diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 4ec73c935fa..4501f704be0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -29,11 +29,10 @@ A clear and concise description of what you expected to happen. **Screenshots or Pasted Text** If applicable, add screenshots to help explain your problem. For text, please cut and paste the text here, delimited by lines consisting of three backtics to render it verbatim, like this: -
+
 ```
 paste output here
 ```
-
**Versions** - What version of DynamoRIO are you using? diff --git a/.github/workflows/ci-aarch64-native.yml b/.github/workflows/ci-aarch64-native.yml deleted file mode 100644 index 23d8913dc59..00000000000 --- a/.github/workflows/ci-aarch64-native.yml +++ /dev/null @@ -1,103 +0,0 @@ -# ********************************************************** -# Copyright (c) 2020-2023 Google, Inc. All rights reserved. -# Copyright (c) 2023 Arm Limited All rights reserved. -# ********************************************************** - -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of Google, Inc. nor the names of its contributors may be -# used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -# DAMAGE. - -# Github Actions workflow for aarch64 Continuous Integration testing. - -name: ci-aarch64-native - -on: - # Run on pushes to master and on pull request changes, including from a - # forked repo with no "push" trigger, while avoiding duplicate triggers. - push: - branches: - - master - pull_request: - types: [opened, reopened, synchronize] - merge_group: - - workflow_dispatch: - -jobs: - aarch64-native: - runs-on: [self-hosted, linux, ARM64] - steps: - - name: Check out repository code - uses: actions/checkout@v3 - with: - submodules: true - - # Cancel any prior runs for a PR (but do not cancel master branch runs). - - name: Cancel previous runs - uses: n1hility/cancel-previous-runs@v2 - with: - token: ${{ secrets.GITHUB_TOKEN }} - if: ${{ github.event_name == 'pull_request' }} - - # We also need origin/master for pre-commit source file checks in runsuite.cmake. - # But fetching multiple branches isn't supported yet: actions/checkout#214 - # Pending PR that adds this support actions/checkout#155 - # TODO i#4549: When necessary support is available, remove/replace the - # workaround here and from every job in other Github Actions CI workflows. - - name: Fetch master - run: git fetch --no-tags --depth=1 origin master - - - name: Create build directory - run: mkdir build - - - name: Run Suite - working-directory: build - run: ../suite/runsuite_wrapper.pl travis - env: - CI_BRANCH: ${{ github.ref }} - - - name: Send failure mail to dynamorio-devs - if: failure() && github.ref == 'refs/heads/master' - uses: dawidd6/action-send-mail@v2 - with: - server_address: smtp.gmail.com - server_port: 465 - username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} - password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} - subject: | - [${{github.repository}}] ${{github.workflow}} FAILED - on ${{github.event_name}} at ${{github.ref}} - body: | - Github Actions CI workflow run FAILED! - Workflow: ${{github.workflow}}/x86-32 - Repository: ${{github.repository}} - Branch ref: ${{github.ref}} - SHA: ${{github.sha}} - Triggering actor: ${{github.actor}} - Triggering event: ${{github.event_name}} - Run Id: ${{github.run_id}} - See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} - to: dynamorio-devs@googlegroups.com - from: Github Action CI diff --git a/.github/workflows/ci-aarchxx-cross.yml b/.github/workflows/ci-aarchxx-cross.yml new file mode 100644 index 00000000000..3d564d17e9b --- /dev/null +++ b/.github/workflows/ci-aarchxx-cross.yml @@ -0,0 +1,308 @@ +# ********************************************************** +# Copyright (c) 2020-2024 Google, Inc. All rights reserved. +# ********************************************************** + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Google, Inc. nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. + +# Github Actions workflow for AArchXX Continuous Integration testing. + +name: ci-aarchxx-cross +on: + # Run on pushes to master and on pull request changes, including from a + # forked repo with no "push" trigger, while avoiding duplicate triggers. + push: + branches: + - master + pull_request: + types: [opened, reopened, synchronize] + merge_group: + + # Manual trigger using the Actions page. May remove when integration complete. + workflow_dispatch: + +defaults: + run: + shell: bash + +jobs: + # AArch64 cross-compile with gcc, with some tests run under QEMU. + # We use a more recent Ubuntu for a more recent QEMU. + aarch64-cross-compile: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + with: + submodules: true + + # Cancel any prior runs for a PR (but do not cancel master branch runs). + - uses: n1hility/cancel-previous-runs@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + if: ${{ github.event_name == 'pull_request' }} + + - run: git fetch --no-tags --depth=1 origin master + + # Install cross-compiler for cross-compiling Linux build. + # Unfortunately there are no libunwind or compression cross-compile + # packages so we unpack the native versions and copy their files. + - name: Create Build Environment + run: | + sudo apt-get update + sudo apt-get -y install doxygen vera++ cmake g++-aarch64-linux-gnu \ + qemu-user qemu-user-binfmt + sudo add-apt-repository 'deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports focal main' + apt download libunwind8:arm64 libunwind-dev:arm64 liblzma5:arm64 \ + zlib1g:arm64 zlib1g-dev:arm64 libsnappy1v5:arm64 libsnappy-dev:arm64 \ + liblz4-1:arm64 liblz4-dev:arm64 + mkdir ../extract + for i in *.deb; do dpkg-deb -x $i ../extract; done + for i in include lib; do sudo rsync -av ../extract/usr/${i}/aarch64-linux-gnu/ /usr/aarch64-linux-gnu/${i}/; done + sudo rsync -av ../extract/usr/include/ /usr/aarch64-linux-gnu/include/ + sudo rsync -av ../extract/lib/aarch64-linux-gnu/ /usr/aarch64-linux-gnu/lib/ + + - name: Run Suite + working-directory: ${{ github.workspace }} + run: ./suite/runsuite_wrapper.pl automated_ci 64_only + env: + DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY: yes + CI_TRIGGER: ${{ github.event_name }} + CI_BRANCH: ${{ github.ref }} + + - name: Send failure mail to dynamorio-devs + if: failure() && github.ref == 'refs/heads/master' + uses: dawidd6/action-send-mail@v2 + with: + server_address: smtp.gmail.com + server_port: 465 + username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} + password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} + subject: | + [${{github.repository}}] ${{github.workflow}} FAILED + on ${{github.event_name}} at ${{github.ref}} + body: | + Github Actions CI workflow run FAILED! + Workflow: ${{github.workflow}}/aarchxx-cross-compile + Repository: ${{github.repository}} + Branch ref: ${{github.ref}} + SHA: ${{github.sha}} + Triggering actor: ${{github.actor}} + Triggering event: ${{github.event_name}} + Run Id: ${{github.run_id}} + See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} + to: dynamorio-devs@googlegroups.com + from: Github Action CI + + # ARM cross-compile with gcc, with some tests run under QEMU. + # We use a more recent Ubuntu for a more recent QEMU. + arm-cross-compile: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + with: + submodules: true + + # Cancel any prior runs for a PR (but do not cancel master branch runs). + - uses: n1hility/cancel-previous-runs@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + if: ${{ github.event_name == 'pull_request' }} + + - run: git fetch --no-tags --depth=1 origin master + + # Install cross-compiler for cross-compiling Linux build. + # Unfortunately there are no libunwind or compression cross-compile + # packages so we unpack the native versions and copy their files. + - name: Create Build Environment + run: | + sudo apt-get update + sudo apt-get -y install doxygen vera++ cmake g++-arm-linux-gnueabihf \ + qemu-user qemu-user-binfmt + sudo add-apt-repository 'deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports focal main' + apt download libunwind8:armhf libunwind-dev:armhf liblzma5:armhf \ + zlib1g:armhf zlib1g-dev:armhf libsnappy1v5:armhf libsnappy-dev:armhf \ + liblz4-1:armhf liblz4-dev:armhf + mkdir ../extract + for i in *.deb; do dpkg-deb -x $i ../extract; done + for i in include lib; do sudo rsync -av ../extract/usr/${i}/arm-linux-gnueabihf/ /usr/arm-linux-gnueabihf/${i}/; done + sudo rsync -av ../extract/usr/include/ /usr/arm-linux-gnueabihf/include/ + sudo rsync -av ../extract/lib/arm-linux-gnueabihf/ /usr/arm-linux-gnueabihf/lib/ + + - name: Run Suite + working-directory: ${{ github.workspace }} + run: ./suite/runsuite_wrapper.pl automated_ci 32_only + env: + DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY: yes + CI_TRIGGER: ${{ github.event_name }} + CI_BRANCH: ${{ github.ref }} + + - name: Send failure mail to dynamorio-devs + if: failure() && github.ref == 'refs/heads/master' + uses: dawidd6/action-send-mail@v2 + with: + server_address: smtp.gmail.com + server_port: 465 + username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} + password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} + subject: | + [${{github.repository}}] ${{github.workflow}} FAILED + on ${{github.event_name}} at ${{github.ref}} + body: | + Github Actions CI workflow run FAILED! + Workflow: ${{github.workflow}}/aarchxx-cross-compile + Repository: ${{github.repository}} + Branch ref: ${{github.ref}} + SHA: ${{github.sha}} + Triggering actor: ${{github.actor}} + Triggering event: ${{github.event_name}} + Run Id: ${{github.run_id}} + See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} + to: dynamorio-devs@googlegroups.com + from: Github Action CI + + # Android ARM cross-compile with gcc, no tests: + android-arm-cross-compile: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + with: + submodules: true + + # Cancel any prior runs for a PR (but do not cancel master branch runs). + - uses: n1hility/cancel-previous-runs@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + if: ${{ github.event_name == 'pull_request' }} + + - run: git fetch --no-tags --depth=1 origin master + + # Fetch and install Android NDK for Andoid cross-compile build. + - name: Create Build Environment + run: | + sudo apt-get update + sudo apt-get -y install doxygen vera++ cmake + cd /tmp + wget https://dl.google.com/android/repository/android-ndk-r10e-linux-x86_64.zip + unzip -q android-ndk-r10e-linux-x86_64.zip + export ANDROID_NDK_ROOT=/tmp/android-ndk-r10e + android-ndk-r10e/build/tools/make-standalone-toolchain.sh --arch=arm \ + --toolchain=arm-linux-androideabi-4.9 --platform=android-21 \ + --install-dir=/tmp/android-gcc-arm-ndk-10e + # Manually force using ld.bfd, setting CMAKE_LINKER does not work. + ln -sf ld.bfd /tmp/android-gcc-arm-ndk-10e/arm-linux-androideabi/bin/ld + ln -sf arm-linux-androideabi-ld.bfd \ + /tmp/android-gcc-arm-ndk-10e/bin/arm-linux-androideabi-ld + + - name: Run Suite + working-directory: ${{ github.workspace }} + env: + DYNAMORIO_CROSS_ANDROID_ONLY: yes + DYNAMORIO_ANDROID_TOOLCHAIN: /tmp/android-gcc-arm-ndk-10e + CI_TRIGGER: ${{ github.event_name }} + CI_BRANCH: ${{ github.ref }} + run: ./suite/runsuite_wrapper.pl automated_ci + + - name: Send failure mail to dynamorio-devs + if: failure() && github.ref == 'refs/heads/master' + uses: dawidd6/action-send-mail@v2 + with: + server_address: smtp.gmail.com + server_port: 465 + username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} + password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} + subject: | + [${{github.repository}}] ${{github.workflow}} FAILED + on ${{github.event_name}} at ${{github.ref}} + body: | + Github Actions CI workflow run FAILED! + Workflow: ${{github.workflow}}/android-arm-cross-compile + Repository: ${{github.repository}} + Branch ref: ${{github.ref}} + SHA: ${{github.sha}} + Triggering actor: ${{github.actor}} + Triggering event: ${{github.event_name}} + Run Id: ${{github.run_id}} + See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} + to: dynamorio-devs@googlegroups.com + from: Github Action CI + + # AArch64 drdecode and drmemtrace on x86: + a64-on-x86: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + with: + submodules: true + + # Cancel any prior runs for a PR (but do not cancel master branch runs). + - uses: n1hility/cancel-previous-runs@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + if: ${{ github.event_name == 'pull_request' }} + + - run: git fetch --no-tags --depth=1 origin master + + - name: Create Build Environment + run: | + sudo apt-get update + sudo apt-get -y install doxygen vera++ cmake zlib1g-dev libsnappy-dev \ + liblz4-dev + + - name: Run Suite + working-directory: ${{ github.workspace }} + run: ./suite/runsuite_wrapper.pl automated_ci + env: + DYNAMORIO_A64_ON_X86_ONLY: yes + CI_TRIGGER: ${{ github.event_name }} + CI_BRANCH: ${{ github.ref }} + + - name: Send failure mail to dynamorio-devs + if: failure() && github.ref == 'refs/heads/master' + uses: dawidd6/action-send-mail@v2 + with: + server_address: smtp.gmail.com + server_port: 465 + username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} + password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} + subject: | + [${{github.repository}}] ${{github.workflow}} FAILED + on ${{github.event_name}} at ${{github.ref}} + body: | + Github Actions CI workflow run FAILED! + Workflow: ${{github.workflow}}/a64-on-x86 + Repository: ${{github.repository}} + Branch ref: ${{github.ref}} + SHA: ${{github.sha}} + Triggering actor: ${{github.actor}} + Triggering event: ${{github.event_name}} + Run Id: ${{github.run_id}} + See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} + to: dynamorio-devs@googlegroups.com + from: Github Action CI diff --git a/.github/workflows/ci-aarchxx.yml b/.github/workflows/ci-aarchxx.yml index 49bd04c479b..22b42ef0e34 100644 --- a/.github/workflows/ci-aarchxx.yml +++ b/.github/workflows/ci-aarchxx.yml @@ -1,5 +1,6 @@ # ********************************************************** # Copyright (c) 2020-2023 Google, Inc. All rights reserved. +# Copyright (c) 2023 Arm Limited All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -28,7 +29,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH # DAMAGE. -# Github Actions workflow for AArchXX Continuous Integration testing. +# Github Actions workflow for aarch64 Continuous Integration testing. name: ci-aarchxx on: @@ -41,268 +42,66 @@ on: types: [opened, reopened, synchronize] merge_group: - # Manual trigger using the Actions page. May remove when integration complete. workflow_dispatch: -defaults: - run: - shell: bash - jobs: - # AArch64 cross-compile with gcc, with some tests run under QEMU. - # We use a more recent Ubuntu for a more recent QEMU. - aarch64-cross-compile: - runs-on: ubuntu-20.04 - - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - # Cancel any prior runs for a PR (but do not cancel master branch runs). - - uses: n1hility/cancel-previous-runs@v2 - with: - token: ${{ secrets.GITHUB_TOKEN }} - if: ${{ github.event_name == 'pull_request' }} - - - run: git fetch --no-tags --depth=1 origin master - - # Install cross-compiler for cross-compiling Linux build. - # Unfortunately there are no libunwind or compression cross-compile - # packages so we unpack the native versions and copy their files. - - name: Create Build Environment - run: | - sudo apt-get update - sudo apt-get -y install doxygen vera++ cmake g++-aarch64-linux-gnu \ - qemu-user qemu-user-binfmt - sudo add-apt-repository 'deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports focal main' - apt download libunwind8:arm64 libunwind-dev:arm64 liblzma5:arm64 \ - zlib1g:arm64 zlib1g-dev:arm64 libsnappy1v5:arm64 libsnappy-dev:arm64 \ - liblz4-1:arm64 liblz4-dev:arm64 - mkdir ../extract - for i in *.deb; do dpkg-deb -x $i ../extract; done - for i in include lib; do sudo rsync -av ../extract/usr/${i}/aarch64-linux-gnu/ /usr/aarch64-linux-gnu/${i}/; done - sudo rsync -av ../extract/usr/include/ /usr/aarch64-linux-gnu/include/ - sudo rsync -av ../extract/lib/aarch64-linux-gnu/ /usr/aarch64-linux-gnu/lib/ - - - name: Run Suite - working-directory: ${{ github.workspace }} - run: ./suite/runsuite_wrapper.pl automated_ci 64_only - env: - DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY: yes - CI_TRIGGER: ${{ github.event_name }} - CI_BRANCH: ${{ github.ref }} - - - name: Send failure mail to dynamorio-devs - if: failure() && github.ref == 'refs/heads/master' - uses: dawidd6/action-send-mail@v2 - with: - server_address: smtp.gmail.com - server_port: 465 - username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} - password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} - subject: | - [${{github.repository}}] ${{github.workflow}} FAILED - on ${{github.event_name}} at ${{github.ref}} - body: | - Github Actions CI workflow run FAILED! - Workflow: ${{github.workflow}}/aarchxx-cross-compile - Repository: ${{github.repository}} - Branch ref: ${{github.ref}} - SHA: ${{github.sha}} - Triggering actor: ${{github.actor}} - Triggering event: ${{github.event_name}} - Run Id: ${{github.run_id}} - See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} - to: dynamorio-devs@googlegroups.com - from: Github Action CI - - # ARM cross-compile with gcc, with some tests run under QEMU. - # We use a more recent Ubuntu for a more recent QEMU. - arm-cross-compile: - runs-on: ubuntu-20.04 - + aarch64-precommit: + strategy: + fail-fast: false + matrix: + # This job will run in parallel. + os: [ubuntu-20-arm64, ubuntu-20-arm64-sve] + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - with: - submodules: true - - # Cancel any prior runs for a PR (but do not cancel master branch runs). - - uses: n1hility/cancel-previous-runs@v2 - with: - token: ${{ secrets.GITHUB_TOKEN }} - if: ${{ github.event_name == 'pull_request' }} - - - run: git fetch --no-tags --depth=1 origin master - - # Install cross-compiler for cross-compiling Linux build. - # Unfortunately there are no libunwind or compression cross-compile - # packages so we unpack the native versions and copy their files. - - name: Create Build Environment - run: | - sudo apt-get update - sudo apt-get -y install doxygen vera++ cmake g++-arm-linux-gnueabihf \ - qemu-user qemu-user-binfmt - sudo add-apt-repository 'deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports focal main' - apt download libunwind8:armhf libunwind-dev:armhf liblzma5:armhf \ - zlib1g:armhf zlib1g-dev:armhf libsnappy1v5:armhf libsnappy-dev:armhf \ - liblz4-1:armhf liblz4-dev:armhf - mkdir ../extract - for i in *.deb; do dpkg-deb -x $i ../extract; done - for i in include lib; do sudo rsync -av ../extract/usr/${i}/arm-linux-gnueabihf/ /usr/arm-linux-gnueabihf/${i}/; done - sudo rsync -av ../extract/usr/include/ /usr/arm-linux-gnueabihf/include/ - sudo rsync -av ../extract/lib/arm-linux-gnueabihf/ /usr/arm-linux-gnueabihf/lib/ - - - name: Run Suite - working-directory: ${{ github.workspace }} - run: ./suite/runsuite_wrapper.pl automated_ci 32_only - env: - DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY: yes - CI_TRIGGER: ${{ github.event_name }} - CI_BRANCH: ${{ github.ref }} - - - name: Send failure mail to dynamorio-devs - if: failure() && github.ref == 'refs/heads/master' - uses: dawidd6/action-send-mail@v2 - with: - server_address: smtp.gmail.com - server_port: 465 - username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} - password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} - subject: | - [${{github.repository}}] ${{github.workflow}} FAILED - on ${{github.event_name}} at ${{github.ref}} - body: | - Github Actions CI workflow run FAILED! - Workflow: ${{github.workflow}}/aarchxx-cross-compile - Repository: ${{github.repository}} - Branch ref: ${{github.ref}} - SHA: ${{github.sha}} - Triggering actor: ${{github.actor}} - Triggering event: ${{github.event_name}} - Run Id: ${{github.run_id}} - See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} - to: dynamorio-devs@googlegroups.com - from: Github Action CI - - # Android ARM cross-compile with gcc, no tests: - android-arm-cross-compile: - runs-on: ubuntu-20.04 - - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - # Cancel any prior runs for a PR (but do not cancel master branch runs). - - uses: n1hility/cancel-previous-runs@v2 - with: - token: ${{ secrets.GITHUB_TOKEN }} - if: ${{ github.event_name == 'pull_request' }} - - - run: git fetch --no-tags --depth=1 origin master - - # Fetch and install Android NDK for Andoid cross-compile build. - - name: Create Build Environment - run: | - sudo apt-get update - sudo apt-get -y install doxygen vera++ cmake - cd /tmp - wget https://dl.google.com/android/repository/android-ndk-r10e-linux-x86_64.zip - unzip -q android-ndk-r10e-linux-x86_64.zip - export ANDROID_NDK_ROOT=/tmp/android-ndk-r10e - android-ndk-r10e/build/tools/make-standalone-toolchain.sh --arch=arm \ - --toolchain=arm-linux-androideabi-4.9 --platform=android-21 \ - --install-dir=/tmp/android-gcc-arm-ndk-10e - # Manually force using ld.bfd, setting CMAKE_LINKER does not work. - ln -sf ld.bfd /tmp/android-gcc-arm-ndk-10e/arm-linux-androideabi/bin/ld - ln -sf arm-linux-androideabi-ld.bfd \ - /tmp/android-gcc-arm-ndk-10e/bin/arm-linux-androideabi-ld - - - name: Run Suite - working-directory: ${{ github.workspace }} - env: - DYNAMORIO_CROSS_ANDROID_ONLY: yes - DYNAMORIO_ANDROID_TOOLCHAIN: /tmp/android-gcc-arm-ndk-10e - CI_TRIGGER: ${{ github.event_name }} - CI_BRANCH: ${{ github.ref }} - run: ./suite/runsuite_wrapper.pl automated_ci - - - name: Send failure mail to dynamorio-devs - if: failure() && github.ref == 'refs/heads/master' - uses: dawidd6/action-send-mail@v2 - with: - server_address: smtp.gmail.com - server_port: 465 - username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} - password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} - subject: | - [${{github.repository}}] ${{github.workflow}} FAILED - on ${{github.event_name}} at ${{github.ref}} - body: | - Github Actions CI workflow run FAILED! - Workflow: ${{github.workflow}}/android-arm-cross-compile - Repository: ${{github.repository}} - Branch ref: ${{github.ref}} - SHA: ${{github.sha}} - Triggering actor: ${{github.actor}} - Triggering event: ${{github.event_name}} - Run Id: ${{github.run_id}} - See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} - to: dynamorio-devs@googlegroups.com - from: Github Action CI - - # AArch64 drdecode and drmemtrace on x86: - a64-on-x86: - runs-on: ubuntu-20.04 - - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - # Cancel any prior runs for a PR (but do not cancel master branch runs). - - uses: n1hility/cancel-previous-runs@v2 - with: - token: ${{ secrets.GITHUB_TOKEN }} - if: ${{ github.event_name == 'pull_request' }} - - - run: git fetch --no-tags --depth=1 origin master - - - name: Create Build Environment - run: | - sudo apt-get update - sudo apt-get -y install doxygen vera++ cmake zlib1g-dev libsnappy-dev \ - liblz4-dev - - - name: Run Suite - working-directory: ${{ github.workspace }} - run: ./suite/runsuite_wrapper.pl automated_ci - env: - DYNAMORIO_A64_ON_X86_ONLY: yes - CI_TRIGGER: ${{ github.event_name }} - CI_BRANCH: ${{ github.ref }} - - - name: Send failure mail to dynamorio-devs - if: failure() && github.ref == 'refs/heads/master' - uses: dawidd6/action-send-mail@v2 - with: - server_address: smtp.gmail.com - server_port: 465 - username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} - password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} - subject: | - [${{github.repository}}] ${{github.workflow}} FAILED - on ${{github.event_name}} at ${{github.ref}} - body: | - Github Actions CI workflow run FAILED! - Workflow: ${{github.workflow}}/a64-on-x86 - Repository: ${{github.repository}} - Branch ref: ${{github.ref}} - SHA: ${{github.sha}} - Triggering actor: ${{github.actor}} - Triggering event: ${{github.event_name}} - Run Id: ${{github.run_id}} - See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} - to: dynamorio-devs@googlegroups.com - from: Github Action CI + - name: Check out repository code + uses: actions/checkout@v3 + with: + submodules: true + + # Cancel any prior runs for a PR (but do not cancel master branch runs). + - name: Cancel previous runs + uses: n1hility/cancel-previous-runs@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + if: ${{ github.event_name == 'pull_request' }} + + # We also need origin/master for pre-commit source file checks in runsuite.cmake. + # But fetching multiple branches isn't supported yet: actions/checkout#214 + # Pending PR that adds this support actions/checkout#155 + # TODO i#4549: When necessary support is available, remove/replace the + # workaround here and from every job in other Github Actions CI workflows. + - name: Fetch master + run: git fetch --no-tags --depth=1 origin master + + - name: Create build directory + run: mkdir build + + - name: Run Suite + working-directory: build + run: ../suite/runsuite_wrapper.pl travis + env: + CI_BRANCH: ${{ github.ref }} + + - name: Send failure mail to dynamorio-devs + if: failure() && github.ref == 'refs/heads/master' + uses: dawidd6/action-send-mail@v2 + with: + server_address: smtp.gmail.com + server_port: 465 + username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}} + password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}} + subject: | + [${{github.repository}}] ${{github.workflow}} FAILED + on ${{github.event_name}} at ${{github.ref}} + body: | + Github Actions CI workflow run FAILED! + Workflow: ${{github.workflow}}/x86-32 + Repository: ${{github.repository}} + Branch ref: ${{github.ref}} + SHA: ${{github.sha}} + Triggering actor: ${{github.actor}} + Triggering event: ${{github.event_name}} + Run Id: ${{github.run_id}} + See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}} + to: dynamorio-devs@googlegroups.com + from: Github Action CI diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml index e93f9018675..62e88e22689 100644 --- a/.github/workflows/ci-package.yml +++ b/.github/workflows/ci-package.yml @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2020-2023 Google, Inc. All rights reserved. +# Copyright (c) 2020-2024 Google, Inc. All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -81,7 +81,8 @@ jobs: sudo apt-get -y install doxygen vera++ zlib1g-dev libsnappy-dev \ liblz4-dev g++-multilib libunwind-dev sudo add-apt-repository 'deb [arch=i386] http://us.archive.ubuntu.com/ubuntu focal main' - apt download libunwind8:i386 libunwind-dev:i386 liblzma5:i386 + apt download libunwind8:i386 libunwind-dev:i386 liblzma5:i386 \ + zlib1g:i386 zlib1g-dev:i386 mkdir ../extract for i in *.deb; do dpkg-deb -x $i ../extract; done sudo rsync -av ../extract/usr/lib/i386-linux-gnu/ /usr/lib/i386-linux-gnu/ @@ -470,7 +471,7 @@ jobs: 7z x c:\projects\install\doxygen.zip -oc:\projects\install\doxygen > nul set PATH=c:\projects\install\doxygen;%PATH% dir "c:\Program Files (x86)\WiX Toolset"* - set PATH=C:\Program Files (x86)\WiX Toolset v3.11\bin;%PATH% + set PATH=C:\Program Files (x86)\WiX Toolset v3.14\bin;%PATH% call "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars32.bat" echo ------ Running suite ------ echo PATH is "%PATH%" diff --git a/.github/workflows/ci-x86.yml b/.github/workflows/ci-x86.yml index e7e81b1de4c..e62a3f0e676 100644 --- a/.github/workflows/ci-x86.yml +++ b/.github/workflows/ci-x86.yml @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2020-2023 Google, Inc. All rights reserved. +# Copyright (c) 2020-2024 Google, Inc. All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -89,7 +89,8 @@ jobs: sudo apt-get -y install doxygen vera++ zlib1g-dev libsnappy-dev \ liblz4-dev g++-multilib libunwind-dev sudo add-apt-repository 'deb [arch=i386] http://us.archive.ubuntu.com/ubuntu focal main' - apt download libunwind8:i386 libunwind-dev:i386 liblzma5:i386 + apt download libunwind8:i386 libunwind-dev:i386 liblzma5:i386 \ + zlib1g:i386 zlib1g-dev:i386 mkdir ../extract for i in *.deb; do dpkg-deb -x $i ../extract; done sudo rsync -av ../extract/usr/lib/i386-linux-gnu/ /usr/lib/i386-linux-gnu/ @@ -286,7 +287,8 @@ jobs: sudo apt-get -y install doxygen vera++ zlib1g-dev libsnappy-dev \ liblz4-dev g++-multilib libunwind-dev sudo add-apt-repository 'deb [arch=i386] http://us.archive.ubuntu.com/ubuntu focal main' - apt download libunwind8:i386 libunwind-dev:i386 liblzma5:i386 + apt download libunwind8:i386 libunwind-dev:i386 liblzma5:i386 \ + zlib1g:i386 zlib1g-dev:i386 mkdir ../extract for i in *.deb; do dpkg-deb -x $i ../extract; done sudo rsync -av ../extract/usr/lib/i386-linux-gnu/ /usr/lib/i386-linux-gnu/ diff --git a/.gitmodules b/.gitmodules index 3cab4c7bcc2..78458224ddc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "third_party/zlib"] path = third_party/zlib url = https://github.com/madler/zlib.git +[submodule "third_party/elfutils"] + path = third_party/elfutils + url = https://sourceware.org/git/elfutils.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a0173c0cb2..9bf0a929dcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -629,10 +629,14 @@ if (X86 AND UNIX) endif () set(proc_supports_sve OFF) +set(proc_supports_sve2 OFF) if (AARCH64 AND UNIX) set(CFLAGS_SVE "-march=armv8-a+sve") + set(CFLAGS_SVE2 "-march=armv8-a+sve2") set(ASMFLAGS_SVE "-march=armv8-a+sve") + set(ASMFLAGS_SVE2 "-march=armv8-a+sve2") check_sve_processor_and_compiler_support(proc_supports_sve proc_sve_vl) + check_sve2_processor_and_compiler_support(proc_supports_sve2) endif () # Ensure that _AMD64_ or _X86_ are defined on Microsoft Windows, as otherwise diff --git a/License.txt b/License.txt index 7a677684f4b..ebdea6687a4 100644 --- a/License.txt +++ b/License.txt @@ -4,7 +4,7 @@ Primary DynamoRIO License: BSD All of DynamoRIO except certain portions called out in subsequent sections on this page is licensed under the following BSD license: -Copyright (c) 2010-2014 Google, Inc. licensed under the terms of the BSD. All other rights reserved. +Copyright (c) 2010-2024 Google, Inc. licensed under the terms of the BSD. All other rights reserved. Copyright (c) 2000-2009 VMware, Inc. licensed under the terms of the BSD. All other rights reserved. @@ -550,6 +550,183 @@ DAMAGES. END OF TERMS AND CONDITIONS +=========================================================================== +The drsyms Extension is linked with elfutils libraries under the LGPL 3 License + +The drsyms Extension on Linux is linked with static libraries from the +elfutils project. The source code for elfutils is available at +git://sourceware.org/git/elfutils.git. We choose the LGPL 3 license +(elfutils offers that as a choice) for our use of these libraries. The +drsyms Extension and the elfutils static libraries are provided as +libraries distinct from the rest of DynamoRIO. The details of the LGPL 3 +license are below: + + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + =========================================================================== The valgrind.h and memcheck.h files in third_party/valgrind/ are also BSD but with 4 clauses: diff --git a/README b/README index 3bfb8e8445d..ba53b84cecd 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -Copyright (c) 2010-2021 Google, Inc. licensed under the terms of the BSD. All other rights reserved. +Copyright (c) 2010-2024 Google, Inc. licensed under the terms of the BSD. All other rights reserved. Copyright (c) 2000-2010 VMware, Inc. licensed under the terms of the BSD. All other rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index ddd1bd0c230..ac720a2ff0c 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ IA-32, AMD64, ARM, and AArch64 hardware. Mac OSX support is in progress. ## Existing DynamoRIO-based tools DynamoRIO is the basis for some well-known external tools: -- The [Arm Instruction Emulator (ArmIE)](https://developer.arm.com/tools-and-software/server-and-hpc/arm-architecture-tools/arm-instruction-emulator) +- The [Arm Instruction Emulator (ArmIE)](https://developer.arm.com/Tools%20and%20Software/Arm%20Instruction%20Emulator) - [WinAFL](https://github.com/googleprojectzero/winafl), the Windows fuzzing tool, as an instrumentation and code coverage engine - The fine-grained profiler for ARM [DrCCTProf](https://xl10.github.io/blog/drcctprof.html) - The portable and efficient framework for fine-grained value profilers [VClinic](https://github.com/VClinic/VClinic) @@ -36,14 +36,14 @@ Tools built on DynamoRIO and available in the [release package](https://dynamori - [Function call tracing](https://dynamorio.org/sec_drcachesim_tools.html#sec_tool_func_view) - The legacy processor emulator [drcpusim](https://dynamorio.org/page_drcpusim.html) -- The "strace for Windows" tool [drstrace](http://drmemory.org/strace_for_windows.html) +- The "strace for Windows" tool [drstrace](https://drmemory.org/page_drstrace.html) - The code coverage tool [drcov](https://dynamorio.org/page_drcov.html) - The library tracing tool [drltrace](http://dynamorio.org/page_drltrace.html) - The memory address tracing tool [memtrace](https://github.com/DynamoRIO/dynamorio/blob/master/api/samples/memtrace_x86.c) ([drmemtrace](https://dynamorio.org/page_drcachesim.html)'s offline traces are faster with more surrounding infrastructure, but this is a simpler starting point for customized memory address tracing) - The memory value tracing tool [memval](https://github.com/DynamoRIO/dynamorio/blob/master/api/samples/memval_simple.c) - The instruction tracing tool [instrace](https://github.com/DynamoRIO/dynamorio/blob/master/api/samples/instrace_x86.c) ([drmemtrace](https://dynamorio.org/page_drcachesim.html)'s offline traces are faster with more surrounding infrastructure, but this is a simpler starting point for customized instruction tracing) - The basic block tracing tool [bbbuf](https://github.com/DynamoRIO/dynamorio/blob/master/api/samples/bbbuf.c) -- The instruction counting tool [inscount](https://github.com/DynamoRIO/dynamorio/blob/master/api/samples/inscount.c) +- The instruction counting tool [inscount](https://github.com/DynamoRIO/dynamorio/blob/master/api/samples/inscount.cpp) - The dynamic fuzz testing tool [Dr. Fuzz](http://drmemory.org/page_drfuzz.html) - The disassembly tool [drdisas](https://dynamorio.org/page_drdisas.html) - And more, including opcode counts, branch instrumentation, etc.: see [API samples](https://dynamorio.org/API_samples.html) diff --git a/api/docs/bt.dox b/api/docs/bt.dox index e03518a04f7..5767503453f 100644 --- a/api/docs/bt.dox +++ b/api/docs/bt.dox @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2023 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * Copyright (c) 2007-2009 VMware, Inc. All rights reserved. * **********************************************************/ @@ -64,7 +64,8 @@ following sections: \section sec_IR Instruction Representation The primary data structures involved in instruction manipulation are -the #instr_t, which represents a single instruction, and the \c +the #opnd_t, which represents one operand; the #instr_t, which +represents a single instruction; and the \c #instrlist_t, which is a linked list of instructions. The header files dr_ir_instrlist.h and dr_ir_instr.h list a number of functions that operate on these data structures, including: @@ -89,6 +90,20 @@ a primary contributor to DynamoRIO's efficiency. The instruction representation includes all of the operands, whether implicit or explicit, and the condition code effects of each instruction. This allows for analysis of liveness of registers and condition codes. +The operands are split into sources and destinations. + +A memory reference is treated as one operand even when it uses +registers to compute its address: those constituent registers are not +listed as their own separate source operands (unless they are read for +other reasons such as updating the index register). This means that a +store to memory will have that store as a destination operand without +listing the store's addressing mode registers as source operands in +their own right. Tools interested in all registers inside such +operands can use opnd_get_num_regs_used() and opnd_get_reg_used() to +generically walk the registers inside an operand, or +instr_reads_from_reg() to determine whether an instruction reads a +register either as a source operand or as a component of a destination +memory reference. DynamoRIO's IR is mostly opaque to clients. Key data structures have their sizes exposed to allow for stack allocation, but their fields are opaque. In diff --git a/api/docs/building.dox b/api/docs/building.dox index b30f0b239e8..15cd669a666 100644 --- a/api/docs/building.dox +++ b/api/docs/building.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2023 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * ******************************************************************************/ /* @@ -47,7 +47,7 @@ To build DynamoRIO on Linux, use the following commands as a guide. This builds # other distributions (in particular, use "cmake3" for Ubuntu Trusty). $ sudo apt-get install cmake g++ g++-multilib doxygen git zlib1g-dev libunwind-dev libsnappy-dev liblz4-dev # Get sources and initialize the submodules. - $ git clone --recursive https://github.com/DynamoRIO/dynamorio.git + $ git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git # Make a separate build directory. Building in the source directory is not # supported. $ cd dynamorio && mkdir build && cd build @@ -78,7 +78,7 @@ To build 64-bit DynamoRIO in release mode, launch the `Visual Studio 2019 > x64 ``` # Get sources. - $ git clone https://github.com/DynamoRIO/dynamorio.git + $ git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git # Make a separate build directory. Building in the source directory is not # supported. $ cd dynamorio && mkdir build && cd build @@ -152,7 +152,12 @@ In order to build the documentation, you will additionally need: - doxygen -We have tested the following versions of gcc: 4.4.3, 4.3.0, 4.1.2, and 3.4.3. +To see which versions of these packages we have tested, look up the +versions for the Github Actions runner images that execute our +continuous integration tests at +https://github.com/actions/runner-images. You can find the images we +are currently using in our [workflow +files](https://github.com/DynamoRIO/dynamorio/tree/master/.github/workflows). If your machine does not have support for running 32-bit applications and its version of binutils is older than 2.18.50 then you'll need to set the @@ -198,7 +203,29 @@ If you wish to run the test suite, you should enable BUILD_TESTS. ---------------- -## Cross-Compiling for ARM on Linux +## Cross-Compiling for 64-bit ARM (AArch64) on Linux + +Install the cross compiler for the `gnueabihf` target: + +``` +$ sudo apt-get install g++-aarch64-linux-gnu +``` + +Check out the sources as normal, and point at our toolchain CMake file: + +``` +$ git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git +$ mkdir build_aarch64 +$ cd build_aarch64 +$ cmake -DCMAKE_TOOLCHAIN_FILE=../dynamorio/make/toolchain-arm64.cmake ../dynamorio +$ make -j +``` + +To build a client, again use the toolchain file, as well as pointing at a DynamoRIO installation using `-DDynamoRIO_DIR=` as described in the package documentation. + +---------------- + +## Cross-Compiling for 32-bit ARM (AArch32) on Linux Install the cross compiler for the `gnueabihf` target: @@ -209,7 +236,7 @@ $ sudo apt-get install gcc-arm-linux-gnueabihf binutils-arm-linux-gnueabihf g++- Check out the sources as normal, and point at our toolchain CMake file: ``` -$ git clone https://github.com/DynamoRIO/dynamorio.git +$ git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git $ mkdir build_arm $ cd build_arm $ cmake -DCMAKE_TOOLCHAIN_FILE=../dynamorio/make/toolchain-arm32.cmake ../dynamorio @@ -241,7 +268,7 @@ Now check out the sources as normal, and point at our toolchain CMake file. If ``` -$ git clone https://github.com/DynamoRIO/dynamorio.git +$ git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git $ mkdir build_android $ cd build_android $ cmake -DCMAKE_TOOLCHAIN_FILE=../dynamorio/make/toolchain-android.cmake -DANDROID_TOOLCHAIN=/mytooldir/android-ndk-21 -DDR_COPY_TO_DEVICE=ON ../dynamorio diff --git a/api/docs/deployment.dox b/api/docs/deployment.dox index bb779ea39ab..63215f62cb4 100644 --- a/api/docs/deployment.dox +++ b/api/docs/deployment.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2022 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2011 Massachusetts Institute of Technology All rights reserved. * Copyright (c) 2007-2010 VMware, Inc. All rights reserved. * ******************************************************************************/ @@ -318,7 +318,7 @@ There are two methods for invoking an application under DynamoRIO: As an example of the simpler method, the following command runs \c ls under DynamoRIO with the bbsize sample client: \code -% bin32/drrun -c samples/bin32/libbbsize.so -- ls +% bin64/drrun -c samples/bin64/libbbsize.so -- ls \endcode Alternatively, you can first run the target, and then use \c drrun @@ -328,7 +328,7 @@ In particular, if the application is in the middle of a blocking syscall, DynamoRIO will wait for that to finish. To instead force interruption of the syscall, additionally pass -skip_syscall. \code -% bin32/drrun -attach -c samples/bin32/libbbsize.so +% bin64/drrun -attach -c samples/bin64/libbbsize.so \endcode This attach feature requires ptrace capabilities, which can be enabled @@ -337,6 +337,12 @@ with this command: % echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope \endcode +Then, you can also detach DynamoRIO from the target process +without affecting the normal execution of the application. +\code +% bin64/drconfig -detach +\endcode + Run \c drrun with no options to get a list of the options and environment variable shortcuts it supports. To disable following across child execve calls, use the \ref op_children "-no_follow_children" runtime @@ -384,7 +390,7 @@ To \ref sec_comm "nudge" a process with pid \c targetpid running under DynamoRIO and pass argument "5" to the nudge callback, use the \c drnudgeunix tool: \code -bin32/drnudgeunix -pid targetpid -client 0 5 +bin64/drnudgeunix -pid targetpid -client 0 5 \endcode This will result in a nudge event with argument=5 delivered to the client callback registered with dr_register_nudge_event() in the diff --git a/api/docs/home.dox b/api/docs/home.dox index cb3be6342ac..0382697bc8d 100644 --- a/api/docs/home.dox +++ b/api/docs/home.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2022 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * ******************************************************************************/ /* @@ -34,7 +34,8 @@ **************************************************************************** \mainpage Home -\copydoc page_home +\brief \copybrief page_home +\details \copydetails page_home
\subpage page_home diff --git a/api/docs/license.dox b/api/docs/license.dox index 1a9f20ea5ca..511c0bce5b2 100644 --- a/api/docs/license.dox +++ b/api/docs/license.dox @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2021 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2009-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -46,7 +46,7 @@ on this page is licensed under the following BSD license: \verbatim -Copyright (c) 2010-2013 Google, Inc. licensed under the terms of the BSD. All other rights reserved. +Copyright (c) 2010-2024 Google, Inc. licensed under the terms of the BSD. All other rights reserved. Copyright (c) 2000-2010 VMware, Inc. licensed under the terms of the BSD. All other rights reserved. @@ -599,6 +599,189 @@ DAMAGES. END OF TERMS AND CONDITIONS \endverbatim + +*************************************************************************** +\section sec_lgpl3_licenses drsyms Extension use of elfutils: LGPL 3 + +The \p drsyms Extension (see \ref page_drsyms) on Linux is linked with +static libraries from the [elfutils +project](https://sourceware.org/elfutils/). The source code for +elfutils is available at git://sourceware.org/git/elfutils.git. We +choose the LGPL 3 license (elfutils offers that as a choice) for our +use of these libraries. The \p drsyms Extension and the elfutils static +libraries are provided as libraries distinct from the rest of +DynamoRIO. The details of the LGPL 3 license are below: + +\verbatim + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + +\endverbatim + + *************************************************************************** \section sec_gpl_licenses Code Coverage genhtml: GPL 2 diff --git a/api/docs/release.dox b/api/docs/release.dox index 1fa147bd9c7..d73b1ed0848 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2023 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2011 Massachusetts Institute of Technology All rights reserved. * Copyright (c) 2008-2010 VMware, Inc. All rights reserved. * ******************************************************************************/ @@ -142,8 +142,15 @@ changes: refers to timestamps and direct switches, which is what most users should want. - Rename the macro INSTR_CREATE_mul_sve to INSTR_CREATE_mul_sve_imm to differentiate it from the other SVE MUL instructions. + - Renamed a protected data member in #dynamorio::drmemtrace::analyzer_tmpl_t from + merged_interval_snapshots_ to whole_trace_interval_snapshots_ (may be relevant for + users sub-classing analyzer_tmpl_t). + - Converted #dynamorio::drmemtrace::analysis_tool_tmpl_t::interval_state_snapshot_t + into a class with all its data members marked private with public accessor functions. Further non-compatibility-affecting changes include: + - Added DWARF-5 support to the drsyms library by linking in 4 static libraries + from elfutils. These libraries have LGPL licenses. - Added raw2trace support to inject system call kernel trace templates collected from elsewhere (e.g., QEMU, Gem5) into the user-space drmemtrace traces at the corresponding system call number marker. This is done by specifying the path to the @@ -189,6 +196,22 @@ Further non-compatibility-affecting changes include: - Added opportunity to run multiple drcachesim analysis tools simultaneously. - Added support of loading separately-built analysis tools to drcachesim dynamically. - Added instr_is_opnd_store_source(). + - Added kernel context switch sequence injection support to the drmemtrace scheduler. + - Added dr_running_under_dynamorio(). + - Added instr_get_category_name() API that returns the string version (as char*) of a + category. + - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker to indicate the + current vector length for architectures with a hardware defined or runtime changeable + vector length (such as AArch64's SVE scalable vectors). + - Added a new drmemtrace analyzer option \p -interval_instr_count that enables trace + analyzer interval results for every given count of instrs in each shard. This mode + does not support merging the shard interval snapshots to output the whole-trace + interval snapshots. Instead, the print_interval_results() API is called separately + for each shard with the interval state snapshots of that shard. + - Added a new finalize_interval_snapshots() API to + #dynamorio::drmemtrace::analysis_tool_t to allow the tool to make holistic + adjustments to the interval snapshots after all have been generated, and before + they are used for merging across shards (potentially), and printing the results. **************************************************
@@ -791,7 +814,7 @@ Further non-compatibility-affecting changes include: executed on along with an optional simulator scheduling feature to schedule threads on simulated cores to match the recorded execution on physical cpus. - - Added #DR_DISALLOW_UNSAFE_STATIC and dr_disallow_unsafe_static_behavior() + - Added #DR_DISALLOW_UNSAFE_STATIC and dr_allow_unsafe_static_behavior() for sanity checks to help support statically-linked clients. - Added drmgr_register_pre_syscall_event_user_data() and drmgr_unregister_pre_syscall_event_user_data() to enable passing of user data. diff --git a/api/docs/test_suite.dox b/api/docs/test_suite.dox index 81dd856f9e0..c6f6e396ca7 100644 --- a/api/docs/test_suite.dox +++ b/api/docs/test_suite.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2021 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * ******************************************************************************/ /* @@ -71,7 +71,76 @@ Our CI setups provide "trybot" functionality for nearly every platform via pull ## Debugging Tests on Github Actions Runner -Test failures that happen only on Github Actions and are not reproducible locally can be hard to debug. Fortunately, there's a way to SSH into a Github Actions runner to debug the test. This can be done using `tmate`: https://github.com/marketplace/actions/debugging-with-tmate. Follow instructions on the page to make a temporary change to the Github Actions workflow config in your branch, and use the link output by `tmate` to ssh into the runner. You can install `gdb` if needed on the runner. `tmate` also allows web shell access; note that you may need to press `q` one time if the web page doesn't show anything. +Test failures that happen only on Github Actions and are not reproducible +locally can be hard to debug. Fortunately, there's a way to SSH into a Github +Actions runner to debug the test. This can be done using `tmate`: +https://github.com/marketplace/actions/debugging-with-tmate. + +Using tmate requires sudo on the Actions runners which is only available with +pull requests on branches within the repository; it will not work with pull +requests created from external forks of the repository. + +First, identify the Actions workflow file which contains the job with the +failure. From the Actions run page with the failing test (usually reached from +the links in the job runs for a pull request), click on "Workflow file" in the +bottom of the left sidebar. It will present the file contents with its path at +the top. It will be something like ".github/workflows/ci-windows.yml". That is +the path within the git repository. + +Next, go and edit that file in your branch. Delete all jobs except the failing +one (just remove those lines from the file). For the failing one, add these 3 +lines as a new step right after the "Run Suite" step but before the "Send +failure email step". Be sure to match the surrounding indentation as +indentation matters for .yml files. + + - name: Setup tmate session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + +Next, delete all the other workflow files in the ".github/workflows/" directory. +This will save you time and save resources in general, to only run the single +target job. + +You can see an example of these workflow file deletions and edits in this +commit: +https://github.com/DynamoRIO/dynamorio/pull/6414/commits/08b96200cdb9fd4d39a4c89e2aa9eafed92027f4 + +If you want to focus on just one test, you can use a label like the +`TMATE_DEBUG` label in the linked commit to run only that one test, but that is +not necessary. Pasting the key lines for that here: + +In runsuite.cmake after the arg parsing: + +``` +set(extra_ctest_args INCLUDE_LABEL TMATE_DEBUG) # TEMPORARY +``` + +At the bottom of suite/tests/CMakeLists.txt, add the label to the target test: + +``` +set_tests_properties(code_api|tool.drcacheoff.burst_traceopts PROPERTIES + LABELS TMATE_DEBUG) # TEMPORARY +``` + +Commit these changes with a title that starts with "DO NOT COMMIT" so it’s clear +these are temporary debugging changes, and send to Github with `git review`. + +Now go to your pull request page and click on the details for the target +workflow. Wait for it to reach the "Setup tmate session" step (this may take +from a few minutes to 15-20 minutes depending on the job; you can look at prior +instances of jobs to see how long they typically take). It will print a command +like " ssh JhJp879nThXEKUAPWEGuatJ3J@sfo2.tmate.io". Run that command and you +will have an interactive shell in the base build directory. + +By default, it may start the ssh in tmux review mode. You would need to quit +out of that to get to the terminal. The shell should last until the action +times out which is after 6 hours, or until you terminate the initial connection. + +The connection is through tmux, so you can create new panes and shells using +tmux commands. You can install `gdb` if needed. + +`tmate` also allows web shell access; note that you may need to press `q` one +time if the web page doesn't show anything. # Regression Test Suite @@ -315,6 +384,18 @@ The comments at the top of runsuite_ssh.cmake describe additional options. Unfortunately our test suite is not as clean as it could be. Some tests can be flaky and while they pass on the machines of the existing developers and on our automated test machines, they may fail occasionally on a new machine. Please search the issue tracker before filing a new issue to see if a test failure has already been seen once before. We welcome contributions to fix flaky tests. +Flaky tests are marked in one of two ways: + + - Append "_FLAKY" to the test's name + - See [suite/tests/CMakeLists.txt](https://github.com/DynamoRIO/dynamorio/blob/master/suite/tests/CMakeLists.txt) for examples + - Mention the test in runsuite_wrapper.pl + - See [suite/runsuite_wrapper.pl](https://github.com/DynamoRIO/dynamorio/blob/master/suite/runsuite_wrapper.pl) for examples + +In both cases make sure an issue is filed to fix the test and mention the issue +at the place the test is marked as flaky. + +The latter is preferred for tests that should be fixed first. + ## Missing Tests Some features that were tested in our pre-cmake infrastructure have not been ported to cmake. We welcome contributions in this area: diff --git a/api/docs/tool.gendox b/api/docs/tool.gendox index f3a90dfa5b6..9ecf3890d89 100644 --- a/api/docs/tool.gendox +++ b/api/docs/tool.gendox @@ -83,23 +83,30 @@ should point at the local documentation provided with the release package. /** \page page_drstrace System Call Tracer for Windows -\p drstrace is a system call tracing tool for Windows. It is part of the -Dr. Memory tool suite. It is also -included with DynamoRIO versions 5.0.0 and higher. If this documentation -is part of a DynamoRIO public release, this link should +\p drstrace is a system call tracing tool for Windows. +It is part of the +Dr. Memory tool suite. It is also +included with DynamoRIO versions 5.0.0 and higher. + +If this documentation is part of a DynamoRIO public release, +this link should point at the local documentation provided with the release package. +This one points to the online +documentation. */ /** \page page_drltrace Library Call Tracer \p drltrace is a library call tracing tool for all platforms. It is part of the -Dr. Memory tool suite. It is also -included with DynamoRIO versions 5.0.0 and higher. If this documentation -is part of a DynamoRIO public release, this link should -point at the local documentation provided with the release package. +Dr. Memory tool suite. It is also +included with DynamoRIO versions 5.0.0 and higher. + +If this documentation is part of a DynamoRIO public release, +this link + should point at the local documentation provided with the release package. +This one points to the online +documentation. */ /** @@ -107,10 +114,13 @@ point at the local documentation provided with the release package. \p symquery is a symbol querying tool that operates on Linux, Mac, and Windows and supports the Windows PDB, Linux ELF, Mac Mach-O, and Windows -PECOFF formats with DWARF2 line information. It is part of the Dr. Memory tool suite. It is also included -with DynamoRIO versions 5.0.0 and higher. If this documentation is part of -a DynamoRIO public release, this link +PECOFF formats with DWARF2 line information. It is part of the +Dr. Memory tool suite. It is also included +with DynamoRIO versions 5.0.0 and higher. + +If this documentation is part of a DynamoRIO public release, +this link should point at the local documentation provided with the release package. +This one points to the online +documentation. */ diff --git a/api/docs/workflow.dox b/api/docs/workflow.dox index 84c1e5aab9b..468639508b9 100644 --- a/api/docs/workflow.dox +++ b/api/docs/workflow.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2021 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * ******************************************************************************/ /* @@ -53,13 +53,13 @@ Clone the repository, either via ssh if you've set up ssh keys in your Github profile: ~~~{.unparsed} -git clone git@github.com:DynamoRIO/dynamorio.git +git clone --recurse-submodules -j4 git@github.com:DynamoRIO/dynamorio.git ~~~ Or via https: ~~~{.unparsed} -git clone https://github.com/DynamoRIO/dynamorio.git +git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git ~~~ # Configuring Author Information and Aliases diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 540e789200b..75b366ed495 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -277,7 +277,7 @@ target_link_libraries(drcachesim drmemtrace_simulator drmemtrace_reuse_distance drmemtrace_histogram drmemtrace_reuse_time drmemtrace_basic_counts drmemtrace_opcode_mix drmemtrace_syscall_mix drmemtrace_view drmemtrace_func_view drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker - drmemtrace_schedule_stats) + drmemtrace_schedule_stats drmemtrace_record_filter) if (UNIX) target_link_libraries(drcachesim dl) endif () @@ -512,6 +512,9 @@ macro(add_drmemtrace name type) if (liblz4) target_link_libraries(${name} lz4) endif () + if (RISCV64) + target_link_libraries(${name} atomic) + endif () add_dependencies(${name} api_headers) install_target(${name} ${INSTALL_CLIENTS_LIB}) endmacro() @@ -819,7 +822,7 @@ if (BUILD_TESTS) drmemtrace_histogram drmemtrace_reuse_time drmemtrace_basic_counts drmemtrace_opcode_mix drmemtrace_syscall_mix drmemtrace_view drmemtrace_func_view drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker - drmemtrace_schedule_stats drmemtrace_analyzer) + drmemtrace_schedule_stats drmemtrace_analyzer drmemtrace_record_filter) if (UNIX) target_link_libraries(tool.drcachesim.core_sharded dl) endif () diff --git a/clients/drcachesim/analysis_tool.h b/clients/drcachesim/analysis_tool.h index 16c4df7e8a8..24306cd7534 100644 --- a/clients/drcachesim/analysis_tool.h +++ b/clients/drcachesim/analysis_tool.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -189,14 +189,24 @@ template class analysis_tool_tmpl_t { print_results() = 0; /** - * Struct that stores details of a tool's state snapshot at an interval. This is + * Type that stores details of a tool's state snapshot at an interval. This is * useful for computing and combining interval results. Tools should inherit from - * this struct to define their own state snapshot structs. Tools do not need to - * supply any values to construct this base struct; they can simply use the + * this type to define their own state snapshot types. Tools do not need to + * supply any values to construct this base class; they can simply use the * default constructor. The members of this base class will be set by the - * framework automatically. + * framework automatically, and must not be modified by the tool at any point. + * XXX: Perhaps this should be a class with private data members. */ - struct interval_state_snapshot_t { + class interval_state_snapshot_t { + // Allow the analyzer framework access to private data members to set them + // during trace interval analysis. Tools have read-only access via the public + // accessor functions. + // Note that we expect X to be same as RecordType. But friend declarations + // cannot refer to partial specializations so we go with the separate template + // parameter X. + template friend class analyzer_tmpl_t; + + public: // This constructor is only for convenience in unit tests. The tool does not // need to provide these values, and can simply use the default constructor // below. @@ -204,63 +214,98 @@ template class analysis_tool_tmpl_t { uint64_t interval_end_timestamp, uint64_t instr_count_cumulative, uint64_t instr_count_delta) - : shard_id(shard_id) - , interval_id(interval_id) - , interval_end_timestamp(interval_end_timestamp) - , instr_count_cumulative(instr_count_cumulative) - , instr_count_delta(instr_count_delta) + : shard_id_(shard_id) + , interval_id_(interval_id) + , interval_end_timestamp_(interval_end_timestamp) + , instr_count_cumulative_(instr_count_cumulative) + , instr_count_delta_(instr_count_delta) { } + // This constructor should be used by tools that subclass + // interval_state_snapshot_t. The data members will be set by the framework + // automatically when the tool returns a pointer to their created object from + // generate_*interval_snapshot or combine_interval_snapshots. interval_state_snapshot_t() { } + virtual ~interval_state_snapshot_t() = default; + int64_t + get_shard_id() const + { + return shard_id_; + } + uint64_t + get_interval_id() const + { + return interval_id_; + } + uint64_t + get_interval_end_timestamp() const + { + return interval_end_timestamp_; + } + uint64_t + get_instr_count_cumulative() const + { + return instr_count_cumulative_; + } + uint64_t + get_instr_count_delta() const + { + return instr_count_delta_; + } + + static constexpr int64_t WHOLE_TRACE_SHARD_ID = -1; + + private: // The following fields are set automatically by the analyzer framework after // the tool returns the interval_state_snapshot_t* in the // generate_*interval_snapshot APIs. So they'll be available to the tool in - // the combine_interval_snapshots and print_interval_results APIs. + // the finalize_interval_snapshots(), combine_interval_snapshots() (for the + // parameter snapshots), and print_interval_results() APIs via the above + // public accessor functions. // Identifier for the shard to which this interval belongs. Currently, shards // map only to threads, so this is the thread id. Set to WHOLE_TRACE_SHARD_ID // for the whole trace interval snapshots. - int64_t shard_id = 0; - uint64_t interval_id = 0; + int64_t shard_id_ = 0; + uint64_t interval_id_ = 0; // Stores the timestamp (exclusive) when the above interval ends. Note // that this is not the last timestamp actually seen in the trace interval, // but simply the abstract boundary of the interval. This will be aligned // to the specified -interval_microseconds. - uint64_t interval_end_timestamp = 0; - - // Count of instructions: cumulative till this interval, and the incremental - // delta in this interval vs the previous one. May be useful for tools to - // compute PKI (per kilo instruction) metrics; obviates the need for each - // tool to duplicate this. - uint64_t instr_count_cumulative = 0; - uint64_t instr_count_delta = 0; + uint64_t interval_end_timestamp_ = 0; - static constexpr int64_t WHOLE_TRACE_SHARD_ID = -1; - - virtual ~interval_state_snapshot_t() = default; + // Count of instructions: cumulative till this interval's end, and the + // incremental delta in this interval vs the previous one. May be useful for + // tools to compute PKI (per kilo instruction) metrics; obviates the need for + // each tool to duplicate this. + uint64_t instr_count_cumulative_ = 0; + uint64_t instr_count_delta_ = 0; }; /** * Notifies the analysis tool that the given trace \p interval_id has ended so - * that it can generate a snapshot of its internal state in a struct derived + * that it can generate a snapshot of its internal state in a type derived * from \p interval_state_snapshot_t, and return a pointer to it. The returned - * pointer will be provided to the tool in later combine_interval_snapshots() + * pointer will be provided to the tool in later finalize_interval_snapshots(), * and print_interval_result() calls. * * \p interval_id is a positive ordinal of the trace interval that just ended. - * Trace intervals have a length equal to the \p -interval_microseconds specified - * to the framework. Trace intervals are measured using the value of the - * #TRACE_MARKER_TYPE_TIMESTAMP markers. The provided \p interval_id - * values will be monotonically increasing but may not be continuous, - * i.e. the tool may not see some \p interval_id if the trace did not have - * any activity in that interval. + * Trace intervals have a length equal to either \p -interval_microseconds or + * \p -interval_instr_count. Time-based intervals are measured using the value + * of the #TRACE_MARKER_TYPE_TIMESTAMP markers. Instruction count intervals are + * measured in terms of shard-local instrs. * - * The returned \p interval_state_snapshot_t* will be passed to the - * combine_interval_snapshots() API which is invoked by the framework to merge - * multiple \p interval_state_snapshot_t from different shards in the parallel - * mode of the analyzer. + * The provided \p interval_id values will be monotonically increasing. For + * \p -interval_microseconds intervals, these values may not be continuous, + * i.e. the tool may not see some \p interval_id if the trace did not have any + * activity in that interval. + * + * After all interval state snapshots are generated, the list of all returned + * \p interval_state_snapshot_t* is passed to finalize_interval_snapshots() + * to allow the tool the opportunity to make any holistic adjustments to the + * snapshots. * * Finally, the print_interval_result() API is invoked with a list of * \p interval_state_snapshot_t* representing interval snapshots for the @@ -277,6 +322,40 @@ template class analysis_tool_tmpl_t { { return nullptr; } + /** + * Finalizes the interval snapshots in the given \p interval_snapshots list. + * This callback provides an opportunity for tools to make any holistic + * adjustments to the snapshot list now that we have all of them together. This + * may include, for example, computing the diff with the previous snapshot. + * + * Tools can modify the individual snapshots and also the list of snapshots itself. + * If some snapshots are removed, release_interval_snapshot() will not be invoked + * for them and the tool is responsible to de-allocate the resources. Adding new + * snapshots to the list is undefined behavior; tools should operate only on the + * provided snapshots which were generated in prior generate_*interval_snapshot + * calls. + * + * Tools cannot modify any data set by the framework in the base + * \p interval_state_snapshot_t; note that only read-only access is allowed anyway + * to those private data members via public accessor functions. + * + * In the parallel mode, this is invoked for each list of shard-local snapshots + * before they are possibly merged to create whole-trace snapshots using + * combine_interval_snapshots() and passed to print_interval_result(). In the + * serial mode, this is invoked with the list of whole-trace snapshots before it + * is passed to print_interval_results(). + * + * This is an optional API. If a tool chooses to not override this, the snapshot + * list will simply continue unmodified. + * + * Returns whether it was successful. + */ + virtual bool + finalize_interval_snapshots( + std::vector &interval_snapshots) + { + return true; + } /** * Invoked by the framework to combine the shard-local \p interval_state_snapshot_t * objects pointed at by \p latest_shard_snapshots, to create the combined @@ -302,6 +381,10 @@ template class analysis_tool_tmpl_t { * \p interval_end_timestamp) * - or if the tool mixes cumulative and delta metrics: some field-specific logic that * combines the above two strategies. + * + * Note that after the given snapshots have been combined to create the whole-trace + * snapshot using this API, any change made by the tool to the snapshot contents will + * not have any effect. */ virtual interval_state_snapshot_t * combine_interval_snapshots( @@ -314,14 +397,14 @@ template class analysis_tool_tmpl_t { * Prints the interval results for the given series of interval state snapshots in * \p interval_snapshots. * - * This is currently invoked with the list of whole-trace interval snapshots (for - * the parallel mode, these are the snapshots created by merging the shard-local - * snapshots). + * This is invoked with the list of whole-trace interval snapshots (for the + * parallel mode, these are the snapshots created by merging the shard-local + * snapshots). For the \p -interval_instr_count snapshots in parallel mode, this is + * invoked separately for the snapshots of each shard. * * The framework should be able to invoke this multiple times, possibly with a * different list of interval snapshots. So it should avoid free-ing memory or - * changing global state. This is to keep open the possibility of the framework - * printing interval results for each shard separately in future. + * changing global state. */ virtual bool print_interval_results( @@ -334,6 +417,10 @@ template class analysis_tool_tmpl_t { * by \p interval_snapshot is no longer needed by the framework. The tool may * de-allocate it right away or later, as it needs. Returns whether it was * successful. + * + * Note that if the tool removed some snapshot from the list passed to + * finalize_interval_snapshots(), then release_interval_snapshot() will not be + * invoked for that snapshot. */ virtual bool release_interval_snapshot(interval_state_snapshot_t *interval_snapshot) @@ -387,7 +474,8 @@ template class analysis_tool_tmpl_t { /** * Invoked once for each trace shard prior to calling parallel_shard_memref() for * that shard, this allows a tool to create data local to a shard. The \p - * shard_index is a unique identifier allowing shard data to be stored into a global + * shard_index is the 0-based ordinal of the shard, serving as a unique identifier + * allowing shard data to be stored into a global * table if desired (typically for aggregation use in print_results()). The \p * worker_data is the return value of parallel_worker_init() for the worker thread * who will exclusively operate on this shard. The \p shard_stream allows tools to @@ -439,10 +527,10 @@ template class analysis_tool_tmpl_t { /** * Notifies the analysis tool that the given trace \p interval_id in the shard * represented by the given \p shard_data has ended, so that it can generate a - * snapshot of its internal state in a struct derived from \p + * snapshot of its internal state in a type derived from \p * interval_state_snapshot_t, and return a pointer to it. The returned pointer will - * be provided to the tool in later combine_interval_snapshots() and - * print_interval_result() calls. + * be provided to the tool in later combine_interval_snapshots(), + * finalize_interval_snapshots(), and print_interval_result() calls. * * Note that the provided \p interval_id is local to the shard that is * represented by the given \p shard_data, and not the whole-trace interval. The @@ -451,30 +539,22 @@ template class analysis_tool_tmpl_t { * shard-local \p interval_state_snapshot_t corresponding to that whole-trace * interval. * - * \p interval_id is a positive ordinal of the trace interval that just ended. - * Trace intervals have a length equal to the \p -interval_microseconds specified - * to the framework. Trace intervals are measured using the value of the - * #TRACE_MARKER_TYPE_TIMESTAMP markers. The provided \p interval_id - * values will be monotonically increasing but may not be continuous, - * i.e. the tool may not see some \p interval_id if the trace shard did not - * have any activity in that interval. + * The \p interval_id field is defined similar to the same field in + * generate_interval_snapshot(). * - * The returned \p interval_state_snapshot_t* will be passed to the - * combine_interval_snapshot() API which is invoked by the framework to merge - * multiple \p interval_state_snapshot_t from different shards in the parallel - * mode of the analyzer. - * - * Finally, the print_interval_result() API is invoked with a list of - * \p interval_state_snapshot_t* representing interval snapshots for the - * whole trace. In the parallel mode of the analyzer, this list is computed by - * combining the shard-local \p interval_state_snapshot_t using the tool's - * combine_interval_snapshot() API. + * The returned \p interval_state_snapshot_t* is treated in the same manner as + * the same in generate_interval_snapshot(), with the following additions: * - * The tool must not de-allocate the state snapshot until - * release_interval_snapshot() is invoked by the framework. + * In case of \p -interval_microseconds in the parallel mode: after + * finalize_interval_snapshots() has been invoked, the \p interval_state_snapshot_t* + * objects generated at the same time period across different shards are passed to + * the combine_interval_snapshot() API by the framework to merge them to create the + * whole-trace interval snapshots. The print_interval_result() API is then invoked + * with the list of whole-trace \p interval_state_snapshot_t* thus obtained. * - * An example use case of this API is to create a time series of some output - * metric over the whole trace. + * In case of \p -interval_instr_count in the parallel mode: no merging across + * shards is done, and the print_interval_results() API is invoked for each list + * of shard-local \p interval_state_snapshot_t*. */ virtual interval_state_snapshot_t * generate_shard_interval_snapshot(void *shard_data, uint64_t interval_id) diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index c158c00cd84..97159ed342a 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -32,13 +32,6 @@ #include "analyzer.h" -#ifdef WINDOWS -# define WIN32_LEAN_AND_MEAN -# include -#else -# include -#endif - #include #include @@ -47,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -122,6 +114,13 @@ analyzer_t::record_is_timestamp(const memref_t &record) record.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP; } +template <> +bool +analyzer_t::record_is_instr(const memref_t &record) +{ + return type_is_instr(record.instr.type); +} + template <> memref_t analyzer_t::create_wait_marker() @@ -182,6 +181,13 @@ record_analyzer_t::record_is_timestamp(const trace_entry_t &record) return record.type == TRACE_TYPE_MARKER && record.size == TRACE_MARKER_TYPE_TIMESTAMP; } +template <> +bool +record_analyzer_t::record_is_instr(const trace_entry_t &record) +{ + return type_is_instr(static_cast(record.type)); +} + template <> trace_entry_t record_analyzer_t::create_wait_marker() @@ -223,7 +229,7 @@ template bool analyzer_tmpl_t::init_scheduler( const std::string &trace_path, memref_tid_t only_thread, int verbosity, - typename sched_type_t::scheduler_options_t *options) + typename sched_type_t::scheduler_options_t options) { verbosity_ = verbosity; if (trace_path.empty()) { @@ -242,14 +248,14 @@ analyzer_tmpl_t::init_scheduler( if (only_thread != INVALID_THREAD_ID) { workload.only_threads.insert(only_thread); } - return init_scheduler_common(workload, options); + return init_scheduler_common(workload, std::move(options)); } template bool analyzer_tmpl_t::init_scheduler( std::unique_ptr reader, std::unique_ptr reader_end, - int verbosity, typename sched_type_t::scheduler_options_t *options) + int verbosity, typename sched_type_t::scheduler_options_t options) { verbosity_ = verbosity; if (!reader || !reader_end) { @@ -257,20 +263,21 @@ analyzer_tmpl_t::init_scheduler( return false; } std::vector readers; - // With no modifiers or only_threads the tid doesn't matter. - readers.emplace_back(std::move(reader), std::move(reader_end), /*tid=*/1); + // Use a sentinel for the tid so the scheduler will use the memref record tid. + readers.emplace_back(std::move(reader), std::move(reader_end), + /*tid=*/INVALID_THREAD_ID); std::vector regions; if (skip_instrs_ > 0) regions.emplace_back(skip_instrs_ + 1, 0); typename sched_type_t::input_workload_t workload(std::move(readers), regions); - return init_scheduler_common(workload, options); + return init_scheduler_common(workload, std::move(options)); } template bool analyzer_tmpl_t::init_scheduler_common( typename sched_type_t::input_workload_t &workload, - typename sched_type_t::scheduler_options_t *options) + typename sched_type_t::scheduler_options_t options) { for (int i = 0; i < num_tools_; ++i) { if (parallel_ && !tools_[i]->parallel_shard_supported()) { @@ -282,25 +289,37 @@ analyzer_tmpl_t::init_scheduler_common( sched_inputs[0] = std::move(workload); typename sched_type_t::scheduler_options_t sched_ops; + int output_count = worker_count_; if (shard_type_ == SHARD_BY_CORE) { // Subclass must pass us options and set worker_count_ to # cores. - if (options == nullptr || worker_count_ <= 0) { + if (worker_count_ <= 0) { error_string_ = "For -core_sharded, core count must be > 0"; return false; } - sched_ops = *options; + sched_ops = std::move(options); if (sched_ops.quantum_unit == sched_type_t::QUANTUM_TIME) sched_by_time_ = true; + if (!parallel_) { + // output_count remains the # of virtual cores, but we have just + // one worker thread. The scheduler multiplexes the output_count output + // cores onto a single stream for us with this option: + sched_ops.single_lockstep_output = true; + worker_count_ = 1; + } } else if (parallel_) { sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_); + sched_ops.read_inputs_in_init = options.read_inputs_in_init; if (worker_count_ <= 0) worker_count_ = std::thread::hardware_concurrency(); + output_count = worker_count_; } else { sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_); + sched_ops.read_inputs_in_init = options.read_inputs_in_init; worker_count_ = 1; + output_count = 1; } - int output_count = worker_count_; - if (scheduler_.init(sched_inputs, output_count, sched_ops) != + sched_mapping_ = options.mapping; + if (scheduler_.init(sched_inputs, output_count, std::move(sched_ops)) != sched_type_t::STATUS_SUCCESS) { ERRMSG("Failed to initialize scheduler: %s\n", scheduler_.get_error_string().c_str()); @@ -309,6 +328,14 @@ analyzer_tmpl_t::init_scheduler_common( for (int i = 0; i < worker_count_; ++i) { worker_data_.push_back(analyzer_worker_data_t(i, scheduler_.get_stream(i))); + if (options.read_inputs_in_init) { + // The docs say we can query the filetype up front. + uint64_t filetype = scheduler_.get_stream(i)->get_filetype(); + VPRINT(this, 2, "Worker %d filetype %" PRIx64 "\n", i, filetype); + if (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, filetype)) { + shard_type_ = SHARD_BY_CORE; + } + } } return true; @@ -318,7 +345,7 @@ template analyzer_tmpl_t::analyzer_tmpl_t( const std::string &trace_path, analysis_tool_tmpl_t **tools, int num_tools, int worker_count, uint64_t skip_instrs, uint64_t interval_microseconds, - int verbosity) + uint64_t interval_instr_count, int verbosity) : success_(true) , num_tools_(num_tools) , tools_(tools) @@ -326,11 +353,18 @@ analyzer_tmpl_t::analyzer_tmpl_t( , worker_count_(worker_count) , skip_instrs_(skip_instrs) , interval_microseconds_(interval_microseconds) + , interval_instr_count_(interval_instr_count) , verbosity_(verbosity) { + if (interval_microseconds_ > 0 && interval_instr_count_ > 0) { + success_ = false; + error_string_ = "Cannot enable both kinds of interval analysis"; + return; + } // The scheduler will call reader_t::init() for each input file. We assume // that won't block (analyzer_multi_t separates out IPC readers). - if (!init_scheduler(trace_path, INVALID_THREAD_ID, verbosity)) { + typename sched_type_t::scheduler_options_t sched_ops; + if (!init_scheduler(trace_path, INVALID_THREAD_ID, verbosity, std::move(sched_ops))) { success_ = false; error_string_ = "Failed to create scheduler"; return; @@ -376,28 +410,16 @@ template uint64_t analyzer_tmpl_t::get_current_microseconds() { -#ifdef UNIX - struct timeval time; - if (gettimeofday(&time, nullptr) != 0) - return 0; - return time.tv_sec * 1000000 + time.tv_usec; -#else - SYSTEMTIME sys_time; - GetSystemTime(&sys_time); - FILETIME file_time; - if (!SystemTimeToFileTime(&sys_time, &file_time)) - return 0; - return file_time.dwLowDateTime + - (static_cast(file_time.dwHighDateTime) << 32); -#endif + return get_microsecond_timestamp(); } template uint64_t -analyzer_tmpl_t::compute_interval_id(uint64_t first_timestamp, - uint64_t latest_timestamp) +analyzer_tmpl_t::compute_timestamp_interval_id( + uint64_t first_timestamp, uint64_t latest_timestamp) { assert(first_timestamp <= latest_timestamp); + assert(interval_microseconds_ > 0); // We keep the interval end timestamps independent of the first timestamp of the // trace. For the parallel mode, where we need to merge intervals from different // shards that were active during the same final whole-trace interval, having aligned @@ -408,17 +430,34 @@ analyzer_tmpl_t::compute_interval_id(uint64_t first_time first_timestamp / interval_microseconds_ + 1; } +template +uint64_t +analyzer_tmpl_t::compute_instr_count_interval_id( + uint64_t cur_instr_count) +{ + assert(interval_instr_count_ > 0); + if (cur_instr_count == 0) + return 1; + // We want all memory access entries following an instr to stay in the same + // interval as the instr, so we increment interval_id at instr entries. Also, + // we want the last instr in each interval to have an ordinal that's a multiple + // of interval_instr_count_. + return (cur_instr_count - 1) / interval_instr_count_ + 1; +} + template uint64_t analyzer_tmpl_t::compute_interval_end_timestamp( uint64_t first_timestamp, uint64_t interval_id) { + assert(interval_microseconds_ > 0); assert(interval_id >= 1); uint64_t end_timestamp = (first_timestamp / interval_microseconds_ + interval_id) * interval_microseconds_; // Since the interval's end timestamp is exclusive, the end_timestamp would actually // fall under the next interval. - assert(compute_interval_id(first_timestamp, end_timestamp) == interval_id + 1); + assert(compute_timestamp_interval_id(first_timestamp, end_timestamp) == + interval_id + 1); return end_timestamp; } @@ -427,19 +466,33 @@ bool analyzer_tmpl_t::advance_interval_id( typename scheduler_tmpl_t::stream_t *stream, analyzer_shard_data_t *shard, uint64_t &prev_interval_index, - uint64_t &prev_interval_init_instr_count) + uint64_t &prev_interval_init_instr_count, bool at_instr_record) { - if (interval_microseconds_ == 0) { + uint64_t next_interval_index = 0; + if (interval_microseconds_ > 0) { + next_interval_index = compute_timestamp_interval_id(stream->get_first_timestamp(), + stream->get_last_timestamp()); + } else if (interval_instr_count_ > 0) { + // The interval callbacks are invoked just prior to the process_memref or + // parallel_shard_memref callback for the first instr of the new interval; This + // keeps the instr's memory accesses in the same interval as the instr. + next_interval_index = + compute_instr_count_interval_id(stream->get_instruction_ordinal()); + } else { return false; } - uint64_t next_interval_index = - compute_interval_id(stream->get_first_timestamp(), stream->get_last_timestamp()); if (next_interval_index != shard->cur_interval_index) { assert(next_interval_index > shard->cur_interval_index); prev_interval_index = shard->cur_interval_index; prev_interval_init_instr_count = shard->cur_interval_init_instr_count; shard->cur_interval_index = next_interval_index; - shard->cur_interval_init_instr_count = stream->get_instruction_ordinal(); + // If the next record to be presented to the tools is an instr record, we need to + // adjust for the fact that the record has already been read from the stream. + // Since we know that the next record is a part of the new interval and + // cur_interval_init_instr_count is supposed to be the count just prior to the + // new interval, we need to subtract one count for the instr. + shard->cur_interval_init_instr_count = + stream->get_instruction_ordinal() - (at_instr_record ? 1 : 0); return true; } return false; @@ -452,7 +505,7 @@ analyzer_tmpl_t::process_serial(analyzer_worker_data_t & std::vector user_worker_data(num_tools_); worker.shard_data[0].tool_data.resize(num_tools_); - if (interval_microseconds_ != 0) + if (interval_microseconds_ != 0 || interval_instr_count_ != 0) worker.shard_data[0].cur_interval_index = 1; for (int i = 0; i < num_tools_; ++i) { worker.error = tools_[i]->initialize_stream(worker.stream); @@ -469,7 +522,12 @@ analyzer_tmpl_t::process_serial(analyzer_worker_data_t & uint64_t cur_micros = sched_by_time_ ? get_current_microseconds() : 0; typename sched_type_t::stream_status_t status = worker.stream->next_record(record, cur_micros); - if (status != sched_type_t::STATUS_OK) { + if (status == sched_type_t::STATUS_WAIT) { + record = create_wait_marker(); + } else if (status == sched_type_t::STATUS_IDLE) { + assert(shard_type_ == SHARD_BY_CORE); + record = create_idle_marker(); + } else if (status != sched_type_t::STATUS_OK) { if (status != sched_type_t::STATUS_EOF) { if (status == sched_type_t::STATUS_REGION_INVALID) { worker.error = @@ -478,21 +536,24 @@ analyzer_tmpl_t::process_serial(analyzer_worker_data_t & worker.error = "Failed to read from trace: " + worker.stream->get_stream_name(); } - } else if (interval_microseconds_ != 0) { - process_interval(worker.shard_data[0].cur_interval_index, - worker.shard_data[0].cur_interval_init_instr_count, - &worker, - /*parallel=*/false); + } else if (interval_microseconds_ != 0 || interval_instr_count_ != 0) { + if (!process_interval(worker.shard_data[0].cur_interval_index, + worker.shard_data[0].cur_interval_init_instr_count, + &worker, + /*parallel=*/false, /*at_instr_record=*/false) || + !finalize_interval_snapshots(&worker, /*parallel=*/false)) + return; } return; } uint64_t prev_interval_index; uint64_t prev_interval_init_instr_count; - if (record_is_timestamp(record) && + if ((record_is_timestamp(record) || record_is_instr(record)) && advance_interval_id(worker.stream, &worker.shard_data[0], prev_interval_index, - prev_interval_init_instr_count) && + prev_interval_init_instr_count, + record_is_instr(record)) && !process_interval(prev_interval_index, prev_interval_init_instr_count, - &worker, /*parallel=*/false)) { + &worker, /*parallel=*/false, record_is_instr(record))) { return; } for (int i = 0; i < num_tools_; ++i) { @@ -515,11 +576,12 @@ analyzer_tmpl_t::process_shard_exit( VPRINT(this, 1, "Worker %d finished trace shard %s\n", worker->index, worker->stream->get_stream_name().c_str()); worker->shard_data[shard_index].exited = true; - if (interval_microseconds_ != 0 && - !process_interval(worker->shard_data[shard_index].cur_interval_index, - worker->shard_data[shard_index].cur_interval_init_instr_count, - worker, - /*parallel=*/true, shard_index)) + if ((interval_microseconds_ != 0 || interval_instr_count_ != 0) && + (!process_interval(worker->shard_data[shard_index].cur_interval_index, + worker->shard_data[shard_index].cur_interval_init_instr_count, + worker, + /*parallel=*/true, /*at_instr_record=*/false, shard_index) || + !finalize_interval_snapshots(worker, /*parallel=*/true, shard_index))) return false; for (int i = 0; i < num_tools_; ++i) { if (!tools_[i]->parallel_shard_exit( @@ -536,8 +598,9 @@ analyzer_tmpl_t::process_shard_exit( } template -void -analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *worker) +bool +analyzer_tmpl_t::process_tasks_internal( + analyzer_worker_data_t *worker) { std::vector user_worker_data(num_tools_); @@ -573,16 +636,14 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w worker->error = "Failed to read from trace: " + worker->stream->get_stream_name(); } - return; + return false; } - int shard_index = shard_type_ == SHARD_BY_CORE - ? worker->index - : worker->stream->get_input_stream_ordinal(); + int shard_index = worker->stream->get_shard_index(); if (worker->shard_data.find(shard_index) == worker->shard_data.end()) { VPRINT(this, 1, "Worker %d starting on trace shard %d stream is %p\n", worker->index, shard_index, worker->stream); worker->shard_data[shard_index].tool_data.resize(num_tools_); - if (interval_microseconds_ != 0) + if (interval_microseconds_ != 0 || interval_instr_count_ != 0) worker->shard_data[shard_index].cur_interval_index = 1; for (int i = 0; i < num_tools_; ++i) { worker->shard_data[shard_index].tool_data[i].shard_data = @@ -600,12 +661,13 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w } uint64_t prev_interval_index; uint64_t prev_interval_init_instr_count; - if (record_is_timestamp(record) && + if ((record_is_timestamp(record) || record_is_instr(record)) && advance_interval_id(worker->stream, &worker->shard_data[shard_index], - prev_interval_index, prev_interval_init_instr_count) && + prev_interval_index, prev_interval_init_instr_count, + record_is_instr(record)) && !process_interval(prev_interval_index, prev_interval_init_instr_count, worker, - /*parallel=*/true, shard_index)) { - return; + /*parallel=*/true, record_is_instr(record), shard_index)) { + return false; } for (int i = 0; i < num_tools_; ++i) { if (!tools_[i]->parallel_shard_memref( @@ -615,24 +677,27 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w VPRINT(this, 1, "Worker %d hit shard memref error %s on trace shard %s\n", worker->index, worker->error.c_str(), worker->stream->get_stream_name().c_str()); - return; + return false; } } if (record_is_thread_final(record) && shard_type_ != SHARD_BY_CORE) { - if (!process_shard_exit(worker, shard_index)) - return; + if (!process_shard_exit(worker, shard_index)) { + return false; + } } } if (shard_type_ == SHARD_BY_CORE) { if (worker->shard_data.find(worker->index) != worker->shard_data.end()) { - if (!process_shard_exit(worker, worker->index)) - return; + if (!process_shard_exit(worker, worker->index)) { + return false; + } } } for (const auto &keyval : worker->shard_data) { if (!keyval.second.exited) { - if (!process_shard_exit(worker, keyval.second.shard_index)) - return; + if (!process_shard_exit(worker, keyval.second.shard_index)) { + return false; + } } } for (int i = 0; i < num_tools_; ++i) { @@ -641,7 +706,28 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w worker->error = error; VPRINT(this, 1, "Worker %d hit worker exit error %s\n", worker->index, error.c_str()); - return; + return false; + } + } + return true; +} + +template +void +analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *worker) +{ + if (!process_tasks_internal(worker)) { + if (sched_mapping_ == sched_type_t::MAP_TO_ANY_OUTPUT) { + // Avoid a hang in the scheduler if we leave our current input stranded. + // XXX: Better to just do a global exit and not let the other threads + // keep running? That breaks the current model where errors are + // propagated to the user to decide what to do. + // We could perhaps add thread synch points to have other threads + // exit earlier: but maybe some uses cases consider one shard error + // to not affect others and not be fatal? + if (worker->stream->set_active(false) != sched_type_t::STATUS_OK) { + ERRMSG("Failed to set failing worker to inactive; may hang"); + } } } } @@ -658,20 +744,21 @@ analyzer_tmpl_t::combine_interval_snapshots( result = tools_[tool_idx]->combine_interval_snapshots(latest_shard_snapshots, interval_end_timestamp); if (result == nullptr) { - error_string_ = "combine_interval_snapshots unexpectedly returned nullptr"; + error_string_ = "combine_interval_snapshots unexpectedly returned nullptr: " + + tools_[tool_idx]->get_error_string(); return false; } - result->instr_count_delta = 0; - result->instr_count_cumulative = 0; + result->instr_count_delta_ = 0; + result->instr_count_cumulative_ = 0; for (auto snapshot : latest_shard_snapshots) { if (snapshot == nullptr) continue; // As discussed in the doc for analysis_tool_t::combine_interval_snapshots, // we combine all shard's latest snapshots for cumulative metrics, whereas // we combine only the shards active in current interval for delta metrics. - result->instr_count_cumulative += snapshot->instr_count_cumulative; - if (snapshot->interval_end_timestamp == interval_end_timestamp) - result->instr_count_delta += snapshot->instr_count_delta; + result->instr_count_cumulative_ += snapshot->instr_count_cumulative_; + if (snapshot->interval_end_timestamp_ == interval_end_timestamp) + result->instr_count_delta_ += snapshot->instr_count_delta_; } return true; } @@ -679,11 +766,9 @@ analyzer_tmpl_t::combine_interval_snapshots( template bool analyzer_tmpl_t::merge_shard_interval_results( - // intervals[shard_idx] is a queue of interval_state_snapshot_t* - // representing the interval snapshots for that shard. This is a queue as we - // process the intervals here in a FIFO manner. Using a queue also makes code - // a bit simpler. - std::vector::interval_state_snapshot_t *>> &intervals, // This function will write the resulting whole-trace intervals to @@ -698,6 +783,7 @@ analyzer_tmpl_t::merge_shard_interval_results( // numbered by the earliest shard's timestamp. uint64_t earliest_ever_interval_end_timestamp = std::numeric_limits::max(); size_t shard_count = intervals.size(); + std::vector at_idx(shard_count, 0); bool any_shard_has_results_left = true; std::vector::interval_state_snapshot_t *> last_snapshot_per_shard(shard_count, nullptr); @@ -706,11 +792,11 @@ analyzer_tmpl_t::merge_shard_interval_results( // one with the earliest interval-end timestamp. uint64_t earliest_interval_end_timestamp = std::numeric_limits::max(); for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) { - if (intervals[shard_idx].empty()) + if (at_idx[shard_idx] == intervals[shard_idx].size()) continue; - earliest_interval_end_timestamp = - std::min(earliest_interval_end_timestamp, - intervals[shard_idx].front()->interval_end_timestamp); + earliest_interval_end_timestamp = std::min( + earliest_interval_end_timestamp, + intervals[shard_idx][at_idx[shard_idx]]->interval_end_timestamp_); } // We're done if no shard has any interval left unprocessed. if (earliest_interval_end_timestamp == std::numeric_limits::max()) { @@ -725,10 +811,10 @@ analyzer_tmpl_t::merge_shard_interval_results( // Update last_snapshot_per_shard for shards that were active during this // interval, which have a timestamp == earliest_interval_end_timestamp. for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) { - if (intervals[shard_idx].empty()) + if (at_idx[shard_idx] == intervals[shard_idx].size()) continue; uint64_t cur_interval_end_timestamp = - intervals[shard_idx].front()->interval_end_timestamp; + intervals[shard_idx][at_idx[shard_idx]]->interval_end_timestamp_; assert(cur_interval_end_timestamp >= earliest_interval_end_timestamp); if (cur_interval_end_timestamp > earliest_interval_end_timestamp) continue; @@ -741,8 +827,8 @@ analyzer_tmpl_t::merge_shard_interval_results( return false; } } - last_snapshot_per_shard[shard_idx] = intervals[shard_idx].front(); - intervals[shard_idx].pop(); + last_snapshot_per_shard[shard_idx] = intervals[shard_idx][at_idx[shard_idx]]; + ++at_idx[shard_idx]; } // Merge last_snapshot_per_shard to form the result of the current // whole-trace interval. @@ -759,10 +845,10 @@ analyzer_tmpl_t::merge_shard_interval_results( cur_merged_interval)) return false; // Add the merged interval to the result list of whole trace intervals. - cur_merged_interval->shard_id = analysis_tool_tmpl_t< + cur_merged_interval->shard_id_ = analysis_tool_tmpl_t< RecordType>::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; - cur_merged_interval->interval_end_timestamp = earliest_interval_end_timestamp; - cur_merged_interval->interval_id = compute_interval_id( + cur_merged_interval->interval_end_timestamp_ = earliest_interval_end_timestamp; + cur_merged_interval->interval_id_ = compute_timestamp_interval_id( earliest_ever_interval_end_timestamp, earliest_interval_end_timestamp); merged_intervals.push_back(cur_merged_interval); } @@ -776,31 +862,77 @@ analyzer_tmpl_t::merge_shard_interval_results( return true; } +template +void +analyzer_tmpl_t::populate_unmerged_shard_interval_results() +{ + for (auto &worker : worker_data_) { + for (auto &shard_data : worker.shard_data) { + assert(static_cast(shard_data.second.tool_data.size()) == num_tools_); + for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { + key_tool_shard_t tool_shard_key = { tool_idx, + shard_data.second.shard_index }; + per_shard_interval_snapshots_[tool_shard_key] = std::move( + shard_data.second.tool_data[tool_idx].interval_snapshot_data); + } + } + } +} + +template +void +analyzer_tmpl_t::populate_serial_interval_results() +{ + assert(whole_trace_interval_snapshots_.empty()); + whole_trace_interval_snapshots_.resize(num_tools_); + assert(worker_data_.size() == 1); + assert(worker_data_[0].shard_data.size() == 1 && + worker_data_[0].shard_data.count(0) == 1); + assert(static_cast(worker_data_[0].shard_data[0].tool_data.size()) == + num_tools_); + for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { + whole_trace_interval_snapshots_[tool_idx] = std::move( + worker_data_[0].shard_data[0].tool_data[tool_idx].interval_snapshot_data); + } +} + template bool analyzer_tmpl_t::collect_and_maybe_merge_shard_interval_results() { - // all_intervals[tool_idx][shard_idx] contains a queue of the + assert(interval_microseconds_ != 0 || interval_instr_count_ != 0); + if (!parallel_) { + populate_serial_interval_results(); + return true; + } + if (interval_instr_count_ > 0) { + // We do not merge interval state snapshots across shards. See comment by + // per_shard_interval_snapshots for more details. + populate_unmerged_shard_interval_results(); + return true; + } + // all_intervals[tool_idx][shard_idx] contains a vector of the // interval_state_snapshot_t* that were output by that tool for that shard. - std::vector::interval_state_snapshot_t *>>> all_intervals(num_tools_); for (const auto &worker : worker_data_) { for (const auto &shard_data : worker.shard_data) { + assert(static_cast(shard_data.second.tool_data.size()) == num_tools_); for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { all_intervals[tool_idx].emplace_back(std::move( shard_data.second.tool_data[tool_idx].interval_snapshot_data)); } } } - assert(merged_interval_snapshots_.empty()); - merged_interval_snapshots_.resize(num_tools_); + assert(whole_trace_interval_snapshots_.empty()); + whole_trace_interval_snapshots_.resize(num_tools_); for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { // We need to do this separately per tool because all tools may not // generate an interval_state_snapshot_t for the same intervals (even though // the framework notifies all tools of all intervals). if (!merge_shard_interval_results(all_intervals[tool_idx], - merged_interval_snapshots_[tool_idx], + whole_trace_interval_snapshots_[tool_idx], tool_idx)) { return false; } @@ -848,12 +980,20 @@ analyzer_tmpl_t::run() } } } - if (interval_microseconds_ != 0) { + if (interval_microseconds_ != 0 || interval_instr_count_ != 0) { return collect_and_maybe_merge_shard_interval_results(); } return true; } +static void +print_output_separator() +{ + + std::cerr << "\n==========================================================" + "=================\n"; +} + template bool analyzer_tmpl_t::print_stats() @@ -865,25 +1005,84 @@ analyzer_tmpl_t::print_stats() error_string_ = tools_[i]->get_error_string(); return false; } - if (interval_microseconds_ != 0 && !merged_interval_snapshots_.empty()) { - // merged_interval_snapshots_ may be empty depending on the derived class's - // implementation of collect_and_maybe_merge_shard_interval_results. - if (!merged_interval_snapshots_[i].empty() && - !tools_[i]->print_interval_results(merged_interval_snapshots_[i])) { + if (i + 1 < num_tools_) { + // Separate tool output. + print_output_separator(); + } + } + // Now print interval results. + // Should not have both whole-trace or per-shard interval snapshots. + assert(whole_trace_interval_snapshots_.empty() || + per_shard_interval_snapshots_.empty()); + // We may have whole-trace intervals snapshots for instr count intervals in serial + // mode, and for timestamp (microsecond) intervals in both serial and parallel mode. + if (!whole_trace_interval_snapshots_.empty()) { + // Separate non-interval and interval outputs. + print_output_separator(); + std::cerr << "Printing whole-trace interval results:\n"; + for (int i = 0; i < num_tools_; ++i) { + // whole_trace_interval_snapshots_[i] may be empty if the corresponding tool + // did not produce any interval results. + if (!whole_trace_interval_snapshots_[i].empty() && + !tools_[i]->print_interval_results(whole_trace_interval_snapshots_[i])) { error_string_ = tools_[i]->get_error_string(); return false; } - for (auto snapshot : merged_interval_snapshots_[i]) { + for (auto snapshot : whole_trace_interval_snapshots_[i]) { if (!tools_[i]->release_interval_snapshot(snapshot)) { error_string_ = tools_[i]->get_error_string(); return false; } } + if (i + 1 < num_tools_) { + // Separate tool output. + print_output_separator(); + } } - if (i + 1 < num_tools_) { - // Separate tool output. - std::cerr << "\n==========================================================" - "=================\n"; + } else if (!per_shard_interval_snapshots_.empty()) { + // Separate non-interval and interval outputs. + print_output_separator(); + std::cerr << "Printing unmerged per-shard interval results:\n"; + for (auto &interval_snapshots : per_shard_interval_snapshots_) { + int tool_idx = interval_snapshots.first.tool_idx; + if (!interval_snapshots.second.empty() && + !tools_[tool_idx]->print_interval_results(interval_snapshots.second)) { + error_string_ = tools_[tool_idx]->get_error_string(); + return false; + } + for (auto snapshot : interval_snapshots.second) { + if (!tools_[tool_idx]->release_interval_snapshot(snapshot)) { + error_string_ = tools_[tool_idx]->get_error_string(); + return false; + } + } + print_output_separator(); + } + } + return true; +} + +template +bool +analyzer_tmpl_t::finalize_interval_snapshots( + analyzer_worker_data_t *worker, bool parallel, int shard_idx) +{ + assert(parallel || + shard_idx == 0); // Only parallel mode supports a non-zero shard_idx. + for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { + if (!worker->shard_data[shard_idx] + .tool_data[tool_idx] + .interval_snapshot_data.empty() && + !tools_[tool_idx]->finalize_interval_snapshots(worker->shard_data[shard_idx] + .tool_data[tool_idx] + .interval_snapshot_data)) { + worker->error = tools_[tool_idx]->get_error_string(); + VPRINT(this, 1, + "Worker %d hit finalize_interval_snapshots error %s during %s " + "analysis in trace shard %s\n", + worker->index, worker->error.c_str(), parallel ? "parallel" : "serial", + worker->stream->get_stream_name().c_str()); + return false; } } return true; @@ -893,9 +1092,10 @@ template bool analyzer_tmpl_t::process_interval( uint64_t interval_id, uint64_t interval_init_instr_count, - analyzer_worker_data_t *worker, bool parallel, int shard_idx) + analyzer_worker_data_t *worker, bool parallel, bool at_instr_record, int shard_idx) { - assert(parallel || shard_idx == 0); // Default to zero for the serial mode. + assert(parallel || + shard_idx == 0); // Only parallel mode supports a non-zero shard_idx. for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { typename analysis_tool_tmpl_t::interval_state_snapshot_t *snapshot; if (parallel) { @@ -916,18 +1116,31 @@ analyzer_tmpl_t::process_interval( return false; } if (snapshot != nullptr) { - snapshot->shard_id = parallel + snapshot->shard_id_ = parallel ? worker->shard_data[shard_idx].shard_id : analysis_tool_tmpl_t< RecordType>::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; - snapshot->interval_id = interval_id; - snapshot->interval_end_timestamp = compute_interval_end_timestamp( - worker->stream->get_first_timestamp(), interval_id); - snapshot->instr_count_cumulative = worker->stream->get_instruction_ordinal(); - snapshot->instr_count_delta = - snapshot->instr_count_cumulative - interval_init_instr_count; - worker->shard_data[shard_idx].tool_data[tool_idx].interval_snapshot_data.push( - snapshot); + snapshot->interval_id_ = interval_id; + if (interval_microseconds_ > 0) { + // For timestamp intervals, the interval_end_timestamp is the abstract + // non-inclusive end timestamp for the interval_id. This is to make it + // easier to line up the corresponding shard interval snapshots so that + // we can merge them to form the whole-trace interval snapshots. + snapshot->interval_end_timestamp_ = compute_interval_end_timestamp( + worker->stream->get_first_timestamp(), interval_id); + } else { + snapshot->interval_end_timestamp_ = worker->stream->get_last_timestamp(); + } + // instr_count_cumulative for the interval snapshot is supposed to be + // inclusive, so if the first record after the interval (that is, the record + // we're at right now) is an instr, it must be subtracted. + snapshot->instr_count_cumulative_ = + worker->stream->get_instruction_ordinal() - (at_instr_record ? 1 : 0); + snapshot->instr_count_delta_ = + snapshot->instr_count_cumulative_ - interval_init_instr_count; + worker->shard_data[shard_idx] + .tool_data[tool_idx] + .interval_snapshot_data.push_back(snapshot); } } return true; diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h index 8ebc10547b2..63a196bed43 100644 --- a/clients/drcachesim/analyzer.h +++ b/clients/drcachesim/analyzer.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -47,7 +47,6 @@ #include #include -#include #include #include #include @@ -119,7 +118,8 @@ template class analyzer_tmpl_t { analyzer_tmpl_t(const std::string &trace_path, analysis_tool_tmpl_t **tools, int num_tools, int worker_count = 0, uint64_t skip_instrs = 0, - uint64_t interval_microseconds = 0, int verbosity = 0); + uint64_t interval_microseconds = 0, uint64_t interval_instr_count = 0, + int verbosity = 0); /** Launches the analysis process. */ virtual bool run(); @@ -144,9 +144,10 @@ template class analyzer_tmpl_t { } void *shard_data; - // This is a queue as merge_shard_interval_results processes the intervals in a - // FIFO manner. Using a queue also makes code a bit simpler. - std::queue::interval_state_snapshot_t *> + // Stores the interval state snapshots generated by this tool for this shard + // in the same order as they are generated. + std::vector< + typename analysis_tool_tmpl_t::interval_state_snapshot_t *> interval_snapshot_data; private: @@ -167,6 +168,8 @@ template class analyzer_tmpl_t { } uint64_t cur_interval_index; + // Cumulative instr count as it was just before the start of the current + // interval. uint64_t cur_interval_init_instr_count; // Identifier for the shard (thread or core id). int64_t shard_id; @@ -213,20 +216,23 @@ template class analyzer_tmpl_t { operator=(const analyzer_worker_data_t &) = delete; }; + // Pass INVALID_THREAD_ID for only_thread to include all threads. bool - init_scheduler(const std::string &trace_path, - memref_tid_t only_thread = INVALID_THREAD_ID, int verbosity = 0, - typename sched_type_t::scheduler_options_t *options = nullptr); + init_scheduler(const std::string &trace_path, memref_tid_t only_thread, int verbosity, + typename sched_type_t::scheduler_options_t options); + // For core-sharded, worker_count_ must be set prior to calling this; for parallel + // mode if it is not set it will be set to the underlying core count. + // For core-sharded, all of "options" is used; otherwise, only the + // read_inputs_in_init field is preserved. bool - init_scheduler( - std::unique_ptr reader = std::unique_ptr(nullptr), - std::unique_ptr reader_end = std::unique_ptr(nullptr), - int verbosity = 0, typename sched_type_t::scheduler_options_t *options = nullptr); + init_scheduler(std::unique_ptr reader, + std::unique_ptr reader_end, int verbosity, + typename sched_type_t::scheduler_options_t options); bool init_scheduler_common(typename sched_type_t::input_workload_t &workload, - typename sched_type_t::scheduler_options_t *options); + typename sched_type_t::scheduler_options_t options); // Used for std::thread so we need an rvalue (so no &worker). void @@ -235,6 +241,10 @@ template class analyzer_tmpl_t { void process_serial(analyzer_worker_data_t &worker); + // Helper for process_tasks(). + bool + process_tasks_internal(analyzer_worker_data_t *worker); + // Helper for process_tasks() which calls parallel_shard_exit() in each tool. // Returns false if there was an error and the caller should return early. bool @@ -249,27 +259,45 @@ template class analyzer_tmpl_t { bool record_is_timestamp(const RecordType &record); + bool + record_is_instr(const RecordType &record); + RecordType create_wait_marker(); RecordType create_idle_marker(); + // Invoked after all interval state snapshots have been generated for the given + // shard_idx and before any merging or printing of interval snapshots. This + // invokes the finalize_interval_snapshots API for all tools that returned some + // non-null interval snapshot. + bool + finalize_interval_snapshots(analyzer_worker_data_t *worker, bool parallel, + int shard_idx = 0); + // Invoked when the given interval finishes during serial or parallel // analysis of the trace. For parallel analysis, the shard_id // parameter should be set to the shard_id for which the interval // finished. For serial analysis, it should remain the default value. bool process_interval(uint64_t interval_id, uint64_t interval_init_instr_count, - analyzer_worker_data_t *worker, bool parallel, int shard_idx = 0); + analyzer_worker_data_t *worker, bool parallel, bool at_instr_record, + int shard_idx = 0); // Compute interval id for the given latest_timestamp, assuming the trace (or - // trace shard) starts at the given first_timestamp. + // trace shard) starts at the given first_timestamp. This is relevant when + // timestamp intervals are enabled using interval_microseconds_. + uint64_t + compute_timestamp_interval_id(uint64_t first_timestamp, uint64_t latest_timestamp); + + // Compute interval id at the given instr count. This is relevant when instr count + // intervals are enabled using interval_instr_count_. uint64_t - compute_interval_id(uint64_t first_timestamp, uint64_t latest_timestamp); + compute_instr_count_interval_id(uint64_t cur_instr_count); - // Compute the interval end timestamp for the given interval_id, assuming the trace - // (or trace shard) starts at the given first_timestamp. + // Compute the interval end timestamp (non-inclusive) for the given interval_id, + // assuming the trace (or trace shard) starts at the given first_timestamp. uint64_t compute_interval_end_timestamp(uint64_t first_timestamp, uint64_t interval_id); @@ -277,11 +305,13 @@ template class analyzer_tmpl_t { // on the most recent seen timestamp in the trace stream. Returns whether the // current interval id was updated, and if so also sets the previous interval index // in prev_interval_index. + // at_instr_record indicates that the next record that will be presented to + // the analysis tools is an instr record. bool advance_interval_id( typename scheduler_tmpl_t::stream_t *stream, analyzer_shard_data_t *shard, uint64_t &prev_interval_index, - uint64_t &prev_interval_init_instr_count); + uint64_t &prev_interval_init_instr_count, bool at_instr_record); // Collects interval results for all shards from the workers, and then optional // merges the shard-local intervals to form the whole-trace interval results using @@ -290,20 +320,30 @@ template class analyzer_tmpl_t { virtual bool collect_and_maybe_merge_shard_interval_results(); - // Computes and stores the interval results in merged_interval_snapshots_. For + // Computes and stores the interval results in whole_trace_interval_snapshots_. For // serial analysis where we already have only a single shard, this involves // simply copying interval_state_snapshot_t* from the input. For parallel // analysis, this involves merging results from multiple shards for intervals // that map to the same final whole-trace interval. bool merge_shard_interval_results( - std::vector::interval_state_snapshot_t *>> &intervals, std::vector::interval_state_snapshot_t *> &merged_intervals, int tool_idx); + // Populates the per_shard_interval_snapshots_ field based on the interval snapshots + // stored in worker_data_. + void + populate_unmerged_shard_interval_results(); + + // Populates the whole_trace_interval_snapshots_ field based on the interval snapshots + // stored in the only entry of worker_data_. + void + populate_serial_interval_results(); + // Combines all interval snapshots in the given vector to create the interval // snapshot for the whole-trace interval ending at interval_end_timestamp and // stores it in 'result'. These snapshots are for the tool at tool_idx. Returns @@ -328,24 +368,67 @@ template class analyzer_tmpl_t { std::vector worker_data_; int num_tools_; analysis_tool_tmpl_t **tools_; - // Stores the interval state snapshots for the whole trace, which for the parallel - // mode are the resulting interval state snapshots after merging from all shards - // in merge_shard_interval_results. - // merged_interval_snapshots_[tool_idx] is a vector of the interval snapshots - // (in order of the intervals) for that tool. - // This may not be set, depending on the derived class's implementation of - // collect_and_maybe_merge_shard_interval_results. + // Stores the interval state snapshots, merged across shards. These are + // produced when timestamp intervals are enabled using interval_microseconds_. + // + // whole_trace_interval_snapshots_[tool_idx] is a vector of the interval snapshots + // (in order of the intervals) for that tool. For the parallel mode, these + // interval state snapshots are produced after merging corresponding shard + // interval snapshots using merge_shard_interval_results. std::vector::interval_state_snapshot_t *>> - merged_interval_snapshots_; + whole_trace_interval_snapshots_; + + // Key that combines tool and shard idx for use with an std::unordered_map. + struct key_tool_shard_t { + int tool_idx; + int shard_idx; + bool + operator==(const key_tool_shard_t &rhs) const + { + return tool_idx == rhs.tool_idx && shard_idx == rhs.shard_idx; + } + }; + struct key_tool_shard_hash_t { + std::size_t + operator()(const key_tool_shard_t &t) const + { + return std::hash()(t.tool_idx ^ t.shard_idx); + } + }; + + // Stores the interval state snapshots, unmerged across shards. These are + // produced when instr count intervals are enabled using interval_instr_count_. + // + // per_shard_interval_snapshots_[(tool_idx, shard_idx)] is a vector + // of the interval snapshots for that tool and shard. Note that the snapshots for + // each shard are separate; they are not merged across shards. + // + // TODO i#6643: Figure out a useful way to merge instr count intervals across shards. + // One way is to merge the shard interval snapshots that correspond to the same + // [interval_instr_count_ * interval_id, interval_instr_count_ * (interval_id + 1)) + // shard-local instrs. But it is not clear whether this is useful. + // Another way is to merge the shard interval snapshots that correspond to the same + // [interval_instr_count_ * interval_id, interval_instr_count_ * (interval_id + 1)) + // whole-trace instrs. But that is much harder to compute. We'd need some way to + // identify the whole-trace interval boundaries in each shard's stream (since we + // process each shard separately); this would likely need a pre-processing pass. + std::unordered_map::interval_state_snapshot_t *>, + key_tool_shard_hash_t> + per_shard_interval_snapshots_; + bool parallel_; int worker_count_; const char *output_prefix_ = "[analyzer]"; uint64_t skip_instrs_ = 0; uint64_t interval_microseconds_ = 0; + uint64_t interval_instr_count_ = 0; int verbosity_ = 0; shard_type_t shard_type_ = SHARD_BY_THREAD; bool sched_by_time_ = false; + typename sched_type_t::mapping_t sched_mapping_ = sched_type_t::MAP_TO_ANY_OUTPUT; private: bool diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index 9310abe2b02..492e87c499e 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -53,6 +53,7 @@ #include "simulator/cache_simulator_create.h" #include "simulator/tlb_simulator_create.h" #include "tools/basic_counts_create.h" +#include "tools/filter/record_filter_create.h" #include "tools/func_view_create.h" #include "tools/histogram_create.h" #include "tools/invariant_checker.h" @@ -65,6 +66,7 @@ #include "tools/view_create.h" #include "tools/loader/external_config_file.h" #include "tools/loader/external_tool_creator.h" +#include "tools/filter/record_filter_create.h" namespace dynamorio { namespace drmemtrace { @@ -72,6 +74,26 @@ namespace drmemtrace { using ::dynamorio::droption::droption_parser_t; using ::dynamorio::droption::DROPTION_SCOPE_ALL; +/**************************************************************** + * Specializations for analyzer_multi_tmpl_t, + * aka analyzer_multi_t. + */ + +template <> +std::unique_ptr +analyzer_multi_t::create_ipc_reader(const char *name, int verbose) +{ + return std::unique_ptr(new ipc_reader_t(name, verbose)); +} + +template <> +std::unique_ptr +analyzer_multi_t::create_ipc_reader_end() +{ + return std::unique_ptr(new ipc_reader_t()); +} + +template <> analysis_tool_t * analyzer_multi_t::create_external_tool(const std::string &tool_name) { @@ -106,26 +128,245 @@ analyzer_multi_t::create_external_tool(const std::string &tool_name) return tool; } -analyzer_multi_t::analyzer_multi_t() +template <> +analysis_tool_t * +analyzer_multi_t::create_invariant_checker() +{ + if (op_offline.get_value()) { + // TODO i#5538: Locate and open the schedule files and pass to the + // reader(s) for seeking. For now we only read them for this test. + // TODO i#5843: Share this code with scheduler_t or pass in for all + // tools from here for fast skipping in serial and per-cpu modes. + std::string tracedir = + raw2trace_directory_t::tracedir_from_rawdir(op_indir.get_value()); + if (directory_iterator_t::is_directory(tracedir)) { + directory_iterator_t end; + directory_iterator_t iter(tracedir); + if (!iter) { + this->error_string_ = "Failed to list directory: " + iter.error_string(); + return nullptr; + } + for (; iter != end; ++iter) { + const std::string fname = *iter; + const std::string fpath = tracedir + DIRSEP + fname; + if (starts_with(fname, DRMEMTRACE_SERIAL_SCHEDULE_FILENAME)) { + if (ends_with(fname, ".gz")) { +#ifdef HAS_ZLIB + this->serial_schedule_file_ = + std::unique_ptr(new gzip_istream_t(fpath)); +#endif + } else { + this->serial_schedule_file_ = std::unique_ptr( + new std::ifstream(fpath, std::ifstream::binary)); + } + if (this->serial_schedule_file_ && !*serial_schedule_file_) { + this->error_string_ = + "Failed to open serial schedule file " + fpath; + return nullptr; + } + } else if (fname == DRMEMTRACE_CPU_SCHEDULE_FILENAME) { +#ifdef HAS_ZIP + this->cpu_schedule_file_ = + std::unique_ptr(new zipfile_istream_t(fpath)); +#endif + } + } + } + } + return new invariant_checker_t(op_offline.get_value(), op_verbose.get_value(), + op_test_mode_name.get_value(), + serial_schedule_file_.get(), cpu_schedule_file_.get()); +} + +template <> +analysis_tool_t * +analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator_type) +{ + if (simulator_type == CPU_CACHE) { + const std::string &config_file = op_config_file.get_value(); + if (!config_file.empty()) { + return cache_simulator_create(config_file); + } else { + cache_simulator_knobs_t *knobs = get_cache_simulator_knobs(); + return cache_simulator_create(*knobs); + } + } else if (simulator_type == MISS_ANALYZER) { + cache_simulator_knobs_t *knobs = get_cache_simulator_knobs(); + return cache_miss_analyzer_create(*knobs, op_miss_count_threshold.get_value(), + op_miss_frac_threshold.get_value(), + op_confidence_threshold.get_value()); + } else if (simulator_type == TLB) { + tlb_simulator_knobs_t knobs; + knobs.num_cores = op_num_cores.get_value(); + knobs.page_size = op_page_size.get_value(); + knobs.TLB_L1I_entries = op_TLB_L1I_entries.get_value(); + knobs.TLB_L1D_entries = op_TLB_L1D_entries.get_value(); + knobs.TLB_L1I_assoc = op_TLB_L1I_assoc.get_value(); + knobs.TLB_L1D_assoc = op_TLB_L1D_assoc.get_value(); + knobs.TLB_L2_entries = op_TLB_L2_entries.get_value(); + knobs.TLB_L2_assoc = op_TLB_L2_assoc.get_value(); + knobs.TLB_replace_policy = op_TLB_replace_policy.get_value(); + knobs.skip_refs = op_skip_refs.get_value(); + knobs.warmup_refs = op_warmup_refs.get_value(); + knobs.warmup_fraction = op_warmup_fraction.get_value(); + knobs.sim_refs = op_sim_refs.get_value(); + knobs.verbose = op_verbose.get_value(); + knobs.cpu_scheduling = op_cpu_scheduling.get_value(); + knobs.use_physical = op_use_physical.get_value(); + return tlb_simulator_create(knobs); + } else if (simulator_type == HISTOGRAM) { + return histogram_tool_create(op_line_size.get_value(), op_report_top.get_value(), + op_verbose.get_value()); + } else if (simulator_type == REUSE_DIST) { + reuse_distance_knobs_t knobs; + knobs.line_size = op_line_size.get_value(); + knobs.report_histogram = op_reuse_distance_histogram.get_value(); + knobs.distance_threshold = op_reuse_distance_threshold.get_value(); + knobs.report_top = op_report_top.get_value(); + knobs.skip_list_distance = op_reuse_skip_dist.get_value(); + knobs.distance_limit = op_reuse_distance_limit.get_value(); + knobs.verify_skip = op_reuse_verify_skip.get_value(); + knobs.histogram_bin_multiplier = op_reuse_histogram_bin_multiplier.get_value(); + if (knobs.histogram_bin_multiplier < 1.0) { + ERRMSG("Usage error: reuse_histogram_bin_multiplier must be >= 1.0\n"); + return nullptr; + } + knobs.verbose = op_verbose.get_value(); + return reuse_distance_tool_create(knobs); + } else if (simulator_type == REUSE_TIME) { + return reuse_time_tool_create(op_line_size.get_value(), op_verbose.get_value()); + } else if (simulator_type == BASIC_COUNTS) { + return basic_counts_tool_create(op_verbose.get_value()); + } else if (simulator_type == OPCODE_MIX) { + std::string module_file_path = get_module_file_path(); + if (module_file_path.empty() && op_indir.get_value().empty() && + op_infile.get_value().empty() && !op_instr_encodings.get_value()) { + ERRMSG("Usage error: the opcode_mix tool requires offline traces, or " + "-instr_encodings for online traces.\n"); + return nullptr; + } + return opcode_mix_tool_create(module_file_path, op_verbose.get_value(), + op_alt_module_dir.get_value()); + } else if (simulator_type == SYSCALL_MIX) { + return syscall_mix_tool_create(op_verbose.get_value()); + } else if (simulator_type == VIEW) { + std::string module_file_path = get_module_file_path(); + // The module file is optional so we don't check for emptiness. + return view_tool_create(module_file_path, op_skip_refs.get_value(), + op_sim_refs.get_value(), op_view_syntax.get_value(), + op_verbose.get_value(), op_alt_module_dir.get_value()); + } else if (simulator_type == FUNC_VIEW) { + std::string funclist_file_path = get_aux_file_path( + op_funclist_file.get_value(), DRMEMTRACE_FUNCTION_LIST_FILENAME); + if (funclist_file_path.empty()) { + ERRMSG("Usage error: the func_view tool requires offline traces.\n"); + return nullptr; + } + return func_view_tool_create(funclist_file_path, op_show_func_trace.get_value(), + op_verbose.get_value()); + } else if (simulator_type == INVARIANT_CHECKER) { + return create_invariant_checker(); + } else if (simulator_type == SCHEDULE_STATS) { + return schedule_stats_tool_create(op_schedule_stats_print_every.get_value(), + op_verbose.get_value()); + } else { + auto tool = create_external_tool(simulator_type); + if (tool == nullptr) { + ERRMSG("Usage error: unsupported analyzer type \"%s\". " + "Please choose " CPU_CACHE ", " MISS_ANALYZER ", " TLB ", " HISTOGRAM + ", " REUSE_DIST ", " BASIC_COUNTS ", " OPCODE_MIX ", " SYSCALL_MIX + ", " VIEW ", " FUNC_VIEW ", or some external analyzer.\n", + simulator_type.c_str()); + } + return tool; + } +} + +/****************************************************************************** + * Specializations for analyzer_multi_tmpl_t, aka + * record_analyzer_multi_t. + */ + +template <> +std::unique_ptr +record_analyzer_multi_t::create_ipc_reader(const char *name, int verbose) +{ + error_string_ = "Online analysis is not supported for record_filter"; + ERRMSG("%s\n", error_string_.c_str()); + return std::unique_ptr(); +} + +template <> +std::unique_ptr +record_analyzer_multi_t::create_ipc_reader_end() +{ + error_string_ = "Online analysis is not supported for record_filter"; + ERRMSG("%s\n", error_string_.c_str()); + return std::unique_ptr(); +} + +template <> +record_analysis_tool_t * +record_analyzer_multi_t::create_external_tool(const std::string &tool_name) { - worker_count_ = op_jobs.get_value(); - skip_instrs_ = op_skip_instrs.get_value(); - interval_microseconds_ = op_interval_microseconds.get_value(); + error_string_ = "External tools are not supported for record analysis"; + ERRMSG("%s\n", error_string_.c_str()); + return nullptr; +} + +template <> +record_analysis_tool_t * +record_analyzer_multi_t::create_invariant_checker() +{ + error_string_ = "Invariant checker is not supported for record analysis"; + ERRMSG("%s\n", error_string_.c_str()); + return nullptr; +} + +template <> +record_analysis_tool_t * +record_analyzer_multi_t::create_analysis_tool_from_options( + const std::string &simulator_type) +{ + if (simulator_type == RECORD_FILTER) { + return record_filter_tool_create( + op_outdir.get_value(), op_filter_stop_timestamp.get_value(), + op_filter_cache_size.get_value(), op_filter_trace_types.get_value(), + op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(), + op_trim_after_timestamp.get_value(), op_verbose.get_value()); + } + ERRMSG("Usage error: unsupported record analyzer type \"%s\". Only " RECORD_FILTER + " is supported.\n", + simulator_type.c_str()); + return nullptr; +} + +/******************************************************************** + * Other analyzer_multi_tmpl_t routines that do not need to be specialized. + */ + +template +analyzer_multi_tmpl_t::analyzer_multi_tmpl_t() +{ + this->worker_count_ = op_jobs.get_value(); + this->skip_instrs_ = op_skip_instrs.get_value(); + this->interval_microseconds_ = op_interval_microseconds.get_value(); + this->interval_instr_count_ = op_interval_instr_count.get_value(); // Initial measurements show it's sometimes faster to keep the parallel model // of using single-file readers but use them sequentially, as opposed to // the every-file interleaving reader, but the user can specify -jobs 1, so // we still keep the serial vs parallel split for 0. - if (worker_count_ == 0) - parallel_ = false; + if (this->worker_count_ == 0) + this->parallel_ = false; if (!op_indir.get_value().empty() || !op_infile.get_value().empty()) op_offline.set_value(true); // Some tools check this on post-proc runs. // XXX: add a "required" flag to droption to avoid needing this here if (op_indir.get_value().empty() && op_infile.get_value().empty() && op_ipc_name.get_value().empty()) { - error_string_ = + this->error_string_ = "Usage error: -ipc_name or -indir or -infile is required\nUsage:\n" + droption_parser_t::usage_short(DROPTION_SCOPE_ALL); - success_ = false; + this->success_ = false; return; } if (!op_indir.get_value().empty()) { @@ -163,8 +404,8 @@ analyzer_multi_t::analyzer_multi_t() dir.initialize(op_indir.get_value(), "", op_trace_compress.get_value(), op_syscall_template_file.get_value()); if (!dir_err.empty()) { - success_ = false; - error_string_ = "Directory setup failed: " + dir_err; + this->success_ = false; + this->error_string_ = "Directory setup failed: " + dir_err; return; } raw2trace_t raw2trace( @@ -176,69 +417,71 @@ analyzer_multi_t::analyzer_multi_t() std::move(dir.syscall_template_file_reader_)); std::string error = raw2trace.do_conversion(); if (!error.empty()) { - success_ = false; - error_string_ = "raw2trace failed: " + error; + this->success_ = false; + this->error_string_ = "raw2trace failed: " + error; } } } // Create the tools after post-processing so we have the schedule files for // test_mode. if (!create_analysis_tools()) { - success_ = false; - error_string_ = "Failed to create analysis tool:" + error_string_; + this->success_ = false; + this->error_string_ = "Failed to create analysis tool:" + this->error_string_; return; } - scheduler_t::scheduler_options_t sched_ops; - scheduler_t::scheduler_options_t *sched_ops_ptr = nullptr; + typename sched_type_t::scheduler_options_t sched_ops; if (op_core_sharded.get_value() || op_core_serial.get_value()) { if (op_core_serial.get_value()) { - // TODO i#5694: Add serial core-sharded support by having the - // analyzer create #cores streams but walk them in lockstep. - // Then, update drcachesim to use get_output_cpuid(). - error_string_ = "-core_serial is not yet implemented"; - success_ = false; - return; + this->parallel_ = false; } sched_ops = init_dynamic_schedule(); - sched_ops_ptr = &sched_ops; } if (!op_indir.get_value().empty()) { std::string tracedir = raw2trace_directory_t::tracedir_from_rawdir(op_indir.get_value()); - if (!init_scheduler(tracedir, op_only_thread.get_value(), op_verbose.get_value(), - sched_ops_ptr)) - success_ = false; + if (!this->init_scheduler(tracedir, op_only_thread.get_value(), + op_verbose.get_value(), std::move(sched_ops))) + this->success_ = false; } else if (op_infile.get_value().empty()) { // XXX i#3323: Add parallel analysis support for online tools. - parallel_ = false; - auto reader = std::unique_ptr( - new ipc_reader_t(op_ipc_name.get_value().c_str(), op_verbose.get_value())); - auto end = std::unique_ptr(new ipc_reader_t()); - if (!init_scheduler(std::move(reader), std::move(end), op_verbose.get_value(), - sched_ops_ptr)) { - success_ = false; + this->parallel_ = false; + auto reader = + create_ipc_reader(op_ipc_name.get_value().c_str(), op_verbose.get_value()); + if (!reader) { + this->error_string_ = "Failed to create IPC reader: " + this->error_string_; + this->success_ = false; + return; + } + auto end = create_ipc_reader_end(); + // We do not want the scheduler's init() to block. + sched_ops.read_inputs_in_init = false; + if (!this->init_scheduler(std::move(reader), std::move(end), + op_verbose.get_value(), std::move(sched_ops))) { + this->success_ = false; } } else { // Legacy file. - if (!init_scheduler(op_infile.get_value(), INVALID_THREAD_ID /*all threads*/, - op_verbose.get_value(), sched_ops_ptr)) - success_ = false; + if (!this->init_scheduler(op_infile.get_value(), + INVALID_THREAD_ID /*all threads*/, + op_verbose.get_value(), std::move(sched_ops))) + this->success_ = false; } if (!init_analysis_tools()) { - success_ = false; + this->success_ = false; return; } // We can't call serial_trace_iter_->init() here as it blocks for ipc_reader_t. } -analyzer_multi_t::~analyzer_multi_t() +template +analyzer_multi_tmpl_t::~analyzer_multi_tmpl_t() { #ifdef HAS_ZIP if (!op_record_file.get_value().empty()) { - if (scheduler_.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) { + if (this->scheduler_.write_recorded_schedule() != sched_type_t::STATUS_SUCCESS) { ERRMSG("Failed to write schedule to %s", op_record_file.get_value().c_str()); } } @@ -246,23 +489,25 @@ analyzer_multi_t::~analyzer_multi_t() destroy_analysis_tools(); } -scheduler_t::scheduler_options_t -analyzer_multi_t::init_dynamic_schedule() +template +typename scheduler_tmpl_t::scheduler_options_t +analyzer_multi_tmpl_t::init_dynamic_schedule() { - shard_type_ = SHARD_BY_CORE; - worker_count_ = op_num_cores.get_value(); - scheduler_t::scheduler_options_t sched_ops( - scheduler_t::MAP_TO_ANY_OUTPUT, - op_sched_order_time.get_value() ? scheduler_t::DEPENDENCY_TIMESTAMPS - : scheduler_t::DEPENDENCY_IGNORE, - scheduler_t::SCHEDULER_DEFAULTS, op_verbose.get_value()); + this->shard_type_ = SHARD_BY_CORE; + this->worker_count_ = op_num_cores.get_value(); + typename sched_type_t::scheduler_options_t sched_ops( + sched_type_t::MAP_TO_ANY_OUTPUT, + op_sched_order_time.get_value() ? sched_type_t::DEPENDENCY_TIMESTAMPS + : sched_type_t::DEPENDENCY_IGNORE, + sched_type_t::SCHEDULER_DEFAULTS, op_verbose.get_value()); sched_ops.quantum_duration = op_sched_quantum.get_value(); if (op_sched_time.get_value()) - sched_ops.quantum_unit = scheduler_t::QUANTUM_TIME; + sched_ops.quantum_unit = sched_type_t::QUANTUM_TIME; sched_ops.syscall_switch_threshold = op_sched_syscall_switch_us.get_value(); sched_ops.blocking_switch_threshold = op_sched_blocking_switch_us.get_value(); sched_ops.block_time_scale = op_sched_block_scale.get_value(); sched_ops.block_time_max = op_sched_block_max_us.get_value(); + sched_ops.randomize_next_input = op_sched_randomize.get_value(); #ifdef HAS_ZIP if (!op_record_file.get_value().empty()) { record_schedule_zip_.reset(new zipfile_ostream_t(op_record_file.get_value())); @@ -270,32 +515,30 @@ analyzer_multi_t::init_dynamic_schedule() } else if (!op_replay_file.get_value().empty()) { replay_schedule_zip_.reset(new zipfile_istream_t(op_replay_file.get_value())); sched_ops.schedule_replay_istream = replay_schedule_zip_.get(); - sched_ops.mapping = scheduler_t::MAP_AS_PREVIOUSLY; - sched_ops.deps = scheduler_t::DEPENDENCY_TIMESTAMPS; + sched_ops.mapping = sched_type_t::MAP_AS_PREVIOUSLY; + sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS; } else if (!op_cpu_schedule_file.get_value().empty()) { cpu_schedule_zip_.reset(new zipfile_istream_t(op_cpu_schedule_file.get_value())); - sched_ops.mapping = scheduler_t::MAP_TO_RECORDED_OUTPUT; - sched_ops.deps = scheduler_t::DEPENDENCY_TIMESTAMPS; + sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT; + sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS; sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get(); } #endif + sched_ops.kernel_switch_trace_path = op_sched_switch_file.get_value(); return sched_ops; } +template bool -analyzer_multi_t::create_analysis_tools() +analyzer_multi_tmpl_t::create_analysis_tools() { - /* TODO i#2006: add multiple tool support. */ - /* TODO i#2006: create a single top-level tool for multi-component - * tools. - */ - tools_ = new analysis_tool_t *[max_num_tools_]; + this->tools_ = new analysis_tool_tmpl_t *[this->max_num_tools_]; if (!op_simulator_type.get_value().empty()) { std::stringstream stream(op_simulator_type.get_value()); std::string type; while (std::getline(stream, type, ':')) { - if (num_tools_ >= max_num_tools_ - 1) { - error_string_ = "Only " + std::to_string(max_num_tools_ - 1) + + if (this->num_tools_ >= this->max_num_tools_ - 1) { + this->error_string_ = "Only " + std::to_string(this->max_num_tools_ - 1) + " simulators are allowed simultaneously"; return false; } @@ -306,196 +549,49 @@ analyzer_multi_t::create_analysis_tools() std::string tool_error = tool->get_error_string(); if (tool_error.empty()) tool_error = "no error message provided."; - error_string_ = "Tool failed to initialize: " + tool_error; + this->error_string_ = "Tool failed to initialize: " + tool_error; delete tool; return false; } - tools_[num_tools_++] = tool; + this->tools_[this->num_tools_++] = tool; } } if (op_test_mode.get_value()) { - tools_[num_tools_] = create_invariant_checker(); - if (tools_[num_tools_] == NULL) + // This will return nullptr for record_ instantiation; we just don't support + // -test_mode for record_. + this->tools_[this->num_tools_] = create_invariant_checker(); + if (this->tools_[this->num_tools_] == NULL) return false; - if (!*tools_[num_tools_]) { - error_string_ = tools_[num_tools_]->get_error_string(); - delete tools_[num_tools_]; - tools_[num_tools_] = NULL; + if (!*this->tools_[this->num_tools_]) { + this->error_string_ = this->tools_[this->num_tools_]->get_error_string(); + delete this->tools_[this->num_tools_]; + this->tools_[this->num_tools_] = NULL; return false; } - num_tools_++; + this->num_tools_++; } - return (num_tools_ != 0); + return (this->num_tools_ != 0); } +template bool -analyzer_multi_t::init_analysis_tools() +analyzer_multi_tmpl_t::init_analysis_tools() { // initialize_stream() is now called from analyzer_t::run(). return true; } +template void -analyzer_multi_t::destroy_analysis_tools() +analyzer_multi_tmpl_t::destroy_analysis_tools() { - if (!success_) + if (!this->success_) return; - for (int i = 0; i < num_tools_; i++) - delete tools_[i]; - delete[] tools_; -} - -analysis_tool_t * -analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator_type) -{ - if (simulator_type == CPU_CACHE) { - const std::string &config_file = op_config_file.get_value(); - if (!config_file.empty()) { - return cache_simulator_create(config_file); - } else { - cache_simulator_knobs_t *knobs = get_cache_simulator_knobs(); - return cache_simulator_create(*knobs); - } - } else if (simulator_type == MISS_ANALYZER) { - cache_simulator_knobs_t *knobs = get_cache_simulator_knobs(); - return cache_miss_analyzer_create(*knobs, op_miss_count_threshold.get_value(), - op_miss_frac_threshold.get_value(), - op_confidence_threshold.get_value()); - } else if (simulator_type == TLB) { - tlb_simulator_knobs_t knobs; - knobs.num_cores = op_num_cores.get_value(); - knobs.page_size = op_page_size.get_value(); - knobs.TLB_L1I_entries = op_TLB_L1I_entries.get_value(); - knobs.TLB_L1D_entries = op_TLB_L1D_entries.get_value(); - knobs.TLB_L1I_assoc = op_TLB_L1I_assoc.get_value(); - knobs.TLB_L1D_assoc = op_TLB_L1D_assoc.get_value(); - knobs.TLB_L2_entries = op_TLB_L2_entries.get_value(); - knobs.TLB_L2_assoc = op_TLB_L2_assoc.get_value(); - knobs.TLB_replace_policy = op_TLB_replace_policy.get_value(); - knobs.skip_refs = op_skip_refs.get_value(); - knobs.warmup_refs = op_warmup_refs.get_value(); - knobs.warmup_fraction = op_warmup_fraction.get_value(); - knobs.sim_refs = op_sim_refs.get_value(); - knobs.verbose = op_verbose.get_value(); - knobs.cpu_scheduling = op_cpu_scheduling.get_value(); - knobs.use_physical = op_use_physical.get_value(); - return tlb_simulator_create(knobs); - } else if (simulator_type == HISTOGRAM) { - return histogram_tool_create(op_line_size.get_value(), op_report_top.get_value(), - op_verbose.get_value()); - } else if (simulator_type == REUSE_DIST) { - reuse_distance_knobs_t knobs; - knobs.line_size = op_line_size.get_value(); - knobs.report_histogram = op_reuse_distance_histogram.get_value(); - knobs.distance_threshold = op_reuse_distance_threshold.get_value(); - knobs.report_top = op_report_top.get_value(); - knobs.skip_list_distance = op_reuse_skip_dist.get_value(); - knobs.distance_limit = op_reuse_distance_limit.get_value(); - knobs.verify_skip = op_reuse_verify_skip.get_value(); - knobs.histogram_bin_multiplier = op_reuse_histogram_bin_multiplier.get_value(); - if (knobs.histogram_bin_multiplier < 1.0) { - ERRMSG("Usage error: reuse_histogram_bin_multiplier must be >= 1.0\n"); - return nullptr; - } - knobs.verbose = op_verbose.get_value(); - return reuse_distance_tool_create(knobs); - } else if (simulator_type == REUSE_TIME) { - return reuse_time_tool_create(op_line_size.get_value(), op_verbose.get_value()); - } else if (simulator_type == BASIC_COUNTS) { - return basic_counts_tool_create(op_verbose.get_value()); - } else if (simulator_type == OPCODE_MIX) { - std::string module_file_path = get_module_file_path(); - if (module_file_path.empty() && op_indir.get_value().empty() && - op_infile.get_value().empty() && !op_instr_encodings.get_value()) { - ERRMSG("Usage error: the opcode_mix tool requires offline traces, or " - "-instr_encodings for online traces.\n"); - return nullptr; - } - return opcode_mix_tool_create(module_file_path, op_verbose.get_value(), - op_alt_module_dir.get_value()); - } else if (simulator_type == SYSCALL_MIX) { - return syscall_mix_tool_create(op_verbose.get_value()); - } else if (simulator_type == VIEW) { - std::string module_file_path = get_module_file_path(); - // The module file is optional so we don't check for emptiness. - return view_tool_create(module_file_path, op_skip_refs.get_value(), - op_sim_refs.get_value(), op_view_syntax.get_value(), - op_verbose.get_value(), op_alt_module_dir.get_value()); - } else if (simulator_type == FUNC_VIEW) { - std::string funclist_file_path = get_aux_file_path( - op_funclist_file.get_value(), DRMEMTRACE_FUNCTION_LIST_FILENAME); - if (funclist_file_path.empty()) { - ERRMSG("Usage error: the func_view tool requires offline traces.\n"); - return nullptr; - } - return func_view_tool_create(funclist_file_path, op_show_func_trace.get_value(), - op_verbose.get_value()); - } else if (simulator_type == INVARIANT_CHECKER) { - return create_invariant_checker(); - } else if (simulator_type == SCHEDULE_STATS) { - return schedule_stats_tool_create(op_schedule_stats_print_every.get_value(), - op_verbose.get_value()); - } else { - auto tool = create_external_tool(simulator_type); - if (tool == nullptr) { - ERRMSG("Usage error: unsupported analyzer type \"%s\". " - "Please choose " CPU_CACHE ", " MISS_ANALYZER ", " TLB ", " HISTOGRAM - ", " REUSE_DIST ", " BASIC_COUNTS ", " OPCODE_MIX ", " SYSCALL_MIX - ", " VIEW ", " FUNC_VIEW ", or some external analyzer.\n", - simulator_type.c_str()); - } - return tool; - } -} - -analysis_tool_t * -analyzer_multi_t::create_invariant_checker() -{ - if (op_offline.get_value()) { - // TODO i#5538: Locate and open the schedule files and pass to the - // reader(s) for seeking. For now we only read them for this test. - // TODO i#5843: Share this code with scheduler_t or pass in for all - // tools from here for fast skipping in serial and per-cpu modes. - std::string tracedir = - raw2trace_directory_t::tracedir_from_rawdir(op_indir.get_value()); - if (directory_iterator_t::is_directory(tracedir)) { - directory_iterator_t end; - directory_iterator_t iter(tracedir); - if (!iter) { - error_string_ = "Failed to list directory: " + iter.error_string(); - return nullptr; - } - for (; iter != end; ++iter) { - const std::string fname = *iter; - const std::string fpath = tracedir + DIRSEP + fname; - if (starts_with(fname, DRMEMTRACE_SERIAL_SCHEDULE_FILENAME)) { - if (ends_with(fname, ".gz")) { -#ifdef HAS_ZLIB - serial_schedule_file_ = - std::unique_ptr(new gzip_istream_t(fpath)); -#endif - } else { - serial_schedule_file_ = std::unique_ptr( - new std::ifstream(fpath, std::ifstream::binary)); - } - if (serial_schedule_file_ && !*serial_schedule_file_) { - error_string_ = "Failed to open serial schedule file " + fpath; - return nullptr; - } - } else if (fname == DRMEMTRACE_CPU_SCHEDULE_FILENAME) { -#ifdef HAS_ZIP - cpu_schedule_file_ = - std::unique_ptr(new zipfile_istream_t(fpath)); -#endif - } - } - } - } - return new invariant_checker_t(op_offline.get_value(), op_verbose.get_value(), - op_test_mode_name.get_value(), - serial_schedule_file_.get(), cpu_schedule_file_.get()); + for (int i = 0; i < this->num_tools_; i++) + delete this->tools_[i]; + delete[] this->tools_; } /* Get the path to an auxiliary file by examining @@ -504,8 +600,10 @@ analyzer_multi_t::create_invariant_checker() * If a trace file is provided instead of a trace directory, it searches in the * directory which contains the trace file. */ +template std::string -analyzer_multi_t::get_aux_file_path(std::string option_val, std::string default_filename) +analyzer_multi_tmpl_t::get_aux_file_path( + std::string option_val, std::string default_filename) { std::string file_path; if (!option_val.empty()) @@ -543,8 +641,9 @@ analyzer_multi_t::get_aux_file_path(std::string option_val, std::string default_ return file_path; } +template std::string -analyzer_multi_t::get_module_file_path() +analyzer_multi_tmpl_t::get_module_file_path() { return get_aux_file_path(op_module_file.get_value(), DRMEMTRACE_MODULE_LIST_FILENAME); } @@ -552,8 +651,9 @@ analyzer_multi_t::get_module_file_path() /* Get the cache simulator knobs used by the cache simulator * and the cache miss analyzer. */ +template cache_simulator_knobs_t * -analyzer_multi_t::get_cache_simulator_knobs() +analyzer_multi_tmpl_t::get_cache_simulator_knobs() { cache_simulator_knobs_t *knobs = new cache_simulator_knobs_t; knobs->num_cores = op_num_cores.get_value(); @@ -578,5 +678,9 @@ analyzer_multi_t::get_cache_simulator_knobs() return knobs; } +template class analyzer_multi_tmpl_t; +template class analyzer_multi_tmpl_t; + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/analyzer_multi.h b/clients/drcachesim/analyzer_multi.h index 5d8e5068c42..4699e09c4ea 100644 --- a/clients/drcachesim/analyzer_multi.h +++ b/clients/drcachesim/analyzer_multi.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -45,15 +45,18 @@ namespace dynamorio { namespace drmemtrace { -class analyzer_multi_t : public analyzer_t { +template +class analyzer_multi_tmpl_t : public analyzer_tmpl_t { public: // Usage: errors encountered during the constructor will set a flag that should // be queried via operator!. - analyzer_multi_t(); - virtual ~analyzer_multi_t(); + analyzer_multi_tmpl_t(); + virtual ~analyzer_multi_tmpl_t(); protected: - scheduler_t::scheduler_options_t + typedef scheduler_tmpl_t sched_type_t; + + typename scheduler_tmpl_t::scheduler_options_t init_dynamic_schedule(); bool create_analysis_tools(); @@ -62,13 +65,19 @@ class analyzer_multi_t : public analyzer_t { void destroy_analysis_tools(); - analysis_tool_t * + std::unique_ptr + create_ipc_reader(const char *name, int verbose); + + std::unique_ptr + create_ipc_reader_end(); + + analysis_tool_tmpl_t * create_analysis_tool_from_options(const std::string &type); - analysis_tool_t * + analysis_tool_tmpl_t * create_external_tool(const std::string &id); - analysis_tool_t * + analysis_tool_tmpl_t * create_invariant_checker(); std::string @@ -96,6 +105,11 @@ class analyzer_multi_t : public analyzer_t { static const int max_num_tools_ = 8; }; +typedef analyzer_multi_tmpl_t analyzer_multi_t; + +typedef analyzer_multi_tmpl_t + record_analyzer_multi_t; + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h index ac187ff352d..23e4d3af274 100644 --- a/clients/drcachesim/common/memtrace_stream.h +++ b/clients/drcachesim/common/memtrace_stream.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -47,6 +47,7 @@ #include #include +#include /** * @file drmemtrace/memtrace_stream.h @@ -155,10 +156,25 @@ class memtrace_stream_t { return false; } + /** + * Returns the 0-based ordinal for the current shard. For parallel analysis, + * this equals the \p shard_index passed to parallel_shard_init_stream(). + * This is more useful for serial modes where there is no other convenience mechanism + * to determine such an index; it allows a tool to compute per-shard results even in + * serial mode. The shard orderings in serial mode may not always mach the ordering + * in parallel mode. If not implemented, -1 is returned. + */ + virtual int + get_shard_index() const + { + return -1; + } + /** * Returns a unique identifier for the current "output cpu". Generally this only * applies when using #SHARD_BY_CORE. For dynamic schedules, the identifier is - * typically an output cpu ordinal. For replaying an as-traced schedule, the + * typically an output cpu ordinal equal to get_shard_index(). For replaying an + * as-traced schedule, the * identifier is typically the original input cpu which is now mapped directly * to this output. If not implemented for the current mode, -1 is returned. */ @@ -192,6 +208,17 @@ class memtrace_stream_t { return -1; } + /** + * Returns the thread identifier for the current input trace. + * This is a convenience method for use in parallel_shard_init_stream() + * prior to access to any #memref_t records. + */ + virtual int64_t + get_tid() const + { + return -1; + } + /** * Returns the stream interface for the current input trace. This differs from * "this" for #SHARD_BY_CORE where multiple inputs are interleaved on one @@ -203,6 +230,16 @@ class memtrace_stream_t { { return nullptr; } + + /** + * Returns whether the current record is from a part of the trace corresponding + * to kernel execution. + */ + virtual bool + is_record_kernel() const + { + return false; + } }; /** @@ -274,8 +311,53 @@ class default_memtrace_stream_t : public memtrace_stream_t { return 0; } + void + set_output_cpuid(int64_t cpuid) + { + cpuid_ = cpuid; + } + int64_t + get_output_cpuid() const override + { + return cpuid_; + } + void + set_shard_index(int index) + { + shard_ = index; + } + int + get_shard_index() const override + { + return shard_; + } + // Also sets the shard index to the dynamic-discovery-order tid ordinal. + void + set_tid(int64_t tid) + { + tid_ = tid; + auto exists = tid2shard_.find(tid); + if (exists == tid2shard_.end()) { + int index = static_cast(tid2shard_.size()); + tid2shard_[tid] = index; + set_shard_index(index); + } else { + set_shard_index(exists->second); + } + } + int64_t + get_tid() const override + { + return tid_; + } + private: - uint64_t *record_ordinal_; + uint64_t *record_ordinal_ = nullptr; + int64_t cpuid_ = 0; + int shard_ = 0; + int64_t tid_ = 0; + // To let a test set just the tid and get a shard index for free. + std::unordered_map tid2shard_; }; } // namespace drmemtrace diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 36b3f9dd994..5bdee7aef05 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -35,6 +35,7 @@ #include "options.h" #include +#include #include #include "dr_api.h" // For IF_X86_ELSE. @@ -289,8 +290,9 @@ droption_t op_cpu_scheduling( "round-robin fashion. This option causes the scheduler to instead use the recorded " "cpu that each thread executed on (at a granularity of the trace buffer size) " "for scheduling, mapping traced cpu's to cores and running each segment of each " - "thread " - "on the core that owns the recorded cpu for that segment."); + "thread on the core that owns the recorded cpu for that segment. " + "This option is not supported with -core_serial; use " + "-cpu_schedule_file with -core_serial instead."); droption_t op_max_trace_size( DROPTION_SCOPE_CLIENT, "max_trace_size", 0, @@ -456,13 +458,17 @@ droption_t "Specifies the replacement policy for TLBs. " "Supported policies: LFU (Least Frequently Used)."); +// TODO i#6660: Add "-tool" alias as these are not all "simulators". droption_t op_simulator_type(DROPTION_SCOPE_FRONTEND, "simulator_type", CPU_CACHE, - "Specifies the types of simulators, separated by a colon (\":\").", + "Specifies which trace analysis tool(s) to run. Multiple tools " + "can be specified, separated by a colon (\":\").", "Predefined types: " CPU_CACHE ", " MISS_ANALYZER ", " TLB ", " REUSE_DIST ", " REUSE_TIME ", " HISTOGRAM ", " BASIC_COUNTS - ", " INVARIANT_CHECKER ", or " SCHEDULE_STATS - ". The external types: name of a tool identified by a " + ", " INVARIANT_CHECKER ", " SCHEDULE_STATS ", or " RECORD_FILTER + ". The " RECORD_FILTER " tool cannot be combined with the others " + "as it operates on raw disk records. " + "To invoke an external tool: specify its name as identified by a " "name.drcachesim config file in the DR tools directory."); droption_t op_verbose(DROPTION_SCOPE_ALL, "verbose", 0, 0, 64, @@ -520,7 +526,17 @@ droption_t op_interval_microseconds( "Enable periodic heartbeats for intervals of given microseconds in the trace.", "Desired length of each trace interval, defined in microseconds of trace time. " "Trace intervals are measured using the TRACE_MARKER_TYPE_TIMESTAMP marker values. " - "If set, analysis tools receive a callback at the end of each interval."); + "If set, analysis tools receive a callback at the end of each interval, and one " + "at the end of trace analysis to print the whole-trace interval results."); + +droption_t op_interval_instr_count( + DROPTION_SCOPE_FRONTEND, "interval_instr_count", 0, + "Enable periodic heartbeats for intervals of given per-shard instr count. ", + "Desired length of each trace interval, defined in instr count of each shard. " + "With -parallel, this does not support whole trace intervals, only per-shard " + "intervals. If set, analysis tools receive a callback at the end of each interval, " + "and separate callbacks per shard at the end of trace analysis to print each " + "shard's interval results."); droption_t op_only_thread(DROPTION_SCOPE_FRONTEND, "only_thread", 0, @@ -888,6 +904,21 @@ droption_t "Applies to -core_sharded and -core_serial. " "Path with stored as-traced schedule for replay."); #endif +droption_t op_sched_switch_file( + DROPTION_SCOPE_FRONTEND, "sched_switch_file", "", + "Path to file holding context switch sequences", + "Applies to -core_sharded and -core_serial. Path to file holding context switch " + "sequences. The file can contain multiple sequences each with regular trace headers " + "and the sequence proper bracketed by TRACE_MARKER_TYPE_CONTEXT_SWITCH_START and " + "TRACE_MARKER_TYPE_CONTEXT_SWITCH_END markers."); + +droption_t op_sched_randomize( + DROPTION_SCOPE_FRONTEND, "sched_randomize", false, + "Pick next inputs randomly on context switches", + "Applies to -core_sharded and -core_serial. Disables the normal methods of " + "choosing the next input based on priority, timestamps (if -sched_order_time is " + "set), and FIFO order and instead selects the next input randomly. " + "This is intended for experimental use in sensitivity studies."); // Schedule_stats options. droption_t @@ -902,5 +933,48 @@ droption_t op_syscall_template_file( "If set, system call traces will be injected from the file " "into the resulting trace."); +// Record filter options. +droption_t op_filter_stop_timestamp( + DROPTION_SCOPE_FRONTEND, "filter_stop_timestamp", 0, 0, + // Wrap max in parens to work around Visual Studio compiler issues with the + // max macro (even despite NOMINMAX defined above). + (std::numeric_limits::max)(), + "Timestamp (in us) in the trace when to stop filtering.", + "Record filtering will be disabled (everything will be output) " + "when the tool sees a TRACE_MARKER_TYPE_TIMESTAMP marker with " + "timestamp greater than the specified value."); + +droption_t op_filter_cache_size( + DROPTION_SCOPE_FRONTEND, "filter_cache_size", 0, + "Enable data cache filter with given size (in bytes).", + "Enable data cache filter with given size (in bytes), with 64 byte " + "line size and a direct mapped LRU cache."); + +droption_t + op_filter_trace_types(DROPTION_SCOPE_FRONTEND, "filter_trace_types", "", + "Comma-separated integers for trace types to remove.", + "Comma-separated integers for trace types to remove. " + "See trace_type_t for the list of trace entry types."); + +droption_t + op_filter_marker_types(DROPTION_SCOPE_FRONTEND, "filter_marker_types", "", + "Comma-separated integers for marker types to remove.", + "Comma-separated integers for marker types to remove. " + "See trace_marker_type_t for the list of marker types."); + +droption_t op_trim_before_timestamp( + DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0, + (std::numeric_limits::max)(), + "Trim records until this timestamp (in us) in the trace.", + "Removes all records (after headers) before the first TRACE_MARKER_TYPE_TIMESTAMP " + "marker in the trace with timestamp greater than or equal to the specified value."); + +droption_t op_trim_after_timestamp( + DROPTION_SCOPE_ALL, "trim_after_timestamp", (std::numeric_limits::max)(), 0, + (std::numeric_limits::max)(), + "Trim records after this timestamp (in us) in the trace.", + "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with " + "timestamp larger than the specified value."); + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index dbf1c57ca47..13b8b4268f3 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -36,6 +36,9 @@ #define _OPTIONS_H_ 1 // Tool names (for -simulator_type option). +// TODO i#6660: When we add "-tool", add "cache_simulator" or "drcachesim" +// instead of just "-tool cache". Ditto for "TLB". +#define CPU_CACHE "cache" #define MISS_ANALYZER "miss_analyzer" #define TLB "TLB" #define HISTOGRAM "histogram" @@ -48,6 +51,7 @@ #define FUNC_VIEW "func_view" #define INVARIANT_CHECKER "invariant_checker" #define SCHEDULE_STATS "schedule_stats" +#define RECORD_FILTER "record_filter" // Constants used by specific tools. #define REPLACE_POLICY_NON_SPECIFIED "" @@ -56,7 +60,6 @@ #define REPLACE_POLICY_FIFO "FIFO" #define PREFETCH_POLICY_NEXTLINE "nextline" #define PREFETCH_POLICY_NONE "none" -#define CPU_CACHE "cache" #define CACHE_TYPE_INSTRUCTION "instruction" #define CACHE_TYPE_DATA "data" #define CACHE_TYPE_UNIFIED "unified" @@ -158,6 +161,8 @@ extern dynamorio::droption::droption_t op_tracer_alt; extern dynamorio::droption::droption_t op_tracer_ops; extern dynamorio::droption::droption_t op_interval_microseconds; +extern dynamorio::droption::droption_t + op_interval_instr_count; extern dynamorio::droption::droption_t op_only_thread; extern dynamorio::droption::droption_t op_skip_instrs; extern dynamorio::droption::droption_t op_skip_refs; @@ -200,8 +205,16 @@ extern dynamorio::droption::droption_t op_record_file; extern dynamorio::droption::droption_t op_replay_file; extern dynamorio::droption::droption_t op_cpu_schedule_file; #endif +extern dynamorio::droption::droption_t op_sched_switch_file; +extern dynamorio::droption::droption_t op_sched_randomize; extern dynamorio::droption::droption_t op_schedule_stats_print_every; extern dynamorio::droption::droption_t op_syscall_template_file; +extern dynamorio::droption::droption_t op_filter_stop_timestamp; +extern dynamorio::droption::droption_t op_filter_cache_size; +extern dynamorio::droption::droption_t op_filter_trace_types; +extern dynamorio::droption::droption_t op_filter_marker_types; +extern dynamorio::droption::droption_t op_trim_before_timestamp; +extern dynamorio::droption::droption_t op_trim_after_timestamp; } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 63cadfcb5cb..342ebd8b252 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -598,6 +598,35 @@ typedef enum { */ TRACE_MARKER_TYPE_CORE_IDLE, + /** + * Indicates a point in the trace where context switch's kernel trace starts. + * The value of the marker is set to the switch type enum value from + * #dynamorio::drmemtrace::scheduler_tmpl_t::switch_type_t. + */ + TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, + + /** + * Indicates a point in the trace where a context switch's kernel trace ends. + * The value of the marker is set to the switch type enum value from + * #dynamorio::drmemtrace::scheduler_tmpl_t::switch_type_t. + */ + TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, + + /** + * This marker's value is the current thread's vector length in bytes, for + * architectures with a dynamic vector length. It is currently only used on AArch64. + * + * On AArch64 the marker's value contains the SVE vector length. The marker is + * emitted with the thread header to establish the initial vector length for that + * thread. In the future it will also be emitted later in the trace if the app + * changes the vector length at runtime (TODO i#6625). In all cases the vector + * length value is specific to the current thread. + * The vector length affects how some SVE instructions are decoded so any tools which + * decode instructions should clear any cached data and set the vector length used by + * the decoder using dr_set_sve_vector_length(). + */ + TRACE_MARKER_TYPE_VECTOR_LENGTH, + // ... // These values are reserved for future built-in marker types. // ... @@ -894,11 +923,11 @@ typedef enum { */ OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS = 0x800, /** - * Kernel traces of syscalls are included. - * The included kernel traces are provided either by the -syscall_template_file to - * raw2trace (see #OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES), or on x86 using - * the -enable_kernel_tracing option that uses Intel® Processor Trace to collect a - * trace for system call execution. + * Kernel traces (both instructions and memory addresses) of syscalls are included. If + * only kernel instructions are included the file type is + * #OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY instead. The included kernel traces + * are provided by the -syscall_template_file to raw2trace (see + * #OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES). */ OFFLINE_FILE_TYPE_KERNEL_SYSCALLS = 0x1000, /** @@ -925,6 +954,19 @@ typedef enum { * the future. */ OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES = 0x4000, + /** + * Kernel instruction traces of syscalls are included. When memory addresses are + * also included for kernel execution, the file type is + * #OFFLINE_FILE_TYPE_KERNEL_SYSCALLS instead. + * On x86, the kernel trace is enabled by the -enable_kernel_tracing option that + * uses Intel® Processor Trace to collect an instruction trace for system call + * execution. + */ + OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY = 0x8000, + /** + * Each trace shard represents one core and contains interleaved software threads. + */ + OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000, } offline_file_type_t; static inline const char * diff --git a/clients/drcachesim/common/utils.h b/clients/drcachesim/common/utils.h index 7bc5bec97ff..f49832443ce 100644 --- a/clients/drcachesim/common/utils.h +++ b/clients/drcachesim/common/utils.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -35,17 +35,34 @@ #ifndef _UTILS_H_ #define _UTILS_H_ 1 +#include #include #include #include #include #include +#if defined(_WIN32) || defined(_WIN64) || defined(WINDOWS) +# define WIN32_LEAN_AND_MEAN +# define UNICODE // For Windows headers. +# define _UNICODE // For C headers. +# define NOMINMAX // Avoid windows.h messing up std::min. +# include +#else +# include +#endif + namespace dynamorio { namespace drmemtrace { // XXX: DR should export this #define INVALID_THREAD_ID 0 +// We avoid collisions with DR's INVALID_PROCESS_ID by using our own name. +#define INVALID_PID -1 +// A separate sentinel for an idle core with no software thread. +// XXX i#6703: Export this in scheduler.h as part of its API when we have +// the scheduler insert synthetic headers. +#define IDLE_THREAD_ID -1 // XXX: perhaps we should use a C++-ish stream approach instead // This cannot be named ERROR as that conflicts with Windows headers. @@ -182,6 +199,27 @@ split_by(std::string s, const std::string &sep) return vec; } +// Returns a timestamp with at least microsecond granularity. +// On UNIX this is an absolute timestamp; but on Windows where we had +// trouble with the GetSystemTime* functions not being granular enough +// it's the timestamp counter from the processor. +// (We avoid dr_get_microseconds() because not all targets link +// in the DR library.) +static inline uint64_t +get_microsecond_timestamp() +{ +#if defined(_WIN32) || defined(_WIN64) || defined(WINDOWS) + uint64_t res; + QueryPerformanceCounter((LARGE_INTEGER *)&res); + return res; +#else + struct timeval time; + if (gettimeofday(&time, nullptr) != 0) + return 0; + return time.tv_sec * 1000000 + time.tv_usec; +#endif +} + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in index 447fd74764f..5f1a6751a6c 100644 --- a/clients/drcachesim/docs/drcachesim.dox.in +++ b/clients/drcachesim/docs/drcachesim.dox.in @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -125,7 +125,11 @@ using the drdecode decoder or any other decoder. An additional field information should be invalidated due to possibly changed application code. (For online traces, encodings are not provided unless the option `-instr_encodings` is passed, as encodings add overhead and -are not needed for many tools.) +are not needed for many tools.) Cached decoding information might also +need to be discarded if there is a +#dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker entry +indicating a change of vector length on architectures such as AArch64 +which have a dynamic vector length. Older legacy traces may not contain instruction encodings. For those traces, encodings for static code can be obtained by @@ -272,6 +276,7 @@ tools can also be created, as described in \ref sec_drcachesim_newtool. - \ref sec_tool_histogram - \ref sec_tool_invariant_checker - \ref sec_tool_syscall_mix +- \ref sec_tool_record_filter \section sec_tool_cache_sim Cache Simulator @@ -890,6 +895,52 @@ Syscall mix tool results: 1 : 273 \endcode +\section sec_tool_record_filter Record Filter + +The record filter tool modifies a target trace. It contains several varieties of +filters which selectively remove records from the tool. The filters currently provided +include: + +- Removing records of types specified by the -filter_trace_types option. + The types are identified by their #dynamorio::drmemtrace::trace_type_t + enum numeric value. + +- Remove marker records of marker types specified by the -filter_marker_types option. + The types are identified by their #dynamorio::drmemtrace::trace_marker_type_t + enum numeric value. + +- Running a simple data cache filter and removing hits. The cache is enbabled + and its size specified by the -filter_cache_size option. + +- Trimming the start (via -trim_before_timestamp) and end (via -trim_after_timestamp) + of a trace. Any now-empty shards are deleted entirely. + +A filter can be applied only to the start of a trace using the -filter_stop_timestamp +option. + +Example of removing function markers: + +\code +$ bin64/drrun -t drcachesim -indir mytracedir -simulator_type basic_counts +... + 9009 total function id markers + 5006 total function return address markers + 6007 total function argument markers + 4003 total function return value markers +... + +$ bin64/drrun -t drcachesim -simulator_type record_filter -filter_marker_types 4,5,6,7 -indir mytracedir -outdir newdir +Output 1280800 entries from 1304825 entries. + +$ bin64/drrun -t drcachesim -indir newdir -simulator_type basic_counts +... + 0 total function id markers + 0 total function return address markers + 0 total function argument markers + 0 total function return value markers +... +\endcode + **************************************************************************** \page google_workload_traces Google Workload Traces diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index 57e27e931d1..9cf3a33f140 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -32,6 +32,12 @@ #include "ir2trace.h" #include "dr_api.h" +#include "drir.h" +#include "trace_entry.h" + +#include +#include +#include namespace dynamorio { namespace drmemtrace { @@ -59,12 +65,12 @@ ir2trace_t::convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity) { - if (drir == nullptr || drir->get_ilist() == NULL) { + if (drir == nullptr || drir->get_ilist() == nullptr) { return IR2TRACE_CONV_ERROR_INVALID_PARAMETER; } instr_t *instr = instrlist_first(drir->get_ilist()); bool prev_was_repstr = false; - while (instr != NULL) { + while (instr != nullptr) { trace_entry_t entry = {}; entry.size = instr_length(GLOBAL_DCONTEXT, instr); entry.addr = reinterpret_cast(instr_get_app_pc(instr)); @@ -88,28 +94,28 @@ ir2trace_t::convert(DR_PARAM_IN drir_t *drir, * library to raw2trace, the redundancy should be eliminated by removing the * subsequent code. */ - entry.type = TRACE_TYPE_INSTR; + trace_type_t entry_type = TRACE_TYPE_INSTR; if (instr_opcode_valid(instr)) { bool cur_is_repstr = false; if (instr_is_call_direct(instr)) { - entry.type = TRACE_TYPE_INSTR_DIRECT_CALL; + entry_type = TRACE_TYPE_INSTR_DIRECT_CALL; } else if (instr_is_call_indirect(instr)) { - entry.type = TRACE_TYPE_INSTR_INDIRECT_CALL; + entry_type = TRACE_TYPE_INSTR_INDIRECT_CALL; } else if (instr_is_return(instr)) { - entry.type = TRACE_TYPE_INSTR_RETURN; + entry_type = TRACE_TYPE_INSTR_RETURN; } else if (instr_is_ubr(instr)) { - entry.type = TRACE_TYPE_INSTR_DIRECT_JUMP; + entry_type = TRACE_TYPE_INSTR_DIRECT_JUMP; } else if (instr_is_mbr(instr)) { - entry.type = TRACE_TYPE_INSTR_INDIRECT_JUMP; + entry_type = TRACE_TYPE_INSTR_INDIRECT_JUMP; } else if (instr_is_cbr(instr)) { // We update this on the next iteration. - entry.type = TRACE_TYPE_INSTR_CONDITIONAL_JUMP; + entry_type = TRACE_TYPE_INSTR_CONDITIONAL_JUMP; } else if (instr_get_opcode(instr) == OP_sysenter) { - entry.type = TRACE_TYPE_INSTR_SYSENTER; + entry_type = TRACE_TYPE_INSTR_SYSENTER; } else if (instr_is_rep_string_op(instr)) { cur_is_repstr = true; if (prev_was_repstr) { - entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + entry_type = TRACE_TYPE_INSTR_MAYBE_FETCH; } else { prev_was_repstr = true; } @@ -120,7 +126,24 @@ ir2trace_t::convert(DR_PARAM_IN drir_t *drir, } else { VPRINT(1, "Trying to convert an invalid instruction.\n"); } - + entry.type = entry_type; + if (type_is_instr_branch(entry_type) && + !type_is_instr_direct_branch(entry_type)) { + instr_t *next_instr = instr_get_next(instr); + if (next_instr != nullptr) { + trace.push_back( + { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_BRANCH_TARGET, + { reinterpret_cast(instr_get_app_pc(next_instr)) } }); + } + // TODO i#5505: Today PT traces have some noise instructions at the end + // from the ioctl call that we make to disable PT tracing in the + // post-syscall callback. After we remove those noise instructions, the + // last instruction in the syscall's trace will likely be an iret that + // returns back to the user-space. We should add a + // TRACE_MARKER_TYPE_BRANCH_TARGET marker with a value equal to the next + // user-space instr. + } trace.push_back(entry); instr = instr_get_next(instr); diff --git a/clients/drcachesim/launcher.cpp b/clients/drcachesim/launcher.cpp index d1ea6dc23dd..3b61905ef6a 100644 --- a/clients/drcachesim/launcher.cpp +++ b/clients/drcachesim/launcher.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -85,6 +85,7 @@ namespace { } while (0) static analyzer_t *analyzer; +static record_analyzer_t *record_analyzer; #ifdef UNIX static pid_t child; #endif @@ -101,8 +102,14 @@ signal_handler(int sig, siginfo_t *info, void *cxt) if (child != 0) kill(child, SIGINT); // Destroy pipe file if it's open. - if (analyzer != NULL) + if (analyzer != nullptr) { delete analyzer; + analyzer = nullptr; + } + if (record_analyzer != nullptr) { + delete record_analyzer; + record_analyzer = nullptr; + } exit(1); } #endif @@ -316,11 +323,20 @@ _tmain(int argc, const TCHAR *targv[]) FATAL_ERROR("invalid -outdir %s", op_outdir.get_value().c_str()); } } else { - analyzer = new analyzer_multi_t; - if (!*analyzer) { - std::string error_string_ = analyzer->get_error_string(); - FATAL_ERROR("failed to initialize analyzer%s%s", - error_string_.empty() ? "" : ": ", error_string_.c_str()); + if (op_simulator_type.get_value() == RECORD_FILTER) { + record_analyzer = new record_analyzer_multi_t; + if (!*record_analyzer) { + std::string error_string_ = record_analyzer->get_error_string(); + FATAL_ERROR("failed to initialize record analyzer%s%s", + error_string_.empty() ? "" : ": ", error_string_.c_str()); + } + } else { + analyzer = new analyzer_multi_t; + if (!*analyzer) { + std::string error_string_ = analyzer->get_error_string(); + FATAL_ERROR("failed to initialize analyzer%s%s", + error_string_.empty() ? "" : ": ", error_string_.c_str()); + } } } @@ -364,10 +380,18 @@ _tmain(int argc, const TCHAR *targv[]) } if (!op_offline.get_value() || have_trace_file) { - if (!analyzer->run()) { - std::string error_string_ = analyzer->get_error_string(); - FATAL_ERROR("failed to run analyzer%s%s", error_string_.empty() ? "" : ": ", - error_string_.c_str()); + if (analyzer != nullptr) { + if (!analyzer->run()) { + std::string error_string_ = analyzer->get_error_string(); + FATAL_ERROR("failed to run analyzer%s%s", + error_string_.empty() ? "" : ": ", error_string_.c_str()); + } + } else { + if (!record_analyzer->run()) { + std::string error_string_ = record_analyzer->get_error_string(); + FATAL_ERROR("failed to run analyzer%s%s", + error_string_.empty() ? "" : ": ", error_string_.c_str()); + } } } @@ -403,6 +427,14 @@ _tmain(int argc, const TCHAR *targv[]) } // release analyzer's space delete analyzer; + } else if (record_analyzer != nullptr) { + if (!record_analyzer->print_stats()) { + std::string error_string_ = record_analyzer->get_error_string(); + FATAL_ERROR("failed to print results%s%s", error_string_.empty() ? "" : ": ", + error_string_.c_str()); + } + // release analyzer's space + delete record_analyzer; } sc = drfront_cleanup_args(argv, argc); diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index 783d2a44bb8..d192b3d43d8 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -88,6 +88,12 @@ reader_t::operator++() // We've already presented the thread exit entry to the analyzer. continue; } + if (input_entry_->type == TRACE_TYPE_HEADER) { + // We support complete traces being packaged in archives and then read + // sequentially. We just keep going past the header. + VPRINT(this, 2, "Assuming header is part of concatenated traces\n"); + continue; + } VPRINT(this, 5, "RECV: type=%s (%d), size=%d, addr=0x%zx\n", trace_type_names[input_entry_->type], input_entry_->type, input_entry_->size, input_entry_->addr); @@ -129,7 +135,7 @@ reader_t::process_input_entry() case TRACE_TYPE_PREFETCH_WRITE_L3: case TRACE_TYPE_PREFETCH_WRITE_L3_NT: have_memref = true; - assert(cur_tid_ != 0 && cur_pid_ != 0); + assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_); cur_ref_.data.pid = cur_pid_; cur_ref_.data.tid = cur_tid_; cur_ref_.data.type = (trace_type_t)input_entry_->type; @@ -171,7 +177,7 @@ reader_t::process_input_entry() case TRACE_TYPE_INSTR_RETURN: case TRACE_TYPE_INSTR_SYSENTER: case TRACE_TYPE_INSTR_NO_FETCH: - assert(cur_tid_ != 0 && cur_pid_ != 0); + assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_); if (input_entry_->size == 0) { // Just an entry to tell us the PC of the subsequent memref, // used with -L0_filter where we don't reliably have icache @@ -198,8 +204,8 @@ reader_t::process_input_entry() // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { if (last_encoding_.size != cur_ref_.instr.size) { - ERRMSG("Encoding size %zu != instr size %zu\n", last_encoding_.size, - cur_ref_.instr.size); + ERRMSG("Encoding size %zu != instr size %zu for PC 0x%zx\n", + last_encoding_.size, cur_ref_.instr.size, cur_ref_.instr.addr); assert(false); } memcpy(cur_ref_.instr.encoding, last_encoding_.bits, last_encoding_.size); @@ -210,7 +216,11 @@ reader_t::process_input_entry() const auto &it = encodings_.find(cur_ref_.instr.addr); if (it != encodings_.end()) { memcpy(cur_ref_.instr.encoding, it->second.bits, it->second.size); - } else if (!expect_no_encodings_) { + } else if (!expect_no_encodings_ && + // A thread can migrate after encoding records are seen. + // It is up to the user to properly handle encodings + // in this mode. + !core_sharded_) { ERRMSG("Missing encoding for 0x%zx\n", cur_ref_.instr.addr); assert(false); } @@ -244,7 +254,7 @@ reader_t::process_input_entry() break; case TRACE_TYPE_INSTR_FLUSH: case TRACE_TYPE_DATA_FLUSH: - assert(cur_tid_ != 0 && cur_pid_ != 0); + assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_); cur_ref_.flush.pid = cur_pid_; cur_ref_.flush.tid = cur_tid_; cur_ref_.flush.type = (trace_type_t)input_entry_->type; @@ -268,7 +278,7 @@ reader_t::process_input_entry() case TRACE_TYPE_THREAD_EXIT: cur_tid_ = (memref_tid_t)input_entry_->addr; cur_pid_ = tid2pid_[cur_tid_]; - assert(cur_tid_ != 0 && cur_pid_ != 0); + assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_); // We do pass this to the caller but only some fields are valid: cur_ref_.exit.pid = cur_pid_; cur_ref_.exit.tid = cur_tid_; @@ -282,7 +292,9 @@ reader_t::process_input_entry() break; case TRACE_TYPE_MARKER: cur_ref_.marker.type = (trace_type_t)input_entry_->type; - assert(cur_tid_ != 0 && cur_pid_ != 0); + assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_ || + // We have to wait for the filetype to see whether we're core-sharded. + !found_filetype_); cur_ref_.marker.pid = cur_pid_; cur_ref_.marker.tid = cur_tid_; cur_ref_.marker.marker_type = (trace_marker_type_t)input_entry_->size; @@ -321,9 +333,13 @@ reader_t::process_input_entry() version_ = cur_ref_.marker.marker_value; else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { filetype_ = cur_ref_.marker.marker_value; + found_filetype_ = true; if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) { expect_no_encodings_ = false; } + if (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, filetype_)) { + core_sharded_ = true; + } } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE) cache_line_size_ = cur_ref_.marker.marker_value; else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_PAGE_SIZE) @@ -332,6 +348,21 @@ reader_t::process_input_entry() chunk_instr_count_ = cur_ref_.marker.marker_value; else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CHUNK_FOOTER) skip_chunk_header_.insert(cur_tid_); + else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START || + cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) { + in_kernel_trace_ = true; + } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_END || + cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_END) { + in_kernel_trace_ = false; + } + break; + case TRACE_TYPE_HEADER: + // We support complete traces being packaged in archives and then read + // sequentially, or core-sharded record_filter operation. + // We just keep going past the header. + VPRINT( + this, 2, + "Assuming header is part of concatenated or on-disk-core-sharded traces\n"); break; default: ERRMSG("Unknown trace entry type %s (%d)\n", trace_type_names[input_entry_->type], diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h index 42c38051b89..a10f614d848 100644 --- a/clients/drcachesim/reader/reader.h +++ b/clients/drcachesim/reader/reader.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -190,6 +190,11 @@ class reader_t : public std::iterator, return page_size_; } bool + is_record_kernel() const override + { + return in_kernel_trace_; + } + bool is_record_synthetic() const override { if (cur_ref_.marker.type == TRACE_TYPE_MARKER && @@ -267,6 +272,10 @@ class reader_t : public std::iterator, }; std::unordered_map encodings_; + // Whether this reader's input stream interleaves software threads and thus + // some thread-based checks may not apply. + bool core_sharded_ = false; + bool found_filetype_ = false; private: memref_t cur_ref_; @@ -280,6 +289,7 @@ class reader_t : public std::iterator, bool expect_no_encodings_ = true; encoding_info_t last_encoding_; addr_t last_branch_target_ = 0; + bool in_kernel_trace_ = false; }; } // namespace drmemtrace diff --git a/clients/drcachesim/reader/record_file_reader.h b/clients/drcachesim/reader/record_file_reader.h index 7d4a8cb0205..e966bc51f4e 100644 --- a/clients/drcachesim/reader/record_file_reader.h +++ b/clients/drcachesim/reader/record_file_reader.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -126,7 +126,7 @@ class record_reader_t : public std::iteratortype == TRACE_TYPE_ENCODING || + // The branch target marker sits between any encodings and the instr. + (record->type == TRACE_TYPE_MARKER && + record->size == TRACE_MARKER_TYPE_BRANCH_TARGET); + } + record_reader_t & operator++() { @@ -149,7 +158,13 @@ class record_reader_t : public std::iterator(cur_entry_.type))) + // We increment the instr count at the encoding as that avoids multiple + // problems with separating encodings from instrs when skipping (including + // for scheduler regions of interest) and when replaying schedules: anything + // using instr ordinals as boundaries. + if (!prev_record_was_pre_instr_ && + (record_is_pre_instr(&cur_entry_) || + type_is_instr(static_cast(cur_entry_.type)))) ++cur_instr_count_; else if (cur_entry_.type == TRACE_TYPE_MARKER) { switch (cur_entry_.size) { @@ -167,8 +182,17 @@ class record_reader_t : public std::iterator= (level)) { \ + fprintf(stderr, __VA_ARGS__); \ + } \ + } while (0) +#else +# define ZPRINT(verbosity, level, ...) /* nothing */ +#endif + +bool +open_single_file_common(const std::string &path, zipfile_reader_t &zread) +{ + unzFile file = unzOpen(path.c_str()); + if (file == nullptr) + return false; + zread = zipfile_reader_t(file, path); + if (unzGoToFirstFile(file) != UNZ_OK || unzOpenCurrentFile(file) != UNZ_OK) + return false; + return true; +} + +bool +read_if_at_end_of_buffer(zipfile_reader_t &zipfile, bool &at_eof, + trace_entry_t last_entry) +{ + if (zipfile.cur_buf >= zipfile.max_buf) { + int num_read = unzReadCurrentFile(zipfile.file, zipfile.buf, sizeof(zipfile.buf)); + if (num_read == 0) { +#ifdef DEBUG + if (zipfile.verbosity >= 3) { + zipfile.name[0] = '\0'; /* Just in case. */ + // This call is expensive if we do it every time. + unzGetCurrentFileInfo64(zipfile.file, nullptr, zipfile.name, + sizeof(zipfile.name), nullptr, 0, nullptr, 0); + ZPRINT(zipfile.verbosity, 3, + "Hit end of component %s; opening next component in %s\n", + zipfile.name, zipfile.path.c_str()); + } +#endif + if ((last_entry.type != TRACE_TYPE_MARKER || + last_entry.size != TRACE_MARKER_TYPE_CHUNK_FOOTER) && + last_entry.type != TRACE_TYPE_FOOTER) { + zipfile.name[0] = '\0'; /* Just in case. */ + unzGetCurrentFileInfo64(zipfile.file, nullptr, zipfile.name, + sizeof(zipfile.name), nullptr, 0, nullptr, 0); + ZPRINT(zipfile.verbosity, 1, + "Chunk is missing footer: truncation detected in %s %s\n", + zipfile.path.c_str(), zipfile.name); + return false; + } + if (unzCloseCurrentFile(zipfile.file) != UNZ_OK) + return false; + int res = unzGoToNextFile(zipfile.file); + if (res != UNZ_OK) { + if (res == UNZ_END_OF_LIST_OF_FILE) { + ZPRINT(zipfile.verbosity, 2, "Hit EOF in %s\n", zipfile.path.c_str()); + at_eof = true; + } + return false; + } + if (unzOpenCurrentFile(zipfile.file) != UNZ_OK) + return false; + num_read = unzReadCurrentFile(zipfile.file, zipfile.buf, sizeof(zipfile.buf)); + } + if (num_read < static_cast(sizeof(trace_entry_t))) { + ZPRINT(zipfile.verbosity, 1, "Failed to read: returned %d in %s\n", num_read, + zipfile.path.c_str()); + return false; + } + zipfile.cur_buf = zipfile.buf; + zipfile.max_buf = zipfile.buf + (num_read / sizeof(*zipfile.max_buf)); + } + return true; +} + +} // namespace + +/************************************************** + * zipfile_reader_t specializations for file_reader_t. + */ + /* clang-format off */ /* (make vera++ newline-after-type check happy) */ template <> /* clang-format on */ @@ -63,13 +155,10 @@ template <> bool file_reader_t::open_single_file(const std::string &path) { - unzFile file = unzOpen(path.c_str()); - if (file == nullptr) - return false; - input_file_ = zipfile_reader_t(file, path); - if (unzGoToFirstFile(file) != UNZ_OK || unzOpenCurrentFile(file) != UNZ_OK) + if (!open_single_file_common(path, input_file_)) return false; VPRINT(this, 1, "Opened input file %s\n", path.c_str()); + input_file_.verbosity = verbosity_; return true; } @@ -80,60 +169,13 @@ file_reader_t::read_next_entry() trace_entry_t *from_queue = read_queued_entry(); if (from_queue != nullptr) return from_queue; - zipfile_reader_t *zipfile = &input_file_; - if (zipfile->cur_buf >= zipfile->max_buf) { - int num_read = - unzReadCurrentFile(zipfile->file, zipfile->buf, sizeof(zipfile->buf)); - if (num_read == 0) { -#ifdef DEBUG - if (verbosity_ >= 3) { - zipfile->name[0] = '\0'; /* Just in case. */ - // This call is expensive if we do it every time. - unzGetCurrentFileInfo64(zipfile->file, nullptr, zipfile->name, - sizeof(zipfile->name), nullptr, 0, nullptr, 0); - VPRINT(this, 3, "Hit end of component %s; opening next component in %s\n", - zipfile->name, zipfile->path.c_str()); - } -#endif - // read_next_entry() stored the last-read entry into entry_copy_. - if ((entry_copy_.type != TRACE_TYPE_MARKER || - entry_copy_.size != TRACE_MARKER_TYPE_CHUNK_FOOTER) && - entry_copy_.type != TRACE_TYPE_FOOTER) { - zipfile->name[0] = '\0'; /* Just in case. */ - unzGetCurrentFileInfo64(zipfile->file, nullptr, zipfile->name, - sizeof(zipfile->name), nullptr, 0, nullptr, 0); - VPRINT(this, 1, "Chunk is missing footer: truncation detected in %s %s\n", - zipfile->path.c_str(), zipfile->name); - return nullptr; - } - if (unzCloseCurrentFile(zipfile->file) != UNZ_OK) - return nullptr; - int res = unzGoToNextFile(zipfile->file); - if (res != UNZ_OK) { - if (res == UNZ_END_OF_LIST_OF_FILE) { - VPRINT(this, 2, "Hit EOF in %s\n", zipfile->path.c_str()); - at_eof_ = true; - } - return nullptr; - } - if (unzOpenCurrentFile(zipfile->file) != UNZ_OK) - return nullptr; - num_read = - unzReadCurrentFile(zipfile->file, zipfile->buf, sizeof(zipfile->buf)); - } - if (num_read < static_cast(sizeof(entry_copy_))) { - VPRINT(this, 1, "Failed to read: returned %d in %s\n", num_read, - zipfile->path.c_str()); - return nullptr; - } - zipfile->cur_buf = zipfile->buf; - zipfile->max_buf = zipfile->buf + (num_read / sizeof(*zipfile->max_buf)); - } - entry_copy_ = *zipfile->cur_buf; - ++zipfile->cur_buf; - VPRINT(this, 5, "Read %s: type=%s (%d), size=%d, addr=%zu\n", zipfile->path.c_str(), - trace_type_names[entry_copy_.type], entry_copy_.type, entry_copy_.size, - entry_copy_.addr); + if (!read_if_at_end_of_buffer(input_file_, at_eof_, entry_copy_)) + return nullptr; + entry_copy_ = *input_file_.cur_buf; + ++input_file_.cur_buf; + VPRINT(this, 5, "Read %s: type=%s (%d), size=%d, addr=%zu\n", + input_file_.path.c_str(), trace_type_names[entry_copy_.type], entry_copy_.type, + entry_copy_.size, entry_copy_.addr); return &entry_copy_; } @@ -203,5 +245,53 @@ file_reader_t::skip_instructions(uint64_t instruction_count) return skip_instructions_with_timestamp(stop_count - 1); } +/********************************************************* + * zipfile_reader_t specializations for record_file_reader_t. + */ + +/* clang-format off */ /* (make vera++ newline-after-type check happy) */ +template <> +/* clang-format on */ +record_file_reader_t::record_file_reader_t() +{ + input_file_->file = nullptr; +} + +template <> +record_file_reader_t::~record_file_reader_t() +{ + if (input_file_->file != nullptr) { + unzClose(input_file_->file); + input_file_->file = nullptr; + } +} + +template <> +bool +record_file_reader_t::open_single_file(const std::string &path) +{ + zipfile_reader_t zread; + if (!open_single_file_common(path, zread)) + return false; + input_file_ = std::unique_ptr(new zipfile_reader_t(zread)); + VPRINT(this, 1, "Opened input file %s\n", path.c_str()); + input_file_->verbosity = verbosity_; + return true; +} + +template <> +bool +record_file_reader_t::read_next_entry() +{ + if (!read_if_at_end_of_buffer(*input_file_, eof_, cur_entry_)) + return false; + cur_entry_ = *input_file_->cur_buf; + ++input_file_->cur_buf; + VPRINT(this, 5, "Read %s: type=%s (%d), size=%d, addr=%zu\n", + input_file_->path.c_str(), trace_type_names[cur_entry_.type], cur_entry_.type, + cur_entry_.size, cur_entry_.addr); + return true; +} + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/reader/zipfile_file_reader.h b/clients/drcachesim/reader/zipfile_file_reader.h index 813623b6fe3..695f3a63b95 100644 --- a/clients/drcachesim/reader/zipfile_file_reader.h +++ b/clients/drcachesim/reader/zipfile_file_reader.h @@ -38,6 +38,7 @@ #include #include "minizip/unzip.h" #include "file_reader.h" +#include "record_file_reader.h" namespace dynamorio { namespace drmemtrace { @@ -66,9 +67,11 @@ struct zipfile_reader_t { // Store the path and component names for debug messages. std::string path; char name[128]; + int verbosity = 0; }; typedef file_reader_t zipfile_file_reader_t; +typedef record_file_reader_t zipfile_record_file_reader_t; /* Declare this so the compiler knows not to use the default implementation in the * class declaration. diff --git a/clients/drcachesim/scheduler/flexible_queue.h b/clients/drcachesim/scheduler/flexible_queue.h index 699d70ff94d..897e21ae8dc 100644 --- a/clients/drcachesim/scheduler/flexible_queue.h +++ b/clients/drcachesim/scheduler/flexible_queue.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -41,8 +41,10 @@ */ #define NOMINMAX // Avoid windows.h messing up std::max. +#include #include #include +#include #include #include #include @@ -66,10 +68,10 @@ class flexible_queue_t { // max macro (even despite NOMINMAX defined above). static constexpr index_t INVALID_INDEX = (std::numeric_limits::max)(); - flexible_queue_t() = default; - explicit flexible_queue_t(int verbose) + flexible_queue_t(int rand_seed = 0, int verbose = 0) : verbose_(verbose) { + rand_gen_.seed(rand_seed); } bool push(T entry) @@ -93,9 +95,19 @@ class flexible_queue_t { T top() const { + assert(!empty()); return entries_[0]; // Undefined if empty. } + T + get_random_entry() // Not const as it change rand_gen's state. + { + assert(!empty()); + // minstd_rand returns uint_fast32_t. We do not support get_random_entry() + // for queues with >2^32 entries. + return entries_[rand_gen_() % size()]; // Undefined if empty. + } + bool empty() const { @@ -223,6 +235,7 @@ class flexible_queue_t { comparator_t compare_; std::unordered_map entry2index_; int verbose_ = 0; + std::minstd_rand rand_gen_; }; } // namespace drmemtrace diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index df4d8dbeff3..b8b918d9de2 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -32,6 +32,7 @@ #include "scheduler.h" +#include #include #include @@ -70,12 +71,6 @@ #endif #include "directory_iterator.h" #include "utils.h" -#ifdef UNIX -# include -#else -# define WIN32_LEAN_AND_MEAN -# include -#endif #undef VPRINT #ifdef DEBUG @@ -111,6 +106,23 @@ typedef dynamorio::drmemtrace::record_file_reader_t default_record_file_reader_t; #endif +std::string +replay_file_checker_t::check(archive_istream_t *infile) +{ + // Ensure we don't have repeated idle records, which balloon the file size. + scheduler_t::schedule_record_t record; + bool prev_was_idle = false; + while (infile->read(reinterpret_cast(&record), sizeof(record))) { + if (record.type == scheduler_t::schedule_record_t::IDLE) { + if (prev_was_idle) + return "Error: consecutive idle records"; + prev_was_idle = true; + } else + prev_was_idle = false; + } + return ""; +} + /**************************************************************** * Specializations for scheduler_tmpl_t, aka scheduler_t. */ @@ -195,6 +207,25 @@ scheduler_tmpl_t::record_type_has_tid(memref_t record, return true; } +template <> +bool +scheduler_tmpl_t::record_type_has_pid(memref_t record, + memref_pid_t &pid) +{ + if (record.marker.pid == INVALID_PID) + return false; + pid = record.marker.pid; + return true; +} + +template <> +void +scheduler_tmpl_t::record_type_set_tid(memref_t &record, + memref_tid_t tid) +{ + record.marker.tid = tid; +} + template <> bool scheduler_tmpl_t::record_type_is_instr(memref_t record) @@ -202,6 +233,23 @@ scheduler_tmpl_t::record_type_is_instr(memref_t record) return type_is_instr(record.instr.type); } +template <> +bool +scheduler_tmpl_t::record_type_is_encoding(memref_t record) +{ + // There are no separate memref_t encoding records: encoding info is + // inside instruction records. + return false; +} + +template <> +bool +scheduler_tmpl_t::record_type_is_instr_boundary(memref_t record, + memref_t prev_record) +{ + return record_type_is_instr(record); +} + template <> bool scheduler_tmpl_t::record_type_is_marker(memref_t record, @@ -276,12 +324,19 @@ scheduler_tmpl_t::print_record(const memref_t &record) if (type_is_instr(record.instr.type)) fprintf(stderr, " pc=0x%zx size=%zu", record.instr.addr, record.instr.size); else if (record.marker.type == TRACE_TYPE_MARKER) { - fprintf(stderr, " marker=0x%d val=%zu", record.marker.marker_type, + fprintf(stderr, " marker=%d val=%zu", record.marker.marker_type, record.marker.marker_value); } fprintf(stderr, "\n"); } +template <> +void +scheduler_tmpl_t::insert_switch_tid_pid(input_info_t &info) +{ + // We do nothing, as every record has a tid from the separate inputs. +} + /****************************************************************************** * Specializations for scheduler_tmpl_t, aka record_scheduler_t. */ @@ -299,10 +354,15 @@ std::unique_ptr scheduler_tmpl_t::get_reader(const std::string &path, int verbosity) { - // TODO i#5675: Add support for other file formats, particularly - // .zip files. - if (ends_with(path, ".sz") || ends_with(path, ".zip")) + // TODO i#5675: Add support for other file formats. + if (ends_with(path, ".sz")) return nullptr; +#ifdef HAS_ZIP + if (ends_with(path, ".zip")) { + return std::unique_ptr( + new zipfile_record_file_reader_t(path, verbosity)); + } +#endif return std::unique_ptr( new default_record_file_reader_t(path, verbosity)); } @@ -318,6 +378,27 @@ scheduler_tmpl_t::record_type_has_tid( return true; } +template <> +bool +scheduler_tmpl_t::record_type_has_pid( + trace_entry_t record, memref_pid_t &pid) +{ + if (record.type != TRACE_TYPE_PID) + return false; + pid = static_cast(record.addr); + return true; +} + +template <> +void +scheduler_tmpl_t::record_type_set_tid( + trace_entry_t &record, memref_tid_t tid) +{ + if (record.type != TRACE_TYPE_THREAD) + return; + record.addr = static_cast(tid); +} + template <> bool scheduler_tmpl_t::record_type_is_instr( @@ -326,6 +407,24 @@ scheduler_tmpl_t::record_type_is_instr( return type_is_instr(static_cast(record.type)); } +template <> +bool +scheduler_tmpl_t::record_type_is_encoding( + trace_entry_t record) +{ + return static_cast(record.type) == TRACE_TYPE_ENCODING; +} + +template <> +typename scheduler_tmpl_t::stream_status_t +scheduler_tmpl_t::unread_last_record( + output_ordinal_t output, trace_entry_t &record, input_info_t *&input) +{ + // See the general unread_last_record() below: we don't support this as + // we can't provide the prev-prev record for record_type_is_instr_boundary(). + return STATUS_NOT_IMPLEMENTED; +} + template <> bool scheduler_tmpl_t::record_type_is_marker( @@ -338,6 +437,18 @@ scheduler_tmpl_t::record_type_is_marker( return true; } +template <> +bool +scheduler_tmpl_t::record_type_is_instr_boundary( + trace_entry_t record, trace_entry_t prev_record) +{ + // Don't advance past encodings or target markers and split them from their + // associated instr. + return (record_type_is_instr(record) || + record_reader_t::record_is_pre_instr(&record)) && + !record_reader_t::record_is_pre_instr(&prev_record); +} + template <> bool scheduler_tmpl_t::record_type_is_timestamp( @@ -402,6 +513,27 @@ scheduler_tmpl_t::print_record( record.addr); } +template <> +void +scheduler_tmpl_t::insert_switch_tid_pid( + input_info_t &input) +{ + // We need explicit tid,pid records so reader_t will see the new context. + // We insert at the front, so we have reverse order. + trace_entry_t pid; + pid.type = TRACE_TYPE_PID; + pid.size = 0; + pid.addr = static_cast(input.pid); + + trace_entry_t tid; + tid.type = TRACE_TYPE_THREAD; + tid.size = 0; + tid.addr = static_cast(input.tid); + + input.queue.push_front(pid); + input.queue.push_front(tid); +} + /*************************************************************************** * Scheduled stream. */ @@ -418,6 +550,11 @@ typename scheduler_tmpl_t::stream_status_t scheduler_tmpl_t::stream_t::next_record(RecordType &record, uint64_t cur_time) { + if (max_ordinal_ > 0) { + ++ordinal_; + if (ordinal_ >= max_ordinal_) + ordinal_ = 0; + } input_info_t *input = nullptr; sched_type_t::stream_status_t res = scheduler_->next_record(ordinal_, record, input, cur_time); @@ -428,7 +565,7 @@ scheduler_tmpl_t::stream_t::next_record(RecordType &reco std::lock_guard guard(*input->lock); if (!input->reader->is_record_synthetic()) ++cur_ref_count_; - if (scheduler_->record_type_is_instr(record)) + if (scheduler_->record_type_is_instr_boundary(record, prev_record_)) ++cur_instr_count_; VPRINT(scheduler_, 4, "stream record#=%" PRId64 ", instr#=%" PRId64 " (cur input %" PRId64 @@ -462,6 +599,7 @@ scheduler_tmpl_t::stream_t::next_record(RecordType &reco break; } } + prev_record_ = record; return sched_type_t::STATUS_OK; } @@ -515,7 +653,7 @@ scheduler_tmpl_t::init( std::vector &workload_inputs, int output_count, scheduler_options_t options) { - options_ = options; + options_ = std::move(options); verbosity_ = options_.verbosity; // workload_inputs is not const so we can std::move readers out of it. std::unordered_map> workload2inputs(workload_inputs.size()); @@ -615,6 +753,10 @@ scheduler_tmpl_t::init( static_cast(sched_type_t::SCHEDULER_SPECULATE_NOPS)); outputs_.reserve(output_count); + if (options_.single_lockstep_output) { + global_stream_ = std::unique_ptr( + new sched_type_t::stream_t(this, 0, verbosity_, output_count)); + } for (int i = 0; i < output_count; ++i) { outputs_.emplace_back(this, i, TESTANY(SCHEDULER_SPECULATE_NOPS, options_.flags) @@ -622,6 +764,8 @@ scheduler_tmpl_t::init( // TODO i#5843: Add more flags for other options. : spec_type_t::LAST_FROM_TRACE, create_invalid_record(), verbosity_); + if (options_.single_lockstep_output) + outputs_.back().stream = global_stream_.get(); if (options_.schedule_record_ostream != nullptr) { sched_type_t::stream_status_t status = record_schedule_segment( i, schedule_record_t::VERSION, schedule_record_t::VERSION_CURRENT, 0, 0); @@ -633,6 +777,11 @@ scheduler_tmpl_t::init( } VPRINT(this, 1, "%zu inputs\n", inputs_.size()); live_input_count_.store(static_cast(inputs_.size()), std::memory_order_release); + + sched_type_t::scheduler_status_t res = read_switch_sequences(); + if (res != sched_type_t::STATUS_SUCCESS) + return STATUS_ERROR_INVALID_PARAMETER; + return set_initial_schedule(workload2inputs); } @@ -641,6 +790,40 @@ typename scheduler_tmpl_t::scheduler_status_t scheduler_tmpl_t::set_initial_schedule( std::unordered_map> &workload2inputs) { + // Determine whether we need to read ahead in the inputs. There are cases where we + // do not want to do that as it would block forever if the inputs are not available + // (e.g., online analysis IPC readers); it also complicates ordinals so we avoid it + // if we can and enumerate all the cases that do need it. + bool gather_timestamps = false; + if (((options_.mapping == MAP_AS_PREVIOUSLY || + options_.mapping == MAP_TO_ANY_OUTPUT) && + options_.deps == DEPENDENCY_TIMESTAMPS) || + (options_.mapping == MAP_TO_RECORDED_OUTPUT && + options_.replay_as_traced_istream == nullptr && inputs_.size() > 1)) { + gather_timestamps = true; + if (!options_.read_inputs_in_init) { + error_string_ = "Timestamp dependencies require read_inputs_in_init"; + return STATUS_ERROR_INVALID_PARAMETER; + } + } + // The filetype, if present, is before the first timestamp. If we only need the + // filetype we avoid going as far as the timestamp. + bool gather_filetype = options_.read_inputs_in_init; + // Avoid reading ahead for replay as it makes the input ords not match in tests. + if (options_.mapping == MAP_TO_RECORDED_OUTPUT && + options_.replay_as_traced_istream != nullptr) + gather_filetype = false; + if (gather_filetype || gather_timestamps) { + sched_type_t::scheduler_status_t res = + get_initial_input_content(gather_timestamps); + if (res != STATUS_SUCCESS) { + error_string_ = "Failed to read initial input contents for filetype"; + if (gather_timestamps) + error_string_ += " and initial timestamps"; + return res; + } + } + if (options_.mapping == MAP_AS_PREVIOUSLY) { live_replay_output_count_.store(static_cast(outputs_.size()), std::memory_order_release); @@ -652,9 +835,7 @@ scheduler_tmpl_t::set_initial_schedule( return STATUS_ERROR_INVALID_PARAMETER; if (options_.deps == DEPENDENCY_TIMESTAMPS) { // Match the ordinals from the original run by pre-reading the timestamps. - sched_type_t::scheduler_status_t res = get_initial_timestamps(); - if (res != STATUS_SUCCESS) - return res; + assert(gather_timestamps); } } else if (options_.schedule_replay_istream != nullptr) { return STATUS_ERROR_INVALID_PARAMETER; @@ -689,9 +870,7 @@ scheduler_tmpl_t::set_initial_schedule( // thread first and then pick the oldest timestamp once it reached a // timestamp. We instead queue those headers so we can start directly with the // oldest timestamp's thread. - sched_type_t::scheduler_status_t res = get_initial_timestamps(); - if (res != STATUS_SUCCESS) - return res; + assert(gather_timestamps); uint64_t min_time = std::numeric_limits::max(); input_ordinal_t min_input = -1; for (int i = 0; i < static_cast(inputs_.size()); ++i) { @@ -707,9 +886,7 @@ scheduler_tmpl_t::set_initial_schedule( } else { // Assign initial inputs. if (options_.deps == DEPENDENCY_TIMESTAMPS) { - sched_type_t::scheduler_status_t res = get_initial_timestamps(); - if (res != STATUS_SUCCESS) - return res; + assert(gather_timestamps); // Compute the min timestamp (==base_timestamp) per workload and sort // all inputs by relative time from the base. for (int workload_idx = 0; @@ -914,9 +1091,9 @@ scheduler_tmpl_t::read_traced_schedule() uint64_t cur_cpu = std::numeric_limits::max(); // We also want to collapse same-cpu consecutive records so we start with // a temporary local vector. - std::vector> all_sched(outputs_.size()); + std::vector> all_sched(outputs_.size()); // Work around i#6107 by tracking counts sorted by timestamp for each input. - std::vector> input_sched(inputs_.size()); + std::vector> input_sched(inputs_.size()); while (options_.replay_as_traced_istream->read(reinterpret_cast(&entry), sizeof(entry))) { if (entry.cpu != cur_cpu) { @@ -938,24 +1115,24 @@ scheduler_tmpl_t::read_traced_schedule() uint64_t timestamp = entry.timestamp; // Some entries have no instructions (there is an entry for each timestamp, and // a signal can come in after a prior timestamp with no intervening instrs). - assert(all_sched[cur_output].empty() || - all_sched[cur_output].back().type == schedule_record_t::DEFAULT); if (!all_sched[cur_output].empty() && - input == all_sched[cur_output].back().key.input && - start == all_sched[cur_output].back().value.start_instruction) { + input == all_sched[cur_output].back().input && + start == all_sched[cur_output].back().start_instruction) { VPRINT(this, 3, "Output #%d: as-read segment #%zu has no instructions: skipping\n", cur_output, all_sched[cur_output].size() - 1); continue; } - all_sched[cur_output].emplace_back(schedule_record_t::DEFAULT, input, start, 0, - timestamp); + all_sched[cur_output].emplace_back(true, input, start, timestamp); start2stop[input].insert(start); - input_sched[input].emplace_back(schedule_record_t::DEFAULT, input, start, 0, - timestamp); + input_sched[input].emplace_back(cur_output, all_sched[cur_output].size() - 1, + start, timestamp); } sched_type_t::scheduler_status_t res = check_and_fix_modulo_problem_in_schedule(input_sched, start2stop, all_sched); + if (res != sched_type_t::STATUS_SUCCESS) + return res; + res = remove_zero_instruction_segments(input_sched, all_sched); if (res != sched_type_t::STATUS_SUCCESS) return res; for (int output_idx = 0; output_idx < static_cast(outputs_.size()); @@ -968,41 +1145,40 @@ scheduler_tmpl_t::read_traced_schedule() for (int sched_idx = 0; sched_idx < static_cast(all_sched[output_idx].size()); ++sched_idx) { auto &segment = all_sched[output_idx][sched_idx]; - auto find = - start2stop[segment.key.input].find(segment.value.start_instruction); + if (!segment.valid) + continue; + auto find = start2stop[segment.input].find(segment.start_instruction); ++find; - if (find == start2stop[segment.key.input].end()) + if (find == start2stop[segment.input].end()) segment.stop_instruction = std::numeric_limits::max(); else segment.stop_instruction = *find; VPRINT(this, 4, "as-read segment #%d: input=%d start=%" PRId64 " stop=%" PRId64 " time=%" PRId64 "\n", - sched_idx, segment.key.input, segment.value.start_instruction, + sched_idx, segment.input, segment.start_instruction, segment.stop_instruction, segment.timestamp); if (sched_idx + 1 < static_cast(all_sched[output_idx].size()) && - segment.key.input == all_sched[output_idx][sched_idx + 1].key.input && + segment.input == all_sched[output_idx][sched_idx + 1].input && segment.stop_instruction > - all_sched[output_idx][sched_idx + 1].value.start_instruction) { + all_sched[output_idx][sched_idx + 1].start_instruction) { // A second sanity check. error_string_ = "Invalid decreasing start field in schedule file"; return STATUS_ERROR_INVALID_PARAMETER; } else if (sched_idx + 1 < static_cast(all_sched[output_idx].size()) && - segment.key.input == - all_sched[output_idx][sched_idx + 1].key.input && + segment.input == all_sched[output_idx][sched_idx + 1].input && segment.stop_instruction == - all_sched[output_idx][sched_idx + 1].value.start_instruction) { + all_sched[output_idx][sched_idx + 1].start_instruction) { // Collapse into next. if (start_consec == -1) start_consec = sched_idx; } else { - schedule_record_t &toadd = start_consec >= 0 + schedule_output_tracker_t &toadd = start_consec >= 0 ? all_sched[output_idx][start_consec] : all_sched[output_idx][sched_idx]; outputs_[output_idx].record.emplace_back( - static_cast(toadd.type), - +toadd.key.input, +toadd.value.start_instruction, - +all_sched[output_idx][sched_idx].stop_instruction, +toadd.timestamp); + schedule_record_t::DEFAULT, toadd.input, toadd.start_instruction, + all_sched[output_idx][sched_idx].stop_instruction, toadd.timestamp); start_consec = -1; VDO(this, 3, { auto &added = outputs_[output_idx].record.back(); @@ -1035,12 +1211,71 @@ scheduler_tmpl_t::read_traced_schedule() return STATUS_SUCCESS; } +template +typename scheduler_tmpl_t::scheduler_status_t +scheduler_tmpl_t::remove_zero_instruction_segments( + std::vector> &input_sched, + std::vector> &all_sched) + +{ + // For a cpuid pair with no instructions in between, our + // instruction-ordinal-based control points cannot model both sides. + // For example: + // 5 0: 1294139 + // 6 0: 1294139 + // 7 0: 1294139 + // 8 0: 1294139 + // 9 0: 1294139 + // 10 0: 1294139 + // 11 0: 1294139 + // 12 0: 1294139 + // 13 1: 1294139 ifetch 3 byte(s) @ 0x0000563642cc5e75 8d 50 0b lea... + // That sequence has 2 different cpu_schedule file entries for that input + // starting at instruction 0, which causes confusion when determining endpoints. + // We just drop the older entry and keep the later one, which is the one bundled + // with actual instructions. + // + // Should we not have instruction-based control points? The skip and + // region-of-interest features were designed thinking about instructions, the more + // natural unit for microarchitectural simulators. It seemed like that was much more + // usable for a user, and translated to other venues like PMU counts. The scheduler + // replay features were also designed that way. But, that makes the infrastructure + // messy as the underlying records are not built that way. Xref i#6716 on an + // instruction-based iterator. + for (int input_idx = 0; input_idx < static_cast(inputs_.size()); + ++input_idx) { + std::sort( + input_sched[input_idx].begin(), input_sched[input_idx].end(), + [](const schedule_input_tracker_t &l, const schedule_input_tracker_t &r) { + return l.timestamp < r.timestamp; + }); + uint64_t prev_start = 0; + for (size_t i = 0; i < input_sched[input_idx].size(); ++i) { + uint64_t start = input_sched[input_idx][i].start_instruction; + assert(start >= prev_start); + if (i > 0 && start == prev_start) { + // Keep the newer one. + VPRINT(this, 1, "Dropping same-input=%d same-start=%" PRIu64 " entry\n", + input_idx, start); + all_sched[input_sched[input_idx][i - 1].output] + [static_cast( + input_sched[input_idx][i - 1].output_array_idx)] + .valid = false; + // If code after this used input_sched we would want to erase the + // entry, but we have no further use so we leave it. + } + prev_start = start; + } + } + return STATUS_SUCCESS; +} + template typename scheduler_tmpl_t::scheduler_status_t scheduler_tmpl_t::check_and_fix_modulo_problem_in_schedule( - std::vector> &input_sched, + std::vector> &input_sched, std::vector> &start2stop, - std::vector> &all_sched) + std::vector> &all_sched) { // Work around i#6107 where the counts in the file are incorrectly modulo the chunk @@ -1063,16 +1298,16 @@ scheduler_tmpl_t::check_and_fix_modulo_problem_in_schedu bool found_i6107 = false; for (int input_idx = 0; input_idx < static_cast(inputs_.size()); ++input_idx) { - std::sort(input_sched[input_idx].begin(), input_sched[input_idx].end(), - [](const schedule_record_t &l, const schedule_record_t &r) { - return l.timestamp < r.timestamp; - }); + std::sort( + input_sched[input_idx].begin(), input_sched[input_idx].end(), + [](const schedule_input_tracker_t &l, const schedule_input_tracker_t &r) { + return l.timestamp < r.timestamp; + }); uint64_t prev_start = 0; uint64_t add_to_start = 0; bool in_order = true; - for (const schedule_record_t &sched : input_sched[input_idx]) { - assert(sched.type == schedule_record_t::DEFAULT); - if (sched.value.start_instruction < prev_start) { + for (schedule_input_tracker_t &sched : input_sched[input_idx]) { + if (sched.start_instruction < prev_start) { // If within 50% of the end of the chunk we assume it's i#6107. if (prev_start * 2 > DEFAULT_CHUNK_SIZE) { add_to_start += DEFAULT_CHUNK_SIZE; @@ -1094,9 +1329,10 @@ scheduler_tmpl_t::check_and_fix_modulo_problem_in_schedu error_string_ = "Same timestamps not supported for i#6107 workaround"; return STATUS_ERROR_INVALID_PARAMETER; } - prev_start = sched.value.start_instruction; + prev_start = sched.start_instruction; timestamp2adjust[input_idx][sched.timestamp] = - sched.value.start_instruction + add_to_start; + sched.start_instruction + add_to_start; + sched.start_instruction += add_to_start; } } if (!found_i6107) @@ -1115,21 +1351,23 @@ scheduler_tmpl_t::check_and_fix_modulo_problem_in_schedu for (int sched_idx = 0; sched_idx < static_cast(all_sched[output_idx].size()); ++sched_idx) { auto &segment = all_sched[output_idx][sched_idx]; - auto it = timestamp2adjust[segment.key.input].find(segment.timestamp); - if (it == timestamp2adjust[segment.key.input].end()) { + if (!segment.valid) + continue; + auto it = timestamp2adjust[segment.input].find(segment.timestamp); + if (it == timestamp2adjust[segment.input].end()) { error_string_ = "Failed to find timestamp for i#6107 workaround"; return STATUS_ERROR_INVALID_PARAMETER; } - assert(it->second >= segment.value.start_instruction); - assert(it->second % DEFAULT_CHUNK_SIZE == segment.value.start_instruction); - if (it->second != segment.value.start_instruction) { + assert(it->second >= segment.start_instruction); + assert(it->second % DEFAULT_CHUNK_SIZE == segment.start_instruction); + if (it->second != segment.start_instruction) { VPRINT(this, 2, "Updating all_sched[%d][%d] input %d from %" PRId64 " to %" PRId64 "\n", - output_idx, sched_idx, segment.key.input, - segment.value.start_instruction, it->second); + output_idx, sched_idx, segment.input, segment.start_instruction, + it->second); } - segment.value.start_instruction = it->second; + segment.start_instruction = it->second; } } return STATUS_SUCCESS; @@ -1137,28 +1375,139 @@ scheduler_tmpl_t::check_and_fix_modulo_problem_in_schedu template typename scheduler_tmpl_t::scheduler_status_t -scheduler_tmpl_t::get_initial_timestamps() -{ +scheduler_tmpl_t::read_switch_sequences() +{ + std::unique_ptr reader, reader_end; + if (!options_.kernel_switch_trace_path.empty()) { + reader = get_reader(options_.kernel_switch_trace_path, verbosity_); + if (!reader || !reader->init()) { + error_string_ += + "Failed to open kernel switch file " + options_.kernel_switch_trace_path; + return STATUS_ERROR_FILE_OPEN_FAILED; + } + reader_end = get_default_reader(); + } else if (!options_.kernel_switch_reader) { + // No switch data provided. + return STATUS_SUCCESS; + } else { + if (!options_.kernel_switch_reader_end) { + error_string_ += "Provided kernel switch reader but no end"; + return STATUS_ERROR_INVALID_PARAMETER; + } + reader = std::move(options_.kernel_switch_reader); + reader_end = std::move(options_.kernel_switch_reader_end); + // We own calling init() as it can block. + if (!reader->init()) { + error_string_ += "Failed to init kernel switch reader"; + return STATUS_ERROR_INVALID_PARAMETER; + } + } + // We assume these sequences are small and we can easily read them all into + // memory and don't need to stream them on every use. + // We read a single stream, even if underneath these are split into subfiles + // in an archive. + sched_type_t::switch_type_t switch_type = SWITCH_INVALID; + while (*reader != *reader_end) { + RecordType record = **reader; + // Only remember the records between the markers. + trace_marker_type_t marker_type = TRACE_MARKER_TYPE_RESERVED_END; + uintptr_t marker_value = 0; + if (record_type_is_marker(record, marker_type, marker_value) && + marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) { + switch_type = static_cast(marker_value); + if (!switch_sequence_[switch_type].empty()) { + error_string_ += "Duplicate context switch sequence type found"; + return STATUS_ERROR_INVALID_PARAMETER; + } + } + if (switch_type != SWITCH_INVALID) + switch_sequence_[switch_type].push_back(record); + if (record_type_is_marker(record, marker_type, marker_value) && + marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_END) { + if (static_cast(marker_value) != switch_type) { + error_string_ += "Context switch marker values mismatched"; + return STATUS_ERROR_INVALID_PARAMETER; + } + VPRINT(this, 1, "Read %zu kernel context switch records for type %d\n", + switch_sequence_[switch_type].size(), switch_type); + switch_type = SWITCH_INVALID; + } + ++(*reader); + } + return STATUS_SUCCESS; +} + +template +typename scheduler_tmpl_t::scheduler_status_t +scheduler_tmpl_t::get_initial_input_content( + bool gather_timestamps) +{ + // For every mode, read ahead until we see a filetype record so the user can + // examine it prior to retrieving any records. + VPRINT(this, 1, "Reading headers from inputs to find filetypes%s\n", + gather_timestamps ? " and timestamps" : ""); + assert(options_.read_inputs_in_init); // Read ahead in each input until we find a timestamp record. // Queue up any skipped records to ensure we present them to the // output stream(s). for (size_t i = 0; i < inputs_.size(); ++i) { input_info_t &input = inputs_[i]; - if (input.next_timestamp <= 0) { + bool found_filetype = false; + bool found_timestamp = !gather_timestamps || input.next_timestamp > 0; + if (!found_filetype || !found_timestamp) { + // First, check any queued records in the input. + // XXX: Can we create a helper to iterate the queue and then the + // reader, and avoid the duplicated loops here? The challenge is + // the non-consuming queue loop vs the consuming and queue-pushback + // reader loop. for (const auto &record : input.queue) { + trace_marker_type_t marker_type; + uintptr_t marker_value; + if (record_type_is_marker(record, marker_type, marker_value) && + marker_type == TRACE_MARKER_TYPE_FILETYPE) { + found_filetype = true; + VPRINT(this, 2, "Input %zu filetype %zu\n", i, marker_value); + } if (record_type_is_timestamp(record, input.next_timestamp)) + found_timestamp = true; + if (found_filetype && found_timestamp) break; } } - if (input.next_timestamp <= 0) { + if (input.next_timestamp > 0) + found_timestamp = true; + if (!found_filetype || !found_timestamp) { + // If we didn't find our targets in the queue, request new records. if (input.needs_init) { input.reader->init(); input.needs_init = false; } - while (input.reader != input.reader_end) { + while (*input.reader != *input.reader_end) { RecordType record = **input.reader; + trace_marker_type_t marker_type; + uintptr_t marker_value; + if (record_type_is_marker(record, marker_type, marker_value) && + marker_type == TRACE_MARKER_TYPE_FILETYPE) { + found_filetype = true; + VPRINT(this, 2, "Input %zu filetype %zu\n", i, marker_value); + } if (record_type_is_timestamp(record, input.next_timestamp)) + found_timestamp = true; + if (found_filetype && found_timestamp) break; + // Don't go too far if only looking for filetype, to avoid reaching + // the first instruction, which causes problems with ordinals when + // there is no filetype as happens in legacy traces (and unit tests). + // Just exit with a 0 filetype. + if (!found_filetype && + (record_type_is_timestamp(record, marker_value) || + (record_type_is_marker(record, marker_type, marker_value) && + marker_type == TRACE_MARKER_TYPE_PAGE_SIZE))) { + VPRINT(this, 2, "No filetype found: assuming unit test input.\n"); + found_filetype = true; + if (!gather_timestamps) + break; + } // If we see an instruction, there may be no timestamp (a malformed // synthetic trace in a test) or we may have to read thousands of records // to find it if it were somehow missing, which we do not want to do. We @@ -1170,7 +1519,7 @@ scheduler_tmpl_t::get_initial_timestamps() ++(*input.reader); } } - if (input.next_timestamp <= 0) + if (gather_timestamps && input.next_timestamp <= 0) return STATUS_ERROR_INVALID_PARAMETER; } return STATUS_SUCCESS; @@ -1199,7 +1548,7 @@ scheduler_tmpl_t::open_reader( // them to reader_t) to find it. std::unique_ptr reader_end = get_default_reader(); memref_tid_t tid = INVALID_THREAD_ID; - while (reader != reader_end) { + while (*reader != *reader_end) { RecordType record = **reader; if (record_type_has_tid(record, tid)) break; @@ -1273,6 +1622,44 @@ scheduler_tmpl_t::get_input_ordinal(output_ordinal_t out return outputs_[output].cur_input; } +template +int64_t +scheduler_tmpl_t::get_tid(output_ordinal_t output) +{ + int index = outputs_[output].cur_input; + if (index < 0) + return -1; + if (inputs_[index].is_combined_stream() || + TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, inputs_[index].reader->get_filetype())) + return inputs_[index].last_record_tid; + return inputs_[index].tid; +} + +template +int +scheduler_tmpl_t::get_shard_index(output_ordinal_t output) +{ + if (output < 0 || output >= static_cast(outputs_.size())) + return -1; + if (TESTANY(sched_type_t::SCHEDULER_USE_INPUT_ORDINALS | + sched_type_t::SCHEDULER_USE_SINGLE_INPUT_ORDINALS, + options_.flags)) { + if (inputs_.size() == 1 && inputs_[0].is_combined_stream()) { + int index; + memref_tid_t tid = get_tid(output); + auto exists = tid2shard_.find(tid); + if (exists == tid2shard_.end()) { + index = static_cast(tid2shard_.size()); + tid2shard_[tid] = index; + } else + index = exists->second; + return index; + } + return get_input_ordinal(output); + } + return output; +} + template int scheduler_tmpl_t::get_workload_ordinal(output_ordinal_t output) @@ -1291,15 +1678,21 @@ scheduler_tmpl_t::is_record_synthetic(output_ordinal_t o int index = outputs_[output].cur_input; if (index < 0) return false; + if (outputs_[output].in_context_switch_code) + return true; return inputs_[index].reader->is_record_synthetic(); } template int64_t -scheduler_tmpl_t::get_output_cpuid(output_ordinal_t output) +scheduler_tmpl_t::get_output_cpuid(output_ordinal_t output) const { if (options_.replay_as_traced_istream != nullptr) return outputs_[output].as_traced_cpuid; + int index = outputs_[output].cur_input; + if (index >= 0 && + TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, inputs_[index].reader->get_filetype())) + return outputs_[output].cur_input; return output; } @@ -1315,6 +1708,64 @@ scheduler_tmpl_t::get_input_stream(output_ordinal_t outp return inputs_[index].reader.get(); } +template +uint64_t +scheduler_tmpl_t::get_input_record_ordinal( + output_ordinal_t output) +{ + if (output < 0 || output >= static_cast(outputs_.size())) + return 0; + int index = outputs_[output].cur_input; + if (index < 0) + return 0; + uint64_t ord = inputs_[index].reader->get_record_ordinal(); + if (inputs_[index].reader->get_instruction_ordinal() == 0) { + // Account for get_initial_input_content() readahead for filetype/timestamp. + // If this gets any more complex, the scheduler stream should track its + // own counts for every input and just ignore the input stream's tracking. + ord -= inputs_[index].queue.size() + (inputs_[index].cur_from_queue ? 1 : 0); + } + return ord; +} + +template +uint64_t +scheduler_tmpl_t::get_input_first_timestamp( + output_ordinal_t output) +{ + if (output < 0 || output >= static_cast(outputs_.size())) + return 0; + int index = outputs_[output].cur_input; + if (index < 0) + return 0; + uint64_t res = inputs_[index].reader->get_first_timestamp(); + if (inputs_[index].reader->get_instruction_ordinal() == 0 && + (!inputs_[index].queue.empty() || inputs_[index].cur_from_queue)) { + // Account for get_initial_input_content() readahead for filetype/timestamp. + res = 0; + } + return res; +} + +template +uint64_t +scheduler_tmpl_t::get_input_last_timestamp( + output_ordinal_t output) +{ + if (output < 0 || output >= static_cast(outputs_.size())) + return 0; + int index = outputs_[output].cur_input; + if (index < 0) + return 0; + uint64_t res = inputs_[index].reader->get_last_timestamp(); + if (inputs_[index].reader->get_instruction_ordinal() == 0 && + (!inputs_[index].queue.empty() || inputs_[index].cur_from_queue)) { + // Account for get_initial_input_content() readahead for filetype/timestamp. + res = 0; + } + return res; +} + template typename scheduler_tmpl_t::stream_status_t scheduler_tmpl_t::advance_region_of_interest( @@ -1402,7 +1853,9 @@ scheduler_tmpl_t::clear_input_queue(input_info_t &input) // skip it all when skipping ahead in the input stream. int i = 0; while (!input.queue.empty()) { - assert(i == 0 || !record_type_is_instr(input.queue.front())); + assert(i == 0 || + (!record_type_is_instr(input.queue.front()) && + !record_type_is_encoding(input.queue.front()))); ++i; input.queue.pop_front(); } @@ -1422,7 +1875,8 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out // For a skip of 0 we still need to clear non-instrs from the queue, but // should not have an instr in there. assert(skip_amount > 0 || input.queue.empty() || - !record_type_is_instr(input.queue.front())); + (!record_type_is_instr(input.queue.front()) && + !record_type_is_encoding(input.queue.front()))); clear_input_queue(input); input.reader->skip_instructions(skip_amount); if (*input.reader == *input.reader_end) { @@ -1432,7 +1886,7 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out return sched_type_t::STATUS_REGION_INVALID; } input.in_cur_region = true; - auto &stream = outputs_[output].stream; + auto *stream = outputs_[output].stream; // We've documented that an output stream's ordinals ignore skips in its input // streams, so we do not need to remember the input's ordinals pre-skip and increase @@ -1440,14 +1894,19 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out // If we skipped from the start we may not have seen the initial headers: // use the input's cached copies. - if (stream.version_ == 0) { - stream.version_ = input.reader->get_version(); - stream.last_timestamp_ = input.reader->get_last_timestamp(); - stream.first_timestamp_ = input.reader->get_first_timestamp(); - stream.filetype_ = input.reader->get_filetype(); - stream.cache_line_size_ = input.reader->get_cache_line_size(); - stream.chunk_instr_count_ = input.reader->get_chunk_instr_count(); - stream.page_size_ = input.reader->get_page_size(); + // We set the version and filetype up front for outputs with + // an initial input, so we check a different field to detect a + // skip. + if (stream->cache_line_size_ == 0 || + // Check the version too as a fallback for inputs with no cache size. + stream->version_ == 0) { + stream->version_ = input.reader->get_version(); + stream->last_timestamp_ = input.reader->get_last_timestamp(); + stream->first_timestamp_ = input.reader->get_first_timestamp(); + stream->filetype_ = input.reader->get_filetype(); + stream->cache_line_size_ = input.reader->get_cache_line_size(); + stream->chunk_instr_count_ = input.reader->get_chunk_instr_count(); + stream->page_size_ = input.reader->get_page_size(); } // We let the user know we've skipped. There's no discontinuity for the // first one so we do not insert a marker there (if we do want to insert one, @@ -1466,24 +1925,7 @@ template uint64_t scheduler_tmpl_t::get_time_micros() { - // XXX i#5843: Should we just use dr_get_microseconds() and avoid split-OS support - // inside here? We will be pulling in drdecode at least for identifying blocking - // syscalls so maybe full DR isn't much more since we're often linked with raw2trace - // which already needs it. If we do we can remove the headers for this code too. -#ifdef UNIX - struct timeval time; - if (gettimeofday(&time, nullptr) != 0) - return sched_type_t::STATUS_RECORD_FAILED; - return time.tv_sec * 1000000 + time.tv_usec; -#else - SYSTEMTIME sys_time; - GetSystemTime(&sys_time); - FILETIME file_time; - if (!SystemTimeToFileTime(&sys_time, &file_time)) - return sched_type_t::STATUS_RECORD_FAILED; - return file_time.dwLowDateTime + - (static_cast(file_time.dwHighDateTime) << 32); -#endif + return get_microsecond_timestamp(); } template @@ -1506,6 +1948,12 @@ scheduler_tmpl_t::record_schedule_segment( // We always use the current wall-clock time, as the time stored in the prior // next_record() call can be out of order across outputs and lead to deadlocks. uint64_t timestamp = get_time_micros(); + if (type == schedule_record_t::IDLE && + outputs_[output].record.back().type == schedule_record_t::IDLE) { + // Merge. We don't need intermediate timestamps when idle, and consecutive + // idle records quickly balloon the file. + return sched_type_t::STATUS_OK; + } outputs_[output].record.emplace_back(type, input, start_instruction, stop_instruction, timestamp); // The stop is typically updated later in close_schedule_segment(). @@ -1601,8 +2049,13 @@ scheduler_tmpl_t::pop_from_ready_queue( sched_type_t::stream_status_t status = STATUS_OK; uint64_t cur_time = (num_blocked_ > 0) ? get_output_time(for_output) : 0; while (!ready_priority_.empty()) { - res = ready_priority_.top(); - ready_priority_.pop(); + if (options_.randomize_next_input) { + res = ready_priority_.get_random_entry(); + ready_priority_.erase(res); + } else { + res = ready_priority_.top(); + ready_priority_.pop(); + } if (res->binding.empty() || res->binding.find(for_output) != res->binding.end()) { // For blocked inputs, as we don't have interrupts or other regular // control points we only check for being unblocked when an input @@ -1729,12 +2182,62 @@ scheduler_tmpl_t::set_cur_input(output_ordinal_t output, if (status != sched_type_t::STATUS_OK) return status; } + if (outputs_[output].cur_input >= 0) + outputs_[output].prev_input = outputs_[output].cur_input; outputs_[output].cur_input = input; if (input < 0) return STATUS_OK; if (prev_input == input) return STATUS_OK; + + int prev_workload = -1; + if (outputs_[output].prev_input >= 0) { + std::lock_guard lock(*inputs_[outputs_[output].prev_input].lock); + prev_workload = inputs_[outputs_[output].prev_input].workload; + } + std::lock_guard lock(*inputs_[input].lock); + + if (prev_input < 0 && outputs_[output].stream->filetype_ == 0) { + // Set the version and filetype up front, to let the user query at init time + // as documented. + outputs_[output].stream->version_ = inputs_[input].reader->get_version(); + outputs_[output].stream->filetype_ = inputs_[input].reader->get_filetype(); + } + + if (inputs_[input].pid != INVALID_PID) { + insert_switch_tid_pid(inputs_[input]); + } + + if (!switch_sequence_.empty() && + outputs_[output].stream->get_instruction_ordinal() > 0) { + sched_type_t::switch_type_t switch_type = SWITCH_INVALID; + if (prev_workload != inputs_[input].workload) + switch_type = SWITCH_PROCESS; + else + switch_type = SWITCH_THREAD; + // Inject kernel context switch code. Since the injected records belong to this + // input (the kernel is acting on behalf of this input) we insert them into the + // input's queue, but ahead of any prior queued items. This is why we walk in + // reverse, for the push_front calls to the deque. We update the tid of the + // records here to match. They are considered as is_record_synthetic() and do + // not affect input stream ordinals. + // XXX: These will appear before the top headers of a new thread which is slightly + // odd to have regular records with the new tid before the top headers. + if (!switch_sequence_[switch_type].empty()) { + for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; + i >= 0; --i) { + RecordType record = switch_sequence_[switch_type][i]; + record_type_set_tid(record, inputs_[input].tid); + inputs_[input].queue.push_front(record); + } + VPRINT(this, 3, + "Inserted %zu switch records for type %d from %d.%d to %d.%d\n", + switch_sequence_[switch_type].size(), switch_type, prev_workload, + outputs_[output].prev_input, inputs_[input].workload, input); + } + } + inputs_[input].prev_time_in_quantum = outputs_[output].cur_time; if (options_.schedule_record_ostream != nullptr) { uint64_t instr_ord = inputs_[input].reader->get_instruction_ordinal(); @@ -2138,7 +2641,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, return sched_type_t::STATUS_OK; } while (true) { - bool from_queue = false; + input->cur_from_queue = false; if (input->needs_init) { // We pay the cost of this conditional to support ipc_reader_t::init() which // blocks and must be called right before reading its first record. @@ -2152,7 +2655,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, if (!input->queue.empty()) { record = input->queue.front(); input->queue.pop_front(); - from_queue = true; + input->cur_from_queue = true; } else { // We again have a flag check because reader_t::init() does an initial ++ // and so we want to skip that on the first record but perform a ++ prior @@ -2220,7 +2723,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, // to get into the trace reading loop and then do something like a skip // from the start rather than adding logic into the setup code). if (input->reader->get_instruction_ordinal() >= stop && - (!from_queue || (start == 0 && stop == 0))) { + (!input->cur_from_queue || (start == 0 && stop == 0))) { VPRINT(this, 5, "next_record[%d]: need new input: at end of segment in=%d " "stop=%" PRId64 "\n", @@ -2254,7 +2757,8 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, } else { input->switch_to_input = it->second; } - } else if (record_type_is_instr(record)) { + } else if (record_type_is_instr_boundary(record, + outputs_[output].last_record)) { if (syscall_incurs_switch(input, blocked_time)) { // Model as blocking and should switch to a different input. need_new_input = true; @@ -2274,6 +2778,17 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, input->pre_syscall_timestamp = 0; } } + if (outputs_[output].hit_switch_code_end) { + // We have to delay so the end marker is still in_context_switch_code. + outputs_[output].in_context_switch_code = false; + outputs_[output].hit_switch_code_end = false; + // We're now back "on the clock". + if (options_.quantum_unit == QUANTUM_TIME) + input->prev_time_in_quantum = cur_time; + // XXX: If we add a skip feature triggered on the output stream, + // we'll want to make sure skipping while in these switch and kernel + // sequences is handled correctly. + } if (record_type_is_marker(record, marker_type, marker_value) && marker_type == TRACE_MARKER_TYPE_SYSCALL) { input->processing_syscall = true; @@ -2285,9 +2800,24 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, // syscall marker, but we support tests and other synthetic sequences // with just a maybe-blocking. input->pre_syscall_timestamp = input->reader->get_last_timestamp(); + } else if (record_type_is_marker(record, marker_type, marker_value) && + (marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START || + marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START)) { + outputs_[output].in_kernel_code = true; + if (marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) + outputs_[output].in_context_switch_code = true; + } else if (record_type_is_marker(record, marker_type, marker_value) && + (marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_END || + marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_END)) { + outputs_[output].in_kernel_code = false; + if (marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_END) { + // We have to delay until the next record. + outputs_[output].hit_switch_code_end = true; + } } if (options_.quantum_unit == QUANTUM_INSTRUCTIONS && - record_type_is_instr(record)) { + record_type_is_instr_boundary(record, outputs_[output].last_record) && + !outputs_[output].in_kernel_code) { ++input->instrs_in_quantum; if (input->instrs_in_quantum > options_.quantum_duration) { // We again prefer to switch to another input even if the current @@ -2315,7 +2845,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, // We only switch on instruction boundaries. We could possibly switch // in between (e.g., scatter/gather long sequence of reads/writes) by // setting input->switching_pre_instruction. - record_type_is_instr(record)) { + record_type_is_instr_boundary(record, outputs_[output].last_record)) { VPRINT(this, 4, "next_record[%d]: hit end of time quantum after %" PRIu64 "\n", output, input->time_spent_in_quantum); @@ -2361,7 +2891,8 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, prev_input, outputs_[output].cur_input); if (!preempt) { if (options_.quantum_unit == QUANTUM_INSTRUCTIONS && - record_type_is_instr(record)) { + record_type_is_instr_boundary(record, + outputs_[output].last_record)) { --inputs_[prev_input].instrs_in_quantum; } else if (options_.quantum_unit == QUANTUM_TIME) { inputs_[prev_input].time_spent_in_quantum -= @@ -2409,6 +2940,8 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, VDO(this, 4, print_record(record);); outputs_[output].last_record = record; + record_type_has_tid(record, input->last_record_tid); + record_type_has_pid(record, input->pid); return sched_type_t::STATUS_OK; } @@ -2429,6 +2962,9 @@ scheduler_tmpl_t::unread_last_record(output_ordinal_t ou VPRINT(this, 4, "next_record[%d]: unreading last record, from %d\n", output, input->index); input->queue.push_back(outinfo.last_record); + // XXX: This should be record_type_is_instr_boundary() but we don't have the pre-prev + // record. For now we don't support unread_last_record() for record_reader_t, + // enforced in a specialization of unread_last_record(). if (options_.quantum_unit == QUANTUM_INSTRUCTIONS && record_type_is_instr(record)) --input->instrs_in_quantum; outinfo.last_record = create_invalid_record(); @@ -2513,12 +3049,22 @@ scheduler_tmpl_t::eof_or_idle(output_ordinal_t output) assert(options_.mapping != MAP_AS_PREVIOUSLY || outputs_[output].at_eof); return sched_type_t::STATUS_EOF; } else { - set_cur_input(output, INVALID_INPUT_ORDINAL); outputs_[output].waiting = true; + set_cur_input(output, INVALID_INPUT_ORDINAL); return sched_type_t::STATUS_IDLE; } } +template +bool +scheduler_tmpl_t::is_record_kernel(output_ordinal_t output) +{ + int index = outputs_[output].cur_input; + if (index < 0) + return false; + return inputs_[index].reader->is_record_kernel(); +} + template typename scheduler_tmpl_t::stream_status_t scheduler_tmpl_t::set_output_active(output_ordinal_t output, diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 3bb5f78dee3..f14e275943d 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -72,6 +72,13 @@ namespace dynamorio { /**< General DynamoRIO namespace. */ namespace drmemtrace { /**< DrMemtrace tracing + simulation infrastructure namespace. */ +/* For testing, where schedule_record_t is not accessible. */ +class replay_file_checker_t { +public: + std::string + check(archive_istream_t *infile); +}; + /** * Schedules traced software threads onto simulated cpus. * Takes in a set of recorded traces and maps them onto a new set of output @@ -241,7 +248,7 @@ template class scheduler_tmpl_t { * A unique identifier to distinguish from other readers for this workload. * Typically this will be the thread id but it does not need to be, so long * as it is not 0 (DynamoRIO's INVALID_THREAD_ID sentinel). - * This is used to in the 'thread_modifiers' field of 'input_workload_t' + * This allows the 'thread_modifiers' field of 'input_workload_t' * to refer to this input. */ memref_tid_t tid = INVALID_THREAD_ID; @@ -342,6 +349,11 @@ template class scheduler_tmpl_t { * must be specified. * The original as-traced cpuid that is mapped to each output stream can be * obtained by calling the get_output_cpuid() function on each stream. + * + * An alternative use of this mapping is with a single output to interleave + * inputs in a strict timestamp order, as with make_scheduler_serial_options(), + * without specifying a schedule file and without recreating core mappings: + * only timestamps are honored. */ MAP_TO_RECORDED_OUTPUT, /** @@ -382,6 +394,8 @@ template class scheduler_tmpl_t { * dependencies: thus, timestamp ordering will be followed at context switch * points for picking the next input, but timestamps will not preempt an input. * To precisely follow the recorded timestamps, use #MAP_TO_RECORDED_OUTPUT. + * If this flag is on, #dynamorio::drmemtrace::scheduler_tmpl_t:: + * scheduler_options_t.read_inputs_in_init must be set to true. */ DEPENDENCY_TIMESTAMPS_BITFIELD = 0x01, /** @@ -435,6 +449,8 @@ template class scheduler_tmpl_t { * Causes the get_record_ordinal() and get_instruction_ordinal() results * for an output stream to equal those values for the current input stream * for that output, rather than accumulating across inputs. + * This also changes the behavior of get_shard_index() as documented under that + * function. */ SCHEDULER_USE_INPUT_ORDINALS = 0x4, // This was added for the analyzer view tool on a single trace specified via @@ -442,12 +458,30 @@ template class scheduler_tmpl_t { // whether to request SCHEDULER_USE_INPUT_ORDINALS. /** * If there is just one input and just one output stream, this sets - * #SCHEDULER_USE_INPUT_ORDINALS; otherwise, it has no effect. + * #SCHEDULER_USE_INPUT_ORDINALS. In all cases, this changes the behavior + * of get_shard_index() as documented under that function. */ SCHEDULER_USE_SINGLE_INPUT_ORDINALS = 0x8, // TODO i#5843: Add more speculation flags for other strategies. }; + /** + * Types of context switches for + * #dynamorio::drmemtrace::scheduler_tmpl_t::scheduler_options_t:: + * kernel_switch_trace_path and kernel_switch_reader. + * The enum value is the subfile component name in the archive_istream_t. + */ + enum switch_type_t { + /** Invalid value. */ + SWITCH_INVALID = 0, + /** Generic thread context switch. */ + SWITCH_THREAD, + /** + * Generic process context switch. A workload is considered a process. + */ + SWITCH_PROCESS, + }; + /** * Collects the parameters specifying how the scheduler should behave, outside * of the workload inputs and the output count. @@ -540,11 +574,68 @@ template class scheduler_tmpl_t { */ double block_time_scale = 1000.; /** - * The maximum time, in microseconds, for an input to be considered blocked - * for any one system call. This is applied after multiplying by - * #block_time_scale. + * The maximum time, in the units explained by #block_time_scale (either + * #QUANTUM_TIME simulator time or wall-clock microseconds for + * #QUANTUM_INSTRUCTIONS), for an input to be considered blocked for any one + * system call. This is applied after multiplying by #block_time_scale. */ uint64_t block_time_max = 25000000; + // XXX: Should we share the file-to-reader code currently in the scheduler + // with the analyzer and only then need reader interfaces and not pass paths + // to the scheduler? + /** + * Input file containing template sequences of kernel context switch code. + * Each sequence must start with a #TRACE_MARKER_TYPE_CONTEXT_SWITCH_START + * marker and end with #TRACE_MARKER_TYPE_CONTEXT_SWITCH_END. + * The values of each marker must hold a #switch_type_t enum value + * indicating which type of switch it corresponds to. + * Each sequence can be stored as a separate subfile of an archive file, + * or concatenated into a single file. + * Each sequence should be in the regular offline drmemtrace format. + * The sequence is inserted into the output stream on each context switch + * of the indicated type. + * The same file (or reader) must be passed when replaying as this kernel + * code is not stored when recording. + * An alternative to passing the file path is to pass #kernel_switch_reader + * and #kernel_switch_reader_end. + */ + std::string kernel_switch_trace_path; + /** + * An alternative to #kernel_switch_trace_path is to pass a reader and + * #kernel_switch_reader_end. See the description of #kernel_switch_trace_path. + * This field is only examined if #kernel_switch_trace_path is empty. + * The scheduler will call the init() function for the reader. + */ + std::unique_ptr kernel_switch_reader; + /** The end reader for #kernel_switch_reader. */ + std::unique_ptr kernel_switch_reader_end; + /** + * If true, enables a mode where all outputs are serialized into one global outer + * layer output. The single global output stream alternates in round-robin + * lockstep among each core output. The core outputs operate just like they + * would with no serialization, other than timing differences relative to other + * core outputs. + */ + bool single_lockstep_output = false; + /** + * If true, enables a mode where the normal methods of choosing the next input + * based on priority, timestamps (if -sched_order_time is set), and FIFO order + * are disabled. Instead, the scheduler selects the next input randomly. Output + * bindings are still honored. This is intended for experimental use in + * sensitivity studies. + */ + bool randomize_next_input = false; + /** + * If true, the scheduler will read from each input to determine its filetype + * during initialization. If false, the filetype will not be available prior + * to explicit record retrieval by the user, but this may be required for + * inputs whose sources are not yet set up at scheduler init time (e.g., + * inputs over blocking pipes with data only becoming available after + * initializing the scheduler, as happens with online trace analyzers). + * This must be true for #DEPENDENCY_TIMESTAMPS as it also requires reading + * ahead. + */ + bool read_inputs_in_init = true; }; /** @@ -582,9 +673,10 @@ template class scheduler_tmpl_t { class stream_t : public memtrace_stream_t { public: stream_t(scheduler_tmpl_t *scheduler, int ordinal, - int verbosity = 0) + int verbosity = 0, int max_ordinal = -1) : scheduler_(scheduler) , ordinal_(ordinal) + , max_ordinal_(max_ordinal) , verbosity_(verbosity) { } @@ -695,11 +787,15 @@ template class scheduler_tmpl_t { { if (TESTANY(sched_type_t::SCHEDULER_USE_INPUT_ORDINALS, scheduler_->options_.flags)) - return scheduler_->get_input_stream(ordinal_)->get_record_ordinal(); + return scheduler_->get_input_record_ordinal(ordinal_); return cur_ref_count_; } /** * Returns the count of instructions from the start of the trace to this point. + * For record_scheduler_t, if any encoding records or the internal record + * TRACE_MARKER_TYPE_BRANCH_TARGET records are present prior to an instruction + * marker, the count will increase at the first of those records as they are + * considered part of the instruction. * If #SCHEDULER_USE_INPUT_ORDINALS is set, then this value matches the * instruction ordinal for the current input stream (and thus might decrease or * not change across records if the input changed). Otherwise, if multiple input @@ -758,7 +854,7 @@ template class scheduler_tmpl_t { { if (TESTANY(sched_type_t::SCHEDULER_USE_INPUT_ORDINALS, scheduler_->options_.flags)) - return scheduler_->get_input_stream(ordinal_)->get_last_timestamp(); + return scheduler_->get_input_last_timestamp(ordinal_); return last_timestamp_; } /** @@ -769,12 +865,15 @@ template class scheduler_tmpl_t { { if (TESTANY(sched_type_t::SCHEDULER_USE_INPUT_ORDINALS, scheduler_->options_.flags)) - return scheduler_->get_input_stream(ordinal_)->get_first_timestamp(); + return scheduler_->get_input_first_timestamp(ordinal_); return first_timestamp_; } /** * Returns the #trace_version_t value from the * #TRACE_MARKER_TYPE_VERSION record in the trace header. + * This can be queried prior to explicitly retrieving any records from + * output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t:: + * scheduler_options_t.read_inputs_in_init is false. */ uint64_t get_version() const override @@ -786,6 +885,9 @@ template class scheduler_tmpl_t { * #offline_file_type_t identifying the architecture and * other key high-level attributes of the trace from the * #TRACE_MARKER_TYPE_FILETYPE record in the trace header. + * This can be queried prior to explicitly retrieving any records from + * output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t:: + * scheduler_options_t.read_inputs_in_init is false. */ uint64_t get_filetype() const override @@ -837,7 +939,9 @@ template class scheduler_tmpl_t { /** * Returns a unique identifier for the current output stream. For * #MAP_TO_RECORDED_OUTPUT, the identifier is the as-traced cpuid mapped to this - * output. For dynamic schedules, the identifier is the output stream ordinal. + * output. For dynamic schedules, the identifier is the output stream ordinal, + * except for #OFFLINE_FILE_TYPE_CORE_SHARDED inputs where the identifier + * is the input stream ordinal. */ int64_t get_output_cpuid() const override @@ -864,6 +968,16 @@ template class scheduler_tmpl_t { return static_cast(get_input_stream_ordinal()); } + /** + * Returns the thread identifier for the current input stream feeding this + * output stream. + */ + int64_t + get_tid() const override + { + return scheduler_->get_tid(ordinal_); + } + /** * Returns the #dynamorio::drmemtrace::memtrace_stream_t interface for the * current input stream feeding this output stream. @@ -874,9 +988,50 @@ template class scheduler_tmpl_t { return scheduler_->get_input_stream_interface(get_input_stream_ordinal()); } + /** + * Returns the ordinal for the current output stream. If + * #dynamorio::drmemtrace::scheduler_tmpl_t::scheduler_options_t:: + * single_lockstep_output + * is set to true, this returns the ordinal of the currently active "inner" + * output stream. Otherwise, this returns the constant ordinal for this output + * stream as there is no concept of inner or outer streams. + */ + output_ordinal_t + get_output_stream_ordinal() const + { + return ordinal_; + } + + /** + * For #SCHEDULER_USE_INPUT_ORDINALS or + * #SCHEDULER_USE_SINGLE_INPUT_ORDINALS, returns the input stream ordinal, except + * for the case of a single combined-stream input with the passed-in thread id + * set to INVALID_THREAD_ID (the serial analysis mode for analyzer tools) in + * which case the last trace record's tid is returned; otherwise returns the + * output stream ordinal. + */ + int + get_shard_index() const override + { + return scheduler_->get_shard_index(ordinal_); + } + + /** + * Returns whether the current record is from a part of the trace corresponding + * to kernel execution. + */ + bool + is_record_kernel() const override + { + return scheduler_->is_record_kernel(ordinal_); + } + protected: scheduler_tmpl_t *scheduler_ = nullptr; int ordinal_ = -1; + // If max_ordinal_ >= 0, ordinal_ is incremented modulo max_ordinal_ at the start + // of every next_record() invocation. + int max_ordinal_ = -1; int verbosity_ = 0; uint64_t cur_ref_count_ = 0; uint64_t cur_instr_count_ = 0; @@ -888,6 +1043,7 @@ template class scheduler_tmpl_t { uint64_t cache_line_size_ = 0; uint64_t chunk_instr_count_ = 0; uint64_t page_size_ = 0; + RecordType prev_record_ = {}; // Let the outer class update our state. friend class scheduler_tmpl_t; @@ -895,6 +1051,7 @@ template class scheduler_tmpl_t { /** Default constructor. */ scheduler_tmpl_t() + : ready_priority_(static_cast(get_time_micros())) { } virtual ~scheduler_tmpl_t() = default; @@ -915,7 +1072,7 @@ template class scheduler_tmpl_t { { if (ordinal < 0 || ordinal >= static_cast(outputs_.size())) return nullptr; - return &outputs_[ordinal].stream; + return outputs_[ordinal].stream; } /** Returns the number of input streams. */ @@ -948,6 +1105,15 @@ template class scheduler_tmpl_t { return inputs_[input].reader->get_stream_name(); } + /** + * Returns the get_output_cpuid() value for the given output. + * This interface is exported so that a user can get the cpuids at initialization + * time when using single_lockstep_output where there is just one output stream + * even with multiple output cpus. + */ + int64_t + get_output_cpuid(output_ordinal_t output) const; + /** Returns a string further describing an error code. */ std::string get_error_string() const @@ -973,6 +1139,14 @@ template class scheduler_tmpl_t { : lock(new std::mutex) { } + // Returns whether the stream mixes threads (online analysis mode) yet + // wants to treat them as separate shards (so not core-sharded-on-disk). + bool + is_combined_stream() + { + // If the tid is invalid, this is a combined stream (online analysis mode). + return tid == INVALID_THREAD_ID; + } int index = -1; // Position in inputs_ vector. std::unique_ptr reader; std::unique_ptr reader_end; @@ -985,11 +1159,16 @@ template class scheduler_tmpl_t { // A tid can be duplicated across workloads so we need the pair of // workload index + tid to identify the original input. int workload = -1; + // If left invalid, this is a combined stream (online analysis mode). memref_tid_t tid = INVALID_THREAD_ID; + memref_pid_t pid = INVALID_PID; + // Used for combined streams. + memref_tid_t last_record_tid = INVALID_THREAD_ID; // If non-empty these records should be returned before incrementing the reader. // This is used for read-ahead and inserting synthetic records. // We use a deque so we can iterate over it. std::deque queue; + bool cur_from_queue; std::set binding; int priority = 0; std::vector regions_of_interest; @@ -1104,15 +1283,21 @@ template class scheduler_tmpl_t { output_ordinal_t ordinal, typename spec_type_t::speculator_flags_t speculator_flags, RecordType last_record_init, int verbosity = 0) - : stream(scheduler, ordinal, verbosity) + : self_stream(scheduler, ordinal, verbosity) + , stream(&self_stream) , speculator(speculator_flags, verbosity) , last_record(last_record_init) { } - stream_t stream; + stream_t self_stream; + // Normally stream points to &self_stream, but for single_lockstep_output + // it points to a global stream shared among all outputs. + stream_t *stream; // This is an index into the inputs_ vector so -1 is an invalid value. // This is set to >=0 for all non-empty outputs during init(). input_ordinal_t cur_input = INVALID_INPUT_ORDINAL; + // Holds the prior non-invalid input. + input_ordinal_t prev_input = INVALID_INPUT_ORDINAL; // For static schedules we can populate this up front and avoid needing a // lock for dynamically finding the next input, keeping things parallel. std::vector input_indices; @@ -1133,6 +1318,9 @@ template class scheduler_tmpl_t { int record_index = 0; bool waiting = false; // Waiting or idling. bool active = true; + bool in_kernel_code = false; + bool in_context_switch_code = false; + bool hit_switch_code_end = false; // Used for time-based quanta. uint64_t cur_time = 0; // Used for MAP_TO_RECORDED_OUTPUT get_output_cpuid(). @@ -1143,16 +1331,52 @@ template class scheduler_tmpl_t { uint64_t wait_start_time = 0; }; + // Used for reading as-traced schedules. + struct schedule_output_tracker_t { + schedule_output_tracker_t(bool valid, input_ordinal_t input, + uint64_t start_instruction, uint64_t timestamp) + : valid(valid) + , input(input) + , start_instruction(start_instruction) + , stop_instruction(0) + , timestamp(timestamp) + { + } + // To support removing later-discovered-as-redundant entries without + // a linear erase operation we have a 'valid' flag. + bool valid; + input_ordinal_t input; + uint64_t start_instruction; + uint64_t stop_instruction; + uint64_t timestamp; + }; + // Used for reading as-traced schedules. + struct schedule_input_tracker_t { + schedule_input_tracker_t(output_ordinal_t output, uint64_t output_array_idx, + uint64_t start_instruction, uint64_t timestamp) + : output(output) + , output_array_idx(output_array_idx) + , start_instruction(start_instruction) + , timestamp(timestamp) + { + } + output_ordinal_t output; + uint64_t output_array_idx; + uint64_t start_instruction; + uint64_t timestamp; + }; + // Called just once at initialization time to set the initial input-to-output // mappings and state. scheduler_status_t set_initial_schedule(std::unordered_map> &workload2inputs); // Assumed to only be called at initialization time. - // Reads ahead in each input to find its first timestamp (queuing the records - // read to feed to the user's first requests). + // Reads ahead in each input to find its filetype, and if "gather_timestamps" + // is set, to find its first timestamp, queuing all records + // read to feed to the user's first requests. scheduler_status_t - get_initial_timestamps(); + get_initial_input_content(bool gather_timestamps); // Opens up all the readers for each file in 'path' which may be a directory. // Returns a map of the thread id of each file to its index in inputs_. @@ -1204,15 +1428,23 @@ template class scheduler_tmpl_t { scheduler_status_t read_traced_schedule(); + scheduler_status_t + remove_zero_instruction_segments( + std::vector> &input_sched, + std::vector> &all_sched); + scheduler_status_t check_and_fix_modulo_problem_in_schedule( - std::vector> &input_sched, + std::vector> &input_sched, std::vector> &start2stop, - std::vector> &all_sched); + std::vector> &all_sched); scheduler_status_t read_recorded_schedule(); + scheduler_status_t + read_switch_sequences(); + uint64_t get_time_micros(); @@ -1256,6 +1488,14 @@ template class scheduler_tmpl_t { bool record_type_has_tid(RecordType record, memref_tid_t &tid); + // If the given record has a process id field, returns true and the value. + bool + record_type_has_pid(RecordType record, memref_pid_t &pid); + + // For trace_entry_t, only sets the tid for record types that have it. + void + record_type_set_tid(RecordType &record, memref_tid_t tid); + // Returns whether the given record is an instruction. bool record_type_is_instr(RecordType record); @@ -1271,6 +1511,12 @@ template class scheduler_tmpl_t { bool record_type_is_invalid(RecordType record); + bool + record_type_is_encoding(RecordType record); + + bool + record_type_is_instr_boundary(RecordType record, RecordType prev_record); + // Creates the marker we insert between regions of interest. RecordType create_region_separator_marker(memref_tid_t tid, uintptr_t value); @@ -1282,6 +1528,11 @@ template class scheduler_tmpl_t { RecordType create_invalid_record(); + // If necessary, inserts context switch info on the incoming pid+tid. + // The lock for 'input' is held by the caller. + void + insert_switch_tid_pid(input_info_t &input); + // Used for diagnostics: prints record fields to stderr. void print_record(const RecordType &record); @@ -1296,6 +1547,16 @@ template class scheduler_tmpl_t { input_ordinal_t get_input_ordinal(output_ordinal_t output); + // Returns the thread identifier for the current input stream scheduled on + // the 'output_ordinal'-th output stream. + int64_t + get_tid(output_ordinal_t output); + + // Returns the shard index for the current input stream scheduled on + // the 'output_ordinal'-th output stream. + int + get_shard_index(output_ordinal_t output); + // Returns the workload ordinal value for the current input stream scheduled on // the 'output_ordinal'-th output stream. int @@ -1306,14 +1567,26 @@ template class scheduler_tmpl_t { bool is_record_synthetic(output_ordinal_t output); - int64_t - get_output_cpuid(output_ordinal_t output); - // Returns the direct handle to the current input stream interface for the // 'output_ordinal'-th output stream. memtrace_stream_t * get_input_stream(output_ordinal_t output); + // Returns the record ordinal for the current input stream interface for the + // 'output_ordinal'-th output stream. + uint64_t + get_input_record_ordinal(output_ordinal_t output); + + // Returns the first timestamp for the current input stream interface for the + // 'output_ordinal'-th output stream. + uint64_t + get_input_first_timestamp(output_ordinal_t output); + + // Returns the last timestamp for the current input stream interface for the + // 'output_ordinal'-th output stream. + uint64_t + get_input_last_timestamp(output_ordinal_t output); + stream_status_t start_speculation(output_ordinal_t output, addr_t start_address, bool queue_current_record); @@ -1331,6 +1604,11 @@ template class scheduler_tmpl_t { stream_status_t eof_or_idle(output_ordinal_t output); + // Returns whether the current record for the current input stream scheduled on + // the 'output_ordinal'-th output stream is from a part of the trace corresponding + // to kernel execution. + bool + is_record_kernel(output_ordinal_t output); /////////////////////////////////////////////////////////////////////////// // Support for ready queues for who to schedule next: @@ -1428,6 +1706,23 @@ template class scheduler_tmpl_t { } }; std::unordered_map tid2input_; + struct switch_type_hash_t { + std::size_t + operator()(const switch_type_t &st) const + { + return std::hash()(static_cast(st)); + } + }; + std::unordered_map, switch_type_hash_t> + switch_sequence_; + // For single_lockstep_output. + std::unique_ptr global_stream_; + // For online where we currently have to map dynamically observed thread ids + // to the 0-based shard index. + std::unordered_map tid2shard_; + + // Our testing class needs access to schedule_record_t. + friend class replay_file_checker_t; }; /** See #dynamorio::drmemtrace::scheduler_tmpl_t. */ diff --git a/clients/drcachesim/simulator/cache_simulator.cpp b/clients/drcachesim/simulator/cache_simulator.cpp index 8a91073ef33..f83e53c8f44 100644 --- a/clients/drcachesim/simulator/cache_simulator.cpp +++ b/clients/drcachesim/simulator/cache_simulator.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -458,14 +458,17 @@ cache_simulator_t::process_memref(const memref_t &memref) return true; } - int core; - if (memref.data.tid == last_thread_) - core = last_core_; - else { - core = core_for_thread(memref.data.tid); - last_thread_ = memref.data.tid; - last_core_ = core; - } + int core_index; + if (shard_type_ == SHARD_BY_THREAD) { + if (memref.data.tid == last_thread_) + core_index = last_core_index_; + else { + core_index = core_for_thread(memref.data.tid); + last_thread_ = memref.data.tid; + last_core_index_ = core_index; + } + } else + core_index = core_for_thread(memref.data.tid); // To support swapping to physical addresses without modifying the passed-in // memref (which is also passed to other tools run at the same time) we use @@ -484,7 +487,7 @@ cache_simulator_t::process_memref(const memref_t &memref) << " @" << (void *)simref->instr.addr << " instr x" << simref->instr.size << "\n"; } - l1_icaches_[core]->request(*simref); + l1_icaches_[core_index]->request(*simref); } else if (simref->data.type == TRACE_TYPE_READ || simref->data.type == TRACE_TYPE_WRITE || // We may potentially handle prefetches differently. @@ -496,21 +499,21 @@ cache_simulator_t::process_memref(const memref_t &memref) << trace_type_names[simref->data.type] << " " << (void *)simref->data.addr << " x" << simref->data.size << "\n"; } - l1_dcaches_[core]->request(*simref); + l1_dcaches_[core_index]->request(*simref); } else if (simref->flush.type == TRACE_TYPE_INSTR_FLUSH) { if (knobs_.verbose >= 3) { std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " << " @" << (void *)simref->data.pc << " iflush " << (void *)simref->data.addr << " x" << simref->data.size << "\n"; } - l1_icaches_[core]->flush(*simref); + l1_icaches_[core_index]->flush(*simref); } else if (simref->flush.type == TRACE_TYPE_DATA_FLUSH) { if (knobs_.verbose >= 3) { std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " << " @" << (void *)simref->data.pc << " dflush " << (void *)simref->data.addr << " x" << simref->data.size << "\n"; } - l1_dcaches_[core]->flush(*simref); + l1_dcaches_[core_index]->flush(*simref); } else if (simref->exit.type == TRACE_TYPE_THREAD_EXIT) { handle_thread_exit(simref->exit.tid); last_thread_ = 0; @@ -593,7 +596,7 @@ cache_simulator_t::print_results() // Print core and associated L1 cache stats first. for (unsigned int i = 0; i < knobs_.num_cores; i++) { print_core(i); - if (thread_ever_counts_[i] > 0) { + if (shard_type_ == SHARD_BY_CORE || thread_ever_counts_[i] > 0) { if (l1_icaches_[i] != l1_dcaches_[i]) { std::cerr << " " << l1_icaches_[i]->get_name() << " (" << l1_icaches_[i]->get_description() << ") stats:" << std::endl; diff --git a/clients/drcachesim/simulator/caching_device_stats.h b/clients/drcachesim/simulator/caching_device_stats.h index d27b08b24ff..62d3989f368 100644 --- a/clients/drcachesim/simulator/caching_device_stats.h +++ b/clients/drcachesim/simulator/caching_device_stats.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -36,6 +36,8 @@ #ifndef _CACHING_DEVICE_STATS_H_ #define _CACHING_DEVICE_STATS_H_ 1 +#define NOMINMAX // Avoid windows.h messing up std::max. + #include #ifdef HAS_ZLIB # include @@ -106,7 +108,9 @@ class access_count_t { // Detect the overflow and assign maximum possible value to the addr_end. if (addr_beg > addr_end) { - addr_end = std::numeric_limits::max(); + // Wrap max in parens to work around Visual Studio compiler issues with the + // max macro (even despite NOMINMAX defined above). + addr_end = (std::numeric_limits::max)(); } std::map::reverse_iterator prev_it(next_it); diff --git a/clients/drcachesim/simulator/simulator.cpp b/clients/drcachesim/simulator/simulator.cpp index 3421e1dfe92..3ff54d803f5 100644 --- a/clients/drcachesim/simulator/simulator.cpp +++ b/clients/drcachesim/simulator/simulator.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -74,11 +74,11 @@ simulator_t::init_knobs(unsigned int num_cores, uint64_t skip_refs, uint64_t war knob_cpu_scheduling_ = cpu_scheduling; knob_use_physical_ = use_physical; knob_verbose_ = verbose; - last_thread_ = 0; - last_core_ = 0; - cpu_counts_.resize(knob_num_cores_, 0); - thread_counts_.resize(knob_num_cores_, 0); - thread_ever_counts_.resize(knob_num_cores_, 0); + if (shard_type_ == SHARD_BY_THREAD) { + cpu_counts_.resize(knob_num_cores_, 0); + thread_counts_.resize(knob_num_cores_, 0); + thread_ever_counts_.resize(knob_num_cores_, 0); + } if (knob_warmup_refs_ > 0 && (knob_warmup_fraction_ > 0.0)) { ERRMSG("Usage error: Either warmup_refs OR warmup_fraction can be set"); @@ -87,13 +87,32 @@ simulator_t::init_knobs(unsigned int num_cores, uint64_t skip_refs, uint64_t war } } +std::string +simulator_t::initialize_stream(memtrace_stream_t *serial_stream) +{ + serial_stream_ = serial_stream; + return ""; +} + +std::string +simulator_t::initialize_shard_type(shard_type_t shard_type) +{ + shard_type_ = shard_type; + if (shard_type_ == SHARD_BY_CORE && knob_cpu_scheduling_) { + return "Usage error: -cpu_scheduling not supported with -core_serial; use " + "-cpu_schedule_file with -core_serial instead"; + } + return ""; +} + bool simulator_t::process_memref(const memref_t &memref) { if (memref.marker.type != TRACE_TYPE_MARKER) return true; if (memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID && knob_cpu_scheduling_) { - int cpu = (int)(intptr_t)memref.marker.marker_value; + assert(shard_type_ == SHARD_BY_THREAD); + int64_t cpu = static_cast(memref.marker.marker_value); if (cpu < 0) return true; int min_core; @@ -114,8 +133,8 @@ simulator_t::process_memref(const memref_t &memref) thread2core_[memref.marker.tid] = min_core; ++thread_counts_[min_core]; ++thread_ever_counts_[min_core]; - last_thread_ = -1; - last_core_ = -1; + last_thread_ = INVALID_THREAD_ID; + last_core_index_ = INVALID_CORE_INDEX; } if (!knob_use_physical_) return true; @@ -218,6 +237,17 @@ simulator_t::find_emptiest_core(std::vector &counts) const int simulator_t::core_for_thread(memref_tid_t tid) { + if (shard_type_ == SHARD_BY_CORE) { + int core_index = serial_stream_->get_shard_index(); + if (core_index != last_core_index_) { + // Track the cpuid<->ordinal relationship for our results printout. + int64_t cpu = serial_stream_->get_output_cpuid(); + if (cpu2core_.find(cpu) == cpu2core_.end()) + cpu2core_[cpu] = core_index; + } + last_core_index_ = core_index; + return core_index; + } auto exists = thread2core_.find(tid); if (exists != thread2core_.end()) return exists->second; @@ -242,6 +272,8 @@ simulator_t::core_for_thread(memref_tid_t tid) void simulator_t::handle_thread_exit(memref_tid_t tid) { + if (shard_type_ == SHARD_BY_CORE) + return; std::unordered_map::iterator exists = thread2core_.find(tid); assert(exists != thread2core_.end()); assert(thread_counts_[exists->second] > 0); @@ -256,17 +288,20 @@ simulator_t::handle_thread_exit(memref_tid_t tid) void simulator_t::print_core(int core) const { - if (!knob_cpu_scheduling_) { + if (!knob_cpu_scheduling_ && shard_type_ == SHARD_BY_THREAD) { std::cerr << "Core #" << core << " (" << thread_ever_counts_[core] << " thread(s))" << std::endl; } else { std::cerr << "Core #" << core; - if (cpu_counts_[core] == 0) { + if (shard_type_ == SHARD_BY_THREAD && cpu_counts_[core] == 0) { // We keep the "(s)" mainly to simplify test templates. std::cerr << " (0 traced CPU(s))" << std::endl; return; } - std::cerr << " (" << cpu_counts_[core] << " traced CPU(s): "; + std::cerr << " ("; + if (shard_type_ == SHARD_BY_THREAD) // Always 1:1 for SHARD_BY_CORE. + std::cerr << cpu_counts_[core] << " "; + std::cerr << "traced CPU(s): "; bool need_comma = false; for (auto iter = cpu2core_.begin(); iter != cpu2core_.end(); ++iter) { if (iter->second == core) { diff --git a/clients/drcachesim/simulator/simulator.h b/clients/drcachesim/simulator/simulator.h index ccc96505fde..74779291041 100644 --- a/clients/drcachesim/simulator/simulator.h +++ b/clients/drcachesim/simulator/simulator.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -61,6 +61,13 @@ class simulator_t : public analysis_tool_t { double warmup_fraction, uint64_t sim_refs, bool cpu_scheduling, bool use_physical, unsigned int verbose); virtual ~simulator_t() = 0; + + std::string + initialize_stream(memtrace_stream_t *serial_stream) override; + + std::string + initialize_shard_type(shard_type_t shard_type) override; + bool process_memref(const memref_t &memref) override; @@ -95,6 +102,11 @@ class simulator_t : public analysis_tool_t { addr_t synthetic_virt2phys(addr_t virt) const; + // We use -1 instead of INVALID_THREAD_ID==0 because we have many tests + // which set tid to 0 to mean "don't care". + static constexpr memref_tid_t INVALID_LAST_THREAD = -1; + static constexpr int INVALID_CORE_INDEX = -1; + unsigned int knob_num_cores_; uint64_t knob_skip_refs_; uint64_t knob_warmup_refs_; @@ -104,11 +116,14 @@ class simulator_t : public analysis_tool_t { bool knob_use_physical_; unsigned int knob_verbose_; - memref_tid_t last_thread_; - int last_core_; + shard_type_t shard_type_ = SHARD_BY_THREAD; + memtrace_stream_t *serial_stream_ = nullptr; + memref_tid_t last_thread_ = INVALID_LAST_THREAD; // Only used for SHARD_BY_THREAD. + int last_core_index_ = INVALID_CORE_INDEX; // For thread mapping to cores: - std::unordered_map cpu2core_; + std::unordered_map cpu2core_; + // The following fields are only used for SHARD_BY_THREAD. std::unordered_map thread2core_; std::vector cpu_counts_; std::vector thread_counts_; diff --git a/clients/drcachesim/simulator/tlb_simulator.cpp b/clients/drcachesim/simulator/tlb_simulator.cpp index ec3a35aad0c..cdf75eaeb3e 100644 --- a/clients/drcachesim/simulator/tlb_simulator.cpp +++ b/clients/drcachesim/simulator/tlb_simulator.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -158,13 +158,13 @@ tlb_simulator_t::process_memref(const memref_t &memref) // We use a static scheduling of threads to cores, as it is // not practical to measure which core each thread actually // ran on for each memref. - int core; + int core_index; if (memref.data.tid == last_thread_) - core = last_core_; + core_index = last_core_index_; else { - core = core_for_thread(memref.data.tid); + core_index = core_for_thread(memref.data.tid); last_thread_ = memref.data.tid; - last_core_ = core; + last_core_index_ = core_index; } // To support swapping to physical addresses without modifying the passed-in @@ -178,10 +178,10 @@ tlb_simulator_t::process_memref(const memref_t &memref) } if (type_is_instr(simref->instr.type)) - itlbs_[core]->request(*simref); + itlbs_[core_index]->request(*simref); else if (simref->data.type == TRACE_TYPE_READ || simref->data.type == TRACE_TYPE_WRITE) - dtlbs_[core]->request(*simref); + dtlbs_[core_index]->request(*simref); else if (simref->exit.type == TRACE_TYPE_THREAD_EXIT) { handle_thread_exit(simref->exit.tid); last_thread_ = 0; diff --git a/clients/drcachesim/tests/CMakeLists.txt b/clients/drcachesim/tests/CMakeLists.txt index ce7b342b548..31ffb8b0f43 100644 --- a/clients/drcachesim/tests/CMakeLists.txt +++ b/clients/drcachesim/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2015-2017 Google, Inc. All rights reserved. +# Copyright (c) 2015-2024 Google, Inc. All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -35,16 +35,13 @@ cmake_minimum_required(VERSION 3.7) project(DynamoRIO_drmemtrace) -if (UNIX) # The shipped drmemtrace on Windows has no zlib support. +if (UNIX) # The shipped drmemtrace on Windows has no compression support. + # The CMake exported target rule for drmemtrace_analyzer includes linking in + # external libraries like zlib and lz4, but those libraries may not be on primary + # search paths, so we set up those paths if available. find_package(ZLIB) - if (NOT ZLIB_FOUND) - # Not a fatal error to succeed on AArch64 pre-commit and other places where - # zlib is not installed. - # XXX i#2006: Can we automate the zlib link somehow? Should we provide two versions - # of our libs, one with and one without? Can we include some version of zlib.a? - message(WARNING "zlib not found: linking will fail if drmemtrace has compressed " - "file support built-in") - endif() + find_library(liblz4 lz4) + find_library(libsnappy snappy) endif () find_package(DynamoRIO) @@ -59,6 +56,3 @@ use_DynamoRIO_extension(analyzer_separate droption) # for droption.h use_DynamoRIO_drmemtrace(analyzer_separate) # for analysis framework headers target_link_libraries(analyzer_separate drmemtrace_analyzer drmemtrace_histogram drfrontendlib) -if (ZLIB_FOUND) - target_link_libraries(analyzer_separate ${ZLIB_LIBRARIES}) -endif () diff --git a/clients/drcachesim/tests/allasm-scattergather-basic-counts-aarch64.templatex b/clients/drcachesim/tests/allasm-scattergather-basic-counts-aarch64.templatex index be2854f5170..6dda568fada 100644 --- a/clients/drcachesim/tests/allasm-scattergather-basic-counts-aarch64.templatex +++ b/clients/drcachesim/tests/allasm-scattergather-basic-counts-aarch64.templatex @@ -2,10 +2,27 @@ Hello, world! ---- ---- Basic counts tool results: Total counts: +#ifdef __ARM_FEATURE_SVE2 + 724 total \(fetched\) instructions + 270 total unique \(fetched\) instructions +#else 685 total \(fetched\) instructions 255 total unique \(fetched\) instructions +#endif 0 total non-fetched instructions 0 total prefetches +#ifdef __ARM_FEATURE_SVE2 +#if (__ARM_FEATURE_SVE_BITS == 128) + 1158 total data loads + 873 total data stores +#elif (__ARM_FEATURE_SVE_BITS == 256) + 2070 total data loads + 1615 total data stores +#elif (__ARM_FEATURE_SVE_BITS == 512) + 3894 total data loads + 3099 total data stores +#endif /* __ARM_FEATURE_SVE_BITS */ +#else #if (__ARM_FEATURE_SVE_BITS == 128) 1137 total data loads 861 total data stores @@ -15,17 +32,35 @@ Total counts: #elif (__ARM_FEATURE_SVE_BITS == 512) 3831 total data loads 3063 total data stores -#endif +#endif /* __ARM_FEATURE_SVE_BITS */ +#endif /* __ARM_FEATURE_SVE2 */ 0 total icache flushes 0 total dcache flushes 1 total threads .* total scheduling markers .* Thread .* counts: +#ifdef __ARM_FEATURE_SVE2 + 724 \(fetched\) instructions + 270 unique \(fetched\) instructions +#else 685 \(fetched\) instructions 255 unique \(fetched\) instructions +#endif 0 non-fetched instructions 0 prefetches +#ifdef __ARM_FEATURE_SVE2 +#if (__ARM_FEATURE_SVE_BITS == 128) + 1158 data loads + 873 data stores +#elif (__ARM_FEATURE_SVE_BITS == 256) + 2070 data loads + 1615 data stores +#elif (__ARM_FEATURE_SVE_BITS == 512) + 3894 data loads + 3099 data stores +#endif /* __ARM_FEATURE_SVE_BITS */ +#else #if (__ARM_FEATURE_SVE_BITS == 128) 1137 data loads 861 data stores @@ -35,7 +70,8 @@ Thread .* counts: #elif (__ARM_FEATURE_SVE_BITS == 512) 3831 data loads 3063 data stores -#endif +#endif /* __ARM_FEATURE_SVE_BITS */ +#endif /* __ARM_FEATURE_SVE2 */ 0 icache flushes 0 dcache flushes .* scheduling markers diff --git a/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex b/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex new file mode 100644 index 00000000000..e609dea3c8f --- /dev/null +++ b/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex @@ -0,0 +1,9 @@ +.* +#if __ARM_FEATURE_SVE_BITS == 128 +.* +#elif __ARM_FEATURE_SVE_BITS == 256 +.* +#elif __ARM_FEATURE_SVE_BITS == 512 +.* +#endif +.* diff --git a/clients/drcachesim/tests/allasm_scattergather_aarch64.asm b/clients/drcachesim/tests/allasm_scattergather_aarch64.asm index b1105e6a7b2..8e08ceee19b 100644 --- a/clients/drcachesim/tests/allasm_scattergather_aarch64.asm +++ b/clients/drcachesim/tests/allasm_scattergather_aarch64.asm @@ -64,19 +64,19 @@ #define D_MASK_REG p3 // Governing predicate for doubleword-element instructions #define BUFFER_REG x1 -#define Z_BASE_REG z0 // base reg used in vector+immediate instructions +#define Z_BASE_REG z0 // base reg used in vector+immediate/vector+scalar instructions #define S_INDEX_REG z1 // index reg used in scalar+immed instructions with 32-bit elements #define D_INDEX_REG z2 // index reg used in scalar+immed instructions with 64-bit elements -#define X_INDEX_REG x2 // index reg used in scalar+scalar instructions +#define X_INDEX_REG x2 // index reg used in scalar+scalar/vector+scalar instructions /* * Test functions. The commented number after each instruction indicates the number of * elements this instruction accesses with a 128-bit vector length. We can add these * numbers up to determine how many loads/stores we expect to see in the basic_counts * output when all elements are active. To find the number for hardware with a larger - * vector size, multiply by vl_size/16. + * vector size, multiply by vl_bytes/16. */ test_scalar_plus_vector: @@ -292,75 +292,75 @@ test_scalar_plus_scalar: test_scalar_plus_immediate: - ld1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld1b DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1b DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1b DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld1sb DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1sb DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1sb DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ld1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1h DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1h DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1sh DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1sh DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ld1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1w DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1sw DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ld1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 + ld1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 16 + ld1b DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1b DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1b DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 16 + ld1sb DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1sb DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1sb DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ld1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1h DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1h DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1sh DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1sh DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ld1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1w DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1sw DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ld1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 // Total: 104 - ld2b { DEST_REG1.b, DEST_REG2.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 32 - ld2h { DEST_REG1.h, DEST_REG2.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld2w { DEST_REG1.s, DEST_REG2.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld2d { DEST_REG1.d, DEST_REG2.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 + ld2b { DEST_REG1.b, DEST_REG2.b }, B_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 32 + ld2h { DEST_REG1.h, DEST_REG2.h }, H_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 16 + ld2w { DEST_REG1.s, DEST_REG2.s }, S_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 8 + ld2d { DEST_REG1.d, DEST_REG2.d }, D_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 4 // Total: 60 - ld3b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 48 - ld3h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 24 - ld3w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 12 - ld3d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 6 + ld3b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b }, B_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 48 + ld3h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h }, H_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 24 + ld3w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s }, S_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 12 + ld3d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d }, D_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 6 // Total: 90 - ld4b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b, DEST_REG4.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 64 - ld4h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h, DEST_REG4.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 32 - ld4w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s, DEST_REG4.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld4d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d, DEST_REG4.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 + ld4b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b, DEST_REG4.b }, B_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 64 + ld4h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h, DEST_REG4.h }, H_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 32 + ld4w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s, DEST_REG4.s }, S_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 16 + ld4d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d, DEST_REG4.d }, D_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 8 // Total: 120 // Total loads: 104 + 60 + 90 + 120 = 374 - st1b SRC_REG1.b, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 16 - st1b SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 - st1b SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 - st1b SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 - st1h SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 - st1h SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 - st1h SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 - st1w SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 - st1w SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 - st1d SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 + st1b SRC_REG1.b, B_MASK_REG, [BUFFER_REG, #1, mul vl] // 16 + st1b SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #1, mul vl] // 8 + st1b SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4 + st1b SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 + st1h SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #1, mul vl] // 8 + st1h SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4 + st1h SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 + st1w SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4 + st1w SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 + st1d SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 // Total: 52 - st2b { SRC_REG1.b, SRC_REG2.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 32 - st2h { SRC_REG1.h, SRC_REG2.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 16 - st2w { SRC_REG1.s, SRC_REG2.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 - st2d { SRC_REG1.d, SRC_REG2.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 + st2b { SRC_REG1.b, SRC_REG2.b }, B_MASK_REG, [BUFFER_REG, #2, mul vl] // 32 + st2h { SRC_REG1.h, SRC_REG2.h }, H_MASK_REG, [BUFFER_REG, #2, mul vl] // 16 + st2w { SRC_REG1.s, SRC_REG2.s }, S_MASK_REG, [BUFFER_REG, #2, mul vl] // 8 + st2d { SRC_REG1.d, SRC_REG2.d }, D_MASK_REG, [BUFFER_REG, #2, mul vl] // 4 // Total: 60 - st3b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 48 - st3h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 24 - st3w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 12 - st3d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 6 + st3b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b }, B_MASK_REG, [BUFFER_REG, #3, mul vl] // 48 + st3h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h }, H_MASK_REG, [BUFFER_REG, #3, mul vl] // 24 + st3w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s }, S_MASK_REG, [BUFFER_REG, #3, mul vl] // 12 + st3d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d }, D_MASK_REG, [BUFFER_REG, #3, mul vl] // 6 // Total: 90 - st4b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b, SRC_REG4.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 64 - st4h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h, SRC_REG4.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 32 - st4w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s, SRC_REG4.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 16 - st4d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d, SRC_REG4.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 + st4b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b, SRC_REG4.b }, B_MASK_REG, [BUFFER_REG, #4, mul vl] // 64 + st4h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h, SRC_REG4.h }, H_MASK_REG, [BUFFER_REG, #4, mul vl] // 32 + st4w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s, SRC_REG4.s }, S_MASK_REG, [BUFFER_REG, #4, mul vl] // 16 + st4d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d, SRC_REG4.d }, D_MASK_REG, [BUFFER_REG, #4, mul vl] // 8 // Total: 120 // Total stores: 52 + 60 + 90 + 120 = 322 @@ -381,6 +381,26 @@ test_replicating_loads: ret +#ifdef __ARM_FEATURE_SVE2 + +test_vector_plus_scalar: + ldnt1b DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + ldnt1sb DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + ldnt1h DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + ldnt1sh DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + ldnt1w DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + ldnt1sw DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + ldnt1d DEST_REG1.d, D_MASK_REG/z, [Z_BASE_REG.d, X_INDEX_REG] // 2 + // Total: 14 + + stnt1b {SRC_REG1.d}, D_MASK_REG, [Z_BASE_REG.d, X_INDEX_REG] // 2 + stnt1h {SRC_REG1.d}, D_MASK_REG, [Z_BASE_REG.d, X_INDEX_REG] // 2 + stnt1w {SRC_REG1.d}, D_MASK_REG, [Z_BASE_REG.d, X_INDEX_REG] // 2 + stnt1d {SRC_REG1.d}, D_MASK_REG, [Z_BASE_REG.d, X_INDEX_REG] // 2 + // Total: 8 + ret +#endif // __ARM_FEATURE_SVE2 + _start: #ifdef __APPLE__ adrp BUFFER_REG, buffer@PAGE @@ -424,10 +444,19 @@ _start: // +(322 * vl_bytes/16) stores bl test_replicating_loads // +60 loads // +0 stores +#ifdef __ARM_FEATURE_SVE2 + bl test_vector_plus_scalar // +(14 * vl_bytes/16) loads + // +(8 * vl_bytes/16) stores +#endif // Running total: + // SVE only: // Loads: (136 + 14 + 374 + 374) * vl_bytes/16 + 60 = 898 * vl_bytes/16 + 60 // Stores: (82 + 8 + 322 + 322) * vl_bytes/16 = 734 * vl_bytes/16 + // Including SVE2: + // Loads: ((898 + 14) * vl_bytes/16) + 60 = (912 * vl_bytes/16) + 60 + // Stores: (734 + 8) * vl_bytes/16 = 742 * vl_bytes/16 + /* Run all the instructions with no active elements */ pfalse B_MASK_REG.b // Set all elements to inactive in the mask regs @@ -440,11 +469,19 @@ _start: bl test_scalar_plus_scalar // +0 loads, +0 stores bl test_scalar_plus_immediate // +0 loads, +0 stores bl test_replicating_loads // +0 loads, +0 stores +#ifdef __ARM_FEATURE_SVE2 + bl test_vector_plus_scalar // +0 loads, +0 stores +#endif // Running total (unchanged from above): + // SVE only: // Loads: (898 * vl_bytes/16) + 60 // Stores: 734 * vl_bytes/16 + // Including SVE2: + // Loads: (912 * vl_bytes/16) + 60 + // Stores: 742 * vl_bytes/16 + /* Run all instructions with one active element */ ptrue B_MASK_REG.b, VL1 // Set 1 element to active in the mask regs. ptrue H_MASK_REG.h, VL1 // The rest of the elements are inactive. @@ -456,28 +493,46 @@ _start: bl test_scalar_plus_scalar // +56 loads, +46 stores bl test_scalar_plus_immediate // +56 loads, +46 stores bl test_replicating_loads // +8 loads, +0 stores +#ifdef __ARM_FEATURE_SVE2 + bl test_vector_plus_scalar // +7 loads, +4 stores +#endif // Running total: + // SVE only: // Loads: (898 * vl_bytes/16) + 60 + 52 + 7 + 56 + 56 + 8 = (898 * vl_bytes/16) + 239 // Stores: (734 * vl_bytes/16) + 41 + 4 + 46 + 46 = (734 * vl_bytes/16) + 127 + // Including SVE2: + // Loads: (912 * vl_bytes/16) + 239 + 7 = (912 * vl_bytes/16) + 246 + // Stores: (742 * vl_bytes/16) + 127 + 4 = (742 * vl_bytes/16) + 131 + // The functions in this file have the following instructions counts: - // _start 40 + // _start 40 (+3 SVE2) // test_scalar_plus_vector 84 // test_vector_plus_immediate 12 // test_scalar_plus_scalar 55 // test_scalar_plus_immediate 55 // test_replicating_loads 9 + // test_vector_plus_scalar 12 // So there are 40 + 84 + 12 + 55 + 55 + 9 = 255 unique instructions + // (or 255 + 12 + 3 = 270 including SVE2) // We run the test_* functions 3 times each so the total instruction executed is // ((84 + 12 + 55 + 55 + 9) * 3) + 40 = (215 * 3) + 37 = 685 + // (or 685 + 3 + (12 * 3) = 724 including SVE2) // Totals: + // SVE only: // Loads: (898 * vl_bytes/16) + 239 // Stores: (734 * vl_bytes/16) + 127 // Instructions: 685 // Unique instructions: 255 + // Including SVE2: + // Loads: (912 * vl_bytes/16) + 246 + // Stores: (742 * vl_bytes/16) + 131 + // Instructions: 724 + // Unique instructions: 270 + // Exit. mov w0, #1 // stdout #ifdef __APPLE__ @@ -502,4 +557,9 @@ helloworld: .ascii "Hello, world!\n" buffer: - .zero 1024 // Maximum size of an SVE Z register * 4. + .zero 2048 // Maximum size of an SVE Z register * 8. + // This gives us enough space to use an offset of + // #1, mul vl for scalar+immediate/vector+immediate + // instructions which lets us check the VL scaling of + // offsets in the IR in + // tool.drcacheoff.allasm-scattergather-vl-view diff --git a/clients/drcachesim/tests/analysis_unit_tests.cpp b/clients/drcachesim/tests/analysis_unit_tests.cpp index c580d27efbb..1329cd217c9 100644 --- a/clients/drcachesim/tests/analysis_unit_tests.cpp +++ b/clients/drcachesim/tests/analysis_unit_tests.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -67,7 +67,7 @@ class mock_analyzer_t : public analyzer_t { scheduler_t::scheduler_options_t sched_ops; if (sched_ops_in != nullptr) { shard_type_ = SHARD_BY_CORE; - sched_ops = *sched_ops_in; + sched_ops = std::move(*sched_ops_in); // XXX: We could refactor init_scheduler_common() to share a couple of // these lines. if (sched_ops.quantum_unit == sched_type_t::QUANTUM_TIME) @@ -76,7 +76,8 @@ class mock_analyzer_t : public analyzer_t { sched_ops = scheduler_t::make_scheduler_parallel_options(verbosity_); else sched_ops = scheduler_t::make_scheduler_serial_options(verbosity_); - if (scheduler_.init(sched_inputs, worker_count_, sched_ops) != + sched_mapping_ = sched_ops.mapping; + if (scheduler_.init(sched_inputs, worker_count_, std::move(sched_ops)) != sched_type_t::STATUS_SUCCESS) { assert(false); success_ = false; @@ -366,10 +367,122 @@ test_wait_records() return true; } +bool +test_tool_errors() +{ + // Tool errors can hang the analyzer if it doesn't tell the scheduler + // it's giving up on its input. We test that here. + std::cerr << "\n----------------\nTesting tool errors\n"; + + static constexpr int NUM_INPUTS = 5; + static constexpr int NUM_OUTPUTS = 2; + static constexpr int NUM_INSTRS = 9; + static constexpr memref_tid_t TID_BASE = 100; + std::vector inputs[NUM_INPUTS]; + for (int i = 0; i < NUM_INPUTS; i++) { + memref_tid_t tid = TID_BASE + i; + inputs[i].push_back(make_thread(tid)); + inputs[i].push_back(make_pid(1)); + for (int j = 0; j < NUM_INSTRS; j++) + inputs[i].push_back(make_instr(42 + j * 4)); + if (i == 4) { + // This one input will trigger an error in our error_tool_t. + inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_CPU_ID, 4)); + } + inputs[i].push_back(make_exit(tid)); + } + + std::vector sched_inputs; + for (int i = 0; i < NUM_INPUTS; i++) { + memref_tid_t tid = TID_BASE + i; + std::vector readers; + readers.emplace_back(std::unique_ptr(new mock_reader_t(inputs[i])), + std::unique_ptr(new mock_reader_t()), tid); + sched_inputs.emplace_back(std::move(readers)); + } + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_IGNORE, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/1); + + static const char *const TOOL_ERROR_STRING = "cpuid not supported"; + + class error_tool_t : public analysis_tool_t { + public: + bool + process_memref(const memref_t &memref) override + { + assert(false); // Only expect parallel mode. + return false; + } + bool + print_results() override + { + return true; + } + bool + parallel_shard_supported() override + { + return true; + } + void * + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) override + { + auto per_shard = new per_shard_t; + return reinterpret_cast(per_shard); + } + bool + parallel_shard_exit(void *shard_data) override + { + per_shard_t *shard = reinterpret_cast(shard_data); + delete shard; + return true; + } + std::string + parallel_shard_error(void *shard_data) override + { + per_shard_t *shard = reinterpret_cast(shard_data); + return shard->error; + } + bool + parallel_shard_memref(void *shard_data, const memref_t &memref) override + { + per_shard_t *shard = reinterpret_cast(shard_data); + // Return an error in one of the inputs. + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID) { + shard->error = TOOL_ERROR_STRING; + return false; + } + return true; + } + + private: + struct per_shard_t { + std::string error; + }; + }; + + std::vector tools; + auto test_tool = std::unique_ptr(new error_tool_t); + tools.push_back(test_tool.get()); + mock_analyzer_t analyzer(sched_inputs, &tools[0], (int)tools.size(), + /*parallel=*/true, NUM_OUTPUTS, &sched_ops); + assert(!!analyzer); + // If the analyzer doesn't give up the input in the output stream that + // encounters it, the scheduler will hang waiting for that input, + // so failure in this test would be a CTest timeout. + bool res = analyzer.run(); + assert(!res); + assert(analyzer.get_error_string() == TOOL_ERROR_STRING); + return true; +} + int test_main(int argc, const char *argv[]) { - if (!test_queries() || !test_wait_records()) + if (!test_queries() || !test_wait_records() || !test_tool_errors()) return 1; std::cerr << "All done!\n"; return 0; diff --git a/clients/drcachesim/tests/burst_syscall_inject.cpp b/clients/drcachesim/tests/burst_syscall_inject.cpp index 50ef8c1824a..5fae87068e6 100644 --- a/clients/drcachesim/tests/burst_syscall_inject.cpp +++ b/clients/drcachesim/tests/burst_syscall_inject.cpp @@ -202,6 +202,12 @@ postprocess(void *dr_context, std::string syscall_trace_template_file) std::string error = raw2trace.do_conversion(); if (!error.empty()) FATAL_ERROR("raw2trace failed: %s\n", error.c_str()); + uint64 injected_syscall_count = + raw2trace.get_statistic(RAW2TRACE_STAT_SYSCALL_TRACES_INJECTED); + if (injected_syscall_count != 2) { + std::cerr << "Incorrect injected syscall count (" << injected_syscall_count + << ")\n"; + } std::cerr << "Done post-processing the raw trace\n"; return outdir; } @@ -398,14 +404,14 @@ test_main(int argc, const char *argv[]) void *dr_context = dr_standalone_init(); std::string syscall_trace_template = write_system_call_template(dr_context); std::cerr << "Getting basic counts for system call trace template\n"; - basic_counts_t::counters_t counts = get_basic_counts(syscall_trace_template); - if (!(counts.instrs == 2 && counts.encodings == 2 && - counts.syscall_number_markers == 2)) { + basic_counts_t::counters_t template_counts = get_basic_counts(syscall_trace_template); + if (!(template_counts.instrs == 2 && template_counts.encodings == 2 && + template_counts.syscall_number_markers == 2)) { std::cerr << "Unexpected counts in system call trace template: " - << syscall_trace_template << ": #instrs: " << counts.instrs - << ", #encodings: " << counts.encodings - << ", #syscall_number_markers: " << counts.syscall_number_markers - << "\n"; + << syscall_trace_template << ": #instrs: " << template_counts.instrs + << ", #encodings: " << template_counts.encodings + << ", #syscall_number_markers: " + << template_counts.syscall_number_markers << "\n"; return 1; } @@ -414,7 +420,16 @@ test_main(int argc, const char *argv[]) instr_destroy(dr_context, instr_in_getpid); instr_destroy(dr_context, instr_in_gettid); dr_standalone_exit(); - return success ? 0 : 1; + if (!success) { + return 1; + } + basic_counts_t::counters_t final_trace_counts = get_basic_counts(trace_dir); + if (final_trace_counts.kernel_instrs != 2) { + std::cerr << "Unexpected kernel instr count in the final trace (" + << final_trace_counts.kernel_instrs << ")\n"; + return 1; + } + return 0; } } // namespace drmemtrace diff --git a/clients/drcachesim/tests/core_on_disk.templatex b/clients/drcachesim/tests/core_on_disk.templatex new file mode 100644 index 00000000000..9d45ae880df --- /dev/null +++ b/clients/drcachesim/tests/core_on_disk.templatex @@ -0,0 +1,21 @@ +Basic counts tool results: +Total counts: +.* + 8 total threads +.* +Core [0-3] counts: +.* + 2 threads +.* +Core [0-3] counts: +.* + 2 threads +.* +Core [0-3] counts: +.* + 2 threads +.* +Core [0-3] counts: +.* + 2 threads +.* diff --git a/clients/drcachesim/tests/core_on_disk_schedule.templatex b/clients/drcachesim/tests/core_on_disk_schedule.templatex new file mode 100644 index 00000000000..df62b6f68ef --- /dev/null +++ b/clients/drcachesim/tests/core_on_disk_schedule.templatex @@ -0,0 +1,5 @@ +.* +Core #0 schedule: (FJ_|EK_|GI|CHC_C__) +Core #1 schedule: (FJ_|EK_|GI|CHC_C__) +Core #2 schedule: (FJ_|EK_|GI|CHC_C__) +Core #3 schedule: (FJ_|EK_|GI|CHC_C__) diff --git a/clients/drcachesim/tests/core_serial.templatex b/clients/drcachesim/tests/core_serial.templatex new file mode 100644 index 00000000000..44591102026 --- /dev/null +++ b/clients/drcachesim/tests/core_serial.templatex @@ -0,0 +1,68 @@ +Schedule stats tool results: +Total counts: + 4 cores + 8 threads + 638938 instructions + 6 total context switches + 0.0093906 CSPKI \(context switches per 1000 instructions\) + 106490 instructions per context switch + 6 voluntary context switches + 0 direct context switches + 100.00% voluntary switches + 0.00% direct switches + 161 system calls + 2 maybe-blocking system calls + 0 direct switch requests + 0 waits + 345686 idles + 64.89% cpu busy by record count + [0-9 ]* cpu microseconds + [0-9 ]* idle microseconds + [0-9 ]* idle microseconds at last instr + [0-9\. ]*% cpu busy by time + [0-9\. ]*% cpu busy by time, ignoring idle past last instr +Core #0 counts: +.* +Core #1 counts: +.* +Core #2 counts: +.* +Core #3 counts: +.* +Core #0 schedule: [A-H_]* +Core #1 schedule: [A-H_]* +Core #2 schedule: [A-H_]* +Core #3 schedule: [A-H_]* + +=========================================================================== +Basic counts tool results: +Total counts: + 638938 total \(fetched\) instructions + 5969 total unique \(fetched\) instructions + 546585 total non-fetched instructions + 0 total prefetches + 676963 total data loads + 725896 total data stores + 0 total icache flushes + 0 total dcache flushes + 8 total threads + 1574 total scheduling markers + 168 total transfer markers + 0 total function id markers + 0 total function return address markers + 0 total function argument markers + 0 total function return value markers + 0 total physical address \+ virtual address marker pairs + 0 total physical address unavailable markers + 161 total system call number markers + 2 total blocking system call markers + 40 total other markers + 8569 total encodings +Core [0-9] counts: +.* +Core [0-9] counts: +.* +Core [0-9] counts: +.* +Core [0-9] counts: +.* diff --git a/clients/drcachesim/tests/core_sharded.templatex b/clients/drcachesim/tests/core_sharded.templatex new file mode 100644 index 00000000000..c4b55dc5884 --- /dev/null +++ b/clients/drcachesim/tests/core_sharded.templatex @@ -0,0 +1,56 @@ +Reuse time tool aggregated results: +Total accesses: 1402859 +Total instructions: 638938 +.* +================================================== +Reuse time tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +================================================== +Reuse time tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +================================================== +Reuse time tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +================================================== +Reuse time tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +=========================================================================== +Reuse distance tool aggregated results: +Total accesses: 2041797 +Instruction accesses: 638938 +.* +================================================== +Reuse distance tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +================================================== +Reuse distance tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +================================================== +Reuse distance tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +================================================== +Reuse distance tool results for shard [0-9] \(core [0-9]\): +Total accesses: .* +.* +=========================================================================== +Cache line histogram tool results: +icache: 618 unique cache lines +dcache: 695 unique cache lines +.* +=========================================================================== +Opcode mix tool results: + 1185523 : total executed instructions + 555262 : rep movs +.* +=========================================================================== +Syscall mix tool results: + syscall count : syscall_num + 84 : 15 +.* diff --git a/clients/drcachesim/tests/drcachesim_unit_tests.cpp b/clients/drcachesim/tests/drcachesim_unit_tests.cpp index 750aefc584d..481203222a7 100644 --- a/clients/drcachesim/tests/drcachesim_unit_tests.cpp +++ b/clients/drcachesim/tests/drcachesim_unit_tests.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -31,8 +31,11 @@ */ // Unit tests for drcachesim + #include #include +#include + #undef NDEBUG #include #include "config_reader_unit_test.h" @@ -77,10 +80,11 @@ make_test_knobs() memref_t make_memref(addr_t address, trace_type_t type = TRACE_TYPE_READ, int size = 4) { - memref_t ref; + memref_t ref = {}; ref.data.type = type; ref.data.size = size; ref.data.addr = address; + ref.data.tid = 1; return ref; } @@ -763,6 +767,55 @@ unit_test_cache_accessors() } } +void +unit_test_core_sharded() +{ + { + // Test invalid cpu_scheduling + core-sharded combo. + cache_simulator_knobs_t knobs = make_test_knobs(); + knobs.cpu_scheduling = true; + cache_simulator_t sim(knobs); + std::string error = sim.initialize_shard_type(SHARD_BY_CORE); + assert(!error.empty()); + } + { + // Test cpu to core mapping by passing larger integers as cpus. + cache_simulator_knobs_t knobs = make_test_knobs(); + knobs.num_cores = 2; + cache_simulator_t sim(knobs); + default_memtrace_stream_t stream; + sim.initialize_stream(&stream); + std::string error = sim.initialize_shard_type(SHARD_BY_CORE); + assert(error.empty()); + memref_t ref = make_memref(42); + stream.set_shard_index(0); + stream.set_output_cpuid(123400); + bool res = sim.process_memref(ref); + assert(res); + stream.set_shard_index(1); + stream.set_output_cpuid(567800); + res = sim.process_memref(ref); + assert(res); + // Capture output. + std::stringstream output; + std::streambuf *prev_buf = std::cerr.rdbuf(output.rdbuf()); + res = sim.print_results(); + assert(res); + std::cerr.rdbuf(prev_buf); + // Make sure the large cpuids are mapped to core 0 and core 1. + // XXX: This regex causes a "regex_constants::error_complexity" + // exception on Windows; for now we disable this part of the test there. +#ifndef WINDOWS + assert(std::regex_search(output.str(), std::regex(R"DELIM((.|\r?\n)* +Core #0 \(traced CPU\(s\): #123400\) +(.|\r?\n)* +Core #1 \(traced CPU\(s\): #567800\) +(.|\r?\n)* +)DELIM"))); +#endif + } +} + int test_main(int argc, const char *argv[]) { @@ -783,6 +836,7 @@ test_main(int argc, const char *argv[]) unit_test_sim_refs(); unit_test_child_hits(); unit_test_cache_replacement_policy(); + unit_test_core_sharded(); return 0; } diff --git a/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip b/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip index defafa0588f..e4383cdb0c8 100644 Binary files a/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip and b/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip differ diff --git a/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10507.6178.trace.gz b/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10507.6178.trace.gz deleted file mode 100644 index d0214e3ef07..00000000000 Binary files a/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10507.6178.trace.gz and /dev/null differ diff --git a/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10508.1635.trace.gz b/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10508.1635.trace.gz deleted file mode 100644 index 0ec9bf6cb9c..00000000000 Binary files a/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10508.1635.trace.gz and /dev/null differ diff --git a/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10509.8547.trace.gz b/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10509.8547.trace.gz deleted file mode 100644 index 00203a988b1..00000000000 Binary files a/clients/drcachesim/tests/drmemtrace.legacy-for-record-filter.x64.tracedir/drmemtrace.threadsig.10509.8547.trace.gz and /dev/null differ diff --git a/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.0.trace.zip b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.0.trace.zip new file mode 100644 index 00000000000..690c07dca85 Binary files /dev/null and b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.0.trace.zip differ diff --git a/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.1.trace.zip b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.1.trace.zip new file mode 100644 index 00000000000..9291d0be7be Binary files /dev/null and b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.1.trace.zip differ diff --git a/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.2.trace.zip b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.2.trace.zip new file mode 100644 index 00000000000..f4ce5fbbfcf Binary files /dev/null and b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.2.trace.zip differ diff --git a/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.3.trace.zip b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.3.trace.zip new file mode 100644 index 00000000000..9b144143e03 Binary files /dev/null and b/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir/drmemtrace.core.3.trace.zip differ diff --git a/clients/drcachesim/tests/invariant_checker_test.cpp b/clients/drcachesim/tests/invariant_checker_test.cpp index 9a90e2fe614..081e768e030 100644 --- a/clients/drcachesim/tests/invariant_checker_test.cpp +++ b/clients/drcachesim/tests/invariant_checker_test.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2021-2023 Google, LLC All rights reserved. + * Copyright (c) 2021-2024 Google, LLC All rights reserved. * **********************************************************/ /* @@ -122,7 +122,12 @@ run_checker(const std::vector &memrefs, bool expect_error, { checker_no_abort_t checker(/*offline=*/true, /*serial=*/true, serial_schedule_file); + default_memtrace_stream_t stream; + checker.initialize_stream(&stream); for (const auto &memref : memrefs) { + int shard_index = static_cast(memref.instr.tid - TID_BASE); + stream.set_tid(memref.instr.tid); + stream.set_shard_index(shard_index); checker.process_memref(memref); } checker.print_results(); @@ -149,22 +154,33 @@ run_checker(const std::vector &memrefs, bool expect_error, } checker_no_abort_t checker(/*offline=*/true, /*serial=*/false, serial_schedule_file); + default_memtrace_stream_t stream; + checker.initialize_stream(&stream); void *shardA = nullptr, *shardB = nullptr, *shardC = nullptr; for (const auto &memref : memrefs) { + int shard_index = static_cast(memref.instr.tid - TID_BASE); + stream.set_tid(memref.instr.tid); + stream.set_shard_index(shard_index); switch (memref.instr.tid) { case TID_A: - if (shardA == nullptr) - shardA = checker.parallel_shard_init(TID_A, NULL); + if (shardA == nullptr) { + shardA = + checker.parallel_shard_init_stream(shard_index, nullptr, &stream); + } checker.parallel_shard_memref(shardA, memref); break; case TID_B: - if (shardB == nullptr) - shardB = checker.parallel_shard_init(TID_B, NULL); + if (shardB == nullptr) { + shardB = + checker.parallel_shard_init_stream(shard_index, nullptr, &stream); + } checker.parallel_shard_memref(shardB, memref); break; case TID_C: - if (shardC == nullptr) - shardC = checker.parallel_shard_init(TID_C, NULL); + if (shardC == nullptr) { + shardC = + checker.parallel_shard_init_stream(shard_index, nullptr, &stream); + } checker.parallel_shard_memref(shardC, memref); break; default: std::cerr << "Internal test error: unknown tid\n"; return false; @@ -2925,6 +2941,278 @@ check_exit_found(void) return true; } +bool +check_kernel_syscall_trace(void) +{ + std::cerr << "Testing kernel syscall traces\n"; +#if defined(WINDOWS) && !defined(X64) + // TODO i#5949: For WOW64 instr_is_syscall() always returns false, so our + // checks do not currently work properly there. + return true; +#else + // XXX: Just like raw2trace_unit_tests, we need to create a syscall instruction + // and it turns out there is no simple cross-platform way. +# ifdef X86 + instr_t *sys = INSTR_CREATE_syscall(GLOBAL_DCONTEXT); +# elif defined(AARCHXX) + instr_t *sys = + INSTR_CREATE_svc(GLOBAL_DCONTEXT, opnd_create_immed_int((sbyte)0x0, OPSZ_1)); +# elif defined(RISCV64) + instr_t *sys = INSTR_CREATE_ecall(GLOBAL_DCONTEXT); +# else +# error Unsupported architecture. +# endif + instr_t *move = + XINST_CREATE_move(GLOBAL_DCONTEXT, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *load = XINST_CREATE_load(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + OPND_CREATE_MEMPTR(REG1, /*disp=*/0)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, sys); + instrlist_append(ilist, move); + instrlist_append(ilist, load); + static constexpr addr_t BASE_ADDR = 0x123450; + static constexpr uintptr_t FILE_TYPE = OFFLINE_FILE_TYPE_ENCODINGS | + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY; + bool res = true; + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, false)) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_instr(TID_A), load }, + // No data memref for the above load, but it should not be an invariant + // violation because the trace type is + // OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY. + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, false)) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, + OFFLINE_FILE_TYPE_ENCODINGS | OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | + OFFLINE_FILE_TYPE_KERNEL_SYSCALLS), + nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_instr(TID_A), load }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "Missing read records", + /*tid=*/TID_A, + /*ref_ordinal=*/10, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/3 }, + "Failed to catch missing data ref")) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, + OFFLINE_FILE_TYPE_ENCODINGS | OFFLINE_FILE_TYPE_SYSCALL_NUMBERS), + nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "Found kernel syscall trace without corresponding file type", + /*tid=*/TID_A, + /*ref_ordinal=*/6, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/1 }, + "Failed to catch mismatching file type")) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 41), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "Mismatching syscall num in trace start and syscall marker", + /*tid=*/TID_A, + /*ref_ordinal=*/6, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/1 }, + "Failed to catch mismatching trace start marker value")) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 41), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "Mismatching syscall num in trace end and syscall marker", + /*tid=*/TID_A, + /*ref_ordinal=*/8, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/2 }, + "Failed to catch mismatching trace end marker value")) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "Found kernel syscall trace end without start", + /*tid=*/TID_A, + /*ref_ordinal=*/7, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/2 }, + "Failed to catch missing kernel trace start marker")) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CPU_ID, 11), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "System call trace found without prior syscall marker", + /*tid=*/TID_A, + /*ref_ordinal=*/7, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/1 }, + "Failed to catch missing prior sysnum marker")) + res = false; + } + { + std::vector memref_setup = { + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr }, + { gen_instr(TID_A), sys }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr }, + { gen_instr(TID_A), move }, + { gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr }, + { gen_exit(TID_A), nullptr } + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + if (!run_checker(memrefs, true, + { "Nested kernel syscall traces are not expected", + /*tid=*/TID_A, + /*ref_ordinal=*/8, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/1 }, + "Failed to catch nested syscall traces")) + res = false; + } + return res; +#endif +} + +bool +check_has_instructions(void) +{ + std::cerr << "Testing at-least-1-instruction\n"; + // Correct: 1 regular instruction. + { + std::vector memrefs = { + gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), + gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), + gen_instr(TID_A), + gen_exit(TID_A), + }; + if (!run_checker(memrefs, false)) + return false; + } + // Correct: 1 unfetched instruction. + { + std::vector memrefs = { + gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), + gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), + gen_instr_type(TRACE_TYPE_INSTR_NO_FETCH, TID_A, 1), + gen_exit(TID_A), + }; + if (!run_checker(memrefs, false)) + return false; + } + // Incorrect: no instructions. + { + std::vector memrefs = { + gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), + gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), + gen_exit(TID_A), + }; + if (!run_checker(memrefs, true, + { "An unfiltered thread should have at least 1 instruction", + /*tid=*/TID_A, + /*ref_ordinal=*/3, /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/0 }, + "Failed to catch missing instructions")) + return false; + } + return true; +} + int test_main(int argc, const char *argv[]) { @@ -2934,7 +3222,8 @@ test_main(int argc, const char *argv[]) check_rseq_side_exit_discontinuity() && check_schedule_file() && check_branch_decoration() && check_filter_endpoint() && check_timestamps_increase_monotonically() && - check_read_write_records_match_operands() && check_exit_found()) { + check_read_write_records_match_operands() && check_exit_found() && + check_kernel_syscall_trace() && check_has_instructions()) { std::cerr << "invariant_checker_test passed\n"; return 0; } diff --git a/clients/drcachesim/tests/mock_reader.h b/clients/drcachesim/tests/mock_reader.h index e3cc2b4391c..e08d77b7e03 100644 --- a/clients/drcachesim/tests/mock_reader.h +++ b/clients/drcachesim/tests/mock_reader.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -83,6 +83,54 @@ class mock_reader_t : public reader_t { int index_ = -1; }; +// A mock record reader that iterates over a vector of records. +class mock_record_reader_t : public record_reader_t { +public: + mock_record_reader_t() = default; + explicit mock_record_reader_t(const std::vector &trace) + : trace_(trace) + { + verbosity_ = 3; + } + bool + init() override + { + eof_ = false; + ++*this; + return true; + } + bool + read_next_entry() override + { + ++index_; + if (index_ >= static_cast(trace_.size())) { + eof_ = true; + return false; + } + cur_entry_ = trace_[index_]; + return true; + } + std::string + get_stream_name() const override + { + return ""; + } + bool + open_single_file(const std::string &input_path) override + { + return false; + } + bool + open_input_file() override + { + return false; + } + +private: + std::vector trace_; + int index_ = -1; +}; + static inline trace_entry_t make_memref(addr_t addr, trace_type_t type = TRACE_TYPE_READ, unsigned short size = 1) { @@ -112,6 +160,15 @@ make_exit(memref_tid_t tid) return entry; } +static inline trace_entry_t +make_header(int version) +{ + trace_entry_t entry; + entry.type = TRACE_TYPE_HEADER; + entry.addr = version; + return entry; +} + static inline trace_entry_t make_footer() { @@ -168,6 +225,16 @@ make_marker(trace_marker_type_t type, uintptr_t value) return entry; } +static inline trace_entry_t +make_encoding(unsigned short size, addr_t encoding) +{ + trace_entry_t entry; + entry.type = TRACE_TYPE_ENCODING; + entry.size = size; + entry.addr = encoding; + return entry; +} + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tests/mock_switch_sequences.x64.zip b/clients/drcachesim/tests/mock_switch_sequences.x64.zip new file mode 100644 index 00000000000..dbb2ad0cf9c Binary files /dev/null and b/clients/drcachesim/tests/mock_switch_sequences.x64.zip differ diff --git a/clients/drcachesim/tests/offline-allasm-scattergather-basic-counts-aarch64.templatex b/clients/drcachesim/tests/offline-allasm-scattergather-basic-counts-aarch64.templatex index 3cda0549822..23a42ecdb44 100644 --- a/clients/drcachesim/tests/offline-allasm-scattergather-basic-counts-aarch64.templatex +++ b/clients/drcachesim/tests/offline-allasm-scattergather-basic-counts-aarch64.templatex @@ -1,10 +1,27 @@ Hello, world! Basic counts tool results: Total counts: +#ifdef __ARM_FEATURE_SVE2 + 724 total \(fetched\) instructions + 270 total unique \(fetched\) instructions +#else 685 total \(fetched\) instructions 255 total unique \(fetched\) instructions +#endif 0 total non-fetched instructions 0 total prefetches +#ifdef __ARM_FEATURE_SVE2 +#if (__ARM_FEATURE_SVE_BITS == 128) + 1158 total data loads + 873 total data stores +#elif (__ARM_FEATURE_SVE_BITS == 256) + 2070 total data loads + 1615 total data stores +#elif (__ARM_FEATURE_SVE_BITS == 512) + 3894 total data loads + 3099 total data stores +#endif /* __ARM_FEATURE_SVE_BITS */ +#else #if (__ARM_FEATURE_SVE_BITS == 128) 1137 total data loads 861 total data stores @@ -14,17 +31,36 @@ Total counts: #elif (__ARM_FEATURE_SVE_BITS == 512) 3831 total data loads 3063 total data stores -#endif +#endif /* __ARM_FEATURE_SVE_BITS */ +#endif /* __ARM_FEATURE_SVE2 */ 0 total icache flushes 0 total dcache flushes 1 total threads .* total scheduling markers .* Thread .* counts: +#ifdef __ARM_FEATURE_SVE2 + 724 \(fetched\) instructions + 270 unique \(fetched\) instructions +#else 685 \(fetched\) instructions 255 unique \(fetched\) instructions +#endif + 0 non-fetched instructions 0 prefetches +#ifdef __ARM_FEATURE_SVE2 +#if (__ARM_FEATURE_SVE_BITS == 128) + 1158 data loads + 873 data stores +#elif (__ARM_FEATURE_SVE_BITS == 256) + 2070 data loads + 1615 data stores +#elif (__ARM_FEATURE_SVE_BITS == 512) + 3894 data loads + 3099 data stores +#endif /* __ARM_FEATURE_SVE_BITS */ +#else #if (__ARM_FEATURE_SVE_BITS == 128) 1137 data loads 861 data stores @@ -34,7 +70,8 @@ Thread .* counts: #elif (__ARM_FEATURE_SVE_BITS == 512) 3831 data loads 3063 data stores -#endif +#endif /* __ARM_FEATURE_SVE_BITS */ +#endif /* __ARM_FEATURE_SVE2 */ 0 icache flushes 0 dcache flushes .* scheduling markers diff --git a/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex b/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex new file mode 100644 index 00000000000..7e7d070e6f4 --- /dev/null +++ b/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex @@ -0,0 +1,15 @@ +.* +#if __ARM_FEATURE_SVE_BITS == 128 +.* +.*a401a03c ld1b \+0x10\(%x1\)\[1byte\] %p0/z -> %z28\.b +.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0x30\(%x1\)\[8byte\] +#elif __ARM_FEATURE_SVE_BITS == 256 +.* +.*a401a03c ld1b \+0x20\(%x1\)\[1byte\] %p0/z -> %z28.b +.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0x60\(%x1\)\[8byte\] +#elif __ARM_FEATURE_SVE_BITS == 512 +.* +.*a401a03c ld1b \+0x40\(%x1\)\[1byte\] %p0/z -> %z28.b +.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0xc0\(%x1\)\[8byte\] +#endif +.* diff --git a/clients/drcachesim/tests/offline-burst_syscall_inject.templatex b/clients/drcachesim/tests/offline-burst_syscall_inject.templatex index 338600b4f79..78967a45b6d 100644 --- a/clients/drcachesim/tests/offline-burst_syscall_inject.templatex +++ b/clients/drcachesim/tests/offline-burst_syscall_inject.templatex @@ -10,11 +10,4 @@ Going to post-process raw trace and add system call trace templates to it Done post-processing the raw trace Verifying resulting user\+kernel trace Successfully completed checks -Basic counts tool results: -Total counts: - .* total \(fetched\) instructions - .* total unique \(fetched\) instructions - .* total non-fetched instructions - .* total userspace instructions - 2 total kernel instructions -.* +Trace invariant checks passed diff --git a/clients/drcachesim/tests/offline-interval-instr-count-output.templatex b/clients/drcachesim/tests/offline-interval-instr-count-output.templatex new file mode 100644 index 00000000000..e0a1268b7c7 --- /dev/null +++ b/clients/drcachesim/tests/offline-interval-instr-count-output.templatex @@ -0,0 +1,37 @@ +Hello, world! +Basic counts tool results: +Total counts: + [ 0-9]* total \(fetched\) instructions + [ 0-9]* total unique \(fetched\) instructions + [ 0-9]* total non-fetched instructions + [ 0-9]* total prefetches + [ 0-9]* total data loads + [ 0-9]* total data stores + [ 0-9]* total icache flushes + [ 0-9]* total dcache flushes + 1 total threads + [ 0-9]* total scheduling markers +.* +Thread [0-9]* counts: + [ 0-9]* \(fetched\) instructions + [ 0-9]* unique \(fetched\) instructions + [ 0-9]* non-fetched instructions + [ 0-9]* prefetches + [ 0-9]* data loads + [ 0-9]* data stores + [ 0-9]* icache flushes + [ 0-9]* dcache flushes + [ 0-9]* scheduling markers +.* +Printing unmerged per-shard interval results: +Counts per trace interval for TID.* +Interval #1 ending at timestamp [0-9]*: + 10000 interval delta \(fetched\) instructions + [ 0-9]* interval delta non-fetched instructions + [ 0-9]* interval delta prefetches + [ 0-9]* interval delta data loads + [ 0-9]* interval delta data stores + [ 0-9]* interval delta icache flushes + [ 0-9]* interval delta dcache flushes + [ 0-9]* interval delta scheduling markers +.* diff --git a/clients/drcachesim/tests/offline-interval-count-output.templatex b/clients/drcachesim/tests/offline-interval-microseconds-count-output.templatex similarity index 96% rename from clients/drcachesim/tests/offline-interval-count-output.templatex rename to clients/drcachesim/tests/offline-interval-microseconds-count-output.templatex index 08b84128398..bb1b946ce5d 100644 --- a/clients/drcachesim/tests/offline-interval-count-output.templatex +++ b/clients/drcachesim/tests/offline-interval-microseconds-count-output.templatex @@ -23,6 +23,7 @@ Thread [0-9]* counts: [ 0-9]* dcache flushes [ 0-9]* scheduling markers .* +Printing whole-trace interval results: Counts per trace interval for whole trace: Interval #1 ending at timestamp [0-9]*: [ 0-9]* interval delta \(fetched\) instructions diff --git a/clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex b/clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex new file mode 100644 index 00000000000..2071186a068 --- /dev/null +++ b/clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex @@ -0,0 +1,12 @@ +Hello, world! + *Opcode mix tool results: + *[0-9]* : total executed instructions + *[1-9][0-9]* : *[a-z][ a-z]* + *[1-9][0-9]* : *[a-z][ a-z]* +.* +Printing unmerged per-shard interval results: +There were [0-9]* intervals created. +ID:1 ending at instruction 10000 has [0-9]* opcodes and [0-9]* categories. + *\[1\] Opcode: [ a-z]* \([0-9]*\) Count=[0-9]* PKI=[0-9\.]* + *\[2\] Opcode: [ a-z]* \([0-9]*\) Count=[0-9]* PKI=[0-9\.]* +.* diff --git a/clients/drcachesim/tests/offline-kernel-invariant-checker.templatex b/clients/drcachesim/tests/offline-kernel-invariant-checker.templatex new file mode 100644 index 00000000000..9bf382c444b --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-invariant-checker.templatex @@ -0,0 +1,2 @@ +Hello, world! +.* diff --git a/clients/drcachesim/tests/offline-opcode_categories.templatex b/clients/drcachesim/tests/offline-opcode_categories.templatex new file mode 100644 index 00000000000..a321ef549b5 --- /dev/null +++ b/clients/drcachesim/tests/offline-opcode_categories.templatex @@ -0,0 +1,32 @@ +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Opcode mix tool results: + 133 : total executed instructions + 34 : mov + 17 : mov + 17 : syscall + 16 : sub + 16 : cmp + 16 : jnz + 16 : lea + 1 : and + + 4 : sets of categories + 51 : move + 33 : branch + 33 : math + 16 : load diff --git a/clients/drcachesim/tests/offline-skip.expect b/clients/drcachesim/tests/offline-skip.expect index 2b59ef3d908..8e216115a3a 100644 --- a/clients/drcachesim/tests/offline-skip.expect +++ b/clients/drcachesim/tests/offline-skip.expect @@ -1,19 +1,18 @@ Output format: <--record#-> <--instr#->: <---tid---> ------------------------------------------------------------ - 86 62: 296231 - 86 62: 296231 - 87 63: 296231 ifetch 2 byte(s) @ 0x0000000000401026 0f 05 syscall -> %rcx %r11 - 88 63: 296231 - 89 63: 296231 - 90 64: 296231 ifetch 4 byte(s) @ 0x0000000000401028 48 83 eb 01 sub $0x0000000000000001 %rbx -> %rbx - 91 65: 296231 ifetch 4 byte(s) @ 0x000000000040102c 48 83 fb 00 cmp %rbx $0x0000000000000000 - 92 66: 296231 ifetch 2 byte(s) @ 0x0000000000401030 75 d9 jnz $0x000000000040100b - 93 67: 296231 ifetch 7 byte(s) @ 0x000000000040100b 48 c7 c7 01 00 00 00 mov $0x0000000000000001 -> %rdi - 94 68: 296231 ifetch 8 byte(s) @ 0x0000000000401012 48 8d 34 25 00 20 40 lea 0x00402000 -> %rsi - 94 68: 296231 00 + 114 62: 1992554 + 114 62: 1992554 + 115 63: 1992554 ifetch 2 byte(s) @ 0x0000000000401026 0f 05 syscall -> %rcx %r11 + 116 63: 1992554 + 117 63: 1992554 + 118 63: 1992554 + 119 63: 1992554 + 120 63: 1992554 + 121 63: 1992554 + 122 64: 1992554 ifetch 4 byte(s) @ 0x0000000000401028 48 83 eb 01 sub $0x0000000000000001 %rbx -> %rbx View tool results: - 6 : total instructions + 2 : total instructions =========================================================================== Trace invariant checks passed diff --git a/clients/drcachesim/tests/offline-skip2.expect b/clients/drcachesim/tests/offline-skip2.expect index 1c00c985ae0..1d678e224da 100644 --- a/clients/drcachesim/tests/offline-skip2.expect +++ b/clients/drcachesim/tests/offline-skip2.expect @@ -1,17 +1,17 @@ Output format: <--record#-> <--instr#->: <---tid---> ------------------------------------------------------------ - 88 63: 296231 - 89 63: 296231 - 90 64: 296231 ifetch 4 byte(s) @ 0x0000000000401028 48 83 eb 01 sub $0x0000000000000001 %rbx -> %rbx - 91 65: 296231 ifetch 4 byte(s) @ 0x000000000040102c 48 83 fb 00 cmp %rbx $0x0000000000000000 - 92 66: 296231 ifetch 2 byte(s) @ 0x0000000000401030 75 d9 jnz $0x000000000040100b - 93 67: 296231 ifetch 7 byte(s) @ 0x000000000040100b 48 c7 c7 01 00 00 00 mov $0x0000000000000001 -> %rdi - 94 68: 296231 ifetch 8 byte(s) @ 0x0000000000401012 48 8d 34 25 00 20 40 lea 0x00402000 -> %rsi - 94 68: 296231 00 - 95 69: 296231 ifetch 7 byte(s) @ 0x000000000040101a 48 c7 c2 0d 00 00 00 mov $0x000000000000000d -> %rdx - 96 70: 296231 ifetch 5 byte(s) @ 0x0000000000401021 b8 01 00 00 00 mov $0x00000001 -> %eax - 97 71: 296231 ifetch 2 byte(s) @ 0x0000000000401026 0f 05 syscall -> %rcx %r11 + 120 63: 1992554 + 121 63: 1992554 + 122 64: 1992554 ifetch 4 byte(s) @ 0x0000000000401028 48 83 eb 01 sub $0x0000000000000001 %rbx -> %rbx + 123 65: 1992554 ifetch 4 byte(s) @ 0x000000000040102c 48 83 fb 00 cmp %rbx $0x0000000000000000 + 124 66: 1992554 ifetch 2 byte(s) @ 0x0000000000401030 75 d9 jnz $0x000000000040100b (taken) + 125 67: 1992554 ifetch 7 byte(s) @ 0x000000000040100b 48 c7 c7 01 00 00 00 mov $0x0000000000000001 -> %rdi + 126 68: 1992554 ifetch 8 byte(s) @ 0x0000000000401012 48 8d 34 25 00 20 40 lea 0x00402000 -> %rsi + 126 68: 1992554 00 + 127 69: 1992554 ifetch 7 byte(s) @ 0x000000000040101a 48 c7 c2 0d 00 00 00 mov $0x000000000000000d -> %rdx + 128 70: 1992554 ifetch 5 byte(s) @ 0x0000000000401021 b8 01 00 00 00 mov $0x00000001 -> %eax + 129 71: 1992554 ifetch 2 byte(s) @ 0x0000000000401026 0f 05 syscall -> %rcx %r11 View tool results: 8 : total instructions diff --git a/clients/drcachesim/tests/offline-trim.templatex b/clients/drcachesim/tests/offline-trim.templatex new file mode 100644 index 00000000000..ac6a55cf090 --- /dev/null +++ b/clients/drcachesim/tests/offline-trim.templatex @@ -0,0 +1,29 @@ +Output 288 entries from 329 entries. +Output format: +<--record#-> <--instr#->: <---tid---> +------------------------------------------------------------ + 1 0: 1992554 + 2 0: 1992554 + 3 0: 1992554 + 4 0: 1992554 + 5 0: 1992554 + 6 0: 1992554 +.* + 207 112: 1992554 + 208 112: 1992554 + 209 113: 1992554 ifetch 4 byte\(s\) @ 0x0000000000401028 48 83 eb 01 sub \$0x0000000000000001 %rbx -> %rbx + 210 114: 1992554 ifetch 4 byte\(s\) @ 0x000000000040102c 48 83 fb 00 cmp %rbx \$0x0000000000000000 + 211 115: 1992554 ifetch 2 byte\(s\) @ 0x0000000000401030 75 d9 jnz \$0x000000000040100b \(taken\) + 212 116: 1992554 ifetch 7 byte\(s\) @ 0x000000000040100b 48 c7 c7 01 00 00 00 mov \$0x0000000000000001 -> %rdi + 213 117: 1992554 ifetch 8 byte\(s\) @ 0x0000000000401012 48 8d 34 25 00 20 40 lea 0x00402000 -> %rsi + 213 117: 1992554 00 + 214 118: 1992554 ifetch 7 byte\(s\) @ 0x000000000040101a 48 c7 c2 0d 00 00 00 mov \$0x000000000000000d -> %rdx + 215 119: 1992554 ifetch 5 byte\(s\) @ 0x0000000000401021 b8 01 00 00 00 mov \$0x00000001 -> %eax + 216 120: 1992554 ifetch 2 byte\(s\) @ 0x0000000000401026 0f 05 syscall -> %rcx %r11 + 217 120: 1992554 + 218 120: 1992554 +View tool results: + 120 : total instructions + +=========================================================================== +Trace invariant checks passed diff --git a/clients/drcachesim/tests/offline-view.templatex b/clients/drcachesim/tests/offline-view.templatex index c1f4943ae9d..c2a3ddecf7c 100644 --- a/clients/drcachesim/tests/offline-view.templatex +++ b/clients/drcachesim/tests/offline-view.templatex @@ -7,9 +7,16 @@ Output format: 3 0: +[0-9]+ 4 0: +[0-9]+ 5 0: +[0-9]+ +#ifdef __ARM_FEATURE_SVE + 6 0: +[0-9]+ + 7 0: +[0-9]+ + 8 0: +[0-9]+ + 9 1: +[0-9]+ ifetch .* +#else 6 0: +[0-9]+ 7 0: +[0-9]+ 8 1: +[0-9]+ ifetch .* +#endif .* View tool results: *[0-9]* : total instructions diff --git a/clients/drcachesim/tests/raw2trace_unit_tests.cpp b/clients/drcachesim/tests/raw2trace_unit_tests.cpp index 889dce30b7b..6466d90fd1e 100644 --- a/clients/drcachesim/tests/raw2trace_unit_tests.cpp +++ b/clients/drcachesim/tests/raw2trace_unit_tests.cpp @@ -1508,6 +1508,84 @@ test_rseq_rollback_with_signal(void *drcontext) check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); } +/* Tests rseq rollback with taken branch right before signal. */ +bool +test_rseq_rollback_with_signal_after_branch(void *drcontext) +{ + std::cerr << "\n===============\nTesting rseq rollback with taken branch before " + "signal.\n"; + instrlist_t *ilist = instrlist_create(drcontext); + // raw2trace doesn't like offsets of 0 so we shift with a nop. + instr_t *nop = XINST_CREATE_nop(drcontext); + instr_t *move1 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *store = + XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(REG2, 0), opnd_create_reg(REG1)); + instr_t *jcc = + XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(store)); + instr_t *move2 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG2), opnd_create_reg(REG1)); + instrlist_append(ilist, nop); + instrlist_append(ilist, jcc); + instrlist_append(ilist, move1); + instrlist_append(ilist, store); + instrlist_append(ilist, move2); + size_t offs_nop = 0; + size_t offs_jcc = offs_nop + instr_length(drcontext, nop); + size_t offs_move1 = offs_jcc + instr_length(drcontext, jcc); + size_t offs_store = offs_move1 + instr_length(drcontext, move1); + size_t offs_move2 = offs_store + instr_length(drcontext, store); + + std::vector raw; + raw.push_back(make_header()); + raw.push_back(make_tid()); + raw.push_back(make_pid()); + raw.push_back(make_line_size()); + raw.push_back(make_timestamp()); + raw.push_back(make_core()); + raw.push_back(make_marker(TRACE_MARKER_TYPE_RSEQ_ENTRY, offs_move2)); + raw.push_back(make_block(offs_jcc, 1)); + raw.push_back(make_marker(TRACE_MARKER_TYPE_RSEQ_ABORT, offs_store)); + raw.push_back(make_marker(TRACE_MARKER_TYPE_KERNEL_EVENT, offs_store)); + raw.push_back(make_block(offs_move2, 1)); + raw.push_back(make_exit()); + + std::vector stats; + std::vector entries; + if (!run_raw2trace(drcontext, raw, ilist, entries, &stats)) + return false; + int idx = 0; + return ( + stats[RAW2TRACE_STAT_RSEQ_ABORT] == 1 && + check_entry(entries, idx, TRACE_TYPE_HEADER, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && + check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && + check_entry(entries, idx, TRACE_TYPE_PID, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RSEQ_ENTRY) && + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#endif + check_entry(entries, idx, TRACE_TYPE_INSTR_TAKEN_JUMP, -1, offs_jcc) && + // The move1 and committing store should not be here. + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RSEQ_ABORT) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_KERNEL_EVENT) && + // Normally the handler is called with another kernel event. The + // trace here is simplified to test rseq abort followed by kernel event. + // The move2 instr. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1, offs_move2) && + check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && + check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); +} + /* Tests rollback i#5954 where a chunk boundary splits an rseq region. */ bool test_rseq_rollback_with_chunks(void *drcontext) @@ -1984,6 +2062,203 @@ test_rseq_side_exit_inverted_with_timestamp(void *drcontext) check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); } +/* Tests an inverted rseq side exit with delayed branch and synthetic jump (i#6346). */ +bool +test_rseq_side_exit_inverted_with_delayed_branches(void *drcontext) +{ + std::cerr + << "\n===============\nTesting inverted rseq side exit with delayed branches\n"; + instrlist_t *ilist = instrlist_create(drcontext); + // raw2trace doesn't like offsets of 0 so we shift with a nop. + instr_t *nop = XINST_CREATE_nop(drcontext); + instr_t *move1 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *move3 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG2), opnd_create_reg(REG1)); + // Our conditional jumps over the jump which is the exit. + instr_t *jcc = + XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(move1)); + instr_t *jmp = XINST_CREATE_jump(drcontext, opnd_create_instr(move3)); + instr_t *store = + XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(REG2, 0), opnd_create_reg(REG1)); + instr_t *move2 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG2), opnd_create_reg(REG1)); + instrlist_append(ilist, nop); + instrlist_append(ilist, jcc); + instrlist_append(ilist, jmp); + instrlist_append(ilist, move1); + instrlist_append(ilist, store); + instrlist_append(ilist, move2); + instrlist_append(ilist, move3); + size_t offs_nop = 0; + size_t offs_jcc = offs_nop + instr_length(drcontext, nop); + size_t offs_jmp = offs_jcc + instr_length(drcontext, jcc); + size_t offs_move1 = offs_jmp + instr_length(drcontext, jmp); + size_t offs_store = offs_move1 + instr_length(drcontext, move1); + size_t offs_move2 = offs_store + instr_length(drcontext, store); + size_t offs_move3 = offs_move2 + instr_length(drcontext, move2); + + std::vector raw; + raw.push_back(make_header()); + raw.push_back(make_tid()); + raw.push_back(make_pid()); + raw.push_back(make_line_size()); + raw.push_back(make_timestamp()); + raw.push_back(make_core()); + raw.push_back(make_marker(TRACE_MARKER_TYPE_RSEQ_ENTRY, offs_move2)); + // The jcc is taken and we don't see the side exit in instrumented execution. + raw.push_back(make_block(offs_jcc, 1)); + raw.push_back(make_timestamp()); + raw.push_back(make_core()); + // The end of our rseq sequence, ending in a committing store. + raw.push_back(make_block(offs_move1, 2)); + raw.push_back(make_memref(42)); + // A discontinuity as we continue with the side exit target. + raw.push_back(make_block(offs_move3, 1)); + raw.push_back(make_exit()); + + std::vector stats; + std::vector entries; + if (!run_raw2trace(drcontext, raw, ilist, entries, &stats)) + return false; + int idx = 0; + return ( + stats[RAW2TRACE_STAT_RSEQ_SIDE_EXIT] == 1 && + check_entry(entries, idx, TRACE_TYPE_HEADER, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && + check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && + check_entry(entries, idx, TRACE_TYPE_PID, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RSEQ_ENTRY) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + // The jcc instr. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#endif + check_entry(entries, idx, TRACE_TYPE_INSTR_UNTAKEN_JUMP, -1, offs_jcc) && + // The jmp which raw2trace has to synthesize. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#endif + check_entry(entries, idx, TRACE_TYPE_INSTR_DIRECT_JUMP, -1, offs_jmp) && + // The move1, move2, and committing store should be gone. + // We should go straight to the move3 instr. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1, offs_move3) && + check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && + check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); +} + +/* Tests rseq side exit with timestamps. */ +bool +test_rseq_side_exit_with_timestamps(void *drcontext) +{ + std::cerr << "\n===============\nTesting rseq side exit with timestamps\n"; + instrlist_t *ilist = instrlist_create(drcontext); + // raw2trace doesn't like offsets of 0 so we shift with a nop. + instr_t *nop = XINST_CREATE_nop(drcontext); + instr_t *move1 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *move2 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG2), opnd_create_reg(REG1)); + instr_t *move3 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *move4 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *jmp1 = XINST_CREATE_jump(drcontext, opnd_create_instr(move4)); + instr_t *jcc = XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(jmp1)); + instr_t *store = + XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(REG2, 0), opnd_create_reg(REG1)); + instrlist_append(ilist, nop); + instrlist_append(ilist, jcc); + instrlist_append(ilist, move1); + instrlist_append(ilist, store); + instrlist_append(ilist, move2); + instrlist_append(ilist, jmp1); + instrlist_append(ilist, move3); + instrlist_append(ilist, move4); + size_t offs_nop = 0; + size_t offs_jcc = offs_nop + instr_length(drcontext, nop); + size_t offs_move1 = offs_jcc + instr_length(drcontext, jcc); + size_t offs_store = offs_move1 + instr_length(drcontext, move1); + size_t offs_move2 = offs_store + instr_length(drcontext, store); + size_t offs_jmp1 = offs_move2 + instr_length(drcontext, move2); + size_t offs_move3 = offs_jmp1 + instr_length(drcontext, jmp1); + size_t offs_move4 = offs_move3 + instr_length(drcontext, move3); + + std::vector raw; + raw.push_back(make_header()); + raw.push_back(make_tid()); + raw.push_back(make_pid()); + raw.push_back(make_line_size()); + raw.push_back(make_timestamp(/*value=*/1)); + raw.push_back(make_core()); + raw.push_back(make_marker(TRACE_MARKER_TYPE_RSEQ_ENTRY, offs_move2)); + raw.push_back(make_block(offs_jcc, /*instr_count=*/1)); + // The jcc jumps to jmp1, and skips move1, store, and move2. + raw.push_back(make_block(offs_move1, /*instr_count=*/2)); + raw.push_back(make_memref(/*addr=*/42)); + raw.push_back(make_block(offs_jmp1, /*instr_count=*/1)); + raw.push_back(make_timestamp(/*value=*/2)); + raw.push_back(make_core()); + raw.push_back(make_timestamp(/*value=*/3)); + raw.push_back(make_core()); + raw.push_back(make_block(offs_move4, /*instr_count=*/1)); + raw.push_back(make_exit()); + + std::vector stats; + std::vector entries; + if (!run_raw2trace(drcontext, raw, ilist, entries, &stats)) + return false; + int idx = 0; + return ( + stats[RAW2TRACE_STAT_RSEQ_SIDE_EXIT] == 1 && + check_entry(entries, idx, TRACE_TYPE_HEADER, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && + check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && + check_entry(entries, idx, TRACE_TYPE_PID, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RSEQ_ENTRY) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + // The jcc instr. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#endif + check_entry(entries, idx, TRACE_TYPE_INSTR_TAKEN_JUMP, -1, offs_jcc) && + // The move1, move2, and committing store should be gone. + // We should go straight to the jmp1 instr. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#endif + check_entry(entries, idx, TRACE_TYPE_INSTR_DIRECT_JUMP, -1) && + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1, offs_move4) && + check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && + check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); +} + /* Tests a trace ending mid-rseq (i#6444). * If at_end is true, tests the endpoint just being reached but not pased; * else tests the endpoint not being reached. @@ -2892,12 +3167,15 @@ test_main(int argc, const char *argv[]) !test_rseq_rollback_legacy(drcontext) || !test_rseq_rollback(drcontext) || !test_rseq_rollback_with_timestamps(drcontext) || !test_rseq_rollback_with_signal(drcontext) || + !test_rseq_rollback_with_signal_after_branch(drcontext) || !test_rseq_rollback_with_chunks(drcontext) || !test_rseq_side_exit(drcontext) || !test_rseq_side_exit_signal(drcontext) || !test_rseq_side_exit_inverted(drcontext) || !test_rseq_side_exit_inverted_with_timestamp(drcontext) || - !test_midrseq_end(drcontext) || !test_xfer_modoffs(drcontext) || - !test_xfer_absolute(drcontext) || !test_branch_decoration(drcontext) || + !test_rseq_side_exit_inverted_with_delayed_branches(drcontext) || + !test_rseq_side_exit_with_timestamps(drcontext) || !test_midrseq_end(drcontext) || + !test_xfer_modoffs(drcontext) || !test_xfer_absolute(drcontext) || + !test_branch_decoration(drcontext) || !test_stats_timestamp_instr_count(drcontext) || !test_is_maybe_blocking_syscall(drcontext) || !test_ifiltered(drcontext)) return 1; diff --git a/clients/drcachesim/tests/record_filter-offline.templatex b/clients/drcachesim/tests/record_filter-offline.templatex new file mode 100644 index 00000000000..3700fba38a9 --- /dev/null +++ b/clients/drcachesim/tests/record_filter-offline.templatex @@ -0,0 +1,10 @@ +#ifdef WINDOWS +Hit delay threshold: enabling tracing. +Exiting process after .* references. +#else +Hello, world! +#endif +Trace invariant checks passed +Output .* entries from .* entries. +Done! +Trace invariant checks passed diff --git a/clients/drcachesim/tests/record_filter_as_traced.templatex b/clients/drcachesim/tests/record_filter_as_traced.templatex new file mode 100644 index 00000000000..e9e8f316122 --- /dev/null +++ b/clients/drcachesim/tests/record_filter_as_traced.templatex @@ -0,0 +1,10 @@ +Output .* entries from .* entries. +Schedule stats tool results: +.* +Core #0 schedule: .* +Core #1 schedule: .* +Core #2 schedule: .* +Core #3 schedule: .* +Core #4 schedule: .* +Core #5 schedule: .* +Core #6 schedule: .* diff --git a/clients/drcachesim/tests/record_filter_bycore_multi.templatex b/clients/drcachesim/tests/record_filter_bycore_multi.templatex new file mode 100644 index 00000000000..bd9b2d341f2 --- /dev/null +++ b/clients/drcachesim/tests/record_filter_bycore_multi.templatex @@ -0,0 +1,8 @@ +Estimation of pi is 3.14.* +Trace invariant checks passed +Output .* entries from .* entries. +Schedule stats tool results: +.* +Core #0 schedule: .* +Core #1 schedule: .* +Core #2 schedule: .* diff --git a/clients/drcachesim/tests/record_filter_bycore_uni.templatex b/clients/drcachesim/tests/record_filter_bycore_uni.templatex new file mode 100644 index 00000000000..723ce48676b --- /dev/null +++ b/clients/drcachesim/tests/record_filter_bycore_uni.templatex @@ -0,0 +1,14 @@ +#ifdef WINDOWS +Hit delay threshold: enabling tracing. +Exiting process after .* references. +#else +Hello, world! +#endif +Trace invariant checks passed +Output .* entries from .* entries. +Schedule stats tool results: +.* +Core #0 schedule: .* +Core #1 schedule: .* +Core #2 schedule: .* +Core #3 schedule: .* diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 48e66b35098..22907ae6851 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -33,21 +33,27 @@ /* Unit tests for the record_filter analyzer. */ #include "analyzer.h" +#include "archive_ostream.h" #include "dr_api.h" #include "droption.h" #include "tools/basic_counts.h" #include "tools/filter/null_filter.h" #include "tools/filter/cache_filter.h" #include "tools/filter/record_filter.h" +#include "tools/filter/trim_filter.h" #include "tools/filter/type_filter.h" +#include "zipfile_ostream.h" #include #include +#include #include namespace dynamorio { namespace drmemtrace { +using record_filter_func_t = + ::dynamorio::drmemtrace::record_filter_t::record_filter_func_t; using ::dynamorio::droption::droption_parser_t; using ::dynamorio::droption::DROPTION_SCOPE_ALL; using ::dynamorio::droption::DROPTION_SCOPE_FRONTEND; @@ -81,15 +87,16 @@ static droption_t op_tmp_output_dir( class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t { public: test_record_filter_t(std::vector> filters, - uint64_t last_timestamp) + uint64_t last_timestamp, bool write_archive = false) : record_filter_t("", std::move(filters), last_timestamp, /*verbose=*/0) + , write_archive_(write_archive) { } std::vector get_output_entries() { - return output; + return output_; } protected: @@ -97,17 +104,35 @@ class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t { write_trace_entry(dynamorio::drmemtrace::record_filter_t::per_shard_t *shard, const trace_entry_t &entry) override { - output.push_back(entry); + output_.push_back(entry); + shard->cur_refs += shard->memref_counter.entry_memref_count(&entry); + shard->last_written_record = entry; return true; } - std::unique_ptr + std::string get_writer(per_shard_t *per_shard, memtrace_stream_t *shard_stream) override { - return std::unique_ptr(new std::ofstream("/dev/null")); + if (write_archive_) { + per_shard->archive_writer = + std::unique_ptr(new zipfile_ostream_t("/dev/null")); + per_shard->writer = per_shard->archive_writer.get(); + return open_new_chunk(per_shard); + } + per_shard->file_writer = + std::unique_ptr(new std::ofstream("/dev/null")); + per_shard->writer = per_shard->file_writer.get(); + return ""; + } + std::string + remove_output_file(per_shard_t *per_shard) override + { + output_.clear(); + return ""; } private: - std::vector output; + std::vector output_; + bool write_archive_ = false; }; class local_stream_t : public default_memtrace_stream_t { @@ -122,11 +147,29 @@ class local_stream_t : public default_memtrace_stream_t { { last_timestamp_ = last_timestamp; } + int64_t + get_input_id() const override + { + // Just one input for our tests. + return 0; + } private: uint64_t last_timestamp_; }; +struct test_case_t { + trace_entry_t entry; + // Specifies whether the entry should be processed by the record_filter + // as an input. Some entries are added only to show the expected output + // and shouldn't be used as input to the record_filter. + bool input; + // Specifies whether the entry should be expected in the result of the + // record filter. This is an array of size equal to the number of test + // cases. + std::vector output; +}; + static bool local_create_dir(const char *dir) { @@ -161,27 +204,98 @@ print_entry(trace_entry_t entry) entry.addr); } +bool +process_entries_and_check_result(test_record_filter_t *record_filter, + const std::vector &entries, int index) +{ + auto stream = std::unique_ptr(new local_stream_t()); + void *shard_data = + record_filter->parallel_shard_init_stream(0, nullptr, stream.get()); + if (!*record_filter) { + fprintf(stderr, "Filtering init failed: %s\n", + record_filter->get_error_string().c_str()); + return false; + } + // Process each trace entry. + for (int i = 0; i < static_cast(entries.size()); ++i) { + // We need to emulate the stream for the tool. + if (entries[i].entry.type == TRACE_TYPE_MARKER && + entries[i].entry.size == TRACE_MARKER_TYPE_TIMESTAMP) + stream->set_last_timestamp(entries[i].entry.addr); + if (entries[i].input && + !record_filter->parallel_shard_memref(shard_data, entries[i].entry)) { + fprintf(stderr, "Filtering failed on entry %d: %s\n", i, + record_filter->parallel_shard_error(shard_data).c_str()); + return false; + } + } + if (!record_filter->parallel_shard_exit(shard_data) || !*record_filter) { + fprintf(stderr, "Filtering exit failed\n"); + return false; + } + if (!record_filter->print_results()) { + fprintf(stderr, "Filtering results failed\n"); + return false; + } + + std::vector filtered = record_filter->get_output_entries(); + // Verbose output for easier debugging. + fprintf(stderr, "Input:\n"); + for (int i = 0; i < static_cast(entries.size()); ++i) { + if (!entries[i].input) + continue; + fprintf(stderr, " %d: ", i); + print_entry(entries[i].entry); + fprintf(stderr, "\n"); + } + fprintf(stderr, "Output:\n"); + for (int i = 0; i < static_cast(filtered.size()); ++i) { + fprintf(stderr, " %d: ", i); + print_entry(filtered[i]); + fprintf(stderr, "\n"); + } + // Check filtered output entries. + int j = 0; + for (int i = 0; i < static_cast(entries.size()); ++i) { + if (!entries[i].output[index]) + continue; + if (j >= static_cast(filtered.size())) { + fprintf(stderr, + "Too few entries in filtered output (iter=%d). Expected: ", index); + print_entry(entries[i].entry); + fprintf(stderr, "\n"); + return false; + } + if (memcmp(&filtered[j], &entries[i].entry, sizeof(trace_entry_t)) != 0) { + fprintf(stderr, + "Wrong filter result for iter=%d, at pos=%d. Expected: ", index, i); + print_entry(entries[i].entry); + fprintf(stderr, ", got: "); + print_entry(filtered[j]); + fprintf(stderr, "\n"); + return false; + } + ++j; + } + if (j < static_cast(filtered.size())) { + fprintf(stderr, "Got %d extra entries in filtered output (iter=%d). Next one: ", + static_cast(filtered.size()) - j, index); + print_entry(filtered[j]); + fprintf(stderr, "\n"); + return false; + } + return true; +} + static bool test_cache_and_type_filter() { - struct test_case { - trace_entry_t entry; - // Specifies whether the entry should be processed by the record_filter - // as an input. Some entries are added only to show the expected output - // and shouldn't be used as input to the record_filter. - bool input; - // Specifies whether the entry should be expected in the result of the - // record filter. This is an array of size equal to the number of test - // cases. - bool output[2]; - }; - // We test two configurations: // 1. filter data address stream using a cache, and filter function markers // and encoding entries, without any stop timestamp. // 2. filter data and instruction address stream using a cache, with a // stop timestamp. - std::vector entries = { + std::vector entries = { // Trace shard header. { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true, true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, @@ -219,17 +333,17 @@ test_cache_and_type_filter() { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true, true } }, + { { TRACE_TYPE_ENCODING, 4, { 0xf00d } }, true, { false, true } }, { { TRACE_TYPE_INSTR, 4, { 0xaa00 } }, true, { true, true } }, { { TRACE_TYPE_WRITE, 4, { 0xaa80 } }, true, { true, true } }, - // Unit header. For the 1st test, this is skipped, since no entry - // is output from this unit. + // Unit header. { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x9 } }, true, - { false, true } }, + { true, true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xa } }, true, - { false, true } }, + { true, true } }, // Filtered out by cache_filter. { { TRACE_TYPE_WRITE, 4, { 0xaa90 } }, true, { false, false } }, // For the 1st test: filtered out by type_filter. @@ -262,7 +376,7 @@ test_cache_and_type_filter() // cache_filter. { { TRACE_TYPE_ENCODING, 4, { 0x8bad } }, true, { false, false } }, { { TRACE_TYPE_ENCODING, 2, { 0xf00d } }, true, { false, false } }, - { { TRACE_TYPE_INSTR, 4, { 0xaa80 } }, true, { true, false } }, + { { TRACE_TYPE_INSTR, 6, { 0xaa80 } }, true, { true, false } }, // Filtered out by the cache_filter. { { TRACE_TYPE_READ, 4, { 0xaaa0 } }, true, { false, false } }, @@ -289,27 +403,23 @@ test_cache_and_type_filter() // For the 1st test: encoding entries are filtered out by type_filter. { { TRACE_TYPE_ENCODING, 4, { 0xdead } }, true, { false, true } }, { { TRACE_TYPE_ENCODING, 2, { 0xbeef } }, true, { false, true } }, - { { TRACE_TYPE_INSTR, 4, { 0xab80 } }, true, { true, true } }, + { { TRACE_TYPE_INSTR, 6, { 0xab80 } }, true, { true, true } }, // For the 2nd test: Delayed encodings from the previous instance // of the instruction at PC 0xaa80 that was filtered out. { { TRACE_TYPE_ENCODING, 4, { 0x8bad } }, false, { false, true } }, { { TRACE_TYPE_ENCODING, 2, { 0xf00d } }, false, { false, true } }, - { { TRACE_TYPE_INSTR, 4, { 0xaa80 } }, true, { true, true } }, + { { TRACE_TYPE_INSTR, 6, { 0xaa80 } }, true, { true, true } }, // Trace shard footer. { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { true, true } } }; for (int k = 0; k < 2; ++k) { - auto stream = std::unique_ptr(new local_stream_t()); // Construct record_filter_func_ts. - std::vector< - std::unique_ptr> - filters; - auto cache_filter = - std::unique_ptr( - new dynamorio::drmemtrace::cache_filter_t( - /*cache_associativity=*/1, /*cache_line_size=*/64, /*cache_size=*/128, - /*filter_data=*/true, /*filter_instrs=*/k == 1)); + std::vector> filters; + auto cache_filter = std::unique_ptr( + new dynamorio::drmemtrace::cache_filter_t( + /*cache_associativity=*/1, /*cache_line_size=*/64, /*cache_size=*/128, + /*filter_data=*/true, /*filter_instrs=*/k == 1)); if (cache_filter->get_error_string() != "") { fprintf(stderr, "Couldn't construct a cache_filter %s", cache_filter->get_error_string().c_str()); @@ -318,8 +428,7 @@ test_cache_and_type_filter() filters.push_back(std::move(cache_filter)); if (k == 0) { - auto type_filter = std::unique_ptr< - dynamorio::drmemtrace::record_filter_t::record_filter_func_t>( + auto type_filter = std::unique_ptr( new dynamorio::drmemtrace::type_filter_t({ TRACE_TYPE_ENCODING }, { TRACE_MARKER_TYPE_FUNC_ID, TRACE_MARKER_TYPE_FUNC_RETADDR, @@ -336,68 +445,570 @@ test_cache_and_type_filter() uint64_t stop_timestamp = k == 0 ? 0 : 0xabcdee; auto record_filter = std::unique_ptr( new test_record_filter_t(std::move(filters), stop_timestamp)); - void *shard_data = - record_filter->parallel_shard_init_stream(0, nullptr, stream.get()); - if (!*record_filter) { - fprintf(stderr, "Filtering init failed\n"); + if (!process_entries_and_check_result(record_filter.get(), entries, k)) return false; - } + } + fprintf(stderr, "test_cache_and_type_filter passed\n"); + return true; +} - // Process each trace entry. - for (int i = 0; i < static_cast(entries.size()); ++i) { - // We need to emulate the stream for the tool. - if (entries[i].entry.type == TRACE_TYPE_MARKER && - entries[i].entry.size == TRACE_MARKER_TYPE_TIMESTAMP) - stream->set_last_timestamp(entries[i].entry.addr); - if (entries[i].input && - !record_filter->parallel_shard_memref(shard_data, entries[i].entry)) { - fprintf(stderr, "Filtering failed\n"); - return false; - } - } - if (!record_filter->parallel_shard_exit(shard_data) || !*record_filter) { - fprintf(stderr, "Filtering exit failed\n"); +static bool +test_chunk_update() +{ + { + // Test that the ordinal marker is updated on removing records. + // From Chunk 1 we remove 3 visible records (the _FUNC_ ones); the encodings + // are also removed but are not visible in the record count. + std::vector entries = { + // Header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ENCODINGS } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE, { 0 } }, false, { true } }, + { { TRACE_TYPE_THREAD, 0, { 0x4 } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 0x2 } }, + true, + { true } }, + // Chunk 1. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + { { TRACE_TYPE_ENCODING, 2, { 0xf00d } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { 0x1234 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETADDR, { 0 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { 0xf00d } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { 0x1235 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { true } }, + // Chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 9 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + { { TRACE_TYPE_ENCODING, 2, { 0xf00d } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { 0x1236 } }, true, { true } }, + { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { true } }, + }; + std::vector> filters; + auto filter = std::unique_ptr( + new dynamorio::drmemtrace::type_filter_t({ TRACE_TYPE_ENCODING }, + { TRACE_MARKER_TYPE_FUNC_ID, + TRACE_MARKER_TYPE_FUNC_RETADDR, + TRACE_MARKER_TYPE_FUNC_ARG })); + if (!filter->get_error_string().empty()) { + fprintf(stderr, "Couldn't construct a type_filter %s", + filter->get_error_string().c_str()); return false; } - - // Check filtered output entries. - std::vector filtered = record_filter->get_output_entries(); - int j = 0; - for (int i = 0; i < static_cast(entries.size()); ++i) { - if (!entries[i].output[k]) - continue; - if (j >= static_cast(filtered.size())) { - fprintf(stderr, - "Too few entries in filtered output (iter=%d). Expected: ", k); - print_entry(entries[i].entry); - fprintf(stderr, "\n"); - return false; + filters.push_back(std::move(filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + } + { + // Test that a filtered-out instr doesn't have new-chunk encodings added. + class ordinal_filter_t : public record_filter_t::record_filter_func_t { + public: + ordinal_filter_t(std::set ordinals) + : ordinals_(ordinals) + { } - if (memcmp(&filtered[j], &entries[i].entry, sizeof(trace_entry_t)) != 0) { - fprintf(stderr, - "Wrong filter result for iter=%d, at pos=%d. Expected: ", k, i); - print_entry(entries[i].entry); - fprintf(stderr, ", got: "); - print_entry(filtered[j]); - fprintf(stderr, "\n"); - return false; + void * + parallel_shard_init(memtrace_stream_t *shard_stream, + bool partial_trace_filter) override + { + return nullptr; + } + bool + parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + { + bool res = true; + if (type_is_instr(static_cast(entry.type))) { + if (ordinals_.find(cur_ord_) != ordinals_.end()) + res = false; + ++cur_ord_; + } + return res; + } + bool + parallel_shard_exit(void *shard_data) override + { + return true; } - ++j; + + private: + // Our test class supports only a single small shard. + std::set ordinals_; + int cur_ord_ = 0; + }; + constexpr addr_t PC_A = 0x1234; + constexpr addr_t PC_B = 0x5678; + constexpr addr_t ENCODING_A = 0x4321; + constexpr addr_t ENCODING_B = 0x8765; + // We have the following where "e" means encoding and | divides chunks and + // x means we removed it: + // "eA A A | eB B eA" => "eA A x eB | x eA" + // We check to ensure the 2nd B, now removed, has no encoding added. + std::vector entries = { + // Header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ENCODINGS } }, + true, + { true } }, + { { TRACE_TYPE_THREAD, 0, { 0x4 } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 0x3 } }, + true, + { true } }, + // Chunk 1. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_A } }, true, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_A } }, true, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_A } }, true, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_A } }, true, { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { false } }, + // Chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 10 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x8 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + false, + { true } }, + // New chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 10 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x8 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, false, { true } }, + // This is the heart of this test: there should be no inserted new-chunk + // encoding for this filtered-out instruction. + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_A } }, true, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_A } }, true, { true } }, + { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { true } }, + }; + std::vector> filters; + auto filter = + std::unique_ptr(new ordinal_filter_t({ 2, 4 })); + if (!filter->get_error_string().empty()) { + fprintf(stderr, "Couldn't construct a pc_filter %s", + filter->get_error_string().c_str()); + return false; } - if (j < static_cast(filtered.size())) { - fprintf(stderr, - "Got %d extra entries in filtered output (iter=%d). Next one: ", - static_cast(filtered.size()) - j, k); - print_entry(filtered[j]); - fprintf(stderr, "\n"); + filters.push_back(std::move(filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + } + fprintf(stderr, "test_chunk_update passed\n"); + return true; +} + +static bool +test_trim_filter() +{ + constexpr addr_t TID = 5; + constexpr addr_t PC_A = 0x1234; + constexpr addr_t ENCODING_A = 0x4321; + constexpr addr_t PC_B = 0x5678; + constexpr addr_t ENCODING_B = 0x8765; + { + // Test invalid parameters. + auto filter = std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(150, 149)); + if (filter->get_error_string().empty()) { + fprintf(stderr, "Failed to return an error on invalid params"); + return false; + } + auto filter2 = std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(150, 150)); + if (filter2->get_error_string().empty()) { + fprintf(stderr, "Failed to return an error on invalid params"); return false; } } - fprintf(stderr, "test_cache_and_type_filter passed\n"); + { + // Test removing from mid-way in the 1st chunk to the very end. + std::vector entries = { + // Header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ENCODINGS } }, + true, + { true } }, + { { TRACE_TYPE_THREAD, 0, { TID } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, + true, + { true } }, + // Chunk 1. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 100 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { true } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_A } }, true, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_A } }, true, { true } }, + // Removal starts here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { false } }, + // Chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } }, + // These footer records should remain. + { { TRACE_TYPE_THREAD_EXIT, 0, { TID } }, true, { true } }, + { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { true } }, + }; + std::vector> filters; + auto filter = std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(50, 150)); + if (!filter->get_error_string().empty()) { + fprintf(stderr, "Couldn't construct a trim_filter %s", + filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + } + { + // Test removing from the start to mid-way in the 1st chunk. + // This requires repeating encodings in the new chunks. + std::vector entries = { + // Header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ENCODINGS } }, + true, + { true } }, + { { TRACE_TYPE_THREAD, 0, { TID } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, + true, + { true } }, + // Original chunk 1. + // Removal starts here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 100 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } }, + // Removal ends here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { true } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, false, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { false } }, + // Original chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + true, + { false } }, + // Dup timestamp;cpuid should be removed. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + // This encoding is not repeated b/c this is now in the same chunk as + // the prior instance of this same instr. + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + false, + { true } }, + // New chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 9 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, false, { true } }, + // This encoding is added since it is the first instance in a new chunk. + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, false, { true } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { false } }, + // Original chunk 3. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 1 } }, + false, + { true } }, + // New chunk 3. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, false, { true } }, + { { TRACE_TYPE_THREAD_EXIT, 0, { TID } }, true, { true } }, + { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { true } }, + }; + std::vector> filters; + auto filter = std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(150, 600)); + if (!filter->get_error_string().empty()) { + fprintf(stderr, "Couldn't construct a trim_filter %s", + filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + } + { + // Test removing a zero-instruction thread. + std::vector entries = { + // Header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ENCODINGS } }, + true, + { false } }, + { { TRACE_TYPE_THREAD, 0, { TID } }, true, { false } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, + true, + { false } }, + // Original chunk 1. + // Removal starts here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 100 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } }, + // Removal ends here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_THREAD_EXIT, 0, { TID } }, true, { false } }, + { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { false } }, + }; + std::vector> filters; + auto filter = std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(150, 600)); + if (!filter->get_error_string().empty()) { + fprintf(stderr, "Couldn't construct a trim_filter %s", + filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + } + { + // Test removing from the start to mid-way in the 1st chunk while also + // removing all encodings. + std::vector entries = { + // Header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ENCODINGS } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE, { 0 } }, false, { true } }, + { { TRACE_TYPE_THREAD, 0, { TID } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, + true, + { true } }, + // Original chunk 1. + // Removal starts here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 100 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } }, + // Removal ends here. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { true } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, false, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { false } }, + // Original chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + true, + { false } }, + // Dup timestamp;cpuid should be removed. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + false, + { true } }, + // New chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 9 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, false, { true } }, + // An encoding would be added here, but we want it removed. + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, false, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, + true, + { false } }, + // Original chunk 3. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } }, + { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 1 } }, + false, + { true } }, + // New chunk 3. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 12 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } }, + false, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, false, { true } }, + { { TRACE_TYPE_THREAD_EXIT, 0, { TID } }, true, { true } }, + { { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, true, { true } }, + }; + std::vector> filters; + auto filter = std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(150, 600)); + if (!filter->get_error_string().empty()) { + fprintf(stderr, "Couldn't construct a trim_filter %s", + filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(filter)); + auto type_filter = std::unique_ptr( + new dynamorio::drmemtrace::type_filter_t({ TRACE_TYPE_ENCODING }, {})); + if (type_filter->get_error_string() != "") { + fprintf(stderr, "Couldn't construct a type_filter %s", + type_filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(type_filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + } + fprintf(stderr, "test_trim_filter passed\n"); return true; } -// Tests I/O for the record_filter. +// Tests I/O for the record_filter on a legacy trace. +// We also have the tool.record_filter test which tests on a freshly generated +// zipfile trace. static bool test_null_filter() { @@ -406,11 +1017,8 @@ test_null_filter() FATAL_ERROR("Failed to create filtered trace output dir %s", output_dir.c_str()); } auto null_filter = - std::unique_ptr( - new dynamorio::drmemtrace::null_filter_t()); - std::vector< - std::unique_ptr> - filter_funcs; + std::unique_ptr(new dynamorio::drmemtrace::null_filter_t()); + std::vector> filter_funcs; filter_funcs.push_back(std::move(null_filter)); // We use a very small stop_timestamp for the record filter. This is to verify that // we emit the TRACE_MARKER_TYPE_FILTER_ENDPOINT marker for each thread even if it @@ -454,10 +1062,9 @@ test_main(int argc, const char *argv[]) FATAL_ERROR("Usage error: %s\nUsage:\n%s", parse_err.c_str(), droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); } - if (!test_cache_and_type_filter() || !test_null_filter()) + if (!test_cache_and_type_filter() || !test_chunk_update() || !test_trim_filter() || + !test_null_filter()) return 1; - // TODO i#5675: Add test using a freshly generated trace (during the test) when - // zip support is added. fprintf(stderr, "All done!\n"); return 0; } diff --git a/clients/drcachesim/tests/reuse_distance.templatex b/clients/drcachesim/tests/reuse_distance.templatex index 558e116a016..d60fc21dfd4 100644 --- a/clients/drcachesim/tests/reuse_distance.templatex +++ b/clients/drcachesim/tests/reuse_distance.templatex @@ -2,7 +2,7 @@ Hello, world! ---- ---- Reuse distance tool aggregated results: Total accesses: [1-9][0-9]+ -Instruction accesses: [1-9][0-9]+ +Instruction accesses: [0-9]+ Data accesses: [1-9][0-9]+ Unique accesses: [1-9][0-9]+ Unique cache lines accessed: [0-9]+ diff --git a/clients/drcachesim/tests/reuse_distance_test.cpp b/clients/drcachesim/tests/reuse_distance_test.cpp index d65f69bff6a..778fcae987a 100644 --- a/clients/drcachesim/tests/reuse_distance_test.cpp +++ b/clients/drcachesim/tests/reuse_distance_test.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, LLC All rights reserved. + * Copyright (c) 2015-2024 Google, LLC All rights reserved. * **********************************************************/ /* @@ -64,6 +64,8 @@ class reuse_distance_test_t : public reuse_distance_t { : reuse_distance_t(knobs) { test_verbosity = knobs.verbose; // Set the file verbosity. + stream_ = std::unique_ptr(new default_memtrace_stream_t); + serial_stream_ = stream_.get(); } ~reuse_distance_test_t() @@ -85,11 +87,14 @@ class reuse_distance_test_t : public reuse_distance_t { using reuse_distance_t::get_aggregated_results; using reuse_distance_t::print_histogram; - std::unordered_map & + std::unordered_map & get_shard_map() { return shard_map_; } + +private: + std::unique_ptr stream_; }; // Helper class to return a non-repeating(*) sequence of addresses. diff --git a/clients/drcachesim/tests/scattergather-aarch64.templatex b/clients/drcachesim/tests/scattergather-aarch64.templatex index 6efdb5b4544..1f735049665 100644 --- a/clients/drcachesim/tests/scattergather-aarch64.templatex +++ b/clients/drcachesim/tests/scattergather-aarch64.templatex @@ -277,5 +277,22 @@ st4d scalar\+immediate: PASS st4d scalar\+immediate \(min index\): PASS st4d scalar\+immediate \(max index\): PASS #endif /* __ARM_FEATURE_SVE */ +#ifdef __ARM_FEATURE_SVE2 +ldnt1b vector\+scalar 64bit unscaled offset: PASS +ldnt1sb vector\+scalar 64bit unscaled offset: PASS +ldnt1h vector\+scalar 64bit unscaled offset: PASS +ldnt1sh vector\+scalar 64bit unscaled offset: PASS +ldnt1w vector\+scalar 64bit unscaled offset: PASS +ldnt1sw vector\+scalar 64bit unscaled offset: PASS +ldnt1d vector\+scalar 64bit unscaled offset: PASS +stnt1b vector\+scalar 64bit unscaled offset: PASS +stnt1b vector\+scalar 64bit unscaled offset \(repeated base\): PASS +stnt1h vector\+scalar 64bit unscaled offset: PASS +stnt1h vector\+scalar 64bit unscaled offset \(repeated base\): PASS +stnt1w vector\+scalar 64bit unscaled offset: PASS +stnt1w vector\+scalar 64bit unscaled offset \(repeated base\): PASS +stnt1d vector\+scalar 64bit unscaled offset: PASS +stnt1d vector\+scalar 64bit unscaled offset \(repeated base\): PASS +#endif /* __ARM_FEATURE_SVE2 */ ---- ---- Trace invariant checks passed diff --git a/clients/drcachesim/tests/schedule_stats_test.cpp b/clients/drcachesim/tests/schedule_stats_test.cpp index 760dc7cab04..7a18fbf0c9e 100644 --- a/clients/drcachesim/tests/schedule_stats_test.cpp +++ b/clients/drcachesim/tests/schedule_stats_test.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2021-2023 Google, LLC All rights reserved. + * Copyright (c) 2021-2024 Google, LLC All rights reserved. * **********************************************************/ /* @@ -63,36 +63,15 @@ using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_SYSCALL; // Bypasses the analyzer and scheduler for a controlled test sequence. // Alternates the per-core memref vectors in lockstep. static schedule_stats_t::counters_t -run_schedule_stats(const std::vector> &memrefs, - const std::unordered_map &tid2ord) +run_schedule_stats(const std::vector> &memrefs) { - schedule_stats_t tool(/*print_every=*/1, /*verbosity=*/2); - // schedule_stats_t uses get_input_id() to identify switches. - class mock_stream_t : public default_memtrace_stream_t { - public: - void - set_input_id(int64_t input_id) - { - input_id_ = input_id; - } - int64_t - get_input_id() const override - { - return input_id_; - } - memtrace_stream_t * - get_input_interface() const override - { - return const_cast(this); - } - - private: - int64_t input_id_ = 0; - }; + // At verbosity 2+ we'd need to subclass default_memtrace_stream_t + // and provide a non-null get_input_interface() (point at "this"). + schedule_stats_t tool(/*print_every=*/1, /*verbosity=*/1); struct per_core_t { void *worker_data; void *shard_data; - mock_stream_t stream; + default_memtrace_stream_t stream; bool finished = false; size_t memref_idx = 0; }; @@ -109,7 +88,7 @@ run_schedule_stats(const std::vector> &memrefs, if (per_core[cpu].finished) continue; memref_t memref = memrefs[cpu][per_core[cpu].memref_idx]; - per_core[cpu].stream.set_input_id(tid2ord.at(memref.instr.tid)); + per_core[cpu].stream.set_tid(memref.instr.tid); bool res = tool.parallel_shard_memref(per_core[cpu].shard_data, memref); assert(res); ++per_core[cpu].memref_idx; @@ -132,10 +111,6 @@ test_basic_stats() static constexpr int64_t TID_A = 42; static constexpr int64_t TID_B = 142; static constexpr int64_t TID_C = 242; - std::unordered_map tid2ord; - tid2ord[TID_A] = 0; - tid2ord[TID_B] = 1; - tid2ord[TID_C] = 2; std::vector> memrefs = { { gen_instr(TID_A), @@ -187,7 +162,7 @@ test_basic_stats() gen_instr(TID_B), }, }; - auto result = run_schedule_stats(memrefs, tid2ord); + auto result = run_schedule_stats(memrefs); assert(result.instrs == 16); assert(result.total_switches == 6); assert(result.voluntary_switches == 2); @@ -210,10 +185,6 @@ test_idle() static constexpr int64_t TID_A = 42; static constexpr int64_t TID_B = 142; static constexpr int64_t TID_C = 242; - std::unordered_map tid2ord; - tid2ord[TID_A] = 0; - tid2ord[TID_B] = 1; - tid2ord[TID_C] = 2; std::vector> memrefs = { { gen_instr(TID_B), @@ -248,7 +219,7 @@ test_idle() gen_instr(TID_A), }, }; - auto result = run_schedule_stats(memrefs, tid2ord); + auto result = run_schedule_stats(memrefs); assert(result.instrs == 13); assert(result.total_switches == 5); assert(result.voluntary_switches == 0); diff --git a/clients/drcachesim/tests/scheduler_launcher.cpp b/clients/drcachesim/tests/scheduler_launcher.cpp index 966f814d7ae..f478bb8b9b8 100644 --- a/clients/drcachesim/tests/scheduler_launcher.cpp +++ b/clients/drcachesim/tests/scheduler_launcher.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -38,10 +38,6 @@ #ifdef WINDOWS # define UNICODE # define _UNICODE -# define WIN32_LEAN_AND_MEAN -# include -#else -# include #endif #include "droption.h" @@ -49,6 +45,7 @@ #include "scheduler.h" #include "trace_entry.h" #include "test_helpers.h" +#include "utils.h" #ifdef HAS_ZIP # include "zipfile_istream.h" # include "zipfile_ostream.h" @@ -63,6 +60,7 @@ using ::dynamorio::drmemtrace::trace_type_names; using ::dynamorio::drmemtrace::zipfile_istream_t; using ::dynamorio::drmemtrace::zipfile_ostream_t; #endif +using ::dynamorio::drmemtrace::get_microsecond_timestamp; using ::dynamorio::droption::droption_parser_t; using ::dynamorio::droption::DROPTION_SCOPE_ALL; using ::dynamorio::droption::DROPTION_SCOPE_FRONTEND; @@ -127,20 +125,7 @@ droption_t op_print_every(DROPTION_SCOPE_ALL, "print_every", 5000, uint64_t get_current_microseconds() { -#ifdef UNIX - struct timeval time; - if (gettimeofday(&time, nullptr) != 0) - return 0; - return time.tv_sec * 1000000 + time.tv_usec; -#else - SYSTEMTIME sys_time; - GetSystemTime(&sys_time); - FILETIME file_time; - if (!SystemTimeToFileTime(&sys_time, &file_time)) - return 0; - return file_time.dwLowDateTime + - (static_cast(file_time.dwHighDateTime) << 32); -#endif + return get_microsecond_timestamp(); } // Processes the stream of records scheduled on the "ordinal"-th virtual core with @@ -344,7 +329,7 @@ _tmain(int argc, const TCHAR *targv[]) sched_ops.replay_as_traced_istream = cpu_schedule_zip.get(); } #endif - if (scheduler.init(sched_inputs, op_num_cores.get_value(), sched_ops) != + if (scheduler.init(sched_inputs, op_num_cores.get_value(), std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) { FATAL_ERROR("failed to initialize scheduler: %s", scheduler.get_error_string().c_str()); diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 0b940ceec06..b217e3ad4ce 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -244,14 +244,19 @@ test_parallel() else assert(tid2stream[memref.instr.tid] == i); // Ensure the ordinals do not accumulate across inputs. - assert( - stream->get_record_ordinal() == - scheduler.get_input_stream_interface(stream->get_input_stream_ordinal()) - ->get_record_ordinal()); + assert(stream->get_record_ordinal() == + scheduler + .get_input_stream_interface(stream->get_input_stream_ordinal()) + ->get_record_ordinal() || + // Relax for early on where the scheduler has read ahead. + stream->get_last_timestamp() == 0); assert( stream->get_instruction_ordinal() == scheduler.get_input_stream_interface(stream->get_input_stream_ordinal()) ->get_instruction_ordinal()); + // Test other queries in parallel mode. + assert(stream->get_tid() == memref.instr.tid); + assert(stream->get_shard_index() == stream->get_input_stream_ordinal()); } } // We expect just 2 records (instr and exit) for each. @@ -779,6 +784,8 @@ test_real_file_queries_and_filters(const char *testdir) assert(stream->get_input_id() == stream->get_input_stream_ordinal()); assert(stream->get_input_interface() == scheduler.get_input_stream_interface(stream->get_input_stream_ordinal())); + assert(stream->get_tid() == memref.instr.tid); + assert(stream->get_shard_index() == stream->get_input_stream_ordinal()); } // Ensure 2 input workloads with 3 streams with proper names. assert(max_workload_index == 1); @@ -865,6 +872,8 @@ run_lockstep_simulation(scheduler_t &scheduler, int num_outputs, memref_tid_t ti // fillers to line everything up in time. sched_as_string[i] += NON_INSTR_SYMBOL; } + assert(outputs[i]->get_shard_index() == + outputs[i]->get_output_stream_ordinal()); } } // Ensure we never see the same output on multiple cores in the same timestep. @@ -952,7 +961,7 @@ test_synthetic() sched_ops.quantum_duration = QUANTUM_DURATION; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1005,7 +1014,7 @@ test_synthetic() sched_ops.quantum_duration = QUANTUM_DURATION; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1070,7 +1079,7 @@ test_synthetic_time_quanta() sched_ops.block_time_scale = 10. / (POST_BLOCK_TIME - PRE_BLOCK_TIME); zipfile_ostream_t outfile(record_fname); sched_ops.schedule_record_ostream = &outfile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); auto check_next = [](scheduler_t::stream_t *stream, uint64_t time, @@ -1141,6 +1150,14 @@ test_synthetic_time_quanta() if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); + } { // Replay. std::vector readers; @@ -1158,7 +1175,7 @@ test_synthetic_time_quanta() /*verbosity=*/4); zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1235,7 +1252,7 @@ test_synthetic_with_timestamps() /*verbosity=*/3); sched_ops.quantum_duration = 3; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1321,7 +1338,7 @@ test_synthetic_with_priorities() /*verbosity=*/3); sched_ops.quantum_duration = 3; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1391,7 +1408,7 @@ test_synthetic_with_bindings() /*verbosity=*/3); sched_ops.quantum_duration = 3; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1462,7 +1479,7 @@ test_synthetic_with_bindings_weighted() /*verbosity=*/3); sched_ops.quantum_duration = 3; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1569,7 +1586,7 @@ test_synthetic_with_syscalls_multiple() sched_ops.blocking_switch_threshold = BLOCK_LATENCY; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); // We omit the "." marker chars to keep the strings short enough to be readable. @@ -1665,7 +1682,7 @@ test_synthetic_with_syscalls_single() sched_ops.blocking_switch_threshold = BLOCK_LATENCY; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -1681,7 +1698,8 @@ test_synthetic_with_syscalls_single() static bool check_ref(std::vector &refs, int &idx, memref_tid_t expected_tid, trace_type_t expected_type, - trace_marker_type_t expected_marker = TRACE_MARKER_TYPE_RESERVED_END) + trace_marker_type_t expected_marker = TRACE_MARKER_TYPE_RESERVED_END, + uintptr_t expected_marker_value = 0) { if (expected_tid != refs[idx].instr.tid || expected_type != refs[idx].instr.type) { std::cerr << "Record " << idx << " has tid " << refs[idx].instr.tid @@ -1689,12 +1707,20 @@ check_ref(std::vector &refs, int &idx, memref_tid_t expected_tid, << expected_tid << " and expected type " << expected_type << "\n"; return false; } - if (expected_type == TRACE_TYPE_MARKER && - expected_marker != refs[idx].marker.marker_type) { - std::cerr << "Record " << idx << " has marker type " - << refs[idx].marker.marker_type << " but expected " << expected_marker - << "\n"; - return false; + if (expected_type == TRACE_TYPE_MARKER) { + if (expected_marker != refs[idx].marker.marker_type) { + std::cerr << "Record " << idx << " has marker type " + << refs[idx].marker.marker_type << " but expected " + << expected_marker << "\n"; + return false; + } + if (expected_marker_value != 0 && + expected_marker_value != refs[idx].marker.marker_value) { + std::cerr << "Record " << idx << " has marker value " + << refs[idx].marker.marker_value << " but expected " + << expected_marker_value << "\n"; + return false; + } } ++idx; return true; @@ -1750,7 +1776,8 @@ test_synthetic_with_syscalls_precise() scheduler_t::SCHEDULER_DEFAULTS, /*verbosity=*/4); scheduler_t scheduler; - if (scheduler.init(sched_inputs, 1, sched_ops) != scheduler_t::STATUS_SUCCESS) + if (scheduler.init(sched_inputs, 1, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) assert(false); auto *stream = scheduler.get_stream(0); memref_t memref; @@ -1762,7 +1789,6 @@ test_synthetic_with_syscalls_precise() assert(status == scheduler_t::STATUS_OK); refs.push_back(memref); } - std::vector entries; int idx = 0; bool res = true; res = res && @@ -1857,7 +1883,8 @@ test_synthetic_with_syscalls_latencies() sched_ops.blocking_switch_threshold = BLOCK_LATENCY; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, 1, sched_ops) != scheduler_t::STATUS_SUCCESS) + if (scheduler.init(sched_inputs, 1, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) assert(false); auto *stream = scheduler.get_stream(0); memref_t memref; @@ -1871,7 +1898,6 @@ test_synthetic_with_syscalls_latencies() assert(status == scheduler_t::STATUS_OK); refs.push_back(memref); } - std::vector entries; int idx = 0; bool res = true; res = res && @@ -1967,7 +1993,7 @@ test_synthetic_with_syscalls_idle() sched_ops.blocking_switch_threshold = BLOCK_LATENCY; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -2027,7 +2053,7 @@ test_synthetic_multi_threaded(const char *testdir) static constexpr int NUM_OUTPUTS = 4; static constexpr int QUANTUM_DURATION = 2000; sched_ops.quantum_duration = QUANTUM_DURATION; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector threads; @@ -2075,7 +2101,8 @@ test_speculation() sched_ops.flags = static_cast( static_cast(sched_ops.flags) | static_cast(scheduler_t::SCHEDULER_SPECULATE_NOPS)); - if (scheduler.init(sched_inputs, 1, sched_ops) != scheduler_t::STATUS_SUCCESS) + if (scheduler.init(sched_inputs, 1, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) assert(false); int ordinal = 0; auto *stream = scheduler.get_stream(0); @@ -2255,7 +2282,7 @@ test_replay() sched_ops.schedule_record_ostream = &outfile; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -2268,6 +2295,14 @@ test_replay() if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); + } // Now replay the schedule several times to ensure repeatability. for (int outer = 0; outer < 5; ++outer) { std::vector sched_inputs; @@ -2287,7 +2322,7 @@ test_replay() sched_ops.schedule_replay_istream = &infile; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -2370,7 +2405,7 @@ test_replay_multi_threaded(const char *testdir) sched_ops.schedule_record_ostream = &outfile; static constexpr int QUANTUM_DURATION = 2000; sched_ops.quantum_duration = QUANTUM_DURATION; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector threads; @@ -2385,6 +2420,14 @@ test_replay_multi_threaded(const char *testdir) if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); + } { // Replay. scheduler_t scheduler; @@ -2396,7 +2439,7 @@ test_replay_multi_threaded(const char *testdir) /*verbosity=*/1); zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector> replay_sequence(NUM_OUTPUTS); @@ -2531,7 +2574,7 @@ test_replay_timestamps() zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -2643,7 +2686,7 @@ test_replay_noeof() zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -2711,7 +2754,8 @@ test_replay_skip() /*verbosity=*/4); zipfile_ostream_t outfile(record_fname); sched_ops.schedule_record_ostream = &outfile; - if (scheduler.init(sched_inputs, 1, sched_ops) != scheduler_t::STATUS_SUCCESS) + if (scheduler.init(sched_inputs, 1, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) assert(false); auto *stream = scheduler.get_stream(0); memref_t memref; @@ -2722,6 +2766,14 @@ test_replay_skip() if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); + } { // Replay. std::vector readers; @@ -2738,7 +2790,8 @@ test_replay_skip() /*verbosity=*/4); zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; - if (scheduler.init(sched_inputs, 1, sched_ops) != scheduler_t::STATUS_SUCCESS) + if (scheduler.init(sched_inputs, 1, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) assert(false); int ordinal = 0; auto *stream = scheduler.get_stream(0); @@ -2885,7 +2938,7 @@ test_replay_limit() /*verbosity=*/2); zipfile_ostream_t outfile(record_fname); sched_ops.schedule_record_ostream = &outfile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector threads; @@ -2899,6 +2952,14 @@ test_replay_limit() thread.join(); if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); + } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); for (int i = 0; i < NUM_OUTPUTS; ++i) { std::cerr << "Output #" << i << " schedule: " << record_schedule[i] << "\n"; } @@ -2923,7 +2984,7 @@ test_replay_limit() /*verbosity=*/2); zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector replay_instr_count(NUM_OUTPUTS, 0); @@ -2972,7 +3033,7 @@ test_replay_limit() zipfile_ostream_t outfile(record_fname); sched_ops.schedule_record_ostream = &outfile; sched_ops.quantum_duration = NUM_INSTRS / 10; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector threads; @@ -2986,6 +3047,14 @@ test_replay_limit() thread.join(); if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); + } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); int switches = 0; for (int i = 0; i < NUM_OUTPUTS; ++i) { std::cerr << "Output #" << i << " schedule: " << record_schedule[i] << "\n"; @@ -3083,7 +3152,7 @@ test_replay_as_traced() zipfile_istream_t infile(cpu_fname); sched_ops.replay_as_traced_istream = &infile; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); // Test that we can find the mappings from as-traced cpuid to output stream, @@ -3095,6 +3164,7 @@ test_replay_as_traced() assert(cpu == CPU0); else assert(cpu == CPU1); + assert(scheduler.get_output_cpuid(i) == cpu); } std::vector sched_as_string = run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE); @@ -3176,7 +3246,7 @@ test_replay_as_traced_i6107_workaround() zipfile_istream_t infile(cpu_fname); sched_ops.replay_as_traced_istream = &infile; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); // Since it initialized we didn't get an invalid schedule order. @@ -3190,6 +3260,156 @@ test_replay_as_traced_i6107_workaround() #endif } +static void +test_replay_as_traced_dup_start() +{ +#ifdef HAS_ZIP + // Test what i#6712 fixes: duplicate start entries. + std::cerr << "\n----------------\nTesting replay as-traced dup starts\n"; + + static constexpr int NUM_INPUTS = 3; + static constexpr int NUM_OUTPUTS = 2; + static constexpr int NUM_INSTRS = 6; + static constexpr memref_tid_t TID_A = 100; + static constexpr memref_tid_t TID_B = TID_A + 1; + static constexpr memref_tid_t TID_C = TID_A + 2; + static constexpr int CPU_0 = 6; + static constexpr int CPU_1 = 7; + static constexpr uint64_t TIMESTAMP_BASE = 100; + + std::vector inputs[NUM_INPUTS]; + for (int input_idx = 0; input_idx < NUM_INPUTS; input_idx++) { + memref_tid_t tid = TID_A + input_idx; + inputs[input_idx].push_back(make_thread(tid)); + inputs[input_idx].push_back(make_pid(1)); + // These timestamps do not line up with the schedule file but + // that does not cause problems and leaving it this way + // simplifies the testdata construction. + inputs[input_idx].push_back(make_timestamp(TIMESTAMP_BASE)); + for (int instr_idx = 0; instr_idx < NUM_INSTRS; ++instr_idx) { + inputs[input_idx].push_back(make_instr(42 + instr_idx)); + } + inputs[input_idx].push_back(make_exit(tid)); + } + + // Synthesize a cpu-schedule file with duplicate starts. + std::string cpu_fname = "tmp_test_cpu_i6712.zip"; + { + zipfile_ostream_t outfile(cpu_fname); + { + std::vector sched; + sched.emplace_back(TID_A, TIMESTAMP_BASE, CPU_0, 0); + sched.emplace_back(TID_B, TIMESTAMP_BASE + 2, CPU_0, 0); + // Simple dup start: non-consecutive but in same output. + sched.emplace_back(TID_A, TIMESTAMP_BASE + 4, CPU_0, 0); + sched.emplace_back(TID_B, TIMESTAMP_BASE + 5, CPU_0, 4); + std::ostringstream cpu_string; + cpu_string << CPU_0; + std::string err = outfile.open_new_component(cpu_string.str()); + assert(err.empty()); + if (!outfile.write(reinterpret_cast(sched.data()), + sched.size() * sizeof(sched[0]))) + assert(false); + } + { + std::vector sched; + // More complex dup start across outputs. + sched.emplace_back(TID_B, TIMESTAMP_BASE + 1, CPU_1, 0); + sched.emplace_back(TID_C, TIMESTAMP_BASE + 3, CPU_1, 0); + sched.emplace_back(TID_A, TIMESTAMP_BASE + 6, CPU_1, 4); + std::ostringstream cpu_string; + cpu_string << CPU_1; + std::string err = outfile.open_new_component(cpu_string.str()); + assert(err.empty()); + if (!outfile.write(reinterpret_cast(sched.data()), + sched.size() * sizeof(sched[0]))) + assert(false); + } + } + + // Replay the recorded schedule. + std::vector sched_inputs; + for (int input_idx = 0; input_idx < NUM_INPUTS; input_idx++) { + memref_tid_t tid = TID_A + input_idx; + std::vector readers; + readers.emplace_back( + std::unique_ptr(new mock_reader_t(inputs[input_idx])), + std::unique_ptr(new mock_reader_t()), tid); + sched_inputs.emplace_back(std::move(readers)); + } + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_RECORDED_OUTPUT, + scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/4); + zipfile_istream_t infile(cpu_fname); + sched_ops.replay_as_traced_istream = &infile; + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + auto *stream0 = scheduler.get_stream(0); + auto *stream1 = scheduler.get_stream(1); + auto check_next = [](scheduler_t::stream_t *stream, + scheduler_t::stream_status_t expect_status, + memref_tid_t expect_tid = INVALID_THREAD_ID, + trace_type_t expect_type = TRACE_TYPE_READ) { + memref_t memref; + scheduler_t::stream_status_t status = stream->next_record(memref); + if (status != expect_status) { + std::cerr << "Expected status " << expect_status << " != " << status << "\n"; + assert(false); + } + if (status == scheduler_t::STATUS_OK) { + if (memref.marker.tid != expect_tid) { + std::cerr << "Expected tid " << expect_tid << " != " << memref.marker.tid + << "\n"; + assert(false); + } + if (memref.marker.type != expect_type) { + std::cerr << "Expected type " << expect_type + << " != " << memref.marker.type << "\n"; + assert(false); + } + } + }; + // We expect the 1st of the start-at-0 TID_A to be deleted; so we should + // start with TID_B (the 2nd of the start-at-0 TID_B). + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_MARKER); + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + // We should have removed the 1st start-at-0 B and start with C + // on cpu 1. + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_MARKER); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_THREAD_EXIT); + // Now cpu 0 should run A. + check_next(stream0, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_MARKER); + check_next(stream0, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); + // Cpu 0 now finishes with B. + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_THREAD_EXIT); + check_next(stream0, scheduler_t::STATUS_IDLE); + // Cpu 1 now finishes with A. + check_next(stream1, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_THREAD_EXIT); + check_next(stream1, scheduler_t::STATUS_EOF); + // Finalize. + check_next(stream0, scheduler_t::STATUS_EOF); +#endif +} + static void test_replay_as_traced_from_file(const char *testdir) { @@ -3216,7 +3436,7 @@ test_replay_as_traced_from_file(const char *testdir) std::cerr << "Reading cpu file " << cpu_file << "\n"; zipfile_istream_t infile(cpu_file); sched_ops.replay_as_traced_istream = &infile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector> replay_sequence(NUM_OUTPUTS); @@ -3294,7 +3514,7 @@ test_inactive() sched_ops.quantum_duration = 2; zipfile_ostream_t outfile(record_fname); sched_ops.schedule_record_ostream = &outfile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); auto *stream0 = scheduler.get_stream(0); @@ -3380,6 +3600,14 @@ test_inactive() if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) assert(false); } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); + } { // Replay. std::vector readers; @@ -3396,7 +3624,7 @@ test_inactive() /*verbosity=*/4); zipfile_istream_t infile(record_fname); sched_ops.schedule_replay_istream = &infile; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -3501,7 +3729,7 @@ test_direct_switch() sched_ops.blocking_switch_threshold = BLOCK_LATENCY; sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; - if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != scheduler_t::STATUS_SUCCESS) assert(false); std::vector sched_as_string = @@ -3512,6 +3740,479 @@ test_direct_switch() assert(sched_as_string[0] == CORE0_SCHED_STRING); } +static void +test_kernel_switch_sequences() +{ + std::cerr << "\n----------------\nTesting kernel switch sequences\n"; + static constexpr memref_tid_t TID_IN_SWITCHES = 1; + static constexpr addr_t PROCESS_SWITCH_PC_START = 0xfeed101; + static constexpr addr_t THREAD_SWITCH_PC_START = 0xcafe101; + static constexpr uint64_t PROCESS_SWITCH_TIMESTAMP = 12345678; + static constexpr uint64_t THREAD_SWITCH_TIMESTAMP = 87654321; + std::vector switch_sequence = { + /* clang-format off */ + make_header(TRACE_ENTRY_VERSION), + make_thread(TID_IN_SWITCHES), + make_pid(TID_IN_SWITCHES), + make_version(TRACE_ENTRY_VERSION), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, scheduler_t::SWITCH_PROCESS), + make_timestamp(PROCESS_SWITCH_TIMESTAMP), + make_instr(PROCESS_SWITCH_PC_START), + make_instr(PROCESS_SWITCH_PC_START + 1), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, scheduler_t::SWITCH_PROCESS), + make_exit(TID_IN_SWITCHES), + make_footer(), + // Test a complete trace after the first one, which is how we plan to store + // these in an archive file. + make_header(TRACE_ENTRY_VERSION), + make_thread(TID_IN_SWITCHES), + make_pid(TID_IN_SWITCHES), + make_version(TRACE_ENTRY_VERSION), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, scheduler_t::SWITCH_THREAD), + make_timestamp(THREAD_SWITCH_TIMESTAMP), + make_instr(THREAD_SWITCH_PC_START), + make_instr(THREAD_SWITCH_PC_START+1), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, scheduler_t::SWITCH_THREAD), + make_exit(TID_IN_SWITCHES), + make_footer(), + /* clang-format on */ + }; + auto switch_reader = + std::unique_ptr(new mock_reader_t(switch_sequence)); + auto switch_reader_end = std::unique_ptr(new mock_reader_t()); + static constexpr int NUM_WORKLOADS = 3; + static constexpr int NUM_INPUTS_PER_WORKLOAD = 3; + static constexpr int NUM_OUTPUTS = 2; + static constexpr int NUM_INSTRS = 9; + static constexpr int INSTR_QUANTUM = 3; + static constexpr uint64_t TIMESTAMP = 44226688; + static constexpr memref_tid_t TID_BASE = 100; + std::vector sched_inputs; + for (int workload_idx = 0; workload_idx < NUM_WORKLOADS; workload_idx++) { + std::vector readers; + for (int input_idx = 0; input_idx < NUM_INPUTS_PER_WORKLOAD; input_idx++) { + std::vector inputs; + inputs.push_back(make_header(TRACE_ENTRY_VERSION)); + memref_tid_t tid = + TID_BASE + workload_idx * NUM_INPUTS_PER_WORKLOAD + input_idx; + inputs.push_back(make_thread(tid)); + inputs.push_back(make_pid(1)); + inputs.push_back(make_version(TRACE_ENTRY_VERSION)); + inputs.push_back(make_timestamp(TIMESTAMP)); + for (int instr_idx = 0; instr_idx < NUM_INSTRS; instr_idx++) { + inputs.push_back(make_instr(42 + instr_idx * 4)); + } + inputs.push_back(make_exit(tid)); + readers.emplace_back( + std::unique_ptr(new mock_reader_t(inputs)), + std::unique_ptr(new mock_reader_t()), tid); + } + sched_inputs.emplace_back(std::move(readers)); + } + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/4); + sched_ops.quantum_duration = INSTR_QUANTUM; + sched_ops.kernel_switch_reader = std::move(switch_reader); + sched_ops.kernel_switch_reader_end = std::move(switch_reader_end); + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + + // We have a custom version of run_lockstep_simulation here for more precise + // testing of the markers and instructions and interfaces. + // We record the entire sequence for a detailed check of some records, along with + // a character representation for a higher-level view of the whole sequence. + std::vector outputs(NUM_OUTPUTS, nullptr); + std::vector eof(NUM_OUTPUTS, false); + for (int i = 0; i < NUM_OUTPUTS; i++) + outputs[i] = scheduler.get_stream(i); + int num_eof = 0; + std::vector> refs(NUM_OUTPUTS); + std::vector sched_as_string(NUM_OUTPUTS); + std::vector prev_tid(NUM_OUTPUTS, INVALID_THREAD_ID); + std::vector in_switch(NUM_OUTPUTS, false); + std::vector prev_in_ord(NUM_OUTPUTS, 0); + std::vector prev_out_ord(NUM_OUTPUTS, 0); + while (num_eof < NUM_OUTPUTS) { + for (int i = 0; i < NUM_OUTPUTS; i++) { + if (eof[i]) + continue; + memref_t memref; + scheduler_t::stream_status_t status = outputs[i]->next_record(memref); + if (status == scheduler_t::STATUS_EOF) { + ++num_eof; + eof[i] = true; + continue; + } + if (status == scheduler_t::STATUS_IDLE) { + sched_as_string[i] += '_'; + continue; + } + assert(status == scheduler_t::STATUS_OK); + refs[i].push_back(memref); + if (memref.instr.tid != prev_tid[i]) { + if (!sched_as_string[i].empty()) + sched_as_string[i] += ','; + sched_as_string[i] += + 'A' + static_cast(memref.instr.tid - TID_BASE); + } + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) + in_switch[i] = true; + if (in_switch[i]) { + // Test that switch code is marked synthetic. + assert(outputs[i]->is_record_synthetic()); + // Test that switch code doesn't count toward input ordinals, but + // does toward output ordinals. + assert(outputs[i]->get_input_interface()->get_record_ordinal() == + prev_in_ord[i] || + // Won't match if we just switched inputs. + (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == + TRACE_MARKER_TYPE_CONTEXT_SWITCH_START)); + assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]); + } else + assert(!outputs[i]->is_record_synthetic()); + if (type_is_instr(memref.instr.type)) + sched_as_string[i] += 'i'; + else if (memref.marker.type == TRACE_TYPE_MARKER) { + switch (memref.marker.marker_type) { + case TRACE_MARKER_TYPE_VERSION: sched_as_string[i] += 'v'; break; + case TRACE_MARKER_TYPE_TIMESTAMP: sched_as_string[i] += '0'; break; + case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END: + in_switch[i] = false; + // Fall-through. + case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START: + if (memref.marker.marker_value == scheduler_t::SWITCH_PROCESS) + sched_as_string[i] += 'p'; + else if (memref.marker.marker_value == scheduler_t::SWITCH_THREAD) + sched_as_string[i] += 't'; + else + assert(false && "unknown context switch type"); + break; + default: sched_as_string[i] += '?'; break; + } + } + prev_tid[i] = memref.instr.tid; + prev_in_ord[i] = outputs[i]->get_input_interface()->get_record_ordinal(); + prev_out_ord[i] = outputs[i]->get_record_ordinal(); + } + } + // Check the high-level strings. + for (int i = 0; i < NUM_OUTPUTS; i++) { + std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; + } + assert(sched_as_string[0] == + "Av0iii,Ct0iitv0iii,Ep0iipv0iii,Gp0iipv0iii,It0iitv0iii,Cp0iipiii,Ep0iipiii," + "Gp0iipiii,Ap0iipiii,Bt0iitiii,Dp0iipiii,Ft0iitiii,Hp0iipiii______"); + assert(sched_as_string[1] == + "Bv0iii,Dp0iipv0iii,Ft0iitv0iii,Hp0iipv0iii,Ap0iipiii,Bt0iitiii,Dp0iipiii," + "Ft0iitiii,Hp0iipiii,It0iitiii,Cp0iipiii,Ep0iipiii,Gp0iipiii,It0iitiii"); + + // Zoom in and check the first sequence record by record with value checks. + int idx = 0; + bool res = true; + res = res && + check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, + TIMESTAMP) && + check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) && + // Thread switch. + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, scheduler_t::SWITCH_THREAD) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_TIMESTAMP, THREAD_SWITCH_TIMESTAMP) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, scheduler_t::SWITCH_THREAD) && + // We now see the headers for this thread. + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_VERSION) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_TIMESTAMP, TIMESTAMP) && + // The 3-instr quantum should not count the 2 switch instrs. + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) && + // Process switch. + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, scheduler_t::SWITCH_PROCESS) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_TIMESTAMP, PROCESS_SWITCH_TIMESTAMP) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, scheduler_t::SWITCH_PROCESS) && + // We now see the headers for this thread. + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_VERSION) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_TIMESTAMP, TIMESTAMP) && + // The 3-instr quantum should not count the 2 switch instrs. + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_INSTR) && + check_ref(refs[0], idx, TID_BASE + 4, TRACE_TYPE_INSTR); + + { + // Test a bad input sequence. + std::vector bad_switch_sequence = { + /* clang-format off */ + make_header(TRACE_ENTRY_VERSION), + make_thread(TID_IN_SWITCHES), + make_pid(TID_IN_SWITCHES), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, scheduler_t::SWITCH_PROCESS), + make_instr(PROCESS_SWITCH_PC_START), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, scheduler_t::SWITCH_PROCESS), + make_footer(), + make_header(TRACE_ENTRY_VERSION), + make_thread(TID_IN_SWITCHES), + make_pid(TID_IN_SWITCHES), + // Error: duplicate type. + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_START, scheduler_t::SWITCH_PROCESS), + make_instr(PROCESS_SWITCH_PC_START), + make_marker(TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, scheduler_t::SWITCH_PROCESS), + make_footer(), + /* clang-format on */ + }; + auto bad_switch_reader = + std::unique_ptr(new mock_reader_t(bad_switch_sequence)); + auto bad_switch_reader_end = std::unique_ptr(new mock_reader_t()); + std::vector test_sched_inputs; + std::vector readers; + std::vector inputs; + inputs.push_back(make_header(TRACE_ENTRY_VERSION)); + readers.emplace_back(std::unique_ptr(new mock_reader_t(inputs)), + std::unique_ptr(new mock_reader_t()), + TID_BASE); + test_sched_inputs.emplace_back(std::move(readers)); + scheduler_t::scheduler_options_t test_sched_ops( + scheduler_t::MAP_TO_ANY_OUTPUT, scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS); + test_sched_ops.kernel_switch_reader = std::move(bad_switch_reader); + test_sched_ops.kernel_switch_reader_end = std::move(bad_switch_reader_end); + scheduler_t test_scheduler; + if (test_scheduler.init(test_sched_inputs, NUM_OUTPUTS, + std::move(test_sched_ops)) != + scheduler_t::STATUS_ERROR_INVALID_PARAMETER) + assert(false); + } +} + +void +test_random_schedule() +{ + std::cerr << "\n----------------\nTesting random scheduling\n"; + static constexpr int NUM_INPUTS = 7; + static constexpr int NUM_OUTPUTS = 2; + static constexpr int NUM_INSTRS = 9; + static constexpr int QUANTUM_DURATION = 3; + static constexpr int ITERS = 9; + static constexpr memref_tid_t TID_BASE = 100; + std::vector inputs[NUM_INPUTS]; + for (int i = 0; i < NUM_INPUTS; i++) { + memref_tid_t tid = TID_BASE + i; + inputs[i].push_back(make_thread(tid)); + inputs[i].push_back(make_pid(1)); + inputs[i].push_back(make_version(TRACE_ENTRY_VERSION)); + inputs[i].push_back(make_timestamp(10)); // All the same time priority. + for (int j = 0; j < NUM_INSTRS; j++) { + inputs[i].push_back(make_instr(42 + j * 4)); + } + inputs[i].push_back(make_exit(tid)); + } + std::vector> scheds_by_cpu(NUM_OUTPUTS); + for (int iter = 0; iter < ITERS; ++iter) { + std::vector sched_inputs; + for (int i = 0; i < NUM_INPUTS; i++) { + std::vector readers; + readers.emplace_back( + std::unique_ptr(new mock_reader_t(inputs[i])), + std::unique_ptr(new mock_reader_t()), TID_BASE + i); + sched_inputs.emplace_back(std::move(readers)); + } + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_IGNORE, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/3); + sched_ops.randomize_next_input = true; + sched_ops.quantum_duration = QUANTUM_DURATION; + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + std::vector sched_as_string = + run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE); + for (int i = 0; i < NUM_OUTPUTS; i++) { + std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; + scheds_by_cpu[i].insert(sched_as_string[i]); + } + } + // With non-determinism it's hard to have a precise test. + // We assume most runs should be different: at least half of them (probably + // more but let's not make this into a flaky test). + for (int i = 0; i < NUM_OUTPUTS; i++) { + assert(scheds_by_cpu[i].size() >= ITERS / 2); + } +} + +static void +test_record_scheduler() +{ + // Test record_scheduler_t switches, which operate differently: + // they have to deal with encoding records preceding instructions, + // and they have to insert tid,pid records. + std::cerr << "\n----------------\nTesting record_scheduler_t\n"; + static constexpr memref_tid_t TID_A = 42; + static constexpr memref_tid_t TID_B = TID_A + 1; + static constexpr memref_tid_t PID_A = 142; + static constexpr memref_tid_t PID_B = PID_A + 1; + static constexpr int NUM_OUTPUTS = 1; + static constexpr addr_t ENCODING_SIZE = 2; + static constexpr addr_t ENCODING_IGNORE = 0xfeed; + std::vector refs_A = { + /* clang-format off */ + make_thread(TID_A), + make_pid(PID_A), + make_version(TRACE_ENTRY_VERSION), + make_timestamp(10), + make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + make_instr(10), + make_timestamp(20), + make_marker(TRACE_MARKER_TYPE_SYSCALL, 42), + make_marker(TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0), + make_timestamp(120), + make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + make_instr(30), + make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + make_instr(50), + make_exit(TID_A), + /* clang-format on */ + }; + std::vector refs_B = { + /* clang-format off */ + make_thread(TID_B), + make_pid(PID_B), + make_version(TRACE_ENTRY_VERSION), + make_timestamp(20), + make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + make_instr(20), + make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + make_instr(40), + make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + // Test a target marker between the encoding and the instr. + make_marker(TRACE_MARKER_TYPE_BRANCH_TARGET, 42), + make_instr(60), + // No encoding for repeated instr. + make_instr(20), + make_exit(TID_B), + /* clang-format on */ + }; + std::vector readers; + readers.emplace_back( + std::unique_ptr(new mock_record_reader_t(refs_A)), + std::unique_ptr(new mock_record_reader_t()), TID_A); + readers.emplace_back( + std::unique_ptr(new mock_record_reader_t(refs_B)), + std::unique_ptr(new mock_record_reader_t()), TID_B); + record_scheduler_t scheduler; + std::vector sched_inputs; + sched_inputs.emplace_back(std::move(readers)); + record_scheduler_t::scheduler_options_t sched_ops( + record_scheduler_t::MAP_TO_ANY_OUTPUT, record_scheduler_t::DEPENDENCY_IGNORE, + record_scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/4); + sched_ops.quantum_duration = 2; + sched_ops.block_time_scale = 0.001; // Do not stay blocked. + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + record_scheduler_t::STATUS_SUCCESS) + assert(false); + auto *stream0 = scheduler.get_stream(0); + auto check_next = [](record_scheduler_t::stream_t *stream, + record_scheduler_t::stream_status_t expect_status, + trace_type_t expect_type = TRACE_TYPE_MARKER, + addr_t expect_addr = 0) { + trace_entry_t record; + record_scheduler_t::stream_status_t status = stream->next_record(record); + assert(status == expect_status); + if (status == record_scheduler_t::STATUS_OK) { + if (record.type != expect_type) { + std::cerr << "Expected type " << expect_type << " != " << record.type + << "\n"; + assert(false); + } + if (expect_addr != 0 && record.addr != expect_addr) { + std::cerr << "Expected addr " << expect_addr << " != " << record.addr + << "\n"; + assert(false); + } + } + }; + // Advance cpu0 on TID_A to its 1st context switch. + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_A); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_A); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + // Test ordinals. + assert(stream0->get_instruction_ordinal() == 0); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 0); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + // The encoding should have incremented the ordinal. + assert(stream0->get_instruction_ordinal() == 1); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 1); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + // The instr should not have further incremented it. + assert(stream0->get_instruction_ordinal() == 1); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 1); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + // Ensure the context switch is *before* the encoding. + // Advance cpu0 on TID_B to its 1st context switch. + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_B); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_B); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + // Ensure the switch is *before* the encoding and target marker. + assert(stream0->get_input_interface()->get_instruction_ordinal() == 2); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_A); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_A); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_B); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_B); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + assert(stream0->get_instruction_ordinal() == 5); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 3); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + assert(stream0->get_instruction_ordinal() == 5); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 3); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + // Should still be at the same count after the encoding, marker, and instr. + assert(stream0->get_instruction_ordinal() == 5); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 3); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + assert(stream0->get_instruction_ordinal() == 6); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 4); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD_EXIT); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_A); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_A); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD_EXIT); + check_next(stream0, record_scheduler_t::STATUS_EOF); +} + int test_main(int argc, const char *argv[]) { @@ -3544,8 +4245,12 @@ test_main(int argc, const char *argv[]) test_replay_as_traced_from_file(argv[1]); test_replay_as_traced(); test_replay_as_traced_i6107_workaround(); + test_replay_as_traced_dup_start(); test_inactive(); test_direct_switch(); + test_kernel_switch_sequences(); + test_random_schedule(); + test_record_scheduler(); dr_standalone_exit(); return 0; diff --git a/clients/drcachesim/tests/simulate_as_traced.templatex b/clients/drcachesim/tests/simulate_as_traced.templatex new file mode 100644 index 00000000000..464b1cb3dcd --- /dev/null +++ b/clients/drcachesim/tests/simulate_as_traced.templatex @@ -0,0 +1,116 @@ +Cache simulation results: +Core #0 \(traced CPU\(s\): #11\) + L1I0 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *88.?342 + Misses: 52 + Compulsory misses: 52 + Invalidations: 0 + Miss rate: 0.06% + L1D0 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *188.?235 + Misses: 33 + Compulsory misses: 61 + Invalidations: 0 + Prefetch hits: 5 + Prefetch misses: 28 + Miss rate: 0.02% +Core #1 \(traced CPU\(s\): #9\) + L1I1 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *94.?091 + Misses: 68 + Compulsory misses: 68 + Invalidations: 0 + Miss rate: 0.07% + L1D1 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *200.?175 + Misses: 40 + Compulsory misses: 70 + Invalidations: 0 + Prefetch hits: 10 + Prefetch misses: 30 + Miss rate: 0.02% +Core #2 \(traced CPU\(s\): #10\) + L1I2 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *94.?076 + Misses: 66 + Compulsory misses: 66 + Invalidations: 0 + Miss rate: 0.07% + L1D2 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *200.?168 + Misses: 38 + Compulsory misses: 67 + Invalidations: 0 + Prefetch hits: 9 + Prefetch misses: 29 + Miss rate: 0.02% +Core #3 \(traced CPU\(s\): #5\) + L1I3 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *188.?232 + Misses: 68 + Compulsory misses: 68 + Invalidations: 0 + Miss rate: 0.04% + L1D3 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *400.?362 + Misses: 59 + Compulsory misses: 105 + Invalidations: 0 + Prefetch hits: 13 + Prefetch misses: 46 + Miss rate: 0.01% +Core #4 \(traced CPU\(s\): #1\) + L1I4 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *94.?076 + Misses: 66 + Compulsory misses: 66 + Invalidations: 0 + Miss rate: 0.07% + L1D4 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *200.?168 + Misses: 38 + Compulsory misses: 67 + Invalidations: 0 + Prefetch hits: 9 + Prefetch misses: 29 + Miss rate: 0.02% +Core #5 \(traced CPU\(s\): #8\) + L1I5 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *29.?568 + Misses: 587 + Compulsory misses: 575 + Invalidations: 0 + Miss rate: 1.95% + L1D5 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *12.?893 + Misses: 458 + Compulsory misses: 717 + Invalidations: 0 + Prefetch hits: 106 + Prefetch misses: 352 + Miss rate: 3.43% +Core #6 \(traced CPU\(s\): #0\) + L1I6 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *94.?083 + Misses: 68 + Compulsory misses: 68 + Invalidations: 0 + Miss rate: 0.07% + L1D6 \(size=32768, assoc=8, block=64, LRU\) stats: + Hits: *200.?172 + Misses: 40 + Compulsory misses: 70 + Invalidations: 0 + Prefetch hits: 10 + Prefetch misses: 30 + Miss rate: 0.02% +LL \(size=8388608, assoc=16, block=64, LRU\) stats: + Hits: 567 + Misses: *1.?114 + Compulsory misses: *1.?512 + Invalidations: 0 + Prefetch hits: 146 + Prefetch misses: 398 + Local miss rate: 66.27% + Child hits: *2.?084.?803 + Total miss rate: 0.05% diff --git a/clients/drcachesim/tests/switch_insertion.templatex b/clients/drcachesim/tests/switch_insertion.templatex new file mode 100644 index 00000000000..2ebc8c506ef --- /dev/null +++ b/clients/drcachesim/tests/switch_insertion.templatex @@ -0,0 +1,8 @@ +Basic counts tool results: +Total counts: + [1-9][0-9][0-9][0-9][0-9][0-9] total \(fetched\) instructions + 5971 total unique \(fetched\) instructions + [1-9][0-9][0-9][0-9][0-9][0-9] total non-fetched instructions + [1-9][0-9][0-9][0-9][0-9][0-9] total userspace instructions + [1-9][0-9][0-9] total kernel instructions +.* diff --git a/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp b/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp index 384cd1ae440..5f991cb729a 100644 --- a/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp +++ b/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -140,8 +140,15 @@ class test_stream_t : public scheduler_t::stream_t { return instr_count_; assert(at_ >= 0 && at_ < refs_.size()); auto it = instr_counts_.find(refs_[at_].instr.tid); - assert(it != instr_counts_.end()); - return it->second; + if (it != instr_counts_.end()) { + return it->second; + } + return 0; + } + int + get_shard_index() const override + { + return get_input_stream_ordinal(); } private: @@ -166,13 +173,15 @@ class test_stream_t : public scheduler_t::stream_t { class test_analyzer_t : public analyzer_t { public: test_analyzer_t(const std::vector &refs, analysis_tool_t **tools, - int num_tools, bool parallel, uint64_t interval_microseconds) + int num_tools, bool parallel, uint64_t interval_microseconds, + uint64_t interval_instr_count) : analyzer_t() { num_tools_ = num_tools; tools_ = tools; parallel_ = parallel; interval_microseconds_ = interval_microseconds; + interval_instr_count_ = interval_instr_count; verbosity_ = 1; worker_count_ = 1; test_stream_ = @@ -200,6 +209,7 @@ class dummy_analysis_tool_t : public analysis_tool_t { analysis_tool_t::interval_state_snapshot_t * generate_interval_snapshot(uint64_t interval_id) override { + saw_serial_generate_snapshot_ = true; ++generate_snapshot_count_; return nullptr; } @@ -232,7 +242,27 @@ class dummy_analysis_tool_t : public analysis_tool_t { generate_shard_interval_snapshot(void *shard_data, uint64_t interval_id) override { ++generate_snapshot_count_; - return nullptr; + // We generate a snapshot here, but we clear them all in + // finalize_interval_snapshots to test that scenario. + auto *snapshot = new interval_state_snapshot_t(); + return snapshot; + } + bool + finalize_interval_snapshots( + std::vector &interval_snapshots) + { + if (saw_serial_generate_snapshot_) { + error_string_ = "Did not expect finalize_interval_snapshots call in serial " + "mode which does not generate any snapshot."; + return false; + } + for (auto snapshot : interval_snapshots) { + delete snapshot; + } + // We clear the snapshots here so that there will be no + // combine_interval_snapshots or print_interval_results calls. + interval_snapshots.clear(); + return true; } analysis_tool_t::interval_state_snapshot_t * combine_interval_snapshots( @@ -265,98 +295,112 @@ class dummy_analysis_tool_t : public analysis_tool_t { private: int generate_snapshot_count_; + bool saw_serial_generate_snapshot_ = false; }; -// Test analysis_tool_t that records information about when the -// generate_shard_interval_snapshot and generate_interval_snapshot APIs were invoked. -class test_analysis_tool_t : public analysis_tool_t { -public: - // Describes the point in trace when an interval ends. This is same as the point - // when the generate_*interval_snapshot API is invoked. - struct interval_end_point_t { - memref_tid_t tid; - int seen_memrefs; // For parallel mode, this is the shard-local count. - uint64_t interval_id; +#define SERIAL_TID 0 - bool - operator==(const interval_end_point_t &rhs) const - { - return tid == rhs.tid && seen_memrefs == rhs.seen_memrefs && - interval_id == rhs.interval_id; - } - bool - operator<(const interval_end_point_t &rhs) const - { - if (tid != rhs.tid) - return tid < rhs.tid; - if (seen_memrefs != rhs.seen_memrefs) - return seen_memrefs < rhs.seen_memrefs; - return interval_id < rhs.interval_id; - } - }; +// Describes the point in trace when an interval ends. This is same as the point +// when the generate_*interval_snapshot API is invoked. +struct interval_end_point_t { + memref_tid_t tid; + int seen_memrefs; // For parallel mode, this is the shard-local count. + uint64_t interval_id; - // Describes the state recorded by test_analysis_tool_t at the end of each - // interval. - struct recorded_snapshot_t : public analysis_tool_t::interval_state_snapshot_t { - recorded_snapshot_t(uint64_t interval_id, uint64_t interval_end_timestamp, - uint64_t instr_count_cumulative, uint64_t instr_count_delta, - std::vector component_intervals) - // Actual tools do not need to supply values to construct the base - // interval_state_snapshot_t. This is only to make it easier to construct - // the expected snapshot objects in this test. - // Since the final verification happens only for the whole trace intervals, - // we simply use WHOLE_TRACE_SHARD_ID here and for tool_shard_id below. - : interval_state_snapshot_t(WHOLE_TRACE_SHARD_ID, interval_id, - interval_end_timestamp, instr_count_cumulative, - instr_count_delta) - , component_intervals(component_intervals) - , tool_shard_id(WHOLE_TRACE_SHARD_ID) - { - } - recorded_snapshot_t() - { - } + bool + operator==(const interval_end_point_t &rhs) const + { + return tid == rhs.tid && seen_memrefs == rhs.seen_memrefs && + interval_id == rhs.interval_id; + } + bool + operator<(const interval_end_point_t &rhs) const + { + if (tid != rhs.tid) + return tid < rhs.tid; + if (seen_memrefs != rhs.seen_memrefs) + return seen_memrefs < rhs.seen_memrefs; + return interval_id < rhs.interval_id; + } +}; - bool - operator==(const recorded_snapshot_t &rhs) const - { - return shard_id == rhs.shard_id && tool_shard_id == rhs.tool_shard_id && - interval_id == rhs.interval_id && - interval_end_timestamp == rhs.interval_end_timestamp && - instr_count_cumulative == rhs.instr_count_cumulative && - instr_count_delta == rhs.instr_count_delta && - component_intervals == rhs.component_intervals; - } - void - print() const - { - std::cerr << "(shard_id: " << shard_id << ", interval_id: " << interval_id - << ", tool_shard_id: " << tool_shard_id - << ", end_timestamp: " << interval_end_timestamp - << ", instr_count_cumulative: " << instr_count_cumulative - << ", instr_count_delta: " << instr_count_delta - << ", component_intervals: "; - for (const auto &s : component_intervals) { - std::cerr << "(tid:" << s.tid << ", seen_memrefs:" << s.seen_memrefs - << ", interval_id:" << s.interval_id << "),"; - } - std::cerr << ")\n"; +// Describes the state recorded by test_analysis_tool_t at the end of each +// interval. +struct recorded_snapshot_t : public analysis_tool_t::interval_state_snapshot_t { + recorded_snapshot_t(uint64_t shard_id, uint64_t interval_id, + uint64_t interval_end_timestamp, uint64_t instr_count_cumulative, + uint64_t instr_count_delta, + std::vector component_intervals) + // Actual tools do not need to supply values to construct the base + // interval_state_snapshot_t. This is only to make it easier to construct + // the expected snapshot objects in this test. + : interval_state_snapshot_t(shard_id, interval_id, interval_end_timestamp, + instr_count_cumulative, instr_count_delta) + , component_intervals(component_intervals) + , tool_shard_id(shard_id) + { + } + recorded_snapshot_t(uint64_t interval_id, uint64_t interval_end_timestamp, + uint64_t instr_count_cumulative, uint64_t instr_count_delta, + std::vector component_intervals) + : recorded_snapshot_t(WHOLE_TRACE_SHARD_ID, interval_id, interval_end_timestamp, + instr_count_cumulative, instr_count_delta, + component_intervals) + { + } + recorded_snapshot_t() + { + } + + bool + operator==(const recorded_snapshot_t &rhs) const + { + return get_shard_id() == rhs.get_shard_id() && + tool_shard_id == rhs.tool_shard_id && + get_interval_id() == rhs.get_interval_id() && + get_interval_end_timestamp() == rhs.get_interval_end_timestamp() && + get_instr_count_cumulative() == rhs.get_instr_count_cumulative() && + get_instr_count_delta() == rhs.get_instr_count_delta() && + component_intervals == rhs.component_intervals; + } + void + print() const + { + std::cerr << "(shard_id: " << get_shard_id() + << ", interval_id: " << get_interval_id() + << ", tool_shard_id: " << tool_shard_id + << ", end_timestamp: " << get_interval_end_timestamp() + << ", instr_count_cumulative: " << get_instr_count_cumulative() + << ", instr_count_delta: " << get_instr_count_delta() + << ", component_intervals: "; + for (const auto &s : component_intervals) { + std::cerr << "(tid:" << s.tid << ", seen_memrefs:" << s.seen_memrefs + << ", interval_id:" << s.interval_id << "),"; } + std::cerr << ")\n"; + } - // Stores the list of intervals that were combined to produce this snapshot. - // In the serial case, this contains just a single value. In the parallel case, - // this contains a list of size equal to the count of shard interval snapshots - // that were combined to create this snapshot. - std::vector component_intervals; - // Stores the shard_id recorded by the test tool. Compared with the shard_id - // stored by the framework in the base struct. - int64_t tool_shard_id; - }; + // Stores the list of intervals that were combined to produce this snapshot. + // In the serial case, this contains just a single value. In the parallel case, + // this contains a list of size equal to the count of shard interval snapshots + // that were combined to create this snapshot. + std::vector component_intervals; + // Stores the shard_id recorded by the test tool. Compared with the shard_id + // stored by the framework in the base struct. + int64_t tool_shard_id; + // Stores whether this snapshot was seen by finalize_interval_snapshots. + bool saw_finalize_call = false; +}; +// Test analysis_tool_t that records information about when the +// generate_shard_interval_snapshot and generate_interval_snapshot APIs were invoked. +class test_analysis_tool_t : public analysis_tool_t { +public: // Constructs an analysis_tool_t that expects the given interval state snapshots to be // produced. - test_analysis_tool_t(const std::vector &expected_state_snapshots, - bool combine_only_active_shards) + test_analysis_tool_t( + const std::vector> &expected_state_snapshots, + bool combine_only_active_shards) : seen_memrefs_(0) , expected_state_snapshots_(expected_state_snapshots) , outstanding_snapshots_(0) @@ -376,7 +420,7 @@ class test_analysis_tool_t : public analysis_tool_t { snapshot->tool_shard_id = analysis_tool_t::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; snapshot->component_intervals.push_back( - { /*tid=*/0, seen_memrefs_, interval_id }); + { SERIAL_TID, seen_memrefs_, interval_id }); ++outstanding_snapshots_; return snapshot; } @@ -393,6 +437,7 @@ class test_analysis_tool_t : public analysis_tool_t { void * parallel_shard_init(int shard_index, void *worker_data) override { + parallel_mode_ = true; auto per_shard = new per_shard_t; per_shard->magic_num = kMagicNum; per_shard->tid = kInvalidTid; @@ -434,12 +479,44 @@ class test_analysis_tool_t : public analysis_tool_t { ++outstanding_snapshots_; return snapshot; } + bool + finalize_interval_snapshots( + std::vector &interval_snapshots) + { + for (auto snapshot : interval_snapshots) { + if (snapshot == nullptr) { + error_string_ = + "Did not expect a nullptr snapshot in finalize_interval_snapshots"; + return false; + } + auto recorded_snapshot = dynamic_cast(snapshot); + if (recorded_snapshot->saw_finalize_call) { + error_string_ = "interval_state_snapshot_t presented " + "to finalize_interval_snapshots multiple times"; + return false; + } + recorded_snapshot->saw_finalize_call = true; + } + return true; + } analysis_tool_t::interval_state_snapshot_t * combine_interval_snapshots( const std::vector latest_shard_snapshots, uint64_t interval_end_timestamp) override { + // If we expect multiple std::vector of interval snapshots (one for each shard), + // it means we're not merging the snapshots across shards, so there should not + // be any combine_interval_snapshot calls. + if (expected_state_snapshots_.size() != 1) { + error_string_ = "Did not expect any combine_interval_snapshots() calls"; + return nullptr; + } + if (!parallel_mode_) { + error_string_ = + "Did not expect any combine_interval_snapshots() calls in serial mode."; + return nullptr; + } recorded_snapshot_t *result = new recorded_snapshot_t(); result->tool_shard_id = analysis_tool_t::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; @@ -447,14 +524,20 @@ class test_analysis_tool_t : public analysis_tool_t { for (auto snapshot : latest_shard_snapshots) { if (snapshot != nullptr && (!combine_only_active_shards_ || - snapshot->interval_end_timestamp == interval_end_timestamp)) { + snapshot->get_interval_end_timestamp() == interval_end_timestamp)) { auto recorded_snapshot = dynamic_cast(snapshot); - if (recorded_snapshot->tool_shard_id != recorded_snapshot->shard_id) { + if (recorded_snapshot->tool_shard_id != + recorded_snapshot->get_shard_id()) { FATAL_ERROR("shard_id stored by tool (%" PRIi64 ") and framework (%" PRIi64 ") mismatch", recorded_snapshot->tool_shard_id, - recorded_snapshot->shard_id); + recorded_snapshot->get_shard_id()); + return nullptr; + } + if (!recorded_snapshot->saw_finalize_call) { + error_string_ = + "combine_interval_snapshots saw non-finalized snapshot"; return nullptr; } result->component_intervals.insert( @@ -485,21 +568,29 @@ class test_analysis_tool_t : public analysis_tool_t { print_interval_results( const std::vector &snapshots) override { + if (seen_print_interval_results_calls_ >= expected_state_snapshots_.size()) { + error_string_ = "Saw more print_interval_results() calls than expected"; + return false; + } std::vector recorded_state_snapshots; for (const auto &p : snapshots) { recorded_state_snapshots.push_back( reinterpret_cast(p)); } - if (!compare_results(recorded_state_snapshots, expected_state_snapshots_)) { + if (!compare_results( + recorded_state_snapshots, + expected_state_snapshots_[seen_print_interval_results_calls_])) { error_string_ = "Unexpected state snapshots"; std::cerr << "Expected:\n"; - for (const auto &snapshot : expected_state_snapshots_) + for (const auto &snapshot : + expected_state_snapshots_[seen_print_interval_results_calls_]) snapshot.print(); std::cerr << "Found:\n"; for (const auto &snapshot : recorded_state_snapshots) snapshot->print(); return false; } + ++seen_print_interval_results_calls_; return true; } bool @@ -515,12 +606,23 @@ class test_analysis_tool_t : public analysis_tool_t { { return outstanding_snapshots_; } + int + get_outstanding_print_interval_results_calls() const + { + return expected_state_snapshots_.size() - seen_print_interval_results_calls_; + } private: int seen_memrefs_; - std::vector expected_state_snapshots_; + // We expect to see one print_interval_results call per shard (we do not merge + // the shard interval snapshots for instr count intervals), or exactly one + // print_interval_results call for the whole-trace (we merge shard interval + // snapshots for timestamp intervals). + std::vector> expected_state_snapshots_; int outstanding_snapshots_; bool combine_only_active_shards_; + int seen_print_interval_results_calls_ = 0; + bool parallel_mode_ = false; // Data tracked per shard. struct per_shard_t { @@ -537,6 +639,7 @@ static bool test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) { constexpr uint64_t kIntervalMicroseconds = 100; + constexpr uint64_t kNoIntervalInstrCount = 0; std::vector refs = { // Trace for a single worker which has two constituent shards. (scheduler_t // does not guarantee that workers will process shards one after the other.) @@ -563,52 +666,51 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) gen_exit(52) // _ | 6 | 7 }; - std::vector expected_state_snapshots; + std::vector> expected_state_snapshots; if (!parallel) { // Each whole trace interval is made up of only one snapshot, the // serial snapshot. - expected_state_snapshots = { - // Format for interval_end_point_t: - test_analysis_tool_t::recorded_snapshot_t(1, 100, 1, 1, { { 0, 3, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(2, 200, 3, 2, { { 0, 7, 2 } }), - test_analysis_tool_t::recorded_snapshot_t(3, 300, 6, 3, { { 0, 13, 3 } }), - test_analysis_tool_t::recorded_snapshot_t(5, 500, 7, 1, { { 0, 15, 5 } }), - test_analysis_tool_t::recorded_snapshot_t(6, 600, 7, 0, { { 0, 17, 6 } }), - test_analysis_tool_t::recorded_snapshot_t(7, 700, 8, 1, { { 0, 20, 7 } }), - }; + expected_state_snapshots = { { + // Format: + // > + recorded_snapshot_t(1, 100, 1, 1, { { SERIAL_TID, 3, 1 } }), + recorded_snapshot_t(2, 200, 3, 2, { { SERIAL_TID, 7, 2 } }), + recorded_snapshot_t(3, 300, 6, 3, { { SERIAL_TID, 13, 3 } }), + recorded_snapshot_t(5, 500, 7, 1, { { SERIAL_TID, 15, 5 } }), + recorded_snapshot_t(6, 600, 7, 0, { { SERIAL_TID, 17, 6 } }), + recorded_snapshot_t(7, 700, 8, 1, { { SERIAL_TID, 20, 7 } }), + } }; } else if (combine_only_active_shards) { // Each whole trace interval is made up of snapshots from each // shard that was active in that interval. expected_state_snapshots = { - // Format for interval_end_point_t: - test_analysis_tool_t::recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), - // Narration: The whole-trace interval_id=1 with interval_end_timestamp=200 - // is made up of the following two shard-local interval snapshots: - // - from shard_id=51, the interval_id=1 that ends at the local_memref=5 - // - from shard_id=52, the interval_id=0 that ends at the local_memref=2 - test_analysis_tool_t::recorded_snapshot_t(2, 200, 3, 2, - { { 51, 5, 2 }, { 52, 2, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(3, 300, 6, 3, - { { 51, 7, 3 }, { 52, 6, 2 } }), - test_analysis_tool_t::recorded_snapshot_t(5, 500, 7, 1, { { 52, 8, 4 } }), - test_analysis_tool_t::recorded_snapshot_t(6, 600, 7, 0, { { 51, 9, 6 } }), - test_analysis_tool_t::recorded_snapshot_t(7, 700, 8, 1, { { 52, 11, 6 } }) + { // Format: + // > + recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), + // Narration: The whole-trace interval_id=2 with interval_end_timestamp=200 + // is made up of the following two shard-local interval snapshots: + // - from shard_id=51, the interval_id=2 that ends at the local_memref=5 + // - from shard_id=52, the interval_id=1 that ends at the local_memref=2 + recorded_snapshot_t(2, 200, 3, 2, { { 51, 5, 2 }, { 52, 2, 1 } }), + recorded_snapshot_t(3, 300, 6, 3, { { 51, 7, 3 }, { 52, 6, 2 } }), + recorded_snapshot_t(5, 500, 7, 1, { { 52, 8, 4 } }), + recorded_snapshot_t(6, 600, 7, 0, { { 51, 9, 6 } }), + recorded_snapshot_t(7, 700, 8, 1, { { 52, 11, 6 } }) } }; } else { // Each whole trace interval is made up of last snapshots from all trace shards. expected_state_snapshots = { - // Format for interval_end_point_t: - test_analysis_tool_t::recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(2, 200, 3, 2, - { { 51, 5, 2 }, { 52, 2, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(3, 300, 6, 3, - { { 51, 7, 3 }, { 52, 6, 2 } }), - test_analysis_tool_t::recorded_snapshot_t(5, 500, 7, 1, - { { 51, 7, 3 }, { 52, 8, 4 } }), - test_analysis_tool_t::recorded_snapshot_t(6, 600, 7, 0, - { { 51, 9, 6 }, { 52, 8, 4 } }), - test_analysis_tool_t::recorded_snapshot_t(7, 700, 8, 1, - { { 51, 9, 6 }, { 52, 11, 6 } }) + { { // Format: + // > + recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), + recorded_snapshot_t(2, 200, 3, 2, { { 51, 5, 2 }, { 52, 2, 1 } }), + recorded_snapshot_t(3, 300, 6, 3, { { 51, 7, 3 }, { 52, 6, 2 } }), + recorded_snapshot_t(5, 500, 7, 1, { { 51, 7, 3 }, { 52, 8, 4 } }), + recorded_snapshot_t(6, 600, 7, 0, { { 51, 9, 6 }, { 52, 8, 4 } }), + recorded_snapshot_t(7, 700, 8, 1, { { 51, 9, 6 }, { 52, 11, 6 } }) } } }; } std::vector tools; @@ -619,7 +721,7 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) std::unique_ptr(new dummy_analysis_tool_t()); tools.push_back(dummy_analysis_tool.get()); test_analyzer_t test_analyzer(refs, &tools[0], (int)tools.size(), parallel, - kIntervalMicroseconds); + kIntervalMicroseconds, kNoIntervalInstrCount); if (!test_analyzer) { FATAL_ERROR("failed to initialize test analyzer: %s", test_analyzer.get_error_string().c_str()); @@ -638,6 +740,13 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) << " left\n"; return false; } + if (test_analysis_tool.get()->get_outstanding_print_interval_results_calls() != 0) { + std::cerr + << "Missing " + << test_analysis_tool.get()->get_outstanding_print_interval_results_calls() + << " print_interval_result() calls\n"; + return false; + } int expected_generate_call_count = parallel ? 8 : 6; if (dummy_analysis_tool.get()->get_generate_snapshot_count() != expected_generate_call_count) { @@ -654,11 +763,120 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) return true; } +static bool +test_non_zero_instr_interval(bool parallel) +{ + constexpr uint64_t kNoIntervalMicroseconds = 0; + constexpr uint64_t kIntervalInstrCount = 2; + std::vector refs = { + // Trace for a single worker which has two constituent shards. (scheduler_t + // does not guarantee that workers will process shards one after the other.) + // Expected active interval_id: tid_51_local | tid_52_local | whole_trace + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 40), // 1 | _ | 1 + gen_instr(51, 10000), // 1 | _ | 1 + gen_data(51, true, 1234, 4), // 1 | _ | 1 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 151), // _ | 1 | 1 + gen_instr(52, 20000), // _ | 1 | 1 + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 170), // 1 | _ | 1 + gen_instr(51, 10008), // 1 | _ | 2 + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 201), // 1 | _ | 2 + gen_instr(51, 20004), // 2 | _ | 2 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 210), // _ | 1 | 2 + gen_instr(52, 20008), // _ | 1 | 3 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 270), // _ | 1 | 3 + gen_instr(52, 20008), // _ | 2 | 3 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 490), // _ | 2 | 3 + gen_instr(52, 20012), // _ | 2 | 4 + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 590), // 2 | _ | 4 + gen_exit(51), // 2 | _ | 4 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 610), // _ | 2 | 4 + gen_instr(52, 20016), // _ | 3 | 4 + gen_exit(52) // _ | 3 | 4 + }; + + std::vector> expected_state_snapshots; + if (!parallel) { + // Each whole trace interval is made up of only one snapshot, the + // serial snapshot. + expected_state_snapshots = { + { // Format: + // > + recorded_snapshot_t(1, 170, 2, 2, { { SERIAL_TID, 6, 1 } }), + recorded_snapshot_t(2, 210, 4, 2, { { SERIAL_TID, 10, 2 } }), + recorded_snapshot_t(3, 490, 6, 2, { { SERIAL_TID, 14, 3 } }), + recorded_snapshot_t(4, 610, 8, 2, { { SERIAL_TID, 20, 4 } }) } + }; + } else { + // For instr count intervals, we do not merge the shard intervals to form the + // whole-trace intervals. Instead, there are multiple print_interval_result + // calls, one for the interval snapshots of each shard. The shard_id is + // included in the provided interval snapshots (see below). + expected_state_snapshots = { + // Format: + // > + { recorded_snapshot_t(51, 1, 201, 2, 2, { { 51, 6, 1 } }), + recorded_snapshot_t(51, 2, 590, 3, 1, { { 51, 9, 2 } }) }, + { recorded_snapshot_t(52, 1, 270, 2, 2, { { 52, 5, 1 } }), + recorded_snapshot_t(52, 2, 610, 4, 2, { { 52, 9, 2 } }), + recorded_snapshot_t(52, 3, 610, 5, 1, { { 52, 11, 3 } }) }, + }; + } + std::vector tools; + constexpr bool kNopCombineOnlyActiveShards = false; + auto test_analysis_tool = std::unique_ptr( + new test_analysis_tool_t(expected_state_snapshots, kNopCombineOnlyActiveShards)); + tools.push_back(test_analysis_tool.get()); + auto dummy_analysis_tool = + std::unique_ptr(new dummy_analysis_tool_t()); + tools.push_back(dummy_analysis_tool.get()); + test_analyzer_t test_analyzer(refs, &tools[0], (int)tools.size(), parallel, + kNoIntervalMicroseconds, kIntervalInstrCount); + if (!test_analyzer) { + FATAL_ERROR("failed to initialize test analyzer: %s", + test_analyzer.get_error_string().c_str()); + } + if (!test_analyzer.run()) { + FATAL_ERROR("failed to run test_analyzer: %s", + test_analyzer.get_error_string().c_str()); + } + if (!test_analyzer.print_stats()) { + FATAL_ERROR("failed to print stats: %s", + test_analyzer.get_error_string().c_str()); + } + if (test_analysis_tool.get()->get_outstanding_snapshot_count() != 0) { + std::cerr << "Failed to release all outstanding snapshots: " + << test_analysis_tool.get()->get_outstanding_snapshot_count() + << " left\n"; + return false; + } + if (test_analysis_tool.get()->get_outstanding_print_interval_results_calls() != 0) { + std::cerr + << "Missing " + << test_analysis_tool.get()->get_outstanding_print_interval_results_calls() + << " print_interval_result() calls\n"; + return false; + } + int expected_generate_call_count = parallel ? 5 : 4; + if (dummy_analysis_tool.get()->get_generate_snapshot_count() != + expected_generate_call_count) { + std::cerr << "Dummy analysis tool got " + << dummy_analysis_tool.get()->get_generate_snapshot_count() + << " interval API calls, but expected " << expected_generate_call_count + << "\n"; + return false; + } + fprintf(stderr, "test_non_zero_instr_interval done for parallel=%d\n", parallel); + return true; +} + int test_main(int argc, const char *argv[]) { if (!test_non_zero_interval(false) || !test_non_zero_interval(true, true) || - !test_non_zero_interval(true, false)) + !test_non_zero_interval(true, false) || !test_non_zero_instr_interval(false) || + !test_non_zero_instr_interval(true)) return 1; fprintf(stderr, "All done!\n"); return 0; diff --git a/clients/drcachesim/tests/window_test.cpp b/clients/drcachesim/tests/window_test.cpp index 7ff52c05108..2404cd0e19f 100644 --- a/clients/drcachesim/tests/window_test.cpp +++ b/clients/drcachesim/tests/window_test.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -62,7 +62,8 @@ my_setenv(const char *var, const char *value) #ifdef UNIX return setenv(var, value, 1 /*override*/) == 0; #else - return SetEnvironmentVariable(var, value) == TRUE; + // UNICODE is defined in utils.h so to use ANSI we explicitly name it. + return SetEnvironmentVariableA(var, value) == TRUE; #endif } diff --git a/clients/drcachesim/tools/basic_counts.cpp b/clients/drcachesim/tools/basic_counts.cpp index 0fbf02a47c5..a0b2ab886cf 100644 --- a/clients/drcachesim/tools/basic_counts.cpp +++ b/clients/drcachesim/tools/basic_counts.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -106,6 +106,7 @@ basic_counts_t::parallel_shard_init_stream(int shard_index, void *worker_data, std::lock_guard guard(shard_map_mutex_); per_shard->stream = stream; per_shard->core = stream->get_output_cpuid(); + per_shard->tid = stream->get_tid(); shard_map_[shard_index] = per_shard; return reinterpret_cast(per_shard); } @@ -136,12 +137,10 @@ basic_counts_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } if (type_is_instr(memref.instr.type)) { ++counters->instrs; - if (TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, per_shard->filetype_)) { - if (per_shard->is_kernel) { - ++counters->kernel_instrs; - } else { - ++counters->user_instrs; - } + if (per_shard->is_kernel) { + ++counters->kernel_instrs; + } else { + ++counters->user_instrs; } counters->unique_pc_addrs.insert(memref.instr.addr); // The encoding entries aren't exposed at the memref_t level, but @@ -209,9 +208,13 @@ basic_counts_t::parallel_shard_memref(void *shard_data, const memref_t &memref) ++counters->syscall_blocking_markers; break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_START: + case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START: per_shard->is_kernel = true; break; - case TRACE_MARKER_TYPE_SYSCALL_TRACE_END: per_shard->is_kernel = false; break; + case TRACE_MARKER_TYPE_SYSCALL_TRACE_END: + case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END: + per_shard->is_kernel = false; + break; case TRACE_MARKER_TYPE_FILETYPE: if (per_shard->filetype_ == -1) { per_shard->filetype_ = @@ -226,7 +229,7 @@ basic_counts_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } } } else if (memref.data.type == TRACE_TYPE_THREAD_EXIT) { - per_shard->tid = memref.exit.tid; + assert(shard_type_ != SHARD_BY_THREAD || per_shard->tid == memref.exit.tid); } else if (memref.data.type == TRACE_TYPE_INSTR_FLUSH) { counters->icache_flushes++; } else if (memref.data.type == TRACE_TYPE_DATA_FLUSH) { @@ -239,16 +242,13 @@ bool basic_counts_t::process_memref(const memref_t &memref) { per_shard_t *per_shard; - const auto &lookup = shard_map_.find(memref.data.tid); + int shard_index = serial_stream_->get_shard_index(); + const auto &lookup = shard_map_.find(shard_index); if (lookup == shard_map_.end()) { per_shard = new per_shard_t; per_shard->stream = serial_stream_; - // TODO i#5694: Once we have -core_serial we can fully test this serial - // code for SHARD_BY_CORE and update any other tools doing this same - // type of data splitting in serial mode by tid. - int64_t shard_index = shard_type_ == SHARD_BY_THREAD - ? memref.data.tid - : serial_stream_->get_output_cpuid(); + per_shard->core = serial_stream_->get_output_cpuid(); + per_shard->tid = serial_stream_->get_tid(); shard_map_[shard_index] = per_shard; } else per_shard = lookup->second; @@ -335,10 +335,15 @@ basic_counts_t::print_results() total += ctr; } if (!for_kernel_trace && - TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, shard.second->filetype_)) { + TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS | + OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, + shard.second->filetype_)) { for_kernel_trace = true; } } + // Print kernel data if context switches were inserted. + if (total.kernel_instrs > 0) + for_kernel_trace = true; total.shard_count = shard_map_.size(); std::cerr << TOOL_NAME << " results:\n"; std::cerr << "Total counts:\n"; @@ -358,8 +363,8 @@ basic_counts_t::print_results() } // Print the shards sorted by instrs. - std::vector> sorted(shard_map_.begin(), - shard_map_.end()); + std::vector> sorted(shard_map_.begin(), + shard_map_.end()); std::sort(sorted.begin(), sorted.end(), cmp_threads); for (const auto &keyvals : sorted) { if (shard_type_ == SHARD_BY_THREAD) @@ -446,37 +451,38 @@ basic_counts_t::print_interval_results( { std::cerr << "Counts per trace interval for "; if (!interval_snapshots.empty() && - interval_snapshots[0]->shard_id != + interval_snapshots[0]->get_shard_id() != interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID) { - std::cerr << "TID " << interval_snapshots[0]->shard_id << ":\n"; + std::cerr << "TID " << interval_snapshots[0]->get_shard_id() << ":\n"; } else { std::cerr << "whole trace:\n"; } counters_t last; for (const auto &snapshot_base : interval_snapshots) { auto *snapshot = dynamic_cast(snapshot_base); - std::cerr << "Interval #" << snapshot->interval_id << " ending at timestamp " - << snapshot->interval_end_timestamp << ":\n"; + std::cerr << "Interval #" << snapshot->get_interval_id() + << " ending at timestamp " << snapshot->get_interval_end_timestamp() + << ":\n"; counters_t diff = snapshot->counters; diff -= last; print_counters(diff, " interval delta"); last = snapshot->counters; if (knob_verbose_ > 0) { - if (snapshot->instr_count_cumulative != + if (snapshot->get_instr_count_cumulative() != static_cast(snapshot->counters.instrs)) { std::stringstream err_stream; err_stream << "Cumulative instr count value provided by framework (" - << snapshot->instr_count_cumulative + << snapshot->get_instr_count_cumulative() << ") not equal to tool value (" << snapshot->counters.instrs << ")\n"; error_string_ = err_stream.str(); return false; } - if (snapshot->instr_count_delta != static_cast(diff.instrs)) { + if (snapshot->get_instr_count_delta() != static_cast(diff.instrs)) { std::stringstream err_stream; err_stream << "Delta instr count value provided by framework (" - << snapshot->instr_count_delta << ") not equal to tool value (" - << diff.instrs << ")\n"; + << snapshot->get_instr_count_delta() + << ") not equal to tool value (" << diff.instrs << ")\n"; error_string_ = err_stream.str(); return false; } diff --git a/clients/drcachesim/tools/basic_counts.h b/clients/drcachesim/tools/basic_counts.h index a4247abf2ba..aa20e3a6e24 100644 --- a/clients/drcachesim/tools/basic_counts.h +++ b/clients/drcachesim/tools/basic_counts.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -274,8 +274,7 @@ class basic_counts_t : public analysis_tool_t { void compute_shard_interval_result(per_shard_t *shard, uint64_t interval_id); - // The keys here are int for parallel, tid for serial. - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index be5db473757..22da3ae24fc 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -32,8 +32,10 @@ #include "record_filter.h" +#include #include +#include #include #include #include @@ -46,11 +48,20 @@ #ifdef HAS_ZLIB # include "common/gzip_ostream.h" #endif +#ifdef HAS_ZIP +# include "common/zipfile_ostream.h" +#endif #include "memref.h" #include "memtrace_stream.h" +#include "raw2trace_shared.h" #include "trace_entry.h" #include "utils.h" +#include "null_filter.h" +#include "cache_filter.h" +#include "trim_filter.h" +#include "type_filter.h" +#undef VPRINT #ifdef DEBUG # define VPRINT(reader, level, ...) \ do { \ @@ -71,14 +82,73 @@ namespace dynamorio { namespace drmemtrace { namespace { + bool is_any_instr_type(trace_type_t type) { return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH || type == TRACE_TYPE_INSTR_NO_FETCH; } + +template +std::vector +parse_string(const std::string &s, char sep = ',') +{ + size_t pos, at = 0; + if (s.empty()) + return {}; + std::vector vec; + do { + pos = s.find(sep, at); + vec.push_back(static_cast(std::stoi(s.substr(at, pos)))); + at = pos + 1; + } while (pos != std::string::npos); + return vec; +} + } // namespace +record_analysis_tool_t * +record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp, + int cache_filter_size, const std::string &remove_trace_types, + const std::string &remove_marker_types, + uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, + unsigned int verbose) +{ + std::vector< + std::unique_ptr> + filter_funcs; + if (cache_filter_size > 0) { + filter_funcs.emplace_back( + std::unique_ptr( + // XXX: add more command-line options to allow the user to set these + // parameters. + new dynamorio::drmemtrace::cache_filter_t( + /*cache_associativity=*/1, /*cache_line_size=*/64, cache_filter_size, + /*filter_data=*/true, /*filter_instrs=*/false))); + } + if (!remove_trace_types.empty() || !remove_marker_types.empty()) { + std::vector filter_trace_types = + parse_string(remove_trace_types); + std::vector filter_marker_types = + parse_string(remove_marker_types); + filter_funcs.emplace_back( + std::unique_ptr( + new dynamorio::drmemtrace::type_filter_t(filter_trace_types, + filter_marker_types))); + } + if (trim_before_timestamp > 0 || trim_after_timestamp > 0) { + filter_funcs.emplace_back( + std::unique_ptr( + new dynamorio::drmemtrace::trim_filter_t(trim_before_timestamp, + trim_after_timestamp))); + } + // TODO i#5675: Add other filters. + + return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), + stop_timestamp, verbose); +} + record_filter_t::record_filter_t( const std::string &output_dir, std::vector> filters, uint64_t stop_timestamp, @@ -105,19 +175,156 @@ record_filter_t::parallel_shard_supported() return true; } -std::unique_ptr +std::string +record_filter_t::initialize_shard_type(shard_type_t shard_type) +{ + shard_type_ = shard_type; + return ""; +} + +std::string +record_filter_t::get_output_basename(memtrace_stream_t *shard_stream) +{ + if (shard_type_ == SHARD_BY_CORE) { + return output_dir_ + DIRSEP + "drmemtrace.core." + + std::to_string(shard_stream->get_shard_index()) + ".trace"; + } else { + return output_dir_ + DIRSEP + shard_stream->get_stream_name(); + } +} + +std::string +record_filter_t::initialize_shard_output(per_shard_t *per_shard, + memtrace_stream_t *shard_stream) +{ + if (shard_type_ == SHARD_BY_CORE) { + // Each output is a mix of inputs so we do not want to reuse the input + // names with tids. + // Since some shards may not have inputs, we need to synchronize determining + // the file extension. + // First, get our path without the extension, so we can add it later. + per_shard->output_path = get_output_basename(shard_stream); + std::string input_name = shard_stream->get_stream_name(); + // Now synchronize determining the extension. + auto lock = std::unique_lock(input_info_mutex_); + if (!output_ext_.empty()) { + per_shard->output_path += output_ext_; + lock.unlock(); + } else if (!input_name.empty()) { + size_t last_dot = input_name.rfind('.'); + if (last_dot == std::string::npos) + return "Failed to determine filename type from extension"; + output_ext_ = input_name.substr(last_dot); + // Set the other key input data. + version_ = shard_stream->get_version(); + filetype_ = add_to_filetype(shard_stream->get_filetype()); + per_shard->output_path += output_ext_; + lock.unlock(); + input_info_cond_var_.notify_all(); + } else { + // We have to wait for another shard with an input to set output_ext_. + input_info_cond_var_.wait(lock, [this] { return !output_ext_.empty(); }); + per_shard->output_path += output_ext_; + lock.unlock(); + } + } else { + per_shard->output_path = get_output_basename(shard_stream); + } + return ""; +} + +std::string record_filter_t::get_writer(per_shard_t *per_shard, memtrace_stream_t *shard_stream) { - per_shard->output_path = output_dir_ + DIRSEP + shard_stream->get_stream_name(); + if (per_shard->output_path.empty()) + return "Error: output_path is empty"; #ifdef HAS_ZLIB if (ends_with(per_shard->output_path, ".gz")) { VPRINT(this, 3, "Using the gzip writer for %s\n", per_shard->output_path.c_str()); - return std::unique_ptr(new gzip_ostream_t(per_shard->output_path)); + per_shard->file_writer = + std::unique_ptr(new gzip_ostream_t(per_shard->output_path)); + per_shard->writer = per_shard->file_writer.get(); + return ""; + } +#endif +#ifdef HAS_ZIP + if (ends_with(per_shard->output_path, ".zip")) { + VPRINT(this, 3, "Using the zip writer for %s\n", per_shard->output_path.c_str()); + per_shard->archive_writer = std::unique_ptr( + new zipfile_ostream_t(per_shard->output_path)); + per_shard->writer = per_shard->archive_writer.get(); + return open_new_chunk(per_shard); } #endif VPRINT(this, 3, "Using the default writer for %s\n", per_shard->output_path.c_str()); - return std::unique_ptr( + per_shard->file_writer = std::unique_ptr( new std::ofstream(per_shard->output_path, std::ofstream::binary)); + per_shard->writer = per_shard->file_writer.get(); + return ""; +} + +std::string +record_filter_t::remove_output_file(per_shard_t *per_shard) +{ + VPRINT(this, 1, "Removing zero-instruction file %s for tid %" PRId64 "\n", + per_shard->output_path.c_str(), per_shard->tid); + if (std::remove(per_shard->output_path.c_str()) != 0) + return "Failed to remove zero-instruction file " + per_shard->output_path; + return ""; +} + +std::string +record_filter_t::emit_marker(per_shard_t *shard, unsigned short marker_type, + uint64_t marker_value) +{ + trace_entry_t marker; + marker.type = TRACE_TYPE_MARKER; + marker.size = marker_type; + marker.addr = static_cast(marker_value); + if (!write_trace_entry(shard, marker)) + return "Failed to write marker"; + return ""; +} + +std::string +record_filter_t::open_new_chunk(per_shard_t *shard) +{ + VPRINT(this, 1, "Opening new chunk #%" PRIu64 "\n", shard->chunk_ordinal); + std::string err; + if (shard->chunk_ordinal > 0) { + err = + emit_marker(shard, TRACE_MARKER_TYPE_CHUNK_FOOTER, shard->chunk_ordinal - 1); + if (!err.empty()) + return err; + } + + std::ostringstream stream; + stream << TRACE_CHUNK_PREFIX << std::setfill('0') << std::setw(4) + << shard->chunk_ordinal; + err = shard->archive_writer->open_new_component(stream.str()); + if (!err.empty()) + return err; + + if (shard->chunk_ordinal > 0) { + // XXX i#6593: This sequence is currently duplicated with + // raw2trace_t::emit_new_chunk_header(). Could we share it? + err = emit_marker(shard, TRACE_MARKER_TYPE_RECORD_ORDINAL, shard->cur_refs); + if (!err.empty()) + return err; + err = emit_marker(shard, TRACE_MARKER_TYPE_TIMESTAMP, shard->last_timestamp); + if (!err.empty()) + return err; + err = emit_marker(shard, TRACE_MARKER_TYPE_CPU_ID, shard->last_cpu_id); + if (!err.empty()) + return err; + // We need to re-emit all encodings. + shard->cur_chunk_pcs.clear(); + } + + ++shard->chunk_ordinal; + shard->cur_chunk_instrs = 0; + + return ""; } void * @@ -125,14 +332,30 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data, memtrace_stream_t *shard_stream) { auto per_shard = new per_shard_t; - per_shard->writer = get_writer(per_shard, shard_stream); + std::string error = initialize_shard_output(per_shard, shard_stream); + if (!error.empty()) { + per_shard->error = "Failure initializing output: " + error; + success_ = false; + return reinterpret_cast(per_shard); + } + error = get_writer(per_shard, shard_stream); + if (!error.empty()) { + per_shard->error = "Failure in opening writer: " + error; + success_ = false; + return reinterpret_cast(per_shard); + } + if (per_shard->writer == nullptr) { + per_shard->error = "Could not open a writer for " + per_shard->output_path; + success_ = false; + return reinterpret_cast(per_shard); + } per_shard->shard_stream = shard_stream; per_shard->enabled = true; per_shard->input_entry_count = 0; per_shard->output_entry_count = 0; - if (!per_shard->writer) { - per_shard->error = "Could not open a writer for " + per_shard->output_path; - success_ = false; + per_shard->tid = shard_stream->get_tid(); + if (shard_type_ == SHARD_BY_CORE) { + per_shard->memref_counter.set_core_sharded(true); } for (auto &f : filters_) { per_shard->filter_shard_data.push_back( @@ -157,10 +380,38 @@ record_filter_t::parallel_shard_exit(void *shard_data) if (!filters_[i]->parallel_shard_exit(per_shard->filter_shard_data[i])) res = false; } + if (per_shard->last_written_record.type != TRACE_TYPE_FOOTER) { + // When core-sharded some cores can end in TRACE_TYPE_IDLE. + // i#6703: The scheduler should add this footer for us. + trace_entry_t footer = {}; + footer.type = TRACE_TYPE_FOOTER; + if (!write_trace_entry(per_shard, footer)) { + per_shard->error = "Failed to write footer"; + return false; + } + } // Destroy the writer since we do not need it anymore. This also makes sure // that data is written out to the file; curiously, a simple flush doesn't // do it. - per_shard->writer.reset(nullptr); + per_shard->file_writer.reset(nullptr); + per_shard->archive_writer.reset(nullptr); + per_shard->writer = nullptr; + // If the shard ended up with no instructions, delete it (otherwise the + // invariant checker complains). + VPRINT(this, 2, "shard %s chunk=%" PRIu64 " cur-instrs=%" PRIu64 "\n", + per_shard->output_path.c_str(), per_shard->chunk_ordinal, + per_shard->cur_chunk_instrs); + if (!TESTANY(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_IFILTERED, + per_shard->filetype) && + // chunk_ordinal is 1 after the init-time call for archives; it + // remains 0 for non-archives. + per_shard->chunk_ordinal <= 1 && per_shard->cur_chunk_instrs == 0 && + // Leave a core-sharded completely-idle file. + shard_type_ != SHARD_BY_CORE) { + // Mark for removal. We delay removal in case it involves global + // operations that might race with other workers. + per_shard->now_empty = true; + } return res; } @@ -174,12 +425,52 @@ record_filter_t::parallel_shard_error(void *shard_data) bool record_filter_t::write_trace_entry(per_shard_t *shard, const trace_entry_t &entry) { + if (shard->output_entry_count == 0 && entry.type != TRACE_TYPE_HEADER) { + // When core-sharded with initially-idle cores we can start without a header. + // XXX i#6703: The scheduler should insert these headers for us, as this + // issue can affect other tools as well. + // Our own stream's version + filetype are 0 so we use another shard's. + std::lock_guard guard(input_info_mutex_); + std::vector header; + header.push_back({ TRACE_TYPE_HEADER, 0, { static_cast(version_) } }); + header.push_back({ TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_VERSION, + { static_cast(version_) } }); + header.push_back({ TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { static_cast(filetype_) } }); + // file_reader_t::open_input_file demands tid+pid so we insert sentinel values. + // We can't use INVALID_THREAD_ID as scheduler_t::open_reader() loops until + // record_type_has_tid() which requires record.marker.tid != INVALID_THREAD_ID. + header.push_back({ TRACE_TYPE_THREAD, + sizeof(thread_id_t), + { static_cast(IDLE_THREAD_ID) } }); + header.push_back({ TRACE_TYPE_PID, + sizeof(process_id_t), + { static_cast(INVALID_PID) } }); + // The scheduler itself demands a timestamp,cpuid pair. + // We don't have a good value to use here though: + // XXX i#6703: The scheduler should insert these for us. + // As-is, these can cause confusion with -1 values, but this is our best + // effort support until i#6703. + header.push_back({ TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_TIMESTAMP, + { static_cast(-1) } }); + header.push_back( + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { static_cast(-1) } }); + if (!write_trace_entries(shard, header)) { + shard->error += "Failed to write synthetic header"; + return false; + } + } if (!shard->writer->write((char *)&entry, sizeof(entry))) { shard->error = "Failed to write to output file " + shard->output_path; success_ = false; return false; } + shard->cur_refs += shard->memref_counter.entry_memref_count(&entry); ++shard->output_entry_count; + shard->last_written_record = entry; return true; } @@ -194,6 +485,161 @@ record_filter_t::write_trace_entries(per_shard_t *shard, return true; } +std::string +record_filter_t::process_markers(per_shard_t *per_shard, trace_entry_t &entry, + bool &output) +{ + if (entry.type == TRACE_TYPE_MARKER) { + switch (entry.size) { + case TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT: + per_shard->chunk_size = entry.addr; + break; + case TRACE_MARKER_TYPE_FILETYPE: + entry.addr = static_cast(add_to_filetype(entry.addr)); + per_shard->filetype = entry.addr; + break; + case TRACE_MARKER_TYPE_CHUNK_FOOTER: + // We insert ourselves in open_new_chunk(). + output = false; + break; + case TRACE_MARKER_TYPE_RECORD_ORDINAL: + // We insert ourselves in open_new_chunk(). + per_shard->input_count_at_ordinal = per_shard->input_entry_count; + output = false; + break; + case TRACE_MARKER_TYPE_TIMESTAMP: + if (output) + per_shard->last_timestamp = entry.addr; + // We insert our own start-of-chunk timestamp. + if (per_shard->archive_writer && + per_shard->input_entry_count - per_shard->input_count_at_ordinal == 1) + output = false; + break; + case TRACE_MARKER_TYPE_CPU_ID: + if (output) + per_shard->last_cpu_id = entry.addr; + // We insert our own start-of-chunk cpuid. + if (per_shard->archive_writer && + per_shard->input_entry_count - per_shard->input_count_at_ordinal == 2) + output = false; + break; + case TRACE_MARKER_TYPE_PHYSICAL_ADDRESS: + case TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE: + if (!output && per_shard->archive_writer) { + // TODO i#6654: These markers need to be repeated across chunks. Even + // raw2trace doesn't support this yet: once we add it there we can add it + // here or try to share code. + return "Removing physical address markers from archive output is not yet " + "supported"; + } + break; + } + } + return ""; +} + +std::string +record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t &entry, + bool output) +{ + if (!per_shard->archive_writer || + !is_any_instr_type(static_cast(entry.type))) + return ""; + if (!per_shard->last_encoding.empty()) { + if (per_shard->per_input == nullptr) + return "Invalid input id for instruction"; + std::lock_guard guard(per_shard->per_input->lock); + per_shard->per_input->pc2encoding[entry.addr] = per_shard->last_encoding; + // Disable the just-delayed encoding output in process_delayed_encodings() if + // this is what used to be a new-chunk encoding but is no longer. + if (per_shard->cur_chunk_pcs.find(entry.addr) != per_shard->cur_chunk_pcs.end()) { + VPRINT(this, 3, "clearing new-chunk last encoding @pc=0x%zx\n", entry.addr); + per_shard->last_encoding.clear(); + } + } else if (output) { + // Insert the cached encoding if this is the first instance of this PC + // (without an encoding) in this chunk, unless the user is removing all encodings. + // XXX: What if there is a filter removing all encodings but only + // to the stop point, so a partial remove that does not change + // the filetype? For now we do not support that, and we re-add + // encodings at chunk boundaries regardless. + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype) && + per_shard->cur_chunk_pcs.find(entry.addr) == per_shard->cur_chunk_pcs.end()) { + if (per_shard->per_input == nullptr) + return "Invalid input id for instruction"; + std::lock_guard guard(per_shard->per_input->lock); + if (per_shard->per_input->pc2encoding.find(entry.addr) == + per_shard->per_input->pc2encoding.end()) { + return "Missing encoding for PC " + std::to_string(entry.addr) + + " in shard " + per_shard->shard_stream->get_stream_name() + + " at input entry " + std::to_string(per_shard->input_entry_count); + } + VPRINT(this, 3, + "output new-chunk encoding chunk=%" PRIu64 " ref=%" PRIu64 "\n", + per_shard->chunk_ordinal, per_shard->cur_refs); + if (!write_trace_entries(per_shard, + per_shard->per_input->pc2encoding[entry.addr])) { + return "Failed to write"; + } + // Avoid emitting the encoding twice. + per_shard->delayed_encodings[entry.addr].clear(); + } + } + if (output) + per_shard->cur_chunk_pcs.insert(entry.addr); + return ""; +} + +std::string +record_filter_t::process_delayed_encodings(per_shard_t *per_shard, trace_entry_t &entry, + bool output) +{ + if (!is_any_instr_type(static_cast(entry.type))) + return ""; + if (!output) { + if (!per_shard->last_encoding.empty()) { + // Overwrite in case the encoding for this pc was already recorded. + per_shard->delayed_encodings[entry.addr] = + std::move(per_shard->last_encoding); + } + } else if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype)) { + // Output if we have encodings that haven't yet been output, and + // there is no filter removing all encodings (we don't support + // partial encoding removal). + // We check prev_was_output to rule out filtered-out encodings + // (we record all encodings for new-chunk insertion). + if (!per_shard->last_encoding.empty() && per_shard->prev_was_output) { + // This instruction is accompanied by a preceding encoding. Since + // this instruction is not filtered out, output the encoding now. + VPRINT(this, 3, + "output just-delayed encoding chunk=%" PRIu64 " ref=%" PRIu64 + " pc=0x%zx\n", + per_shard->chunk_ordinal, per_shard->cur_refs, entry.addr); + if (!write_trace_entries(per_shard, per_shard->last_encoding)) { + return "Failed to write"; + } + // Remove previously delayed encoding that doesn't need to be output + // now that we have a more recent version for this instr. + per_shard->delayed_encodings.erase(entry.addr); + } else if (!per_shard->delayed_encodings[entry.addr].empty()) { + // The previous instance of this instruction was filtered out and + // its encoding was saved. Now that we have an instance of the same + // instruction that is not filtered out, we need to output its + // encoding. + VPRINT(this, 3, + "output long-delayed encoding chunk=%" PRIu64 " ref=%" PRIu64 + " pc=0x%zx\n", + per_shard->chunk_ordinal, per_shard->cur_refs, entry.addr); + if (!write_trace_entries(per_shard, + per_shard->delayed_encodings[entry.addr])) { + return "Failed to write"; + } + per_shard->delayed_encodings.erase(entry.addr); + } + } + return ""; +} + bool record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &input_entry) { @@ -201,14 +647,48 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in ++per_shard->input_entry_count; trace_entry_t entry = input_entry; bool output = true; + // XXX: Once we have multi-workload inputs we'll want all our PC keys to become + // pairs . + if (per_shard->shard_stream->get_workload_id() != per_shard->prev_workload_id && + per_shard->shard_stream->get_workload_id() >= 0 && + per_shard->prev_workload_id >= 0) { + per_shard->error = "Multi-workload inputs not yet supported"; + return false; + } + int64_t input_id = per_shard->shard_stream->get_input_id(); + if (per_shard->prev_input_id != input_id) { + VPRINT(this, 3, + "shard %d switch from %" PRId64 " to %" PRId64 " (refs=%" PRIu64 + " instrs=%" PRIu64 ")\n", + per_shard->shard_stream->get_shard_index(), per_shard->prev_input_id, + input_id, + per_shard->shard_stream->get_input_interface() == nullptr + ? 0 + : per_shard->shard_stream->get_input_interface()->get_record_ordinal(), + per_shard->shard_stream->get_input_interface() == nullptr + ? 0 + : per_shard->shard_stream->get_input_interface() + ->get_instruction_ordinal()); + std::lock_guard guard(input2info_mutex_); + auto it = input2info_.find(input_id); + if (it == input2info_.end()) { + input2info_[input_id] = std::unique_ptr(new per_input_t); + it = input2info_.find(input_id); + } + // It would be nice to assert that this pointer is not in use in other shards + // but that is too expensive. + per_shard->per_input = it->second.get(); + } if (per_shard->enabled && stop_timestamp_ != 0 && per_shard->shard_stream->get_last_timestamp() >= stop_timestamp_) { per_shard->enabled = false; trace_entry_t filter_boundary_entry = { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILTER_ENDPOINT, { 0 } }; - if (!write_trace_entry(per_shard, filter_boundary_entry)) + if (!write_trace_entry(per_shard, filter_boundary_entry)) { + per_shard->error = "Failed to write"; return false; + } } if (per_shard->enabled) { for (int i = 0; i < static_cast(filters_.size()); ++i) { @@ -216,83 +696,76 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in per_shard->filter_shard_data[i])) { output = false; } + if (!filters_[i]->get_error_string().empty()) { + per_shard->error = "Filter error: " + filters_[i]->get_error_string(); + return false; + } } } - if (entry.type == TRACE_TYPE_MARKER) { - switch (entry.size) { - case TRACE_MARKER_TYPE_FILETYPE: - if (stop_timestamp_ != 0) { - entry.addr |= OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP; + if (per_shard->archive_writer) { + // Wait until we reach the next instr or timestamp past the threshold to + // insert the new chunk, to ensure we get all associated records with the + // chunk-final instr. + VPRINT(this, 4, "Cur chunk instr count: %" PRIu64 " vs threshold %" PRIu64 "\n", + per_shard->cur_chunk_instrs, per_shard->chunk_size); + if (per_shard->cur_chunk_instrs >= per_shard->chunk_size && + per_shard->chunk_size > 0 && + (is_any_instr_type(static_cast(entry.type)) || + (entry.type == TRACE_TYPE_MARKER && + entry.size == TRACE_MARKER_TYPE_TIMESTAMP) || + entry.type == TRACE_TYPE_THREAD_EXIT || entry.type == TRACE_TYPE_FOOTER)) { + std::string error = open_new_chunk(per_shard); + if (!error.empty()) { + per_shard->error = error; + return false; } - break; - case TRACE_MARKER_TYPE_TIMESTAMP: - // No need to remember the previous unit's header anymore. We're in the - // next unit now. - // XXX: it may happen that we never output a unit header due to this - // optimization. We should ensure that we output it at least once. We - // skip handling this corner case for now. - per_shard->last_delayed_unit_header.clear(); - ANNOTATE_FALLTHROUGH; - case TRACE_MARKER_TYPE_WINDOW_ID: - case TRACE_MARKER_TYPE_CPU_ID: - // Optimize space by outputting the unit header only if we are outputting - // something from that unit. - if (output) - per_shard->last_delayed_unit_header.push_back(entry); - return true; } } - if (!output) { - if (is_any_instr_type(static_cast(entry.type)) && - !per_shard->last_encoding.empty()) { - // Overwrite in case the encoding for this pc was already recorded. - per_shard->delayed_encodings[entry.addr] = - std::move(per_shard->last_encoding); - per_shard->last_encoding = {}; - } - return true; - } + per_shard->error = process_markers(per_shard, entry, output); + if (!per_shard->error.empty()) + return false; + + per_shard->error = process_chunk_encodings(per_shard, entry, output); + if (!per_shard->error.empty()) + return false; + + if (output && type_is_instr(static_cast(entry.type)) && + // Do not count PC-only i-filtered instrs. + entry.size > 0) + ++per_shard->cur_chunk_instrs; + + per_shard->error = process_delayed_encodings(per_shard, entry, output); + if (!per_shard->error.empty()) + return false; + + per_shard->prev_was_output = output; if (entry.type == TRACE_TYPE_ENCODING) { + // Delay output until we know whether its instr will be output. + VPRINT(this, 4, "@%" PRIu64 " remembering last encoding %d %d 0x%zx\n", + per_shard->input_entry_count, entry.type, entry.size, entry.addr); per_shard->last_encoding.push_back(entry); - return true; + output = false; + } else if (is_any_instr_type(static_cast(entry.type))) { + per_shard->last_encoding.clear(); } - // Since we're outputting something from this unit, output its unit header. - if (!per_shard->last_delayed_unit_header.empty()) { - if (!write_trace_entries(per_shard, per_shard->last_delayed_unit_header)) - return false; - per_shard->last_delayed_unit_header.clear(); - } + per_shard->prev_input_id = per_shard->shard_stream->get_input_id(); + per_shard->prev_workload_id = per_shard->shard_stream->get_workload_id(); - if (is_any_instr_type(static_cast(entry.type))) { - // Output if we have encodings that haven't yet been output. - if (!per_shard->last_encoding.empty()) { - // This instruction is accompanied by a preceding encoding. Since - // this instruction is not filtered out, output the encoding now. - if (!write_trace_entries(per_shard, per_shard->last_encoding)) - return false; - per_shard->last_encoding.clear(); - // Remove previously delayed encoding that doesn't need to be output - // now that we have a more recent version for this instr. - per_shard->delayed_encodings.erase(entry.addr); - } else if (!per_shard->delayed_encodings[entry.addr].empty()) { - // The previous instance of this instruction was filtered out and - // its encoding was saved. Now that we have an instance of the same - // instruction that is not filtered out, we need to output its - // encoding. - if (!write_trace_entries(per_shard, per_shard->delayed_encodings[entry.addr])) - return false; - per_shard->delayed_encodings.erase(entry.addr); + if (output) { + // XXX i#5675: Currently we support writing to a single output file, but we may + // want to write to multiple in the same run; e.g. splitting a trace. For now, + // we can simply run the tool multiple times, but it can be made more efficient. + if (!write_trace_entry(per_shard, entry)) { + per_shard->error = "Failed to write"; + return false; } } - // XXX i#5675: Currently we support writing to a single output file, but we may - // want to write to multiple in the same run; e.g. splitting a trace. For now, - // we can simply run the tool multiple times, but it can be made more efficient. - return write_trace_entry(per_shard, entry); + return true; } bool @@ -306,15 +779,21 @@ record_filter_t::process_memref(const trace_entry_t &memref) bool record_filter_t::print_results() { + bool res = true; uint64_t input_entry_count = 0; uint64_t output_entry_count = 0; for (const auto &shard : shard_map_) { input_entry_count += shard.second->input_entry_count; - output_entry_count += shard.second->output_entry_count; + if (shard.second->now_empty) { + error_string_ = remove_output_file(shard.second); + if (!error_string_.empty()) + res = false; + } else + output_entry_count += shard.second->output_entry_count; } std::cerr << "Output " << output_entry_count << " entries from " << input_entry_count << " entries.\n"; - return true; + return res; } } // namespace drmemtrace diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index a54a5108bce..f5550b39cbd 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -35,6 +35,7 @@ #include +#include #include #include #include @@ -43,8 +44,10 @@ #include #include "analysis_tool.h" +#include "archive_ostream.h" #include "memref.h" #include "memtrace_stream.h" +#include "raw2trace_shared.h" #include "trace_entry.h" namespace dynamorio { @@ -90,6 +93,7 @@ class record_filter_t : public record_analysis_tool_t { * The passed \p entry is not guaranteed to be the original one from * the trace if other filter tools are present, and may include changes * made by other tools. + * An error is indicated by setting error_string_ to a non-empty value. */ virtual bool parallel_shard_filter(trace_entry_t &entry, void *shard_data) = 0; @@ -114,6 +118,7 @@ class record_filter_t : public record_analysis_tool_t { std::string error_string_; }; + // stop_timestamp sets a point beyond which no filtering will occur. record_filter_t(const std::string &output_dir, std::vector> filters, uint64_t stop_timestamp, unsigned int verbose); @@ -124,6 +129,8 @@ class record_filter_t : public record_analysis_tool_t { print_results() override; bool parallel_shard_supported() override; + std::string + initialize_shard_type(shard_type_t shard_type) override; void * parallel_shard_init_stream(int shard_index, void *worker_data, memtrace_stream_t *shard_stream) override; @@ -135,40 +142,133 @@ class record_filter_t : public record_analysis_tool_t { parallel_shard_error(void *shard_data) override; protected: + // For core-sharded we need to remember encodings for an input that were + // seen on a different core, as there is no reader_t remembering them for us. + // XXX i#6635: Is this something the scheduler should help us with? + struct per_input_t { + // There should be no contention on the lock as each input is on + // just one core at a time. + std::mutex lock; + std::unordered_map> pc2encoding; + }; + struct per_shard_t { std::string output_path; - std::unique_ptr writer; + // One and only one of these writers can be valid. + std::unique_ptr file_writer; + std::unique_ptr archive_writer; + // This points to one of the writers. + std::ostream *writer = nullptr; std::string error; std::vector filter_shard_data; - std::vector last_delayed_unit_header; std::unordered_map> delayed_encodings; std::vector last_encoding; uint64_t input_entry_count; uint64_t output_entry_count; memtrace_stream_t *shard_stream; bool enabled; + // For re-chunking archive files. + uint64_t chunk_ordinal = 0; + uint64_t chunk_size = 0; + uint64_t cur_chunk_instrs = 0; + uint64_t cur_refs = 0; + uint64_t input_count_at_ordinal = 0; + memref_counter_t memref_counter; + addr_t last_timestamp = 0; + addr_t last_cpu_id = 0; + std::unordered_set cur_chunk_pcs; + bool prev_was_output = false; + addr_t filetype = 0; + bool now_empty = false; + // For thread-sharded. + memref_tid_t tid = 0; + int64_t prev_workload_id = -1; + // For core-sharded. + int64_t prev_input_id = -1; + trace_entry_t last_written_record; + // Cached value updated on context switches. + per_input_t *per_input = nullptr; }; - // In parallel operation the keys are "shard indices": just ints. - std::unordered_map shard_map_; + + virtual std::string + open_new_chunk(per_shard_t *shard); + + std::string + emit_marker(per_shard_t *shard, unsigned short marker_type, uint64_t marker_value); + + virtual std::string + remove_output_file(per_shard_t *per_shard); + + std::string + process_markers(per_shard_t *per_shard, trace_entry_t &entry, bool &output); + + std::string + process_chunk_encodings(per_shard_t *per_shard, trace_entry_t &entry, bool output); + + std::string + process_delayed_encodings(per_shard_t *per_shard, trace_entry_t &entry, bool output); + + // Computes the output path without the extension output_ext_ which is added + // separately after determining the input path extension. + virtual std::string + get_output_basename(memtrace_stream_t *shard_stream); + + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses // to shard_map (print_results) we are single-threaded. std::mutex shard_map_mutex_; + shard_type_t shard_type_ = SHARD_BY_THREAD; + + // For core-sharded we don't have a 1:1 input:output file mapping. + // Thus, some shards may not have an input stream at init time, and + // need to figure out their file extension and header info from other shards. + std::mutex input_info_mutex_; + std::condition_variable input_info_cond_var_; + // The above locks guard these fields: + std::string output_ext_; + uint64_t version_ = 0; + uint64_t filetype_ = 0; private: virtual bool write_trace_entry(per_shard_t *shard, const trace_entry_t &entry); - virtual std::unique_ptr + // Sets one of file_writer or archive_writer, along with writer, in per_shard. + // Returns "" or an error string. + virtual std::string get_writer(per_shard_t *per_shard, memtrace_stream_t *shard_stream); + // Sets output_path plus cross-shard output_ext_, version_, filetype_. + virtual std::string + initialize_shard_output(per_shard_t *per_shard, memtrace_stream_t *shard_stream); + bool write_trace_entries(per_shard_t *shard, const std::vector &entries); + inline uint64_t + add_to_filetype(uint64_t filetype) + { + if (stop_timestamp_ != 0) { + filetype |= OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP; + } + if (shard_type_ == SHARD_BY_CORE) { + filetype |= OFFLINE_FILE_TYPE_CORE_SHARDED; + } + return filetype; + } + std::string output_dir_; std::vector> filters_; uint64_t stop_timestamp_; unsigned int verbosity_; const char *output_prefix_ = "[record_filter]"; + // For core-sharded, but used for thread-sharded to simplify the code. + std::mutex input2info_mutex_; + // We use a pointer so we can safely cache it in per_shard_t to avoid + // input2info_mutex_ on every access. + // XXX: We could use a read-write lock but C++11 doesn't have a ready-made one. + // If we had the input count we could use an array and atomic reads. + std::unordered_map> input2info_; }; } // namespace drmemtrace diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h new file mode 100644 index 00000000000..ae0665f3fef --- /dev/null +++ b/clients/drcachesim/tools/filter/record_filter_create.h @@ -0,0 +1,76 @@ +/* ********************************************************** + * Copyright (c) 2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _RECORD_FILTER_CREATE_H_ +#define _RECORD_FILTER_CREATE_H_ 1 + +#include "analysis_tool.h" + +namespace dynamorio { +namespace drmemtrace { + +/** + * @file drmemtrace/record_filter_create.h + * @brief DrMemtrace record filter trace analysis tool creation. + */ + +/** + * Creates a record analysis tool that filters the #trace_entry_t records of an offline + * trace. Streams through each shard independenty and parallelly, and writes the + * filtered version to the output directory with the same base name. Serial mode is not + * yet supported. The options specify the filter(s) to employ. + * + * @param[in] output_dir The destination directory for the new filtered trace. + * @param[in] stop_timestamp Disables filtering (outputs everything) once a timestamp + * equal to or greater than this value is seen. + * @param[in] cache_filter_size Enables a data cache filter with the given size in + * bytes with 64-byte lines and a direct mapped LRU cache. + * @param[in] remove_trace_types A comma-separated list of integers of #trace_type_t + * types to remove. + * @param[in] remove_marker_types A comma-separated list of integers of + * #trace_marker_type_t marker types to remove. + * @param[in] trim_before_timestamp Trim records from the trace's initial timestamp + * up to its first timestamp whose value is greater or equal to this parameter. + * @param[in] trim_after_timestamp Trim records after the trace's first timestamp + * whose value is greater than this parameter. + */ +record_analysis_tool_t * +record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp, + int cache_filter_size, const std::string &remove_trace_types, + const std::string &remove_marker_types, + uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, + unsigned int verbose); + +} // namespace drmemtrace +} // namespace dynamorio + +#endif /* _RECORD_FILTER_CREATE_H_ */ diff --git a/clients/drcachesim/tools/filter/trim_filter.h b/clients/drcachesim/tools/filter/trim_filter.h new file mode 100644 index 00000000000..30b8395156f --- /dev/null +++ b/clients/drcachesim/tools/filter/trim_filter.h @@ -0,0 +1,123 @@ +/* ********************************************************** + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _TRIM_FILTER_H_ +#define _TRIM_FILTER_H_ 1 + +#include "record_filter.h" +#include "trace_entry.h" + +#include +#include +#include + +namespace dynamorio { +namespace drmemtrace { + +// A trimming tool to remove records from the start and end of a trace. To ensure +// alignment across threads, we trim by timestamp. Since timestamps are inserted +// only at certain points, this necessarily disallows precise trimming at say certain +// instructions, but at the gain of consistent inter-thread trimming. +class trim_filter_t : public record_filter_t::record_filter_func_t { +public: + trim_filter_t(uint64_t trim_before_timestamp, uint64_t trim_after_timestamp) + : trim_before_timestamp_(trim_before_timestamp) + , trim_after_timestamp_(trim_after_timestamp) + { + // Support 0 to make it easier for users to have no trim-after. + if (trim_after_timestamp_ == 0) { + trim_after_timestamp_ = (std::numeric_limits::max)(); + } + if (trim_after_timestamp_ <= trim_before_timestamp_) { + error_string_ = "Invalid parameters: end must be > start"; + } + } + void * + parallel_shard_init(memtrace_stream_t *shard_stream, + bool partial_trace_filter) override + { + per_shard_t *per_shard = new per_shard_t; + return per_shard; + } + bool + parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + { + per_shard_t *per_shard = reinterpret_cast(shard_data); + if (entry.type == TRACE_TYPE_MARKER && + entry.size == TRACE_MARKER_TYPE_TIMESTAMP) { + // While it seems theoretically nice to keep the timestamp,cpuid that + // is over the threshold so we have a timestamp at the end, that results + // in large time gaps if across a blocking syscall. Trying to edit + // that timestamp a la -align_endpoints is not deal either as it can + // distort syscall durations. The least-bad solution seems to be to + // keep the regular trace content right up to the timestamp and + // throw away the timestamp. + if (entry.addr < trim_before_timestamp_ || entry.addr > trim_after_timestamp_) + per_shard->in_removed_region = true; + else + per_shard->in_removed_region = false; + } + if (entry.type == TRACE_TYPE_THREAD_EXIT || entry.type == TRACE_TYPE_FOOTER) { + // Don't throw the footer away. (The header is always kept because we + // don't start removing until we see a timestamp marker.) + // TODO i#6635: For core-sharded there will be multiple thread exits + // so we need to handle that. For thread-sharded we assume just one. + // (We do not support trimming a single-file multi-window trace). + return true; + } + if (entry.type == TRACE_TYPE_MARKER && + entry.size == TRACE_MARKER_TYPE_WINDOW_ID) { + error_string_ = "Trimming WINDOW_ID markers is not supported"; + } + return !per_shard->in_removed_region; + } + bool + parallel_shard_exit(void *shard_data) override + { + per_shard_t *per_shard = reinterpret_cast(shard_data); + delete per_shard; + return true; + } + +private: + struct per_shard_t { + bool in_removed_region = false; + }; + + uint64_t trim_before_timestamp_; + uint64_t trim_after_timestamp_; +}; + +} // namespace drmemtrace +} // namespace dynamorio + +#endif /* _TRIM_FILTER_H_ */ diff --git a/clients/drcachesim/tools/filter/type_filter.h b/clients/drcachesim/tools/filter/type_filter.h index 4aaaeb02e2c..3dda5c0592f 100644 --- a/clients/drcachesim/tools/filter/type_filter.h +++ b/clients/drcachesim/tools/filter/type_filter.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -90,7 +90,7 @@ class type_filter_t : public record_filter_t::record_filter_func_t { { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { - if (TESTANY(entry.addr, OFFLINE_FILE_TYPE_ENCODINGS) && + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, entry.addr) && !per_shard->partial_trace_filter && remove_trace_types_.find(TRACE_TYPE_ENCODING) != remove_trace_types_.end()) { diff --git a/clients/drcachesim/tools/func_view.cpp b/clients/drcachesim/tools/func_view.cpp index d478fc82cf5..8d82f84a6d6 100644 --- a/clients/drcachesim/tools/func_view.cpp +++ b/clients/drcachesim/tools/func_view.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2020-2023 Google, Inc. All rights reserved. + * Copyright (c) 2020-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -79,8 +79,19 @@ func_view_t::func_view_t(const std::string &funclist_file_path, bool full_trace, } std::string -func_view_t::initialize() +func_view_t::initialize_shard_type(shard_type_t shard_type) { + if (shard_type == SHARD_BY_CORE) { + // We track state that is inherently tied to threads. + return "func_view tool does not support sharding by core"; + } + return ""; +} + +std::string +func_view_t::initialize_stream(memtrace_stream_t *serial_stream) +{ + serial_stream_ = serial_stream; std::vector> entries; raw2trace_directory_t directory_; std::string error = @@ -123,10 +134,12 @@ func_view_t::parallel_shard_supported() } void * -func_view_t::parallel_shard_init(int shard_index, void *worker_data) +func_view_t::parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) { auto shard_data = new shard_data_t; std::lock_guard guard(shard_map_mutex_); + shard_data->tid = stream->get_tid(); shard_map_[shard_index] = shard_data; return shard_data; } @@ -181,8 +194,6 @@ bool func_view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { shard_data_t *shard = reinterpret_cast(shard_data); - if (memref.data.type == TRACE_TYPE_THREAD_EXIT) - shard->tid = memref.exit.tid; if (memref.marker.type != TRACE_TYPE_MARKER) return true; process_memref_for_markers(shard, memref); @@ -193,10 +204,11 @@ bool func_view_t::process_memref(const memref_t &memref) { shard_data_t *shard; - const auto &lookup = shard_map_.find(memref.data.tid); + int shard_index = serial_stream_->get_shard_index(); + const auto &lookup = shard_map_.find(shard_index); if (lookup == shard_map_.end()) { shard = new shard_data_t; - shard_map_[memref.data.tid] = shard; + shard_map_[shard_index] = shard; } else shard = lookup->second; process_memref_for_markers(shard, memref); diff --git a/clients/drcachesim/tools/func_view.h b/clients/drcachesim/tools/func_view.h index 1b53fc1dba8..daf7fbb5c00 100644 --- a/clients/drcachesim/tools/func_view.h +++ b/clients/drcachesim/tools/func_view.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2020-2023 Google, Inc. All rights reserved. + * Copyright (c) 2020-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -59,7 +59,9 @@ class func_view_t : public analysis_tool_t { unsigned int verbose = 0); virtual ~func_view_t(); std::string - initialize() override; + initialize_shard_type(shard_type_t shard_type) override; + std::string + initialize_stream(memtrace_stream_t *serial_stream) override; bool process_memref(const memref_t &memref) override; bool @@ -67,7 +69,8 @@ class func_view_t : public analysis_tool_t { bool parallel_shard_supported() override; void * - parallel_shard_init(int shard_index, void *worker_data) override; + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) override; bool parallel_shard_exit(void *shard_data) override; bool @@ -89,7 +92,7 @@ class func_view_t : public analysis_tool_t { // TODO i#4083: Record the arg and retval distributions. }; struct shard_data_t { - memref_tid_t tid = 0; + memref_tid_t tid = 0; // We only support SHARD_BY_THREAD. std::unordered_map func_map; std::string error; // We use the function markers to record arguments and return @@ -130,11 +133,11 @@ class func_view_t : public analysis_tool_t { std::string funclist_file_path_; - // The keys here are shard index for parallel, tid for serial. - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; + memtrace_stream_t *serial_stream_ = nullptr; }; } // namespace drmemtrace diff --git a/clients/drcachesim/tools/histogram.h b/clients/drcachesim/tools/histogram.h index 8728e2a6136..98ec03a1d18 100644 --- a/clients/drcachesim/tools/histogram.h +++ b/clients/drcachesim/tools/histogram.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -89,7 +89,7 @@ class histogram_t : public analysis_tool_t { unsigned int knob_report_top_; /* most accessed lines */ size_t line_size_bits_; static const std::string TOOL_NAME; - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index 8786e193030..16eef9e1f27 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -81,6 +81,20 @@ invariant_checker_t::~invariant_checker_t() { } +std::string +invariant_checker_t::initialize_shard_type(shard_type_t shard_type) +{ + if (shard_type == SHARD_BY_CORE) { + // We track state that is inherently tied to threads. + // + // XXX: If we did get kernel pieces stitching together context switches, + // we could try to check PC continuity. We could also try to enable + // certain other checks for core-sharded. + return "invariant_checker tool does not support sharding by core"; + } + return ""; +} + std::string invariant_checker_t::initialize_stream(memtrace_stream_t *serial_stream) { @@ -93,6 +107,20 @@ invariant_checker_t::report_if_false(per_shard_t *shard, bool condition, const std::string &invariant_name) { if (!condition) { + // TODO i#5505: There are some PC discontinuities in the instr traces + // captured using Intel-PT. Since these are not trivial to solve, we + // turn this into a non-fatal check for the test for now. + if (TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, shard->file_type_) && + knob_test_name_ == "kernel_syscall_pt_trace" && + shard->between_kernel_syscall_trace_markers_ && + (invariant_name == "Non-explicit control flow has no marker" || + // Some discontinuities are flagged as the following. This is + // a false positive of our heuristic to find rseq side exit + // discontinuities. + invariant_name == "PC discontinuity due to rseq side exit" || + invariant_name == "Branch does not go to the correct target")) { + return; + } std::cerr << "Trace invariant failure in T" << shard->tid_ << " at ref # " << shard->stream->get_record_ordinal() << " (" << shard->instr_count_since_last_timestamp_ @@ -116,6 +144,7 @@ invariant_checker_t::parallel_shard_init_stream(int shard_index, void *worker_da per_shard->stream = shard_stream; void *res = reinterpret_cast(per_shard.get()); std::lock_guard guard(shard_map_mutex_); + per_shard->tid_ = shard_stream->get_tid(); shard_map_[shard_index] = std::move(per_shard); return res; } @@ -156,12 +185,18 @@ invariant_checker_t::parallel_shard_error(void *shard_data) return shard->error_; } +bool +invariant_checker_t::is_a_unit_test(per_shard_t *shard) +{ + // Look for a mock stream. + return shard->stream == nullptr || shard->stream->get_input_interface() == nullptr; +} + bool invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { per_shard_t *shard = reinterpret_cast(shard_data); - if (shard->tid_ == -1 && memref.data.tid != 0) - shard->tid_ = memref.data.tid; + report_if_false(shard, shard->tid_ == memref.data.tid, "Shard tid != memref tid"); // We check the memtrace_stream_t counts with our own, unless there was an // instr skip from the start where we cannot compare, or we're in a unit // test with no stream interface, or we're in serial mode (since we want @@ -174,10 +209,15 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem } // XXX: We also can't verify counts with a skip invoked from the middle, but // we have no simple way to detect that here. - if (shard->instr_count_ <= 1 && !shard->skipped_instrs_ && shard->stream != nullptr && - shard->stream->get_instruction_ordinal() > 1) + if (shard->instr_count_ <= 1 && !shard->skipped_instrs_ && !is_a_unit_test(shard) && + shard->stream->get_instruction_ordinal() > 1) { shard->skipped_instrs_ = true; - if (!shard->skipped_instrs_ && shard->stream != nullptr && + if (!shard->saw_filetype_) { + shard->file_type_ = + static_cast(shard->stream->get_filetype()); + } + } + if (!shard->skipped_instrs_ && !is_a_unit_test(shard) && (shard->stream != serial_stream_ || shard_map_.size() == 1)) { report_if_false(shard, shard->ref_count_ == shard->stream->get_record_ordinal(), "Stream record ordinal inaccurate"); @@ -185,6 +225,8 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem shard->instr_count_ == shard->stream->get_instruction_ordinal(), "Stream instr ordinal inaccurate"); } + bool prev_was_syscall_marker_saved = shard->prev_was_syscall_marker_; + shard->prev_was_syscall_marker_ = false; #ifdef UNIX if (has_annotations_) { // Check conditions specific to the signal_invariants app, where it @@ -299,8 +341,9 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { shard->file_type_ = static_cast(memref.marker.marker_value); + shard->saw_filetype_ = true; report_if_false(shard, - shard->stream == nullptr || + is_a_unit_test(shard) || shard->file_type_ == shard->stream->get_filetype(), "Stream interface filetype != trace marker"); } @@ -316,7 +359,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem memref.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE) { shard->found_cache_line_size_marker_ = true; report_if_false(shard, - shard->stream == nullptr || + is_a_unit_test(shard) || memref.marker.marker_value == shard->stream->get_cache_line_size(), "Stream interface cache line size != trace marker"); @@ -325,15 +368,37 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem memref.marker.marker_type == TRACE_MARKER_TYPE_PAGE_SIZE) { shard->found_page_size_marker_ = true; report_if_false(shard, - shard->stream == nullptr || + is_a_unit_test(shard) || is_a_unit_test(shard) || memref.marker.marker_value == shard->stream->get_page_size(), "Stream interface page size != trace marker"); } + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { +#ifdef AARCH64 + static const int MAX_VL_BYTES = 256; // SVE's maximum vector length is 2048-bit + // Vector length must be a multiple of 16 bytes between 16 and 256. + report_if_false(shard, + memref.marker.marker_value > 0 && + memref.marker.marker_value <= MAX_VL_BYTES && + memref.marker.marker_value % 16 == 0, + "Vector length marker has invalid size"); + + const int new_vl_bits = memref.marker.marker_value * 8; + if (dr_get_sve_vector_length() != new_vl_bits) { + dr_set_sve_vector_length(new_vl_bits); + // Changing the vector length can change the IR representation of some SVE + // instructions but it doesn't effect any of the metadata that is stored + // in decode_cache_ so we don't need to flush the cache. + } +#else + report_if_false(shard, false, "Unexpected vector length marker"); +#endif + } if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_VERSION) { shard->trace_version_ = memref.marker.marker_value; report_if_false(shard, - shard->stream == nullptr || + is_a_unit_test(shard) || memref.marker.marker_value == shard->stream->get_version(), "Stream interface version != trace marker"); } @@ -344,6 +409,8 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) { shard->found_syscall_marker_ = true; + shard->prev_was_syscall_marker_ = true; + shard->last_syscall_marker_value_ = static_cast(memref.marker.marker_value); ++shard->syscall_count_; // TODO i#5949: For WOW64 instr_is_syscall() always returns false here as it // tries to check adjacent instrs; we disable this check until that is solved. @@ -369,6 +436,9 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL) { shard->found_blocking_marker_ = true; + // Re-assign the saved value to the shard state to allow this intervening + // maybe_blocking marker. + shard->prev_was_syscall_marker_ = prev_was_syscall_marker_saved; report_if_false(shard, shard->prev_entry_.marker.type == TRACE_TYPE_MARKER && shard->prev_entry_.marker.marker_type == @@ -382,7 +452,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem memref.marker.marker_type == TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) { shard->chunk_instr_count_ = memref.marker.marker_value; report_if_false(shard, - shard->stream == nullptr || + is_a_unit_test(shard) || shard->chunk_instr_count_ == shard->stream->get_chunk_instr_count(), "Stream interface chunk instr count != trace marker"); @@ -408,6 +478,58 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem !type_is_prefetch(memref.data.type), "Function marker misplaced between instr and memref"); } + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START) { + report_if_false(shard, + TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS | + OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, + shard->file_type_), + "Found kernel syscall trace without corresponding file type"); + report_if_false(shard, !shard->between_kernel_syscall_trace_markers_, + "Nested kernel syscall traces are not expected"); + report_if_false(shard, prev_was_syscall_marker_saved, + "System call trace found without prior syscall marker"); + report_if_false(shard, + shard->last_syscall_marker_value_ == + static_cast(memref.marker.marker_value), + "Mismatching syscall num in trace start and syscall marker"); + report_if_false(shard, shard->prev_instr_.decoding.is_syscall, + "prev_instr at syscall trace start is not a syscall"); + shard->pre_syscall_trace_instr_ = shard->prev_instr_; + shard->between_kernel_syscall_trace_markers_ = true; + } + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_END) { + report_if_false(shard, shard->between_kernel_syscall_trace_markers_, + "Found kernel syscall trace end without start"); + report_if_false(shard, + shard->last_syscall_marker_value_ == + static_cast(memref.marker.marker_value), + "Mismatching syscall num in trace end and syscall marker"); + shard->between_kernel_syscall_trace_markers_ = false; + // For future checks, pretend that the previous instr was the instr just + // before the system call trace start. + if (shard->pre_syscall_trace_instr_.memref.instr.addr > 0) { + // TODO i#5505: Ideally the last instruction in the system call PT trace + // or the system call trace template would be an indirect CTI with a + // TRACE_MARKER_TYPE_BRANCH_TARGET marker pointing to the next user-space + // instr. For PT traces on x86, as also mentioned in the comment in + // ir2trace.cpp, there are noise instructions at the end of the PT syscall + // trace that need to be removed. Also check the kernel-to-user transition + // when that is fixed. + shard->prev_instr_ = shard->pre_syscall_trace_instr_; + shard->pre_syscall_trace_instr_ = {}; + } + } + if (!is_a_unit_test(shard)) { + // XXX: between_kernel_syscall_trace_markers_ does not track the + // TRACE_MARKER_TYPE_CONTEXT_SWITCH_* markers. If the invariant checker is run + // with dynamic injection of context switch sequences this will throw an error. + report_if_false(shard, + shard->between_kernel_syscall_trace_markers_ == + shard->stream->is_record_kernel(), + "Stream is_record_kernel() inaccurate"); + } if (memref.marker.type == TRACE_TYPE_MARKER && marker_type_is_function_marker(memref.marker.marker_type)) { if (memref.marker.marker_type == TRACE_MARKER_TYPE_FUNC_ID) { @@ -463,12 +585,12 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem "Missing instr count markers"); report_if_false(shard, shard->found_cache_line_size_marker_ || - (shard->skipped_instrs_ && shard->stream != nullptr && + (shard->skipped_instrs_ && !is_a_unit_test(shard) && shard->stream->get_cache_line_size() > 0), "Missing cache line marker"); report_if_false(shard, shard->found_page_size_marker_ || - (shard->skipped_instrs_ && shard->stream != nullptr && + (shard->skipped_instrs_ && !is_a_unit_test(shard) && shard->stream->get_page_size() > 0), "Missing page size marker"); report_if_false( @@ -498,8 +620,12 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem } if (!TESTANY(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_IFILTERED, shard->file_type_)) { - report_if_false(shard, type_is_instr(shard->prev_instr_.memref.instr.type), - "An unfiltered thread should have at least 1 instruction"); + report_if_false( + shard, + type_is_instr(shard->prev_instr_.memref.instr.type) || + shard->prev_instr_.memref.instr.type == TRACE_TYPE_PREFETCH_INSTR || + shard->prev_instr_.memref.instr.type == TRACE_TYPE_INSTR_NO_FETCH, + "An unfiltered thread should have at least 1 instruction"); } } if (shard->prev_entry_.marker.type == TRACE_TYPE_MARKER && @@ -584,10 +710,14 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem report_if_false(shard, shard->expected_write_records_ == 0, "Missing write records"); - shard->expected_read_records_ = - cur_instr_info.decoding.num_memory_read_access; - shard->expected_write_records_ = - cur_instr_info.decoding.num_memory_write_access; + if (!(shard->between_kernel_syscall_trace_markers_ && + TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, + shard->file_type_))) { + shard->expected_read_records_ = + cur_instr_info.decoding.num_memory_read_access; + shard->expected_write_records_ = + cur_instr_info.decoding.num_memory_write_access; + } } } // We need to assign the memref variable of cur_instr_info here. The memref @@ -921,12 +1051,14 @@ bool invariant_checker_t::process_memref(const memref_t &memref) { per_shard_t *per_shard; - const auto &lookup = shard_map_.find(memref.data.tid); + int shard_index = serial_stream_->get_shard_index(); + const auto &lookup = shard_map_.find(shard_index); if (lookup == shard_map_.end()) { auto per_shard_unique = std::unique_ptr(new per_shard_t); per_shard = per_shard_unique.get(); per_shard->stream = serial_stream_; - shard_map_[memref.data.tid] = std::move(per_shard_unique); + per_shard->tid_ = serial_stream_->get_tid(); + shard_map_[shard_index] = std::move(per_shard_unique); } else per_shard = lookup->second.get(); if (!parallel_shard_memref(reinterpret_cast(per_shard), memref)) { @@ -1123,6 +1255,9 @@ invariant_checker_t::check_for_pc_discontinuity( shard->file_type_) || // Regular fall-through. (fall_through_allowed && prev_instr_trace_pc + prev_instr.instr.size == cur_pc) || + // First instr of kernel system call trace. + (shard->between_kernel_syscall_trace_markers_ && + shard->prev_instr_.decoding.is_syscall) || // String loop. (prev_instr_trace_pc == cur_pc && (cur_memref_info.memref.instr.type == TRACE_TYPE_INSTR_NO_FETCH || diff --git a/clients/drcachesim/tools/invariant_checker.h b/clients/drcachesim/tools/invariant_checker.h index e8b9def5f2d..80c75a0af58 100644 --- a/clients/drcachesim/tools/invariant_checker.h +++ b/clients/drcachesim/tools/invariant_checker.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -89,6 +89,8 @@ class invariant_checker_t : public analysis_tool_t { std::istream *cpu_schedule_file = nullptr); virtual ~invariant_checker_t(); std::string + initialize_shard_type(shard_type_t shard_type) override; + std::string initialize_stream(memtrace_stream_t *serial_stream) override; bool process_memref(const memref_t &memref) override; @@ -183,6 +185,8 @@ class invariant_checker_t : public analysis_tool_t { bool found_instr_count_marker_ = false; bool found_page_size_marker_ = false; bool found_syscall_marker_ = false; + bool prev_was_syscall_marker_ = false; + int last_syscall_marker_value_ = 0; bool found_blocking_marker_ = false; uint64_t syscall_count_ = 0; uint64_t last_instr_count_marker_ = 0; @@ -195,6 +199,7 @@ class invariant_checker_t : public analysis_tool_t { // operation. addr_t app_handler_pc_ = 0; offline_file_type_t file_type_ = OFFLINE_FILE_TYPE_DEFAULT; + bool saw_filetype_ = false; uintptr_t last_window_ = 0; bool window_transition_ = false; uint64_t chunk_instr_count_ = 0; @@ -214,6 +219,8 @@ class invariant_checker_t : public analysis_tool_t { // Counters for expected read and write records. int expected_read_records_ = 0; int expected_write_records_ = 0; + bool between_kernel_syscall_trace_markers_ = false; + instr_info_t pre_syscall_trace_instr_; }; // We provide this for subclasses to run these invariants with custom @@ -226,6 +233,9 @@ class invariant_checker_t : public analysis_tool_t { virtual void check_schedule_data(per_shard_t *global_shard); + virtual bool + is_a_unit_test(per_shard_t *shard); + // Check for invariant violations caused by PC discontinuities. Return an error string // for such violations. std::string @@ -235,8 +245,7 @@ class invariant_checker_t : public analysis_tool_t { bool expect_encoding, bool at_kernel_event); void *drcontext_ = dr_standalone_init(); - // The keys here are int for parallel, tid for serial. - std::unordered_map> shard_map_; + std::unordered_map> shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 5389829c5ac..995c810a578 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -164,6 +165,17 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) " but tool built for " + trace_arch_string(build_target_arch_type()); return false; } + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { +#ifdef AARCH64 + const int new_vl_bits = memref.marker.marker_value * 8; + if (dr_get_sve_vector_length() != new_vl_bits) { + dr_set_sve_vector_length(new_vl_bits); + // Changing the vector length can change the IR representation of some SVE + // instructions but it will never change the opcode so we don't need to + // flush the opcode cache. + } +#endif } if (!type_is_instr(memref.instr.type) && memref.data.type != TRACE_TYPE_INSTR_NO_FETCH) { @@ -178,7 +190,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) decode_pc = const_cast(memref.instr.encoding); if (memref.instr.encoding_is_new) { // The code may have changed: invalidate the cache. - shard->worker->opcode_cache.erase(trace_pc); + shard->worker->opcode_data_cache.erase(trace_pc); } } else { // Legacy trace support where we need the binaries. @@ -210,9 +222,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } } int opcode; - auto cached_opcode = shard->worker->opcode_cache.find(trace_pc); - if (cached_opcode != shard->worker->opcode_cache.end()) { - opcode = cached_opcode->second; + uint category; + auto cached_opcode_category = shard->worker->opcode_data_cache.find(trace_pc); + if (cached_opcode_category != shard->worker->opcode_data_cache.end()) { + opcode = cached_opcode_category->second.opcode; + category = cached_opcode_category->second.category; } else { instr_t instr; instr_init(dcontext_.dcontext, &instr); @@ -225,10 +239,12 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) return false; } opcode = instr_get_opcode(&instr); - shard->worker->opcode_cache[trace_pc] = opcode; + category = instr_get_category(&instr); + shard->worker->opcode_data_cache[trace_pc] = opcode_data_t(opcode, category); instr_free(dcontext_.dcontext, &instr); } ++shard->opcode_counts[opcode]; + ++shard->category_counts[category]; return true; } @@ -252,7 +268,37 @@ opcode_mix_t::process_memref(const memref_t &memref) static bool cmp_val(const std::pair &l, const std::pair &r) { - return (l.second > r.second); + return (l.second > r.second) || (l.second == r.second && l.first < r.first); +} + +std::string +opcode_mix_t::get_category_names(uint category) +{ + std::string category_name; + if (category == DR_INSTR_CATEGORY_UNCATEGORIZED) { + category_name += instr_get_category_name(DR_INSTR_CATEGORY_UNCATEGORIZED); + return category_name; + } + + const uint max_mask = 0x80000000; + for (uint mask = 0x1; mask <= max_mask; mask <<= 1) { + if (TESTANY(mask, category)) { + if (category_name.length() > 0) { + category_name += " "; + } + category_name += + instr_get_category_name(static_cast(mask)); + } + + /* + * Guard against 32 bit overflow. + */ + if (mask == max_mask) { + break; + } + } + + return category_name; } bool @@ -267,6 +313,9 @@ opcode_mix_t::print_results() for (const auto &keyvals : shard.second->opcode_counts) { total.opcode_counts[keyvals.first] += keyvals.second; } + for (const auto &keyvals : shard.second->category_counts) { + total.category_counts[keyvals.first] += keyvals.second; + } } } std::cerr << TOOL_NAME << " results:\n"; @@ -278,6 +327,123 @@ opcode_mix_t::print_results() std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) << decode_opcode_name(keyvals.first) << "\n"; } + std::cerr << "\n"; + std::cerr << std::setw(15) << total.category_counts.size() + << " : sets of categories\n"; + std::vector> sorted_category_counts( + total.category_counts.begin(), total.category_counts.end()); + std::sort(sorted_category_counts.begin(), sorted_category_counts.end(), cmp_val); + for (const auto &keyvals : sorted_category_counts) { + std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) + << get_category_names(keyvals.first) << "\n"; + } + + return true; +} + +opcode_mix_t::interval_state_snapshot_t * +opcode_mix_t::generate_interval_snapshot(uint64_t interval_id) +{ + return generate_shard_interval_snapshot(&serial_shard_, interval_id); +} + +opcode_mix_t::interval_state_snapshot_t * +opcode_mix_t::generate_shard_interval_snapshot(void *shard_data, uint64_t interval_id) +{ + assert(shard_data != nullptr); + auto &shard = *reinterpret_cast(shard_data); + auto *snap = new snapshot_t; + snap->opcode_counts_ = shard.opcode_counts; + snap->category_counts_ = shard.category_counts; + return snap; +} + +bool +opcode_mix_t::finalize_interval_snapshots( + std::vector &interval_snapshots) +{ + // Loop through snapshots in reverse order, subtracting the *earlier* + // snapshot's cumulative values from this snapshot's cumulative values, to get + // deltas. The first snapshot needs no updates, obviously. + for (int i = static_cast(interval_snapshots.size()) - 1; i > 0; --i) { + auto &this_snap = *reinterpret_cast(interval_snapshots[i]); + auto &prior_snap = *reinterpret_cast(interval_snapshots[i - 1]); + for (auto &opc_count : this_snap.opcode_counts_) { + opc_count.second -= prior_snap.opcode_counts_[opc_count.first]; + } + for (auto &cat_count : this_snap.category_counts_) { + cat_count.second -= prior_snap.category_counts_[cat_count.first]; + } + } + return true; +} + +opcode_mix_t::interval_state_snapshot_t * +opcode_mix_t::combine_interval_snapshots( + const std::vector latest_shard_snapshots, + uint64_t interval_end_timestamp) +{ + snapshot_t *super_snap = new snapshot_t; + for (const interval_state_snapshot_t *base_snap : latest_shard_snapshots) { + const auto *snap = reinterpret_cast(base_snap); + // Skip nullptrs and snapshots from different intervals. + if (snap == nullptr || + snap->get_interval_end_timestamp() != interval_end_timestamp) { + continue; + } + for (const auto opc_count : snap->opcode_counts_) { + super_snap->opcode_counts_[opc_count.first] += opc_count.second; + } + for (const auto cat_count : snap->category_counts_) { + super_snap->category_counts_[cat_count.first] += cat_count.second; + } + } + return super_snap; +} + +bool +opcode_mix_t::print_interval_results( + const std::vector &interval_snapshots) +{ + // Number of opcodes and categories to print per interval. + constexpr int PRINT_TOP_N = 3; + std::cerr << "There were " << interval_snapshots.size() << " intervals created.\n"; + for (auto *base_snap : interval_snapshots) { + const auto *snap = reinterpret_cast(base_snap); + std::cerr << "ID:" << snap->get_interval_id() << " ending at instruction " + << snap->get_instr_count_cumulative() << " has " + << snap->opcode_counts_.size() << " opcodes" + << " and " << snap->category_counts_.size() << " categories.\n"; + std::vector> sorted(snap->opcode_counts_.begin(), + snap->opcode_counts_.end()); + std::sort(sorted.begin(), sorted.end(), cmp_val); + for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted.size()); ++i) { + std::cerr << " [" << i + 1 << "]" + << " Opcode: " << decode_opcode_name(sorted[i].first) << " (" + << sorted[i].first << ")" + << " Count=" << sorted[i].second << " PKI=" + << sorted[i].second * 1000.0 / snap->get_instr_count_delta() + << "\n"; + } + std::vector> sorted_cats(snap->category_counts_.begin(), + snap->category_counts_.end()); + std::sort(sorted_cats.begin(), sorted_cats.end(), cmp_val); + for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted_cats.size()); + ++i) { + std::cerr << " [" << i + 1 << "]" + << " Category=" << get_category_names(sorted_cats[i].first) + << " Count=" << sorted_cats[i].second << " PKI=" + << sorted_cats[i].second * 1000.0 / snap->get_instr_count_delta() + << "\n"; + } + } + return true; +} + +bool +opcode_mix_t::release_interval_snapshot(interval_state_snapshot_t *interval_snapshot) +{ + delete interval_snapshot; return true; } diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index 062de8e6a5b..526f27cdd43 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2018-2023 Google, Inc. All rights reserved. + * Copyright (c) 2018-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "dr_api.h" // Must be before trace_entry.h from analysis_tool.h. @@ -81,9 +82,61 @@ class opcode_mix_t : public analysis_tool_t { std::string parallel_shard_error(void *shard_data) override; + // Interval support. + interval_state_snapshot_t * + generate_interval_snapshot(uint64_t interval_id) override; + interval_state_snapshot_t * + combine_interval_snapshots( + const std::vector latest_shard_snapshots, + uint64_t interval_end_timestamp) override; + bool + print_interval_results( + const std::vector &interval_snapshots) override; + bool + release_interval_snapshot(interval_state_snapshot_t *interval_snapshot) override; + interval_state_snapshot_t * + generate_shard_interval_snapshot(void *shard_data, uint64_t interval_id) override; + + // Convert the captured cumulative snapshots to deltas. + bool + finalize_interval_snapshots( + std::vector &interval_snapshots) override; + protected: + std::string + get_category_names(uint category); + + struct opcode_data_t { + opcode_data_t() + : opcode(OP_INVALID) + , category(DR_INSTR_CATEGORY_UNCATEGORIZED) + { + } + opcode_data_t(int opcode, uint category) + : opcode(opcode) + , category(category) + { + } + int opcode; + /* + * The category field is a uint instead of a dr_instr_category_t because + * multiple category bits can be set when an instruction belongs to more + * than one category. We assume 32 bits (i.e., 32 categories) is enough + * to be future-proof. + */ + uint category; + }; + + class snapshot_t : public interval_state_snapshot_t { + public: + // Snapshot the counts as cumulative stats, and then converted them to deltas in + // finalize_interval_snapshots(). Printed interval results are all deltas. + std::unordered_map opcode_counts_; + std::unordered_map category_counts_; + }; + struct worker_data_t { - std::unordered_map opcode_cache; + std::unordered_map opcode_data_cache; }; struct shard_data_t { @@ -103,6 +156,7 @@ class opcode_mix_t : public analysis_tool_t { worker_data_t *worker; int64_t instr_count; std::unordered_map opcode_counts; + std::unordered_map category_counts; std::string error; app_pc last_trace_module_start; size_t last_trace_module_size; @@ -135,7 +189,7 @@ class opcode_mix_t : public analysis_tool_t { // must match ours. raw2trace_directory_t directory_; - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp index 44ce31cabc5..746481d79d5 100644 --- a/clients/drcachesim/tools/record_filter_launcher.cpp +++ b/clients/drcachesim/tools/record_filter_launcher.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -32,13 +32,16 @@ /* Standalone record filter tool launcher for file traces. */ +#ifdef WINDOWS +# define NOMINMAX // Avoid windows.h messing up std::max. +# define UNICODE // For Windows headers. +# define _UNICODE // For C headers. +#endif + #include "analyzer.h" #include "droption.h" #include "dr_frontend.h" -#include "tools/filter/null_filter.h" -#include "tools/filter/cache_filter.h" -#include "tools/filter/type_filter.h" -#include "tools/filter/record_filter.h" +#include "tools/filter/record_filter_create.h" #include "tests/test_helpers.h" #include @@ -77,8 +80,10 @@ static droption_t op_verbose(DROPTION_SCOPE_ALL, "verbose", 0, 0, "Verbosity level for notifications."); static droption_t + // Wrap max in parens to work around Visual Studio compiler issues with the + // max macro (even despite NOMINMAX defined above). op_stop_timestamp(DROPTION_SCOPE_ALL, "stop_timestamp", 0, 0, - std::numeric_limits::max(), + (std::numeric_limits::max)(), "Timestamp (in us) in the trace when to stop filtering.", "Record filtering will be disabled (everything will be output) " "when the tool sees a TRACE_MARKER_TYPE_TIMESTAMP marker with " @@ -86,37 +91,35 @@ static droption_t static droption_t op_cache_filter_size( DROPTION_SCOPE_FRONTEND, "cache_filter_size", 0, - "[Required] Enable data cache filter with given size (in bytes).", + "Enable data cache filter with given size (in bytes).", "Enable data cache filter with given size (in bytes), with 64 byte " "line size and a direct mapped LRU cache."); -static droption_t op_remove_trace_types( - DROPTION_SCOPE_FRONTEND, "remove_trace_types", "", - "[Required] Comma-separated integers for trace types to remove.", - "Comma-separated integers for trace types to remove. " - "See trace_type_t for the list of trace entry types."); - -static droption_t op_remove_marker_types( - DROPTION_SCOPE_FRONTEND, "remove_marker_types", "", - "[Required] Comma-separated integers for marker types to remove.", - "Comma-separated integers for marker types to remove. " - "See trace_marker_type_t for the list of marker types."); - -template -std::vector -parse_string(const std::string &s, char sep = ',') -{ - size_t pos, at = 0; - if (s.empty()) - return {}; - std::vector vec; - do { - pos = s.find(sep, at); - vec.push_back(static_cast(std::stoi(s.substr(at, pos)))); - at = pos + 1; - } while (pos != std::string::npos); - return vec; -} +static droption_t + op_remove_trace_types(DROPTION_SCOPE_FRONTEND, "remove_trace_types", "", + "Comma-separated integers for trace types to remove.", + "Comma-separated integers for trace types to remove. " + "See trace_type_t for the list of trace entry types."); + +static droption_t + op_remove_marker_types(DROPTION_SCOPE_FRONTEND, "remove_marker_types", "", + "Comma-separated integers for marker types to remove.", + "Comma-separated integers for marker types to remove. " + "See trace_marker_type_t for the list of marker types."); + +static droption_t op_trim_before_timestamp( + DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0, + (std::numeric_limits::max)(), + "Trim records until this timestamp (in us) in the trace.", + "Removes all records (after headers) before the first TRACE_MARKER_TYPE_TIMESTAMP " + "marker in the trace with timestamp greater than or equal to the specified value."); + +static droption_t op_trim_after_timestamp( + DROPTION_SCOPE_ALL, "trim_after_timestamp", (std::numeric_limits::max)(), 0, + (std::numeric_limits::max)(), + "Trim records after this timestamp (in us) in the trace.", + "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with " + "timestamp larger than the specified value."); } // namespace @@ -125,6 +128,10 @@ _tmain(int argc, const TCHAR *targv[]) { disable_popups(); +#if defined(WINDOWS) && !defined(_UNICODE) +# error _UNICODE must be defined +#endif + char **argv; drfront_status_t sc = drfront_convert_args(targv, &argv, argc); if (sc != DRFRONT_SUCCESS) @@ -138,40 +145,18 @@ _tmain(int argc, const TCHAR *targv[]) droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); } - std::vector< - std::unique_ptr> - filter_funcs; - if (op_cache_filter_size.specified()) { - filter_funcs.emplace_back( - std::unique_ptr( - // XXX: add more command-line options to allow the user to set these - // parameters. - new dynamorio::drmemtrace::cache_filter_t( - /*cache_associativity=*/1, /*cache_line_size=*/64, - op_cache_filter_size.get_value(), - /*filter_data=*/true, /*filter_instrs=*/false))); - } - if (op_remove_trace_types.specified() || op_remove_marker_types.specified()) { - std::vector filter_trace_types = - parse_string(op_remove_trace_types.get_value()); - std::vector filter_marker_types = - parse_string(op_remove_marker_types.get_value()); - filter_funcs.emplace_back( - std::unique_ptr( - new dynamorio::drmemtrace::type_filter_t(filter_trace_types, - filter_marker_types))); - } - // TODO i#5675: Add other filters. - auto record_filter = std::unique_ptr( - new dynamorio::drmemtrace::record_filter_t( - op_output_dir.get_value(), std::move(filter_funcs), - op_stop_timestamp.get_value(), op_verbose.get_value())); + dynamorio::drmemtrace::record_filter_tool_create( + op_output_dir.get_value(), op_stop_timestamp.get_value(), + op_cache_filter_size.get_value(), op_remove_trace_types.get_value(), + op_remove_marker_types.get_value(), op_trim_before_timestamp.get_value(), + op_trim_after_timestamp.get_value(), op_verbose.get_value())); std::vector tools; tools.push_back(record_filter.get()); - record_analyzer_t record_analyzer(op_trace_dir.get_value(), &tools[0], - (int)tools.size()); + record_analyzer_t record_analyzer( + op_trace_dir.get_value(), &tools[0], (int)tools.size(), /*worker_count=*/0, + /*skip_instrs=*/0, /*interval_microseconds=*/0, op_verbose.get_value()); if (!record_analyzer) { FATAL_ERROR("Failed to initialize trace filter: %s", record_analyzer.get_error_string().c_str()); diff --git a/clients/drcachesim/tools/reuse_distance.cpp b/clients/drcachesim/tools/reuse_distance.cpp index edb29a5cf1b..7c3cb7627b3 100644 --- a/clients/drcachesim/tools/reuse_distance.cpp +++ b/clients/drcachesim/tools/reuse_distance.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -85,6 +85,20 @@ reuse_distance_t::~reuse_distance_t() } } +std::string +reuse_distance_t::initialize_stream(memtrace_stream_t *serial_stream) +{ + serial_stream_ = serial_stream; + return ""; +} + +std::string +reuse_distance_t::initialize_shard_type(shard_type_t shard_type) +{ + shard_type_ = shard_type; + return ""; +} + reuse_distance_t::shard_data_t::shard_data_t(uint64_t reuse_threshold, uint64_t skip_dist, uint32_t distance_limit, bool verify) : distance_limit(distance_limit) @@ -100,11 +114,14 @@ reuse_distance_t::parallel_shard_supported() } void * -reuse_distance_t::parallel_shard_init(int shard_index, void *worker_data) +reuse_distance_t::parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) { auto shard = new shard_data_t(knobs_.distance_threshold, knobs_.skip_list_distance, knobs_.distance_limit, knobs_.verify_skip); std::lock_guard guard(shard_map_mutex_); + shard->core = stream->get_output_cpuid(); + shard->tid = stream->get_tid(); shard_map_[shard_index] = shard; return reinterpret_cast(shard); } @@ -138,10 +155,6 @@ reuse_distance_t::parallel_shard_memref(void *shard_data, const memref_t &memref } std::cerr << "\n"; }); - if (memref.data.type == TRACE_TYPE_THREAD_EXIT) { - shard->tid = memref.exit.tid; - return true; - } bool is_instr_type = type_is_instr(memref.instr.type); if (is_instr_type || memref.data.type == TRACE_TYPE_READ || memref.data.type == TRACE_TYPE_WRITE || @@ -198,13 +211,15 @@ reuse_distance_t::parallel_shard_memref(void *shard_data, const memref_t &memref bool reuse_distance_t::process_memref(const memref_t &memref) { - // For serial operation we index using the tid. shard_data_t *shard; - const auto &lookup = shard_map_.find(memref.data.tid); + int shard_index = serial_stream_->get_shard_index(); + const auto &lookup = shard_map_.find(shard_index); if (lookup == shard_map_.end()) { shard = new shard_data_t(knobs_.distance_threshold, knobs_.skip_list_distance, knobs_.distance_limit, knobs_.verify_skip); - shard_map_[memref.data.tid] = shard; + shard->core = serial_stream_->get_output_cpuid(); + shard->tid = serial_stream_->get_tid(); + shard_map_[shard_index] = shard; } else shard = lookup->second; if (!parallel_shard_memref(reinterpret_cast(shard), memref)) { @@ -498,15 +513,19 @@ reuse_distance_t::print_results() } if (shard_map_.size() > 1) { - using keyval_t = std::pair; + using keyval_t = std::pair; std::vector sorted(shard_map_.begin(), shard_map_.end()); std::sort(sorted.begin(), sorted.end(), [](const keyval_t &l, const keyval_t &r) { return l.second->total_refs > r.second->total_refs; }); for (const auto &shard : sorted) { std::cerr << "\n==================================================\n" - << TOOL_NAME << " results for shard " << shard.first << " (thread " - << shard.second->tid << "):\n"; + << TOOL_NAME << " results for shard " << shard.first; + if (shard_type_ == SHARD_BY_THREAD) + std::cerr << " (thread " << shard.second->tid; + else + std::cerr << " (core " << shard.second->core; + std::cerr << "):\n"; print_shard_results(shard.second); } } diff --git a/clients/drcachesim/tools/reuse_distance.h b/clients/drcachesim/tools/reuse_distance.h index 7f1d4892982..0a69426c119 100644 --- a/clients/drcachesim/tools/reuse_distance.h +++ b/clients/drcachesim/tools/reuse_distance.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -79,6 +79,10 @@ class reuse_distance_t : public analysis_tool_t { public: explicit reuse_distance_t(const reuse_distance_knobs_t &knobs); ~reuse_distance_t() override; + std::string + initialize_stream(memtrace_stream_t *serial_stream) override; + std::string + initialize_shard_type(shard_type_t shard_type) override; bool process_memref(const memref_t &memref) override; bool @@ -86,7 +90,8 @@ class reuse_distance_t : public analysis_tool_t { bool parallel_shard_supported() override; void * - parallel_shard_init(int shard_index, void *worker_data) override; + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) override; bool parallel_shard_exit(void *shard_data) override; bool @@ -129,9 +134,8 @@ class reuse_distance_t : public analysis_tool_t { std::unique_ptr ref_list; int64_t total_refs = 0; int64_t data_refs = 0; // Non-instruction reference count. - // Ideally the shard index would be the tid when shard==thread but that's - // not the case today so we store the tid. - memref_tid_t tid; + memref_tid_t tid = 0; // For SHARD_BY_THREAD. + int64_t core = 0; // For SHARD_BY_CORE. std::string error; // Keep a per-shard copy of distance_limit for parallel operation. unsigned int distance_limit = 0; @@ -157,11 +161,12 @@ class reuse_distance_t : public analysis_tool_t { const reuse_distance_knobs_t knobs_; const size_t line_size_bits_; static const std::string TOOL_NAME; - // In parallel operation the keys are "shard indices": just ints. - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; + shard_type_t shard_type_ = SHARD_BY_THREAD; + memtrace_stream_t *serial_stream_ = nullptr; }; /* A doubly linked list node for the cache line reference info */ diff --git a/clients/drcachesim/tools/reuse_time.cpp b/clients/drcachesim/tools/reuse_time.cpp index 94fab118861..5a9fcde2ee4 100644 --- a/clients/drcachesim/tools/reuse_time.cpp +++ b/clients/drcachesim/tools/reuse_time.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -80,6 +80,20 @@ reuse_time_t::~reuse_time_t() } } +std::string +reuse_time_t::initialize_stream(memtrace_stream_t *serial_stream) +{ + serial_stream_ = serial_stream; + return ""; +} + +std::string +reuse_time_t::initialize_shard_type(shard_type_t shard_type) +{ + shard_type_ = shard_type; + return ""; +} + bool reuse_time_t::parallel_shard_supported() { @@ -87,10 +101,13 @@ reuse_time_t::parallel_shard_supported() } void * -reuse_time_t::parallel_shard_init(int shard_index, void *worker_data) +reuse_time_t::parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) { auto shard = new shard_data_t(); std::lock_guard guard(shard_map_mutex_); + shard->core = stream->get_output_cpuid(); + shard->tid = stream->get_tid(); shard_map_[shard_index] = shard; return reinterpret_cast(shard); } @@ -126,11 +143,6 @@ reuse_time_t::parallel_shard_memref(void *shard_data, const memref_t &memref) std::cerr << std::endl; } - if (memref.data.type == TRACE_TYPE_THREAD_EXIT) { - shard->tid = memref.exit.tid; - return true; - } - // Only care about data for now. if (type_is_instr(memref.instr.type)) { shard->total_instructions++; @@ -159,12 +171,14 @@ reuse_time_t::parallel_shard_memref(void *shard_data, const memref_t &memref) bool reuse_time_t::process_memref(const memref_t &memref) { - // For serial operation we index using the tid. shard_data_t *shard; - const auto &lookup = shard_map_.find(memref.data.tid); + int shard_index = serial_stream_->get_shard_index(); + const auto &lookup = shard_map_.find(shard_index); if (lookup == shard_map_.end()) { shard = new shard_data_t(); - shard_map_[memref.data.tid] = shard; + shard->core = serial_stream_->get_output_cpuid(); + shard->tid = serial_stream_->get_tid(); + shard_map_[shard_index] = shard; } else shard = lookup->second; if (!parallel_shard_memref(reinterpret_cast(shard), memref)) { @@ -205,7 +219,15 @@ reuse_time_t::print_shard_results(const shard_data_t *shard) std::partial_sort_copy(shard->reuse_time_histogram.begin(), shard->reuse_time_histogram.end(), sorted.begin(), sorted.end(), cmp_dist_key); + // Limit the output to make it readable and avoid stressing CMake's regex + // in our tests, unless the user requested verbosity. + const int max_print = knob_verbose_ > 0 ? 0 : 10; + int print_count = 0; for (auto it = sorted.begin(); it != sorted.end(); ++it) { + if (max_print > 0 && ++print_count > max_print) { + std::cerr << "... (increase verbosity to see entire histogram)\n"; + break; + } double percent = it->second / static_cast(count); cum_percent += percent; std::cerr << std::setw(8) << it->first << std::setw(12) << it->second @@ -241,8 +263,12 @@ reuse_time_t::print_results() }); for (const auto &shard : sorted) { std::cerr << "\n==================================================\n" - << TOOL_NAME << " results for shard " << shard.first << " (thread " - << shard.second->tid << "):\n"; + << TOOL_NAME << " results for shard " << shard.first; + if (shard_type_ == SHARD_BY_THREAD) + std::cerr << " (thread " << shard.second->tid; + else + std::cerr << " (core " << shard.second->core; + std::cerr << "):\n"; print_shard_results(shard.second); } } diff --git a/clients/drcachesim/tools/reuse_time.h b/clients/drcachesim/tools/reuse_time.h index cd522942541..ff64e73b7c3 100644 --- a/clients/drcachesim/tools/reuse_time.h +++ b/clients/drcachesim/tools/reuse_time.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -50,6 +50,10 @@ class reuse_time_t : public analysis_tool_t { public: reuse_time_t(unsigned int line_size, unsigned int verbose); ~reuse_time_t() override; + std::string + initialize_stream(memtrace_stream_t *serial_stream) override; + std::string + initialize_shard_type(shard_type_t shard_type) override; bool process_memref(const memref_t &memref) override; bool @@ -57,7 +61,8 @@ class reuse_time_t : public analysis_tool_t { bool parallel_shard_supported() override; void * - parallel_shard_init(int shard_index, void *worker_data) override; + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) override; bool parallel_shard_exit(void *shard_data) override; bool @@ -73,7 +78,8 @@ class reuse_time_t : public analysis_tool_t { int64_t time_stamp = 0; int64_t total_instructions = 0; std::unordered_map reuse_time_histogram; - memref_tid_t tid; + memref_tid_t tid = 0; // For SHARD_BY_THREAD. + int64_t core = 0; // For SHARD_BY_CORE. std::string error; }; @@ -86,11 +92,12 @@ class reuse_time_t : public analysis_tool_t { static const std::string TOOL_NAME; - // In parallel operation the keys are "shard indices": just ints. - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (process_memref, print_results) we are single-threaded. std::mutex shard_map_mutex_; + shard_type_t shard_type_ = SHARD_BY_THREAD; + memtrace_stream_t *serial_stream_ = nullptr; }; } // namespace drmemtrace diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index b768d85694f..0cc715e1ff1 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -34,13 +34,6 @@ #include "schedule_stats.h" -#ifdef WINDOWS -# define WIN32_LEAN_AND_MEAN -# include -#else -# include -#endif - #include #include @@ -88,8 +81,7 @@ schedule_stats_t::~schedule_stats_t() std::string schedule_stats_t::initialize_stream(memtrace_stream_t *serial_stream) { - if (serial_stream != nullptr) - return "Only core-sharded operation is supported"; + serial_stream_ = serial_stream; return ""; } @@ -104,8 +96,20 @@ schedule_stats_t::initialize_shard_type(shard_type_t shard_type) bool schedule_stats_t::process_memref(const memref_t &memref) { - error_string_ = "Only core-sharded operation is supported."; - return false; + per_shard_t *per_shard; + const auto &lookup = shard_map_.find(serial_stream_->get_output_cpuid()); + if (lookup == shard_map_.end()) { + per_shard = new per_shard_t; + per_shard->stream = serial_stream_; + per_shard->core = serial_stream_->get_output_cpuid(); + shard_map_[per_shard->core] = per_shard; + } else + per_shard = lookup->second; + if (!parallel_shard_memref(reinterpret_cast(per_shard), memref)) { + error_string_ = per_shard->error; + return false; + } + return true; } bool @@ -122,6 +126,7 @@ schedule_stats_t::parallel_shard_init_stream(int shard_index, void *worker_data, std::lock_guard guard(shard_map_mutex_); per_shard->stream = stream; per_shard->core = stream->get_output_cpuid(); + per_shard->filetype = static_cast(stream->get_filetype()); shard_map_[shard_index] = per_shard; return reinterpret_cast(per_shard); } @@ -151,20 +156,7 @@ schedule_stats_t::parallel_shard_error(void *shard_data) uint64_t schedule_stats_t::get_current_microseconds() { -#ifdef UNIX - struct timeval time; - if (gettimeofday(&time, nullptr) != 0) - return 0; - return time.tv_sec * 1000000 + time.tv_usec; -#else - SYSTEMTIME sys_time; - GetSystemTime(&sys_time); - FILETIME file_time; - if (!SystemTimeToFileTime(&sys_time, &file_time)) - return 0; - return file_time.dwLowDateTime + - (static_cast(file_time.dwHighDateTime) << 32); -#endif + return get_microsecond_timestamp(); } bool @@ -175,12 +167,13 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref static constexpr char WAIT_SYMBOL = '-'; static constexpr char IDLE_SYMBOL = '_'; per_shard_t *shard = reinterpret_cast(shard_data); + int64_t input_id = shard->stream->get_input_id(); if (knob_verbose_ >= 4) { std::ostringstream line; line << "Core #" << std::setw(2) << shard->core << " @" << std::setw(9) << shard->stream->get_record_ordinal() << " refs, " << std::setw(9) << shard->stream->get_instruction_ordinal() << " instrs: input " - << std::setw(4) << shard->stream->get_input_id() << " @" << std::setw(9) + << std::setw(4) << input_id << " @" << std::setw(9) << shard->stream->get_input_interface()->get_record_ordinal() << " refs, " << std::setw(9) << shard->stream->get_input_interface()->get_instruction_ordinal() @@ -197,10 +190,12 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref // Cache and reset here to ensure we reset on early return paths. bool was_wait = shard->prev_was_wait; bool was_idle = shard->prev_was_idle; - int64_t prev_input = shard->prev_input; + int64_t prev_workload_id = shard->prev_workload_id; + int64_t prev_tid = shard->prev_tid; shard->prev_was_wait = false; shard->prev_was_idle = false; - shard->prev_input = -1; + shard->prev_workload_id = -1; + shard->prev_tid = -1; if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { ++shard->counters.waits; @@ -237,8 +232,16 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } return true; } - int64_t input = shard->stream->get_input_id(); - if (input != prev_input) { + // We use to detect switches (instead of input_id) to handle + // core-sharded-on-disk. However, we still prefer the input_id ordinal + // for the letters. + int64_t workload_id = shard->stream->get_workload_id(); + int64_t tid = shard->stream->get_tid(); + int64_t letter_ord = + (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, shard->filetype) || input_id < 0) + ? tid + : input_id; + if ((workload_id != prev_workload_id || tid != prev_tid) && tid != IDLE_THREAD_ID) { // We convert to letters which only works well for <=26 inputs. if (!shard->thread_sequence.empty()) { ++shard->counters.total_switches; @@ -248,7 +251,7 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref ++shard->counters.direct_switches; } shard->thread_sequence += - THREAD_LETTER_INITIAL_START + static_cast(input % 26); + THREAD_LETTER_INITIAL_START + static_cast(letter_ord % 26); shard->cur_segment_instrs = 0; if (!was_wait && !was_idle && shard->segment_start_microseconds > 0) { shard->counters.cpu_microseconds += @@ -260,7 +263,7 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref line << "Core #" << std::setw(2) << shard->core << " @" << std::setw(9) << shard->stream->get_record_ordinal() << " refs, " << std::setw(9) << shard->stream->get_instruction_ordinal() << " instrs: input " - << std::setw(4) << input << " @" << std::setw(9) + << std::setw(4) << input_id << " @" << std::setw(9) << shard->stream->get_input_interface()->get_record_ordinal() << " refs, " << std::setw(9) << shard->stream->get_input_interface()->get_instruction_ordinal() @@ -273,14 +276,15 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref std::cerr << line.str(); } } - shard->prev_input = input; + shard->prev_workload_id = workload_id; + shard->prev_tid = tid; if (type_is_instr(memref.instr.type)) { ++shard->counters.instrs; ++shard->cur_segment_instrs; shard->counters.idle_micros_at_last_instr = shard->counters.idle_microseconds; if (shard->cur_segment_instrs == knob_print_every_) { shard->thread_sequence += - THREAD_LETTER_SUBSEQUENT_START + static_cast(input % 26); + THREAD_LETTER_SUBSEQUENT_START + static_cast(letter_ord % 26); shard->cur_segment_instrs = 0; } shard->direct_switch_target = INVALID_THREAD_ID; @@ -300,6 +304,8 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH) { ++shard->counters.direct_switch_requests; shard->direct_switch_target = memref.marker.marker_value; + } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { + shard->filetype = static_cast(memref.marker.marker_value); } } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) shard->saw_exit = true; diff --git a/clients/drcachesim/tools/schedule_stats.h b/clients/drcachesim/tools/schedule_stats.h index f3cff0708c3..1a08b7670fc 100644 --- a/clients/drcachesim/tools/schedule_stats.h +++ b/clients/drcachesim/tools/schedule_stats.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -120,7 +120,8 @@ class schedule_stats_t : public analysis_tool_t { memtrace_stream_t *stream = nullptr; int64_t core = 0; // We target core-sharded. counters_t counters; - int64_t prev_input = -1; + int64_t prev_workload_id = -1; + int64_t prev_tid = INVALID_THREAD_ID; // These are cleared when an instruction is seen. bool saw_syscall = false; memref_tid_t direct_switch_target = INVALID_THREAD_ID; @@ -132,6 +133,7 @@ class schedule_stats_t : public analysis_tool_t { bool prev_was_idle = false; // Computing %-idle. uint64_t segment_start_microseconds = 0; + intptr_t filetype = 0; }; void @@ -147,7 +149,7 @@ class schedule_stats_t : public analysis_tool_t { unsigned int knob_verbose_ = 0; // We use an ordered map to get our output in order. This table is not // used on the hot path so its performance does not matter. - std::map shard_map_; + std::map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map (in print_results) we are single-threaded. std::mutex shard_map_mutex_; diff --git a/clients/drcachesim/tools/syscall_mix.h b/clients/drcachesim/tools/syscall_mix.h index 04cfb449f81..e0707fcdea7 100644 --- a/clients/drcachesim/tools/syscall_mix.h +++ b/clients/drcachesim/tools/syscall_mix.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -75,7 +75,7 @@ class syscall_mix_t : public analysis_tool_t { std::string error; }; - std::unordered_map shard_map_; + std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to // shard_map_ (print_results) we are single-threaded. std::mutex shard_map_mutex_; diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 3a07e944671..2d7391dcee6 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -425,6 +425,14 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) std::cerr << "\n"; break; + case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START: + std::cerr << "\n"; + break; + case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END: + std::cerr << "\n"; + break; case TRACE_MARKER_TYPE_BRANCH_TARGET: // These are not expected to be visible (since the reader adds them // to memref.instr.indirect_branch_target) but we handle nonetheless. @@ -435,6 +443,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) std::cerr << "\n"; break; case TRACE_MARKER_TYPE_CORE_IDLE: std::cerr << "\n"; break; + case TRACE_MARKER_TYPE_VECTOR_LENGTH: + std::cerr << "\n"; + break; default: std::cerr << "\n"; diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index 96ff48a21f2..bf3e63564e5 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -397,6 +397,15 @@ offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, proc_get_cache_line_size()); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); +#if defined(AARCH64) + // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the + // initial vector length for the thread, but the marker can also be emitted again + // later if the app changes the vector length. + if (proc_has_feature(FEATURE_SVE)) { + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, + proc_get_vector_length_bytes()); + } +#endif return (int)(new_buf - buf_ptr); } diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index c2d78050f8d..58b234f915f 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -179,6 +179,15 @@ online_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, proc_get_cache_line_size()); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); +#if defined(AARCH64) + // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the + // initial vector length for the thread, but the marker can also be emitted again + // later if the app changes the vector length. + if (proc_has_feature(FEATURE_SVE)) { + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, + proc_get_vector_length_bytes()); + } +#endif return (int)(new_buf - buf_ptr); } diff --git a/clients/drcachesim/tracer/kcore_copy.cpp b/clients/drcachesim/tracer/kcore_copy.cpp index 962ab73f700..0b6f7a08543 100644 --- a/clients/drcachesim/tracer/kcore_copy.cpp +++ b/clients/drcachesim/tracer/kcore_copy.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2023 Google, Inc. All rights reserved. + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -38,6 +38,7 @@ #include #include +#include #include #include "dr_api.h" @@ -374,6 +375,14 @@ kcore_copy_t::read_modules() return true; } +static bool +is_function_symbol(char type) +{ + // From man nm, "t"/"T" are symbols from the code section, + // and "w"/"W" are weak symbols. + return toupper(type) == 'T' || toupper(type) == 'W'; +} + bool kcore_copy_t::read_kallsyms() { @@ -384,12 +393,32 @@ kcore_copy_t::read_kallsyms() } proc_module_t *kernel_module = nullptr; std::string line; + + /* i#6486: Kernel JIT code like eBPF is not included in /proc/modules, but they have + * entries in /proc/kallsyms if /proc/sys/net/core/bpf_jit_harden and + * /proc/sys/net/core/bpf_jit_kallsyms are set appropriately (see + * docs.kernel.org/admin-guide/sysctl/net.html#proc-sys-net-core-network-core-options + * for more details). + * Perf's kcore copy logic does not copy JIT code but somehow includes JIT encodings + * and symbols in perf.data/data itself (not sure how yet). However, we use a + * different approach and copy the BPF JIT code to our kcore dump. If we find that + * the kernel executes other JIT code (indicated by "no memory mapped at this + * address" errors during libipt decoding), we would need to extend this logic to + * somehow identify those other /proc/kcore JIT regions. + */ + std::set bpf_jit_symbols; +#define BPF_JIT_MODULE_NAME "[bpf]" + while (std::getline(f, line)) { char name[KERNEL_SYMBOL_MAX_LEN]; + char module[KERNEL_SYMBOL_MAX_LEN]; + char type; uint64_t addr; - if (dr_sscanf(line.c_str(), HEX64_FORMAT_STRING " %*1c %299s [%*99s", &addr, - name) < 2) + int n_read = dr_sscanf(line.c_str(), HEX64_FORMAT_STRING " %c %299s %299s", &addr, + &type, name, module); + if (n_read < 3) continue; + bool has_module = n_read > 3; if (strcmp(name, "_stext") == 0) { if (kernel_module != nullptr) { ASSERT(false, "multiple kernel modules found"); @@ -409,9 +438,49 @@ kcore_copy_t::read_kallsyms() kcore_code_segments_num_++; modules_ = kernel_module; kernel_module = nullptr; + } else if (has_module && strcmp(module, BPF_JIT_MODULE_NAME) == 0 && + is_function_symbol(type)) { + bpf_jit_symbols.insert(addr); } } ASSERT(kernel_module == nullptr, "failed to find kernel module"); + + if (!bpf_jit_symbols.empty()) { + /* We copy a page size worth of contents after each bpf-related function symbol + * in an effort to make sure that the complete function is copied. This is + * similar to perf adding page size to the highest kernel symbol in its own + * kcore copy logic. + */ + size_t page_size = dr_page_size(); + proc_module_t *bpf_module = nullptr; + for (auto it = bpf_jit_symbols.begin(); it != bpf_jit_symbols.end();) { + uint64_t addr = *it; + if (bpf_module == nullptr) { + bpf_module = (proc_module_t *)dr_global_alloc(sizeof(proc_module_t)); + bpf_module->start = ALIGN_BACKWARD(addr, page_size); + bpf_module->end = ALIGN_FORWARD(addr + page_size, page_size); + ++it; + continue; + } + if (bpf_module->end >= addr) { + /* Just extend the last module region if the new addr falls within + * the last recorded range. + */ + bpf_module->end = ALIGN_FORWARD(addr + page_size, page_size); + ++it; + } else { + bpf_module->next = modules_; + kcore_code_segments_num_++; + modules_ = bpf_module; + /* Create a new module region for `addr` in the next iteration. */ + bpf_module = nullptr; + } + } + ASSERT(bpf_module != nullptr, "Did not expect nullptr"); + bpf_module->next = modules_; + kcore_code_segments_num_++; + modules_ = bpf_module; + } f.close(); return true; } diff --git a/clients/drcachesim/tracer/output.cpp b/clients/drcachesim/tracer/output.cpp index df2c4f973ad..64d4d120fd0 100644 --- a/clients/drcachesim/tracer/output.cpp +++ b/clients/drcachesim/tracer/output.cpp @@ -218,9 +218,13 @@ get_file_type() static_cast(file_type | OFFLINE_FILE_TYPE_ENCODINGS); } #ifdef BUILD_PT_TRACER + // TODO i#5505: When ir2trace starts adding synthesized read/write memrefs for + // the kernel trace, change the trace file type from + // OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY to + // OFFLINE_FILE_TYPE_KERNEL_SYSCALLS. if (op_enable_kernel_tracing.get_value()) { - file_type = static_cast(file_type | - OFFLINE_FILE_TYPE_KERNEL_SYSCALLS); + file_type = static_cast( + file_type | OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY); } #endif file_type = static_cast( diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 52da3f4976b..92af0ea38e3 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -686,6 +686,7 @@ raw2trace_t::write_syscall_template(raw2trace_thread_data_t *tdata, byte *&buf_i return false; } buf_in = reinterpret_cast(buf_base); + accumulate_to_statistic(tdata, RAW2TRACE_STAT_SYSCALL_TRACES_INJECTED, 1); log(2, "Inserted %d instrs from system call trace template for sysnum %d\n", inserted_instr_count, syscall_num); return true; @@ -840,6 +841,8 @@ raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata, log(4, "Rseq abort %d\n", tdata->rseq_past_end_); if (!adjust_and_emit_rseq_buffer(tdata, marker_val, marker_val)) return false; + if (!append_delayed_branch(tdata, reinterpret_cast(marker_val))) + return false; } else if (marker_type == TRACE_MARKER_TYPE_RSEQ_ENTRY) { if (tdata->rseq_want_rollback_) { if (tdata->rseq_buffering_enabled_) { @@ -880,6 +883,23 @@ raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata, log(2, "Maybe-blocking syscall %zu\n", marker_val); buf += trace_metadata_writer_t::write_marker( buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0); + } else if (marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { +#ifdef AARCH64 + log(4, + "Setting SVE vector length for thread " INT64_FORMAT_STRING " to %zu bytes\n", + tdata->tid, marker_val); + + const int new_vl_bits = marker_val * 8; + if (dr_get_sve_vector_length() != new_vl_bits) { + dr_set_sve_vector_length(new_vl_bits); + // Some SVE load/store instructions have an offset which is scaled by a value + // that depends on the vector length. These instructions will need to be + // re-decoded after the vector length changes. + *flush_decode_cache = true; + } +#else + log(2, "Ignoring unexpected dynamic vector length marker\n"); +#endif } return true; } @@ -929,6 +949,7 @@ raw2trace_t::read_header(raw2trace_thread_data_t *tdata, header->cache_line_size = proc_get_cache_line_size(); unread_last_entry(tdata); } + return true; } @@ -966,8 +987,10 @@ raw2trace_t::process_header(raw2trace_thread_data_t *tdata) thread_id_t tid = header.tid; tdata->tid = tid; #ifdef BUILD_PT_POST_PROCESSOR - if (TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, tdata->file_type)) { - if (syscall_template_file_reader_ == nullptr) { + if (TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS | + OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, + tdata->file_type)) { + if (syscall_template_file_reader_ != nullptr) { tdata->error = "System call trace template injection not supported for " "traces already with kernel parts."; return false; @@ -1085,7 +1108,7 @@ raw2trace_t::get_next_kernel_entry(raw2trace_thread_data_t *tdata, bool raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall_idx) { - DR_ASSERT(TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, tdata->file_type)); + DR_ASSERT(TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, tdata->file_type)); std::unique_ptr pt_metadata; std::unique_ptr pt_data = get_next_kernel_entry(tdata, pt_metadata, syscall_idx); @@ -1165,6 +1188,10 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_START, .addr = sysnum }; entries.push_back(start_entry); + // TODO i#5505: When ir2trace starts adding synthesized read/write memrefs for + // the kernel trace, change the trace file type from + // OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY to + // OFFLINE_FILE_TYPE_KERNEL_SYSCALLS. ir2trace_convert_status_t ir2trace_convert_status = ir2trace_t::convert(tdata->pt_decode_state_.get(), entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { @@ -1204,7 +1231,8 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall "Unknown pc after ir2trace: did ir2trace insert new instr?"; return false; } - if (!append_encoding(tdata, saved_decode_pc, entry.size, buf, + if (record_encoding_emitted(tdata, saved_decode_pc) && + !append_encoding(tdata, saved_decode_pc, entry.size, buf, entries_with_encodings)) return false; } @@ -1513,6 +1541,7 @@ raw2trace_t::do_conversion() final_trace_instr_count_ += thread_data_[i]->final_trace_instr_count; kernel_instr_count_ += thread_data_[i]->kernel_instr_count; syscall_traces_decoded_ += thread_data_[i]->syscall_traces_decoded; + syscall_traces_injected_ += thread_data_[i]->syscall_traces_injected; } } else { // The files can be converted concurrently. @@ -1540,6 +1569,7 @@ raw2trace_t::do_conversion() final_trace_instr_count_ += tdata->final_trace_instr_count; kernel_instr_count_ += tdata->kernel_instr_count; syscall_traces_decoded_ += tdata->syscall_traces_decoded; + syscall_traces_injected_ += tdata->syscall_traces_injected; } } error = aggregate_and_write_schedule_files(); @@ -1561,6 +1591,8 @@ raw2trace_t::do_conversion() VPRINT(1, "Kernel instr count " UINT64_FORMAT_STRING "\n", kernel_instr_count_); VPRINT(1, "System call PT traces decoded " UINT64_FORMAT_STRING "\n", syscall_traces_decoded_); + VPRINT(1, "System call traces injected from template " UINT64_FORMAT_STRING "\n", + syscall_traces_injected_); VPRINT(1, "Successfully converted %zu thread files\n", thread_data_.size()); return ""; } @@ -1944,6 +1976,11 @@ raw2trace_t::append_bb_entries(raw2trace_thread_data_t *tdata, if (tdata->rseq_past_end_) { if (!adjust_and_emit_rseq_buffer(tdata, instr_pc)) return false; + if (!instr->is_cti()) { + // Write out delayed branches now that we have a target. + if (!append_delayed_branch(tdata, orig_pc)) + return false; + } } else if (instr_pc < tdata->rseq_start_pc_ || instr_pc >= tdata->rseq_end_pc_) { log(4, "Hit exit to 0x%zx during instrumented rseq run\n", orig_pc); @@ -2198,6 +2235,9 @@ raw2trace_t::handle_kernel_interrupt_and_markers(raw2trace_thread_data_t *tdata, if (!adjust_and_emit_rseq_buffer(tdata, static_cast(cur_pc), rseq_abort_pc)) return false; + if (!append_delayed_branch(tdata, + reinterpret_cast(marker_val))) + return false; } append = true; *interrupted = true; @@ -2561,6 +2601,34 @@ raw2trace_t::rollback_rseq_buffer(raw2trace_thread_data_t *tdata, if (type_is_instr(static_cast(tdata->rseq_buffer_[i].type))) ++decode_end; } + // If the last instruction after the rollback is a branch, delay the branch. + if (remove_start > 0 && + type_is_instr_branch( + static_cast(tdata->rseq_buffer_[remove_start - 1].type))) { + const int last_buffer_idx = remove_start - 1; + // Walk backwards to find all the related encodings. + int first_encoding_idx; + for (first_encoding_idx = last_buffer_idx - 1; first_encoding_idx >= 0; + --first_encoding_idx) { + if (tdata->rseq_buffer_[first_encoding_idx].type != TRACE_TYPE_ENCODING) + break; + } + const app_pc instr_addr = + reinterpret_cast(tdata->rseq_buffer_[last_buffer_idx].addr); + app_pc target_pc = 0; + for (const auto &branch : tdata->rseq_branch_targets_) { + if (branch.pc == instr_addr) { + target_pc = branch.target_pc; + break; + } + } + if (!write_delayed_branches(tdata, &tdata->rseq_buffer_[first_encoding_idx + 1], + &tdata->rseq_buffer_[last_buffer_idx] + 1, + tdata->rseq_decode_pcs_[decode_start - 1], target_pc)) + return false; + remove_start -= last_buffer_idx - first_encoding_idx; + decode_start--; + } log(4, "Rolling back %d entries for rseq: %d-%d\n", remove_end - remove_start, remove_start, remove_end); tdata->rseq_buffer_.erase(tdata->rseq_buffer_.begin() + remove_start, @@ -2719,11 +2787,18 @@ raw2trace_t::adjust_and_emit_rseq_buffer(raw2trace_thread_data_t *tdata, addr_t // encoding out so it will change back if we see this PC again. rollback_last_encoding(tdata); } - for (trace_entry_t *e = toadd; e < buf; e++) - tdata->rseq_buffer_.push_back(*e); - tdata->rseq_buffer_.push_back(jump); - tdata->rseq_decode_pcs_.push_back(encoding); - log(4, "Appended synthetic jump 0x%zx -> 0x%zx\n", jump.addr, next_pc); + if (delayed_branches_exist(tdata)) { + write_delayed_branches(tdata, toadd, buf, nullptr, nullptr); + write_delayed_branches(tdata, &jump, &jump + 1, encoding, + reinterpret_cast(next_pc)); + log(4, "Delayed synthetic jump 0x%zx -> 0x%zx\n", jump.addr, next_pc); + } else { + for (trace_entry_t *e = toadd; e < buf; e++) + tdata->rseq_buffer_.push_back(*e); + tdata->rseq_buffer_.push_back(jump); + tdata->rseq_decode_pcs_.push_back(encoding); + log(4, "Appended synthetic jump 0x%zx -> 0x%zx\n", jump.addr, next_pc); + } } } @@ -3271,7 +3346,7 @@ raw2trace_t::open_new_chunk(raw2trace_thread_data_t *tdata) tdata->encoding_emitted.clear(); tdata->last_encoding_emitted = nullptr; - // TODO i#5538: Add a virtual-to-physical cache and clear it here. + // TODO i#6654,i#5538: Add a virtual-to-physical cache and clear it here. // We'll need to add a routine for trace_converter_t to call to query our cache -- // or we can put the cache in trace_converter_t and have it clear the cache via // a new new-chunk return value from write() and append_delayed_branch(). @@ -3759,6 +3834,9 @@ raw2trace_t::accumulate_to_statistic(raw2trace_thread_data_t *tdata, case RAW2TRACE_STAT_SYSCALL_TRACES_DECODED: tdata->syscall_traces_decoded += value; break; + case RAW2TRACE_STAT_SYSCALL_TRACES_INJECTED: + tdata->syscall_traces_injected += value; + break; case RAW2TRACE_STAT_MAX: default: DR_ASSERT(false); } @@ -3778,6 +3856,7 @@ raw2trace_t::get_statistic(raw2trace_statistic_t stat) case RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT: return final_trace_instr_count_; case RAW2TRACE_STAT_KERNEL_INSTR_COUNT: return kernel_instr_count_; case RAW2TRACE_STAT_SYSCALL_TRACES_DECODED: return syscall_traces_decoded_; + case RAW2TRACE_STAT_SYSCALL_TRACES_INJECTED: return syscall_traces_injected_; case RAW2TRACE_STAT_MAX: default: DR_ASSERT(false); return 0; } diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index 8faa968b637..01cca6465f0 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -100,8 +100,6 @@ namespace drmemtrace { #define TRACE_SUFFIX "trace" -#define TRACE_CHUNK_PREFIX "chunk." - typedef enum { RAW2TRACE_STAT_COUNT_ELIDED, RAW2TRACE_STAT_DUPLICATE_SYSCALL, @@ -113,6 +111,7 @@ typedef enum { RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, RAW2TRACE_STAT_SYSCALL_TRACES_DECODED, + RAW2TRACE_STAT_SYSCALL_TRACES_INJECTED, // We add a MAX member so that we can iterate over all stats in unit tests. RAW2TRACE_STAT_MAX, } raw2trace_statistic_t; @@ -777,71 +776,6 @@ template class bitset_hash_table_t { } }; -// We need to determine the memref_t record count for inserting a marker with -// that count at the start of each chunk. -class memref_counter_t : public reader_t { -public: - bool - init() override - { - return true; - } - trace_entry_t * - read_next_entry() override - { - return nullptr; - }; - std::string - get_stream_name() const override - { - return ""; - } - int - entry_memref_count(const trace_entry_t *entry) - { - // Mirror file_reader_t::open_input_file(). - // In particular, we need to skip TRACE_TYPE_HEADER and to pass the - // tid and pid to the reader before the 2 markers in front of them. - if (!saw_pid_) { - if (entry->type == TRACE_TYPE_HEADER) - return 0; - else if (entry->type == TRACE_TYPE_THREAD) { - list_.push_front(*entry); - return 0; - } else if (entry->type != TRACE_TYPE_PID) { - list_.push_back(*entry); - return 0; - } - saw_pid_ = true; - auto it = list_.begin(); - ++it; - list_.insert(it, *entry); - int count = 0; - for (auto &next : list_) { - input_entry_ = &next; - if (process_input_entry()) - ++count; - } - return count; - } - if (entry->type == TRACE_TYPE_FOOTER) - return 0; - input_entry_ = const_cast(entry); - return process_input_entry() ? 1 : 0; - } - unsigned char * - get_decode_pc(addr_t orig_pc) - { - if (encodings_.find(orig_pc) == encodings_.end()) - return nullptr; - return encodings_[orig_pc].bits; - } - -private: - bool saw_pid_ = false; - std::list list_; -}; - /** * The raw2trace class converts the raw offline trace format to the format * expected by analysis tools. It requires access to the binary files for the @@ -1104,6 +1038,7 @@ class raw2trace_t { uint64 final_trace_instr_count = 0; uint64 kernel_instr_count = 0; uint64 syscall_traces_decoded = 0; + uint64 syscall_traces_injected = 0; uint64 cur_chunk_instr_count = 0; uint64 cur_chunk_ref_count = 0; @@ -1348,6 +1283,7 @@ class raw2trace_t { uint64 final_trace_instr_count_ = 0; uint64 kernel_instr_count_ = 0; uint64 syscall_traces_decoded_ = 0; + uint64 syscall_traces_injected_ = 0; std::unique_ptr module_mapper_; diff --git a/clients/drcachesim/tracer/raw2trace_shared.h b/clients/drcachesim/tracer/raw2trace_shared.h index 76785743616..1fa67a1e3f1 100644 --- a/clients/drcachesim/tracer/raw2trace_shared.h +++ b/clients/drcachesim/tracer/raw2trace_shared.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -41,8 +41,11 @@ * @brief DrMemtrace routines and structs shared between raw2trace and tracer. */ +#include + #include "dr_api.h" #include "drmemtrace.h" +#include "reader.h" #include "trace_entry.h" namespace dynamorio { @@ -67,6 +70,7 @@ namespace drmemtrace { #define WINDOW_SUBDIR_FORMAT "window.%04zd" /* ptr_int_t is the window number type. */ #define WINDOW_SUBDIR_FIRST "window.0000" #define TRACE_SUBDIR "trace" +#define TRACE_CHUNK_PREFIX "chunk." /** * Functions for decoding and verifying raw memtrace data headers. @@ -80,6 +84,76 @@ struct trace_metadata_reader_t { check_entry_thread_start(const offline_entry_t *entry); }; +// We need to determine the memref_t record count for inserting a marker with +// that count at the start of each chunk. +class memref_counter_t : public reader_t { +public: + bool + init() override + { + return true; + } + trace_entry_t * + read_next_entry() override + { + return nullptr; + }; + std::string + get_stream_name() const override + { + return ""; + } + int + entry_memref_count(const trace_entry_t *entry) + { + // Mirror file_reader_t::open_input_file(). + // In particular, we need to skip TRACE_TYPE_HEADER and to pass the + // tid and pid to the reader before the 2 markers in front of them. + if (!saw_pid_) { + if (entry->type == TRACE_TYPE_HEADER) + return 0; + else if (entry->type == TRACE_TYPE_THREAD) { + list_.push_front(*entry); + return 0; + } else if (entry->type != TRACE_TYPE_PID) { + list_.push_back(*entry); + return 0; + } + saw_pid_ = true; + auto it = list_.begin(); + ++it; + list_.insert(it, *entry); + int count = 0; + for (auto &next : list_) { + input_entry_ = &next; + if (process_input_entry()) + ++count; + } + return count; + } + if (entry->type == TRACE_TYPE_FOOTER) + return 0; + input_entry_ = const_cast(entry); + return process_input_entry() ? 1 : 0; + } + unsigned char * + get_decode_pc(addr_t orig_pc) + { + if (encodings_.find(orig_pc) == encodings_.end()) + return nullptr; + return encodings_[orig_pc].bits; + } + void + set_core_sharded(bool core_sharded) + { + core_sharded_ = core_sharded; + } + +private: + bool saw_pid_ = false; + std::list list_; +}; + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 130166baf78..7cf412dca46 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -2336,7 +2336,16 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[]) DR_ASSERT(false); #ifdef UNIX /* we want an isolated fd so we don't use ipc_pipe.open_for_write() */ - int fd = dr_open_file(ipc_pipe.get_pipe_path().c_str(), DR_FILE_WRITE_ONLY); + const char *pipe_path = ipc_pipe.get_pipe_path().c_str(); + if (!dr_file_exists(pipe_path)) { + NOTIFY(0, + "drmemtrace WARNING: attempting to open write end of pipe at %s " + "for online analysis but pipe does not exist. Use \"-offline\" " + "mode if you are using drmemtrace without a reader.\n", + pipe_path); + } + + int fd = dr_open_file(pipe_path, DR_FILE_WRITE_ONLY); DR_ASSERT(fd != INVALID_FILE); if (!ipc_pipe.set_fd(fd)) DR_ASSERT(false); diff --git a/core/arch/aarch64/aarch64.asm b/core/arch/aarch64/aarch64.asm index 1fbd09c0406..bc8ec7af5f4 100644 --- a/core/arch/aarch64/aarch64.asm +++ b/core/arch/aarch64/aarch64.asm @@ -104,19 +104,6 @@ DECL_EXTERN(initstack_mutex) DECL_EXTERN(icache_op_struct) DECL_EXTERN(linkstub_selfmod) -/* For debugging: report an error if the function called by call_switch_stack() - * unexpectedly returns. Also used elsewhere. - */ - DECLARE_FUNC(unexpected_return) -GLOBAL_LABEL(unexpected_return:) - CALLC3(GLOBAL_REF(d_r_internal_error), HEX(0), HEX(0), HEX(0)) - /* d_r_internal_error normally never returns */ - /* Infinite loop is intentional. Can we do better in release build? - * XXX: why not a debug instr? - */ - JUMP GLOBAL_REF(unexpected_return) - END_FUNC(unexpected_return) - /* bool mrs_id_reg_supported(void) * Checks for kernel support of the MRS instr when reading system registers * above exception level EL0, by attempting to read Instruction Set Attribute @@ -158,7 +145,9 @@ call_dispatch_alt_stack_no_free: /* Switch stack back. */ mov sp, x19 /* Test return_on_return. */ - cbz w20, GLOBAL_REF(unexpected_return) + cbnz w20, call_dispatch_alt_stack_ok_return + bl GLOBAL_REF(unexpected_return) +call_dispatch_alt_stack_ok_return: /* Restore and return. */ ldr x19, [sp, #16] ldp x20, x30, [sp], #32 @@ -532,23 +521,6 @@ GLOBAL_LABEL(_dynamorio_runtime_resolve:) #endif /* UNIX */ #ifdef LINUX -/* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, - * void *ctid, void (*func)(void)) - */ - DECLARE_FUNC(dynamorio_clone) -GLOBAL_LABEL(dynamorio_clone:) - stp ARG6, x0, [ARG2, #-16]! /* func is now on TOS of newsp */ - /* All args are already in syscall registers. */ - mov SYSNUM_REG, #SYS_clone - svc #0 - cbnz x0, dynamorio_clone_parent - ldp x0, x1, [sp], #16 - blr x0 - bl GLOBAL_REF(unexpected_return) -dynamorio_clone_parent: - ret - END_FUNC(dynamorio_clone) - DECLARE_FUNC(dynamorio_sigreturn) GLOBAL_LABEL(dynamorio_sigreturn:) mov SYSNUM_REG, #SYS_rt_sigreturn diff --git a/core/arch/aarch64/proc.c b/core/arch/aarch64/proc.c index 813e28257b4..989d6bf856d 100644 --- a/core/arch/aarch64/proc.c +++ b/core/arch/aarch64/proc.c @@ -114,7 +114,6 @@ get_processor_specific_info(void) */ # if !defined(DR_HOST_NOT_TARGET) if (proc_has_feature(FEATURE_SVE)) { -# if !defined(BUILD_TESTS) uint64 vl; /* This RDVL instruction is inserted as raw hex because we don't build * with SVE enabled: i.e. not -march=armv8-a+sve, so that we can run a @@ -129,10 +128,6 @@ get_processor_specific_info(void) : "x0"); cpu_info.sve_vector_length_bytes = vl; dr_set_sve_vector_length(vl * 8); -# else - cpu_info.sve_vector_length_bytes = 32; - dr_set_sve_vector_length(256); -# endif } else { cpu_info.sve_vector_length_bytes = 32; dr_set_sve_vector_length(256); @@ -223,6 +218,7 @@ proc_init_arch(void) LOG(GLOBAL, LOG_TOP, 1, "Processor features:\n ID_AA64PFR1_EL1 = 0x%016lx\n", cpu_info.features.flags_aa64pfr1); LOG_FEATURE(FEATURE_MTE); + LOG_FEATURE(FEATURE_BTI); }); # endif #endif @@ -296,7 +292,7 @@ enable_all_test_cpu_features() FEATURE_BF16, FEATURE_I8MM, FEATURE_F64MM, FEATURE_FlagM, FEATURE_JSCVT, FEATURE_DPB, FEATURE_DPB2, FEATURE_SVE2, FEATURE_SVEAES, FEATURE_SVEBitPerm, FEATURE_SVESHA3, FEATURE_SVESM4, - FEATURE_MTE + FEATURE_MTE, FEATURE_BTI }; for (int i = 0; i < BUFFER_SIZE_ELEMENTS(features); ++i) { proc_set_feature(features[i], true); diff --git a/core/arch/arch.h b/core/arch/arch.h index 77796b1c12d..2de3d2915c9 100644 --- a/core/arch/arch.h +++ b/core/arch/arch.h @@ -184,6 +184,7 @@ mixed_mode_enabled(void) # define SCRATCH_REG4_OFFS REG4_OFFSET # define SCRATCH_REG5_OFFS REG5_OFFSET # define REG_OFFSET(reg) (X0_OFFSET + ((reg)-DR_REG_X0) * sizeof(reg_t)) +# define FREG_OFFSET(reg) (F0_OFFSET + ((reg)-DR_REG_F0) * sizeof(reg_t)) # define CALL_SCRATCH_REG DR_REG_T6 # define MC_IBL_REG a2 # define MC_RETVAL_REG a0 diff --git a/core/arch/arch_exports.h b/core/arch/arch_exports.h index 8eab49dc6ec..56605fef734 100644 --- a/core/arch/arch_exports.h +++ b/core/arch/arch_exports.h @@ -708,9 +708,6 @@ dynamorio_condvar_wake_and_jmp(KSYNCH_TYPE *ksynch /*in xax/r0*/, void dynamorio_nonrt_sigreturn(void); # endif -thread_id_t -dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, void *ctid, - void (*func)(void)); void xfer_to_new_libdr(app_pc entry, void **init_sp, byte *cur_dr_map, size_t cur_dr_size); # endif diff --git a/core/arch/arm/arm.asm b/core/arch/arm/arm.asm index f69000b1239..f58073eca67 100644 --- a/core/arch/arm/arm.asm +++ b/core/arch/arm/arm.asm @@ -114,7 +114,9 @@ call_dispatch_alt_stack_no_free: /* after call, so we can use REG_R3 as the scratch register */ ldr REG_R3, [sp, #8/* r4, lr */] /* ARG5 */ cmp REG_R3, #0 - beq GLOBAL_REF(unexpected_return) + bne call_dispatch_alt_stack_ok_return + bl GLOBAL_REF(unexpected_return) +call_dispatch_alt_stack_ok_return: /* restore and return */ pop {REG_R4, pc} END_FUNC(call_switch_stack) @@ -489,30 +491,6 @@ GLOBAL_LABEL(_dynamorio_runtime_resolve:) #endif /* UNIX */ #ifdef LINUX -/* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, - * void *ctid, void (*func)(void)) - */ - DECLARE_FUNC(dynamorio_clone) -GLOBAL_LABEL(dynamorio_clone:) - /* Save callee-saved regs we clobber in the parent. */ - push {r4, r5, r7} - ldr r4, [sp, #12] /* ARG5 minus the pushes above */ - ldr r5, [sp, #16] /* ARG6 minus the pushes above */ - /* All args are now in syscall registers. */ - /* Push func on the new stack. */ - stmdb ARG2!, {r5} - mov r7, #SYS_clone - svc 0 - cmp r0, #0 - bne dynamorio_clone_parent - ldmia sp!, {r0} - blx r0 - bl GLOBAL_REF(unexpected_return) -dynamorio_clone_parent: - pop {r4, r5, r7} - bx lr - END_FUNC(dynamorio_clone) - DECLARE_FUNC(dynamorio_sigreturn) GLOBAL_LABEL(dynamorio_sigreturn:) mov r7, #SYS_rt_sigreturn diff --git a/core/arch/proc_api.h b/core/arch/proc_api.h index 048dbb83544..92ed8384ffa 100644 --- a/core/arch/proc_api.h +++ b/core/arch/proc_api.h @@ -361,6 +361,7 @@ typedef enum { FEATURE_SVESM4 = DEF_FEAT(AA64ZFR0, 10, 1, 0), /**< SVE2 + SM4(AArch64) */ FEATURE_SVEBitPerm = DEF_FEAT(AA64ZFR0, 4, 1, 0), /**< SVE2 + BitPerm(AArch64) */ FEATURE_MTE = DEF_FEAT(AA64PFR1, 2, 1, 0), /**< Memory Tagging Extension */ + FEATURE_BTI = DEF_FEAT(AA64PFR1, 0, 1, 0), /**< Branch Target Identification*/ } feature_bit_t; #endif #ifdef RISCV64 diff --git a/core/arch/riscv64/emit_utils.c b/core/arch/riscv64/emit_utils.c index 1809c0ff251..0b423bdfedc 100644 --- a/core/arch/riscv64/emit_utils.c +++ b/core/arch/riscv64/emit_utils.c @@ -53,6 +53,13 @@ /* Number of the fcsr register. */ #define FCSR 0x003 +/* Instruction fixed bits constants. */ + +/* ld zero, 0(zero) */ +#define RAW_LD_INST_MATCH 0x3003 +/* sd zero, 0(zero) */ +#define RAW_SD_INST_MATCH 0x3023 + /***************************************************************************/ /* EXIT STUB */ /***************************************************************************/ @@ -285,6 +292,9 @@ void patch_stub(fragment_t *f, cache_pc stub_pc, cache_pc target_pc, cache_pc target_prefix_pc, bool hot_patch) { + /* If stub_pc is not aligned to 4 bytes, the first instruction will be c.nop, see + * insert_exit_stub_other_flags(). */ + stub_pc = ALIGNED(stub_pc, 4) ? stub_pc : stub_pc + 2; ptr_int_t off = (ptr_int_t)target_pc - (ptr_int_t)stub_pc; if (off < 0x100000 && off > (ptr_int_t)0xFFFFFFFFFFF00000L) { /* target_pc is a near fragment. We can get there with a J (OP_jal, 21-bit signed @@ -358,8 +368,9 @@ unpatch_stub(dcontext_t *dcontext, fragment_t *f, cache_pc stub_pc, bool hot_pat */ ASSERT(TLS_REG0_SLOT <= (2 << 11) - 1); *(uint *)vmcode_get_writable_addr(stub_pc) = - (0x3023 | TLS_REG0_SLOT >> 5 << 25 | (DR_REG_A0 - DR_REG_ZERO) << 20 | - (dr_reg_stolen - DR_REG_ZERO) << 15 | (TLS_REG0_SLOT & 0x1f) << 7); + (RAW_SD_INST_MATCH | TLS_REG0_SLOT >> 5 << 25 | + (DR_REG_A0 - DR_REG_ZERO) << 20 | (dr_reg_stolen - DR_REG_ZERO) << 15 | + (TLS_REG0_SLOT & 0x1f) << 7); if (hot_patch) machine_cache_sync(stub_pc, stub_pc + 4, /*flush_icache=*/true); } else if (stub_is_patched_for_far_fragment_link(dcontext, f, stub_pc)) { @@ -475,37 +486,26 @@ get_stub_branch(uint *val) return (uint *)pc; } -void -link_indirect_exit_arch(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, - bool hot_patch, app_pc target_tag) +static void +set_indirect_exit_target(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, + cache_pc target, cache_pc exit_target, bool hot_patch) { byte *stub_pc = (byte *)EXIT_STUB_PC(dcontext, f, l); - uint *pc; - cache_pc exit_target; - ibl_type_t ibl_type = { 0 }; - DEBUG_DECLARE(bool is_ibl =) - get_ibl_routine_type_ex(dcontext, target_tag, &ibl_type); - ASSERT(is_ibl); - if (IS_IBL_LINKED(ibl_type.link_state)) - exit_target = target_tag; - else - exit_target = get_linked_entry(dcontext, target_tag); - /* Set pc to the last instruction in the stub. * See insert_exit_stub_other_flags(), the last instruction in indirect exit stub will * always be a c.nop. */ - pc = (uint *)(stub_pc + exit_stub_size(dcontext, target_tag, f->flags) - - RISCV64_INSTR_COMPRESSED_SIZE); + uint *pc = (uint *)(stub_pc + exit_stub_size(dcontext, target, f->flags) - + RISCV64_INSTR_COMPRESSED_SIZE); pc = get_stub_branch(pc) - 1; - ASSERT(get_ibl_entry_tls_offs(dcontext, exit_target) <= (2 << 11) - 1); + ASSERT(get_ibl_entry_tls_offs(dcontext, exit_target) <= (1 << 11) - 1); /* Format of the ld instruction: | imm[11:0] | rs1 |011| rd |0000011| ^ 31-20 ^ 19-15 ^ ^ 11-7 ^ */ /* ld a1, offs(reg_stolen) */ - *(uint *)vmcode_get_writable_addr((byte *)pc) = 0x3003 | + *(uint *)vmcode_get_writable_addr((byte *)pc) = RAW_LD_INST_MATCH | get_ibl_entry_tls_offs(dcontext, exit_target) << 20 | (dr_reg_stolen - DR_REG_ZERO) << 15 | (DR_REG_A1 - DR_REG_ZERO) << 7; @@ -513,6 +513,40 @@ link_indirect_exit_arch(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, machine_cache_sync(pc, pc + 1, true); } +void +link_indirect_exit_arch(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, + bool hot_patch, app_pc target_tag) +{ + cache_pc exit_target; + ibl_type_t ibl_type = { 0 }; + DEBUG_DECLARE(bool is_ibl =) + get_ibl_routine_type_ex(dcontext, target_tag, &ibl_type); + ASSERT(is_ibl); + if (IS_IBL_LINKED(ibl_type.link_state)) + exit_target = target_tag; + else + exit_target = get_linked_entry(dcontext, target_tag); + + set_indirect_exit_target(dcontext, f, l, target_tag, exit_target, hot_patch); +} + +void +unlink_indirect_exit(dcontext_t *dcontext, fragment_t *f, linkstub_t *l) +{ + cache_pc exit_target; + ibl_code_t *ibl_code = NULL; + ASSERT(linkstub_owned_by_fragment(dcontext, f, l)); + ASSERT(LINKSTUB_INDIRECT(l->flags)); + /* Target is always the same, so if it's already unlinked, this is a nop. */ + if (!TEST(LINK_LINKED, l->flags)) + return; + ibl_code = get_ibl_routine_code(dcontext, extract_branchtype(l->flags), f->flags); + exit_target = ibl_code->unlinked_ibl_entry; + + set_indirect_exit_target(dcontext, f, l, ibl_code->indirect_branch_lookup_routine, + exit_target, true); +} + cache_pc indirect_linkstub_stub_pc(dcontext_t *dcontext, fragment_t *f, linkstub_t *l) { @@ -537,13 +571,6 @@ cbr_fallthrough_exit_cti(cache_pc prev_cti_pc) return NULL; } -void -unlink_indirect_exit(dcontext_t *dcontext, fragment_t *f, linkstub_t *l) -{ - /* FIXME i#3544: Not implemented */ - ASSERT_NOT_IMPLEMENTED(false); -} - /******************************************************************************* * COARSE-GRAIN FRAGMENT SUPPORT */ @@ -628,7 +655,16 @@ append_restore_xflags(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) void append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) { - /* No-op. */ + opnd_t memopnd; + + /* Floating-point register is not SIMD registers in RISC-V, but to be consistent with + * other architectures, we handle them here. + */ + for (int reg = DR_REG_F0; reg <= DR_REG_F31; reg++) { + memopnd = opnd_create_dcontext_field_via_reg_sz( + dcontext, REG_NULL, REG_OFFSET(reg), reg_get_size(reg)); + APP(ilist, INSTR_CREATE_fld(dcontext, opnd_create_reg(reg), memopnd)); + } } /* Append instructions to restore gpr on fcache enter, to be executed @@ -718,7 +754,16 @@ append_save_gpr(dcontext_t *dcontext, instrlist_t *ilist, bool ibl_end, bool abs void append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) { - /* No-op. */ + opnd_t memopnd; + + /* Floating-point register is not SIMD registers in RISC-V, but to be consistent with + * other architectures, we handle them here. + */ + for (int reg = DR_REG_F0; reg <= DR_REG_F31; reg++) { + memopnd = opnd_create_dcontext_field_via_reg_sz( + dcontext, REG_NULL, REG_OFFSET(reg), reg_get_size(reg)); + APP(ilist, INSTR_CREATE_fsd(dcontext, memopnd, opnd_create_reg(reg))); + } } /* Scratch reg0 is holding exit stub. */ diff --git a/core/arch/riscv64/mangle.c b/core/arch/riscv64/mangle.c index 5ae5b572c72..44e8df4dc26 100644 --- a/core/arch/riscv64/mangle.c +++ b/core/arch/riscv64/mangle.c @@ -72,6 +72,7 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, opnd_t push_pc, reg_id_t scratch) { uint dstack_offs = 0; + int dstack_middle_offs; int max_offs; if (cci == NULL) @@ -96,11 +97,12 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, if (cci->reg_skip[i]) continue; + /* Uses c.sdsp to save space, see -max_bb_instrs option, same below. */ PRE(ilist, instr, - INSTR_CREATE_sd(dcontext, - opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, - dstack_offs + i * XSP_SZ, OPSZ_8), - opnd_create_reg(DR_REG_START_GPR + i))); + INSTR_CREATE_c_sdsp(dcontext, + opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, + dstack_offs + i * XSP_SZ, OPSZ_8), + opnd_create_reg(DR_REG_START_GPR + i))); } dstack_offs += DR_NUM_GPR_REGS * XSP_SZ; @@ -109,25 +111,34 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, PRE(ilist, instr, XINST_CREATE_load_int(dcontext, opnd_create_reg(DR_REG_A0), push_pc)); PRE(ilist, instr, - INSTR_CREATE_sd(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), - opnd_create_reg(DR_REG_A0))); + INSTR_CREATE_c_sdsp(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), + opnd_create_reg(DR_REG_A0))); } else { ASSERT(opnd_is_reg(push_pc)); /* push_pc is still holding the PC value. */ PRE(ilist, instr, - INSTR_CREATE_sd(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), - push_pc)); + INSTR_CREATE_c_sdsp(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), + push_pc)); } dstack_offs += XSP_SZ; + /* XXX: c.sdsp/c.fsdsp has a zero-extended 9-bit offset, which is not enough for our + * usage. We use dstack_middle_offs to mitigate this issue. + */ + dstack_middle_offs = dstack_offs; + dstack_offs = 0; + PRE(ilist, instr, + INSTR_CREATE_addi(dcontext, opnd_create_reg(DR_REG_SP), + opnd_create_reg(DR_REG_SP), + opnd_create_immed_int(dstack_middle_offs, OPSZ_12b))); /* Push FPRs. */ for (int i = 0; i < DR_NUM_FPR_REGS; i++) { PRE(ilist, instr, - INSTR_CREATE_fsd(dcontext, - opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, - dstack_offs + i * XSP_SZ, OPSZ_8), - opnd_create_reg(DR_REG_F0 + i))); + INSTR_CREATE_c_fsdsp(dcontext, + opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, + dstack_offs + i * XSP_SZ, OPSZ_8), + opnd_create_reg(DR_REG_F0 + i))); } dstack_offs += DR_NUM_FPR_REGS * XSP_SZ; @@ -140,8 +151,8 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, opnd_create_immed_int(FCSR, OPSZ_12b))); PRE(ilist, instr, - INSTR_CREATE_sd(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), - opnd_create_reg(DR_REG_A0))); + INSTR_CREATE_c_sdsp(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), + opnd_create_reg(DR_REG_A0))); dstack_offs += XSP_SZ; @@ -149,12 +160,18 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, * shape. */ dstack_offs += (proc_num_simd_registers() * sizeof(dr_simd_t)); + /* Restore sp. */ + PRE(ilist, instr, + INSTR_CREATE_addi(dcontext, opnd_create_reg(DR_REG_SP), + opnd_create_reg(DR_REG_SP), + opnd_create_immed_int(-dstack_middle_offs, OPSZ_12b))); + /* Restore the registers we used. */ PRE(ilist, instr, - INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_A0), - OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(DR_REG_A0)))); + INSTR_CREATE_c_ldsp(dcontext, opnd_create_reg(DR_REG_A0), + OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(DR_REG_A0)))); - return dstack_offs; + return dstack_offs + dstack_middle_offs; } void @@ -170,10 +187,21 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist /* sp is the stack pointer, which should not be poped. */ cci->reg_skip[DR_REG_SP - DR_REG_START_GPR] = true; + /* XXX: c.sdsp/c.fsdsp has a zero-extended 9-bit offset, which is not enough for our + * usage. + */ + ASSERT(current_offs >= DR_NUM_FPR_REGS * XSP_SZ); + PRE(ilist, instr, + INSTR_CREATE_addi(dcontext, opnd_create_reg(DR_REG_SP), + opnd_create_reg(DR_REG_SP), + opnd_create_immed_int(DR_NUM_FPR_REGS * XSP_SZ, OPSZ_12b))); + current_offs -= XSP_SZ; + /* Uses c.ldsp to save space, see -max_bb_instrs option, same below. */ PRE(ilist, instr, - INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_A0), - OPND_CREATE_MEM64(DR_REG_SP, current_offs))); + INSTR_CREATE_c_ldsp( + dcontext, opnd_create_reg(DR_REG_A0), + OPND_CREATE_MEM64(DR_REG_SP, current_offs - DR_NUM_FPR_REGS * XSP_SZ))); /* csrw a0, fcsr */ PRE(ilist, instr, INSTR_CREATE_csrrw(dcontext, opnd_create_reg(DR_REG_X0), @@ -185,11 +213,19 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist /* Pop FPRs. */ for (int i = 0; i < DR_NUM_FPR_REGS; i++) { PRE(ilist, instr, - INSTR_CREATE_fld(dcontext, opnd_create_reg(DR_REG_F0 + i), - opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, - current_offs + i * XSP_SZ, OPSZ_8))); + INSTR_CREATE_c_fldsp(dcontext, opnd_create_reg(DR_REG_F0 + i), + opnd_create_base_disp( + DR_REG_SP, DR_REG_NULL, 0, + current_offs - DR_NUM_FPR_REGS * XSP_SZ + i * XSP_SZ, + OPSZ_8))); } + /* Restore sp. */ + PRE(ilist, instr, + INSTR_CREATE_addi(dcontext, opnd_create_reg(DR_REG_SP), + opnd_create_reg(DR_REG_SP), + opnd_create_immed_int(-DR_NUM_FPR_REGS * XSP_SZ, OPSZ_12b))); + /* Skip pc field. */ current_offs -= XSP_SZ; @@ -201,9 +237,10 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist continue; PRE(ilist, instr, - INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_START_GPR + i), - opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, - current_offs + i * XSP_SZ, OPSZ_8))); + INSTR_CREATE_c_ldsp(dcontext, opnd_create_reg(DR_REG_START_GPR + i), + opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, + current_offs + i * XSP_SZ, + OPSZ_8))); } } @@ -724,19 +761,18 @@ mangle_exclusive_load(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, src0 = instr_get_src(instr, 0); opcode = instr_get_opcode(instr) == OP_lr_d ? OP_ld : OP_lw; opsz = opcode == OP_ld ? OPSZ_8 : OPSZ_4; - ASSERT(opnd_is_reg(dst) && opnd_is_reg(src0)); + ASSERT(opnd_is_reg(dst) && opnd_is_base_disp(src0)); if (opnd_get_reg(dst) == dr_reg_stolen) { opnd_replace_reg(&dst, dr_reg_stolen, scratch_reg2); } - if (opnd_get_reg(src0) == dr_reg_stolen) { + if (opnd_get_base(src0) == dr_reg_stolen) { opnd_replace_reg(&src0, dr_reg_stolen, scratch_reg2); } instr_reset(dcontext, instr); instr_set_opcode(instr, opcode); instr_set_num_opnds(dcontext, instr, 1, 1); instr_set_dst(instr, 0, dst); - instr_set_src(instr, 0, - opnd_create_base_disp(opnd_get_reg(src0), DR_REG_NULL, 0, 0, opsz)); + instr_set_src(instr, 0, src0); instr_set_translation(instr, instrlist_get_translation_target(ilist)); /* Keep the acquire semantics if needed. */ @@ -751,7 +787,7 @@ mangle_exclusive_load(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, /* Save address, value and size to TLS slot. */ PRE(ilist, next_instr, - instr_create_save_to_tls(dcontext, opnd_get_reg(src0), TLS_LRSC_ADDR_SLOT)); + instr_create_save_to_tls(dcontext, opnd_get_base(src0), TLS_LRSC_ADDR_SLOT)); PRE(ilist, next_instr, instr_create_save_to_tls(dcontext, opnd_get_reg(dst), TLS_LRSC_VALUE_SLOT)); PRE(ilist, next_instr, @@ -779,19 +815,20 @@ mangle_exclusive_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, reg_id_t scratch_reg1, scratch_reg2; ushort slot1, slot2; int opcode; - opnd_t dst, src0; + opnd_t dst0, dst1; opnd_size_t opsz; instr_t *fail = INSTR_CREATE_label(dcontext), *final = INSTR_CREATE_label(dcontext), *loop = INSTR_CREATE_label(dcontext); ASSERT(instr_is_exclusive_store(instr)); - ASSERT(instr_num_dsts(instr) == 1 && instr_num_srcs(instr) == 3); + ASSERT(instr_num_dsts(instr) == 2 && instr_num_srcs(instr) == 2); /* TODO i#3544: Not implemented. */ ASSERT_NOT_IMPLEMENTED(!instr_uses_reg(instr, dr_reg_stolen)); ASSERT_NOT_IMPLEMENTED(!instr_uses_reg(instr, DR_REG_TP)); - dst = instr_get_dst(instr, 0); - src0 = instr_get_src(instr, 0); + dst0 = instr_get_dst(instr, 0); + dst1 = instr_get_dst(instr, 1); + ASSERT(opnd_is_base_disp(dst0)); opsz = instr_get_opcode(instr) == OP_sc_d ? OPSZ_8 : OPSZ_4; scratch_reg1 = pick_scratch_reg(dcontext, instr, DR_REG_NULL, &slot1); scratch_reg2 = pick_scratch_reg(dcontext, instr, scratch_reg1, &slot2); @@ -800,12 +837,12 @@ mangle_exclusive_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, PRE(ilist, instr, instr_create_save_to_tls(dcontext, scratch_reg1, slot1)); PRE(ilist, instr, instr_create_save_to_tls(dcontext, scratch_reg2, slot2)); - /* Restore address saved by exclusive load and check if it's equal to src0. */ + /* Restore address saved by exclusive load and check if it's equal to dst0. */ PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch_reg1, TLS_LRSC_ADDR_SLOT)); PRE(ilist, instr, INSTR_CREATE_bne(dcontext, opnd_create_instr(fail), opnd_create_reg(scratch_reg1), - src0)); + opnd_create_reg(opnd_get_base(dst0)))); /* Restore size saved by exclusive load and check if it's equal to current size. */ PRE(ilist, instr, @@ -826,7 +863,7 @@ mangle_exclusive_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, */ opcode = instr_get_opcode(instr) == OP_sc_d ? OP_lr_d : OP_lr_w; PRE(ilist, instr, - instr_create_1dst_2src(dcontext, opcode, opnd_create_reg(scratch_reg2), src0, + instr_create_1dst_2src(dcontext, opcode, opnd_create_reg(scratch_reg2), dst0, opnd_create_immed_int(0b11, OPSZ_2b))); PRE(ilist, instr, INSTR_CREATE_bne(dcontext, opnd_create_instr(final), @@ -835,7 +872,7 @@ mangle_exclusive_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, /* instr is here. */ PRE(ilist, next_instr, - INSTR_CREATE_bne(dcontext, opnd_create_instr(loop), dst, + INSTR_CREATE_bne(dcontext, opnd_create_instr(loop), dst1, opnd_create_reg(DR_REG_ZERO))); /* End of the LR/SC sequence. */ @@ -844,7 +881,7 @@ mangle_exclusive_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, /* Write a non-zero value to dst on fail. */ PRE(ilist, next_instr, fail); PRE(ilist, next_instr, - XINST_CREATE_load_int(dcontext, dst, opnd_create_immed_int(1, OPSZ_12b))); + XINST_CREATE_load_int(dcontext, dst1, opnd_create_immed_int(1, OPSZ_12b))); PRE(ilist, next_instr, final); @@ -880,7 +917,7 @@ mangle_exclusive_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, * stores in between application instructions, it is in danger of breaking every monitor * in the application." * - * On a Unmatched RISC-V SBC, without this mangling, any application linked with libc + * On an Unmatched RISC-V SBC, without this mangling, any application linked with libc * would hang on startup. * * So for the LR/SC sequence, a similar approach to AArch64's exclusive monitors is diff --git a/core/arch/riscv64/riscv64.asm b/core/arch/riscv64/riscv64.asm index 90b658f7eea..d64059bd1f8 100644 --- a/core/arch/riscv64/riscv64.asm +++ b/core/arch/riscv64/riscv64.asm @@ -95,7 +95,9 @@ call_dispatch_alt_stack_no_free: jalr ARG3 /* Switch stack back. */ mv sp, s0 - beqz s1, GLOBAL_LABEL(unexpected_return) + bnez s1, call_dispatch_alt_stack_ok_return + jal GLOBAL_LABEL(unexpected_return) +call_dispatch_alt_stack_ok_return: /* Restore the stack. */ ld s1, 0 (sp) ld s0, 8 (sp) @@ -523,25 +525,6 @@ GLOBAL_LABEL(_dynamorio_runtime_resolve:) #endif /* UNIX */ #ifdef LINUX -/* - * thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, - * void *ctid, void (*func)(void)) - */ - DECLARE_FUNC(dynamorio_clone) -GLOBAL_LABEL(dynamorio_clone:) - addi ARG2, ARG2, -16 /* Description: newsp = newsp - 16. */ - sd ARG6, 0 (ARG2) /* The func is now on TOS of newsp. */ - li SYSNUM_REG, SYS_clone /* All args are already in syscall registers.*/ - ecall - bnez ARG1, dynamorio_clone_parent - ld ARG1, 0 (sp) - addi sp, sp, 16 - jalr ARG1 - jal GLOBAL_REF(unexpected_return) -dynamorio_clone_parent: - ret - END_FUNC(dynamorio_clone) - DECLARE_FUNC(dynamorio_sigreturn) GLOBAL_LABEL(dynamorio_sigreturn:) li SYSNUM_REG, SYS_rt_sigreturn diff --git a/core/arch/x86/x86.asm b/core/arch/x86/x86.asm index c256f03a1ed..8fbdf124dbe 100644 --- a/core/arch/x86/x86.asm +++ b/core/arch/x86/x86.asm @@ -1556,60 +1556,6 @@ no_swap: END_FUNC(main_signal_handler) #endif /* !HAVE_SIGALTSTACK */ -#ifdef LINUX -/* SYS_clone swaps the stack so we need asm support to call it. - * signature: - * thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, - * void *ctid, void (*func)(void)) - */ - DECLARE_FUNC(dynamorio_clone) -GLOBAL_LABEL(dynamorio_clone:) - /* save func for use post-syscall on the newsp. - * when using clone_record_t we have 4 slots we can clobber. - */ -# ifdef X64 - sub ARG2, ARG_SZ - mov [ARG2], ARG6 /* func is now on TOS of newsp */ - /* all args are already in syscall registers */ - mov r10, rcx - mov REG_XAX, SYS_clone - syscall -# else - mov REG_XAX, ARG6 - mov REG_XCX, ARG2 - sub REG_XCX, ARG_SZ - mov [REG_XCX], REG_XAX /* func is now on TOS of newsp */ - mov REG_XDX, ARG3 - /* preserve callee-saved regs */ - push REG_XBX - push REG_XSI - push REG_XDI - /* now can't use ARG* since xsp modified by pushes */ - mov REG_XBX, DWORD [4*ARG_SZ + REG_XSP] /* ARG1 + 3 pushes */ - mov REG_XSI, DWORD [7*ARG_SZ + REG_XSP] /* ARG4 + 3 pushes */ - mov REG_XDI, DWORD [8*ARG_SZ + REG_XSP] /* ARG5 + 3 pushes */ - mov REG_XAX, SYS_clone - /* PR 254280: we assume int$80 is ok even for LOL64 */ - int HEX(80) -# endif - cmp REG_XAX, 0 - jne dynamorio_clone_parent - pop REG_XCX - call REG_XCX - /* shouldn't return */ - jmp GLOBAL_REF(unexpected_return) -dynamorio_clone_parent: -# ifndef X64 - /* restore callee-saved regs */ - pop REG_XDI - pop REG_XSI - pop REG_XBX -# endif - /* return val is in eax still */ - ret - END_FUNC(dynamorio_clone) -#endif /* LINUX */ - #endif /* UNIX */ diff --git a/core/drlibc/drlibc.h b/core/drlibc/drlibc.h index fdc26c2ae68..09ec0c61a5b 100644 --- a/core/drlibc/drlibc.h +++ b/core/drlibc/drlibc.h @@ -72,6 +72,14 @@ dynamorio_mach_syscall(uint sysnum, uint num_args, ...); # else ptr_int_t dynamorio_syscall(uint sysnum, uint num_args, ...); +/* Wrapper for clone(). + * N.B. func must not return. + * On x86 (32 and 64-bit) this supports passing NULL for newsp which is just + * shorthand for the child using the same value as the parent. + */ +thread_id_t +dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, void *ctid, + void (*func)(void)); # endif #endif diff --git a/core/drlibc/drlibc_aarch64.asm b/core/drlibc/drlibc_aarch64.asm index b56ed04781f..611b79384e0 100644 --- a/core/drlibc/drlibc_aarch64.asm +++ b/core/drlibc/drlibc_aarch64.asm @@ -37,6 +37,12 @@ #include "../asm_defines.asm" START_FILE +#ifdef LINUX +#include "include/syscall.h" +#endif + +DECL_EXTERN(unexpected_return) + DECLARE_FUNC(dynamorio_syscall) GLOBAL_LABEL(dynamorio_syscall:) #ifdef LINUX @@ -125,4 +131,24 @@ GLOBAL_LABEL(dynamorio_mach_syscall:) END_FUNC(dynamorio_mach_syscall) #endif +#ifdef LINUX +/* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, + * void *ctid, void (*func)(void)) + * TODO i#6514: Add support for passing NULL for newsp. + */ + DECLARE_FUNC(dynamorio_clone) +GLOBAL_LABEL(dynamorio_clone:) + stp ARG6, x0, [ARG2, #-16]! /* func is now on TOS of newsp */ + /* All args are already in syscall registers. */ + mov SYSNUM_REG, #SYS_clone + svc #0 + cbnz x0, dynamorio_clone_parent + ldp x0, x1, [sp], #16 + blr x0 + bl GLOBAL_REF(unexpected_return) +dynamorio_clone_parent: + ret + END_FUNC(dynamorio_clone) +#endif + END_FILE diff --git a/core/drlibc/drlibc_arm.asm b/core/drlibc/drlibc_arm.asm index e6eda4f6dd3..c9bbdabae98 100644 --- a/core/drlibc/drlibc_arm.asm +++ b/core/drlibc/drlibc_arm.asm @@ -37,6 +37,12 @@ #include "../asm_defines.asm" START_FILE +#ifdef LINUX +#include "include/syscall.h" +#endif + +DECL_EXTERN(unexpected_return) + /* we share dynamorio_syscall w/ preload */ /* To avoid libc wrappers we roll our own syscall here. * Hardcoded to use svc/swi for 32-bit -- FIXME: use something like do_syscall @@ -95,4 +101,31 @@ GLOBAL_LABEL(FUNCNAME:) END_FUNC(FUNCNAME) #undef FUNCNAME +#ifdef LINUX +/* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, + * void *ctid, void (*func)(void)) + * TODO i#6514: Add support for passing NULL for newsp. + */ + DECLARE_FUNC(dynamorio_clone) +GLOBAL_LABEL(dynamorio_clone:) + /* Save callee-saved regs we clobber in the parent. */ + push {r4, r5, r7} + ldr r4, [sp, #12] /* ARG5 minus the pushes above */ + ldr r5, [sp, #16] /* ARG6 minus the pushes above */ + /* All args are now in syscall registers. */ + /* Push func on the new stack. */ + stmdb ARG2!, {r5} + mov r7, #SYS_clone + svc 0 + cmp r0, #0 + bne dynamorio_clone_parent + ldmia sp!, {r0} + blx r0 + bl GLOBAL_REF(unexpected_return) +dynamorio_clone_parent: + pop {r4, r5, r7} + bx lr + END_FUNC(dynamorio_clone) +#endif + END_FILE diff --git a/core/drlibc/drlibc_riscv64.asm b/core/drlibc/drlibc_riscv64.asm index a1ae7343127..45f706741fd 100644 --- a/core/drlibc/drlibc_riscv64.asm +++ b/core/drlibc/drlibc_riscv64.asm @@ -37,6 +37,14 @@ #include "../asm_defines.asm" START_FILE +#ifndef LINUX +# error Non-Linux is not supported +#endif + +#include "include/syscall.h" + +DECL_EXTERN(unexpected_return) + /* * ptr_int_t dynamorio_syscall(uint sysnum, uint num_args, ...); * @@ -65,4 +73,24 @@ GLOBAL_LABEL(FUNCNAME:) END_FUNC(FUNCNAME) #undef FUNCNAME +/* + * thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, + * void *ctid, void (*func)(void)) + * TODO i#6514: Add support for passing NULL for newsp. + */ + DECLARE_FUNC(dynamorio_clone) +GLOBAL_LABEL(dynamorio_clone:) + addi ARG2, ARG2, -16 /* Description: newsp = newsp - 16. */ + sd ARG6, 0 (ARG2) /* The func is now on TOS of newsp. */ + li SYSNUM_REG, SYS_clone /* All args are already in syscall registers.*/ + ecall + bnez ARG1, dynamorio_clone_parent + ld ARG1, 0 (sp) + addi sp, sp, 16 + jalr ARG1 + jal GLOBAL_REF(unexpected_return) +dynamorio_clone_parent: + ret + END_FUNC(dynamorio_clone) + END_FILE diff --git a/core/drlibc/drlibc_unix.c b/core/drlibc/drlibc_unix.c index c583d5e761c..087c0a4693f 100644 --- a/core/drlibc/drlibc_unix.c +++ b/core/drlibc/drlibc_unix.c @@ -315,30 +315,34 @@ os_open(const char *fname, int os_open_flags) { int res; int flags = 0; + int mode = 0; + const int create_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + if (TEST(OS_OPEN_ALLOW_LARGE, os_open_flags)) flags |= O_LARGEFILE; - if (TEST(OS_OPEN_WRITE_ONLY, os_open_flags)) - res = open_syscall(fname, flags | O_WRONLY, 0); - else if (!TEST(OS_OPEN_WRITE, os_open_flags)) - res = open_syscall(fname, flags | O_RDONLY, 0); + + if (TEST(OS_OPEN_WRITE_ONLY, os_open_flags)) { + flags |= O_WRONLY | O_CREAT; + mode = create_mode; + } else if (!TEST(OS_OPEN_WRITE, os_open_flags)) + flags |= O_RDONLY; else { - res = open_syscall( - fname, - flags | O_RDWR | O_CREAT | - (TEST(OS_OPEN_APPEND, os_open_flags) - ? - /* Currently we only support either appending - * or truncating, just like Windows and the client - * interface. If we end up w/ a use case that wants - * neither it could open append and then seek; if we do - * add OS_TRUNCATE or sthg we'll need to add it to - * any current writers who don't set OS_OPEN_REQUIRE_NEW. - */ - O_APPEND - : O_TRUNC) | - (TEST(OS_OPEN_REQUIRE_NEW, os_open_flags) ? O_EXCL : 0), - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + flags |= O_RDWR | O_CREAT | + /* Currently we only support either appending + * or truncating, just like Windows and the client + * interface. If we end up w/ a use case that wants + * neither it could open append and then seek; if we do + * add OS_TRUNCATE or sthg we'll need to add it to + * any current writers who don't set OS_OPEN_REQUIRE_NEW. + */ + (TEST(OS_OPEN_APPEND, os_open_flags) ? O_APPEND : O_TRUNC) | + (TEST(OS_OPEN_REQUIRE_NEW, os_open_flags) ? O_EXCL : 0); + + mode = create_mode; } + + res = open_syscall(fname, flags, mode); + if (res < 0) return INVALID_FILE; diff --git a/core/drlibc/drlibc_x86.asm b/core/drlibc/drlibc_x86.asm index 430126c319b..45b273a9a0f 100644 --- a/core/drlibc/drlibc_x86.asm +++ b/core/drlibc/drlibc_x86.asm @@ -42,6 +42,9 @@ #include "../arch/asm_defines.asm" #include "../arch/x86/x86_asm_defines.asm" /* PUSHGPR, POPGPR, etc. */ +#ifdef LINUX +# include "include/syscall.h" +#endif #ifdef MACOS # include "include/syscall_mach.h" /* SYSCALL_NUM_MARKER_* */ #endif @@ -754,4 +757,81 @@ GLOBAL_LABEL(load_dynamo_failure:) #endif /* WINDOWS */ +#ifdef LINUX +/* SYS_clone swaps the stack so we need asm support to call it. + * signature: + * thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, + * void *ctid, void (*func)(void)) + * i#6514: If newsp is NULL then that tells the kernel to give the child the + * same value for SP as the parent. + */ + DECLARE_FUNC(dynamorio_clone) +GLOBAL_LABEL(dynamorio_clone:) + /* Save func for use post-syscall on the newsp. + * This is tricky because we have to handle the case of newsp == NULL. + */ +# ifdef X64 + /* The syscall preserves all registers except rax, rcx, r11. */ + push r15 + mov r15, ARG6 /* Func is now in r15. */ + and ARG2, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */ + /* All args are already in syscall registers, except for rcx. */ + mov r10, rcx + mov REG_XAX, SYS_clone + syscall +# else + /* Fetch some args we need before we modify XSP and ARGn is no + * longer usable. + */ + mov REG_XCX, ARG2 /* newsp */ + mov REG_XDX, ARG3 /* ptid */ + mov REG_XAX, ARG6 /* func */ + /* Preserve callee-saved regs. */ + push REG_XBX + push REG_XSI + push REG_XDI + /* Now can't use ARG* since xsp modified by pushes. */ + mov REG_XBX, DWORD [4*ARG_SZ + REG_XSP] /* ARG1 + 3 pushes */ + mov REG_XSI, DWORD [7*ARG_SZ + REG_XSP] /* ARG4 + 3 pushes */ + mov REG_XDI, DWORD [8*ARG_SZ + REG_XSP] /* ARG5 + 3 pushes */ + /* i#6514: Save func on the child's stack. Remember that if newsp is + * NULL then the child's stack is our stack. When the syscall returns + * it's cumbersome to know whether newsp was NULL. To keep things simple + * for the parent always push func on our stack. + */ + push REG_XAX /* Xsp is misaligned at this point but kernel doesn't care. */ + and REG_XCX, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */ + jz newsp_is_null + sub REG_XCX, ARG_SZ + mov [REG_XCX], REG_XAX /* Func is now on TOS of newsp. */ +newsp_is_null: + mov REG_XAX, SYS_clone + /* PR 254280: we assume int$80 is ok even for LOL64 */ + int HEX(80) +# endif + cmp REG_XAX, 0 + jne dynamorio_clone_parent +# ifdef X64 + call r15 +# else + pop REG_XCX + call REG_XCX +# endif + /* Shouldn't return. */ + jmp GLOBAL_REF(unexpected_return) +dynamorio_clone_parent: +# ifdef X64 + pop r15 +# else + /* Restore callee-saved regs. */ + add REG_XSP, ARG_SZ /* Discard func. */ + pop REG_XDI + pop REG_XSI + pop REG_XBX +# endif + /* Return val is in eax still. */ + ret + END_FUNC(dynamorio_clone) +#endif /* LINUX */ + END_FILE diff --git a/core/drlibc/drlibc_xarch.asm b/core/drlibc/drlibc_xarch.asm index 4c0370153fb..44c871ea41b 100644 --- a/core/drlibc/drlibc_xarch.asm +++ b/core/drlibc/drlibc_xarch.asm @@ -37,15 +37,17 @@ #include "../arch/asm_defines.asm" START_FILE -/* For AArch64, drlibc has no references to unexpected_return, and in fact we - * have a relocation reachability error if we include it here (i#4304), so - * we limit its use in drlibc to x86 or arm. - */ -#ifndef AARCH64 DECL_EXTERN(d_r_internal_error) /* For debugging: report an error if the function called by call_switch_stack() * unexpectedly returns. Also used elsewhere. + * i#4304: When calling this function do not use short branch instructions, + * e.g., conditional branch instructions on aarch64. They don't have enough + * bits of offset to reach this. Instead, use unconditional branch or + * call instructions (e.g., b, bl on aarch64) that have a sufficient number + * of bits. On x86, and other architectures where a call instruction requires + * the stack, only use branch instructions: This can get called when there + * is insufficient stack. */ DECLARE_FUNC(unexpected_return) GLOBAL_LABEL(unexpected_return:) @@ -56,6 +58,5 @@ GLOBAL_LABEL(unexpected_return:) */ JUMP GLOBAL_REF(unexpected_return) END_FUNC(unexpected_return) -#endif END_FILE diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c index 7e997cc3dc8..bd3330f9fcf 100644 --- a/core/ir/aarch64/codec.c +++ b/core/ir/aarch64/codec.c @@ -1,6 +1,6 @@ /* ********************************************************** * Copyright (c) 2017-2023 Google, Inc. All rights reserved. - * Copyright (c) 2016-2023 ARM Limited. All rights reserved. + * Copyright (c) 2016-2024 ARM Limited. All rights reserved. * **********************************************************/ /* @@ -1598,6 +1598,11 @@ decode_single_sized(reg_id_t min_reg, reg_id_t max_reg, uint pos_start, uint bit reg_id = reg_id + min_reg - max_reg - 1; *opnd = opnd_create_reg_element_vector(reg_id, size); + + if (offset > 0) { + opnd->aux.flags |= DR_OPND_IMPLICIT; + } + return true; } @@ -1635,6 +1640,11 @@ encode_sized_base(uint pos_start, uint size_start, uint min_size, uint max_size, if (size < min_size) return false; + /* DR_OPND_IMPLICIT should be set if using an offset */ + if ((offset > 0) != ((opnd.aux.flags & DR_OPND_IMPLICIT) ? true : false)) { + return false; + } + uint reg_number; if (!is_vreg(&vec_size, ®_number, opnd)) return false; @@ -2396,7 +2406,8 @@ static inline bool encode_float_const_pair(uint pos, float first, float second, opnd_t opnd, OUT uint *enc_out) { - IF_RETURN_FALSE(!opnd_is_immed_float(opnd)) + if (!opnd_is_immed_float(opnd)) + return false; const float value = opnd_get_immed_float(opnd); IF_RETURN_FALSE((value != first) && (value != second)) @@ -2450,6 +2461,20 @@ encode_opnd_fpimm1_half_two_5(uint enc, int opcode, byte *pc, opnd_t opnd, return encode_float_const_pair(5, 0.5f, 2.0f, opnd, enc_out); } +/* imm2_6: 2-bit immediate from bits 6-7 */ + +static inline bool +decode_opnd_imm2_6(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + return decode_opnd_int(6, 2, false, 0, OPSZ_3b, 0, enc, opnd); +} + +static inline bool +encode_opnd_imm2_6(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) +{ + return encode_opnd_int(6, 2, false, 0, 0, opnd, enc_out); +} + /* op2: 3-bit immediate from bits 5-7 */ static inline bool @@ -4476,20 +4501,6 @@ encode_opnd_q16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) return encode_opnd_vector_reg(16, 4, opnd, enc_out); } -/* z16: Z register at bit position 16. */ - -static inline bool -decode_opnd_z16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - return decode_opnd_vector_reg(16, Z_REG, enc, opnd); -} - -static inline bool -encode_opnd_z16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) -{ - return encode_opnd_vector_reg(16, Z_REG, opnd, enc_out); -} - /* z_b_16: Z register with b size elements. */ static inline bool @@ -4600,15 +4611,6 @@ encode_opnd_s16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) return encode_opnd_vector_reg(16, 2, opnd, enc_out); } -static inline opnd_size_t -calculate_mem_transfer(uint bytes_per_element, aarch64_reg_offset element_size) -{ - ASSERT(element_size >= BYTE_REG && element_size <= DOUBLE_REG); - - const uint elements = get_elements_in_sve_vector(element_size); - return opnd_size_from_bytes(bytes_per_element * elements); -} - static inline bool svemem_gprs_per_element_decode(opnd_size_t mem_transfer, uint shift_amount, uint enc, int opcode, byte *pc, OUT opnd_t *opnd) @@ -4624,6 +4626,23 @@ static inline bool svemem_gprs_per_element_encode(opnd_size_t mem_transfer, uint shift_amount, uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { + const uint index_reg = opnd_get_index(opnd); + + /* Only the first-fault loads allow Xm to be WZR. */ + switch (opcode) { + case OP_ldff1b: + case OP_ldff1h: + case OP_ldff1w: + case OP_ldff1d: + case OP_ldff1sb: + case OP_ldff1sh: + case OP_ldff1sw: break; + default: + if (index_reg == DR_REG_WZR) + return false; + break; + } + if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != mem_transfer || opnd_get_disp(opnd) != 0) return false; @@ -4645,20 +4664,6 @@ svemem_gprs_per_element_encode(opnd_size_t mem_transfer, uint shift_amount, uint return true; } -static inline bool -decode_opnd_svemem_gprs_b1(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - return svemem_gprs_per_element_decode(calculate_mem_transfer(1, BYTE_REG), 0, enc, - opcode, pc, opnd); -} - -static inline bool -encode_opnd_svemem_gprs_b1(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) -{ - return svemem_gprs_per_element_encode(calculate_mem_transfer(1, BYTE_REG), 0, enc, - opcode, pc, opnd, enc_out); -} - /* imm8_10: 8 bit imm at pos 10, split across 20:16 and 12:10. */ static inline bool @@ -4695,10 +4700,8 @@ decode_svemem_gpr_vec(uint enc, aarch64_reg_offset element_size, dr_extend_type_ const reg_id_t zm = decode_vreg(Z_REG, extract_uint(enc, 16, 5)); ASSERT(reg_is_z(zm)); - const uint num_elements = get_elements_in_sve_vector(element_size); - const opnd_size_t mem_size = is_prefetch - ? OPSZ_0 - : opnd_size_from_bytes((1 << memory_access_size) * num_elements); + const opnd_size_t mem_size = + is_prefetch ? OPSZ_0 : opnd_size_from_bytes(1 << memory_access_size); *opnd = opnd_create_vector_base_disp_aarch64( xn, zm, get_opnd_size_from_offset(element_size), mod, scaled, 0, 0, mem_size, @@ -5487,67 +5490,6 @@ encode_svemem_gpr_simm4(uint enc, opnd_size_t transfer_size, int scale, opnd_t o return true; } -static inline bool -decode_ssz(uint enc, OUT opnd_size_t *transfer_size) -{ - switch (BITS(enc, 22, 21)) { - case 0b00: *transfer_size = OPSZ_16; return true; - case 0b01: *transfer_size = OPSZ_32; return true; - default: break; - } - return false; -} - -/* svemem_gpr_simm4: SVE memory operand [{, #}] */ - -static inline bool -decode_opnd_svemem_ssz_gpr_simm4(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - opnd_size_t transfer_size; - return decode_ssz(enc, &transfer_size) && - decode_svemem_gpr_simm4(enc, transfer_size, 16, opnd); -} - -static inline bool -encode_opnd_svemem_ssz_gpr_simm4(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) -{ - opnd_size_t transfer_size; - return decode_ssz(enc, &transfer_size) && - encode_svemem_gpr_simm4(enc, OPSZ_16, 16, opnd, enc_out); -} - -/* SVE memory operand [{, #, MUL VL}] multiple dest registers or nt */ - -static inline bool -decode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - const uint register_count = BITS(enc, 22, 21) + 1; - const uint transfer_bytes = (register_count * dr_get_sve_vector_length()) / 8; - /* The offset is scaled by the size of the vector in memory. - * This is the same as the transfer size. - */ - const uint scale = transfer_bytes; - - return decode_svemem_gpr_simm4(enc, opnd_size_from_bytes(transfer_bytes), scale, - opnd); -} - -static inline bool -encode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) -{ - const uint register_count = BITS(enc, 22, 21) + 1; - const uint transfer_bytes = (register_count * dr_get_sve_vector_length()) / 8; - /* The offset is scaled by the size of the vector in memory. - * This is the same as the transfer size. - */ - const uint scale = transfer_bytes; - - return encode_svemem_gpr_simm4(enc, opnd_size_from_bytes(transfer_bytes), scale, opnd, - enc_out); -} - /* hsd_immh_sz: The element size of a vector mediated by immh with possible values h, s * and d */ @@ -7321,6 +7263,19 @@ encode_opnd_z_size_bhsd_5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint return encode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, 0, 0, opnd, enc_out); } +static inline bool +decode_opnd_z_size_bhsd_5p1(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + return decode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, 0, 1, enc, pc, opnd); +} + +static inline bool +encode_opnd_z_size_bhsd_5p1(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) +{ + return encode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, 0, 1, opnd, enc_out); +} + static inline bool decode_opnd_z_size_bhs_5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { @@ -7735,9 +7690,7 @@ decode_svemem_vec_imm5(uint enc, aarch64_reg_offset element_size, bool is_prefet const aarch64_reg_offset msz = BITS(enc, 24, 23); const uint scale = 1 << msz; - const opnd_size_t mem_transfer = is_prefetch - ? OPSZ_0 - : opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size)); + const opnd_size_t mem_transfer = is_prefetch ? OPSZ_0 : opnd_size_from_bytes(scale); const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5)); ASSERT(reg_is_z(zn)); @@ -7781,9 +7734,7 @@ encode_svemem_vec_imm5(uint enc, aarch64_reg_offset element_size, bool is_prefet const aarch64_reg_offset msz = BITS(enc, 24, 23); const uint scale = 1 << msz; - const opnd_size_t mem_transfer = is_prefetch - ? OPSZ_0 - : opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size)); + const opnd_size_t mem_transfer = is_prefetch ? OPSZ_0 : opnd_size_from_bytes(scale); if (opnd_get_size(opnd) != mem_transfer) return false; @@ -7863,6 +7814,46 @@ encode_opnd_sveprf_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint enc_out); } +/* sveprf_gpr_shf: SVE memory address [, {, LSL #x}] */ +static inline bool +decode_opnd_svemem_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + const aarch64_reg_offset insz = BITS(enc, 24, 23); + + return svemem_gprs_per_element_decode(opnd_size_from_bytes(1 << insz), insz, enc, + opcode, pc, opnd); +} + +static inline bool +encode_opnd_svemem_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) +{ + const aarch64_reg_offset insz = BITS(enc, 24, 23); + + return svemem_gprs_per_element_encode(opnd_size_from_bytes(1 << insz), insz, enc, + opcode, pc, opnd, enc_out); +} + +/* sveprf_gpr_shf_signed: SVE memory address [, {, LSL #x}] for signed load + * operations */ +static inline bool +encode_opnd_svemem_gpr_shf_signed(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) +{ + const aarch64_reg_offset insz = BITS(~enc, 24, 23); + + return svemem_gprs_per_element_encode(opnd_size_from_bytes(1 << insz), insz, enc, + opcode, pc, opnd, enc_out); +} + +static inline bool +decode_opnd_svemem_gpr_shf_signed(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + const aarch64_reg_offset insz = BITS(~enc, 24, 23); + + return svemem_gprs_per_element_decode(opnd_size_from_bytes(1 << insz), insz, enc, + opcode, pc, opnd); +} + /* SVE memory address (64-bit offset) [, .D{, }] */ static inline bool decode_opnd_svemem_gpr_vec64(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) @@ -7942,16 +7933,6 @@ sizes_from_dtype(const uint enc, aarch64_reg_offset *insz, aarch64_reg_offset *e *elsz = BITS(dtype, 1, 0); } -static inline opnd_size_t -memory_transfer_size_from_dtype(uint enc) -{ - aarch64_reg_offset insz, elsz; - sizes_from_dtype(enc, &insz, &elsz, true); - - const uint elements = get_elements_in_sve_vector(elsz); - return opnd_size_from_bytes((1 << insz) * elements); -} - static inline bool decode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc, OUT opnd_t *opnd) @@ -7967,8 +7948,7 @@ decode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc, const aarch64_reg_offset element_size = BITS(enc, size_bit, size_bit) == single_bit_value ? SINGLE_REG : DOUBLE_REG; - const opnd_size_t mem_transfer = - opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size)); + const opnd_size_t mem_transfer = opnd_size_from_bytes(scale); const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5)); ASSERT(reg_is_z(zn)); @@ -8014,8 +7994,7 @@ encode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc, opnd_t const aarch64_reg_offset msz = BITS(enc, 24, 23); const uint scale = 1 << msz; - const opnd_size_t mem_transfer = - opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size)); + const opnd_size_t mem_transfer = opnd_size_from_bytes(scale); IF_RETURN_FALSE(opnd_get_size(opnd) != mem_transfer) uint xreg_number; @@ -8050,151 +8029,87 @@ encode_opnd_svemem_vec_22sd_gpr16(uint enc, int opcode, byte *pc, opnd_t opnd, static inline bool decode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { - const opnd_size_t transfer_size = memory_transfer_size_from_dtype(enc); - return decode_svemem_gpr_simm4(enc, transfer_size, opnd_size_in_bytes(transfer_size), - opnd); -} - -static inline bool -encode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) -{ - const opnd_size_t transfer_size = memory_transfer_size_from_dtype(enc); - return encode_svemem_gpr_simm4(enc, transfer_size, opnd_size_in_bytes(transfer_size), - opnd, enc_out); -} - -/* SVE memory operand [, LSL #x], mem transfer size based on ssz */ - -static inline bool -decode_opnd_svemem_ssz_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - opnd_size_t mem_transfer; - if (!decode_ssz(enc, &mem_transfer)) - return false; + aarch64_reg_offset insz, elsz; + sizes_from_dtype(enc, &insz, &elsz, true); - const uint shift_amount = BITS(enc, 24, 23); + const uint elements = get_elements_in_sve_vector(elsz); + const uint scale = (1 << insz) * elements; - return svemem_gprs_per_element_decode(mem_transfer, shift_amount, enc, opcode, pc, - opnd); + const opnd_size_t transfer_size = opnd_size_from_bytes(1 << insz); + return decode_svemem_gpr_simm4(enc, transfer_size, scale, opnd); } static inline bool -encode_opnd_svemem_ssz_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) +encode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) { - opnd_size_t mem_transfer; - if (!decode_ssz(enc, &mem_transfer)) - return false; + aarch64_reg_offset insz, elsz; + sizes_from_dtype(enc, &insz, &elsz, true); - const uint shift_amount = BITS(enc, 24, 23); + const uint elements = get_elements_in_sve_vector(elsz); + const uint scale = (1 << insz) * elements; - return svemem_gprs_per_element_encode(mem_transfer, shift_amount, enc, opcode, pc, - opnd, enc_out); + const opnd_size_t transfer_size = opnd_size_from_bytes(1 << insz); + return encode_svemem_gpr_simm4(enc, transfer_size, scale, opnd, enc_out); } -static inline bool -decode_opnd_svemem_msz_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - aarch64_reg_offset elsz, dests; - sizes_from_dtype(enc, &elsz, &dests, false); - - const uint shift_amount = elsz; - - return svemem_gprs_per_element_decode( - calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, - opcode, pc, opnd); -} +/* SVE memory operand [{, #, MUL VL}] multiple dest registers or nt */ static inline bool -encode_opnd_svemem_msz_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) +decode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { - aarch64_reg_offset elsz, dests; - sizes_from_dtype(enc, &elsz, &dests, false); + const opnd_size_t element_size = get_opnd_size_from_offset(BITS(enc, 24, 23)); - const uint shift_amount = elsz; + /* The offset is scaled by the size of the vector in memory.*/ + const uint register_count = BITS(enc, 22, 21) + 1; + const uint scale = (register_count * dr_get_sve_vector_length()) / 8; - return svemem_gprs_per_element_encode( - calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, - opcode, pc, opnd, enc_out); + return decode_svemem_gpr_simm4(enc, element_size, scale, opnd); } static inline bool -decode_opnd_svemem_msz_stgpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +encode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) { - aarch64_reg_offset elsz, dests; - sizes_from_dtype(enc, &elsz, &dests, false); - if (BITS(enc, 20, 16) == 0b11111) - return false; + const opnd_size_t element_size = get_opnd_size_from_offset(BITS(enc, 24, 23)); - const uint shift_amount = elsz; + /* The offset is scaled by the size of the vector in memory.*/ + const uint register_count = BITS(enc, 22, 21) + 1; + const uint scale = (register_count * dr_get_sve_vector_length()) / 8; - return svemem_gprs_per_element_decode( - calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, - opcode, pc, opnd); + return encode_svemem_gpr_simm4(enc, element_size, scale, opnd, enc_out); } static inline bool -encode_opnd_svemem_msz_stgpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) +decode_ssz(uint enc, OUT uint *scale) { - aarch64_reg_offset elsz, dests; - sizes_from_dtype(enc, &elsz, &dests, false); - - const uint shift_amount = elsz; - - bool success = svemem_gprs_per_element_encode( - calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, - opcode, pc, opnd, enc_out); - - if (BITS(enc, 20, 16) == 0b11111) - return false; - return success; -} -static inline bool -decode_opnd_svemem_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - aarch64_reg_offset insz, elsz; - sizes_from_dtype(enc, &insz, &elsz, true); - - const uint shift_amount = opnd_size_to_shift_amount(get_opnd_size_from_offset(insz)); - - return svemem_gprs_per_element_decode(calculate_mem_transfer(1 << insz, elsz), - shift_amount, enc, opcode, pc, opnd); + switch (BITS(enc, 22, 21)) { + case 0b00: *scale = 16; return true; + case 0b01: *scale = 32; return true; + default: break; + } + return false; } -static inline bool -encode_opnd_svemem_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) -{ - aarch64_reg_offset insz, elsz; - sizes_from_dtype(enc, &insz, &elsz, true); - - const uint shift_amount = opnd_size_to_shift_amount(get_opnd_size_from_offset(insz)); - - return svemem_gprs_per_element_encode(calculate_mem_transfer(1 << insz, elsz), - shift_amount, enc, opcode, pc, opnd, enc_out); -} +/* svemem_gpr_simm4: SVE memory operand [{, #}] */ static inline bool -decode_opnd_svemem_gprs_bhsdx(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +decode_opnd_svemem_ssz_gpr_simm4(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { - aarch64_reg_offset insz, elsz; - sizes_from_dtype(enc, &elsz, &insz, true); - - return svemem_gprs_per_element_decode(calculate_mem_transfer(insz + 1, elsz), 0, enc, - opcode, pc, opnd); + uint scale; + const opnd_size_t transfer_size = opnd_size_from_bytes(1 << BITS(enc, 24, 23)); + return decode_ssz(enc, &scale) && + decode_svemem_gpr_simm4(enc, transfer_size, scale, opnd); } static inline bool -encode_opnd_svemem_gprs_bhsdx(uint enc, int opcode, byte *pc, opnd_t opnd, - OUT uint *enc_out) +encode_opnd_svemem_ssz_gpr_simm4(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) { - aarch64_reg_offset insz, elsz; - sizes_from_dtype(enc, &elsz, &insz, true); - - return svemem_gprs_per_element_encode(calculate_mem_transfer(insz + 1, elsz), 0, enc, - opcode, pc, opnd, enc_out); + uint scale; + const opnd_size_t transfer_size = opnd_size_from_bytes(1 << BITS(enc, 24, 23)); + return decode_ssz(enc, &scale) && + encode_svemem_gpr_simm4(enc, transfer_size, scale, opnd, enc_out); } static inline bool @@ -9781,6 +9696,7 @@ decode_category(uint encoding, instr_t *instr) #include "decode_gen_sve2.h" #include "decode_gen_sve.h" #include "decode_gen_v86.h" +#include "decode_gen_v85.h" #include "decode_gen_v84.h" #include "decode_gen_v83.h" #include "decode_gen_v82.h" @@ -9789,6 +9705,7 @@ decode_category(uint encoding, instr_t *instr) #include "encode_gen_sve2.h" #include "encode_gen_sve.h" #include "encode_gen_v86.h" +#include "encode_gen_v85.h" #include "encode_gen_v84.h" #include "encode_gen_v83.h" #include "encode_gen_v82.h" diff --git a/core/ir/aarch64/codec.h b/core/ir/aarch64/codec.h index 4fe2eaa54c8..81de59b069f 100644 --- a/core/ir/aarch64/codec.h +++ b/core/ir/aarch64/codec.h @@ -57,7 +57,7 @@ encode_common(byte *pc, instr_t *i, decode_info_t *di); #define BITS(_enc, bitmax, bitmin) \ ((((uint32)(_enc)) >> (bitmin)) & (uint32)MASK((bitmax) - (bitmin) + 1)) -#if !defined(DR_HOST_NOT_TARGET) && !defined(STANDALONE_DECODER) +#if !defined(DR_HOST_NOT_TARGET) && !defined(STANDALONE_DECODER) && !defined(BUILD_TESTS) # define OPSZ_SVE_VL_BYTES opnd_size_from_bytes(proc_get_vector_length_bytes()) # define OPSZ_SVE_PL_BYTES opnd_size_from_bytes(proc_get_vector_length_bytes() / 8) #else diff --git a/core/ir/aarch64/codec.py b/core/ir/aarch64/codec.py index a645044f7d0..81d8fb8afdf 100755 --- a/core/ir/aarch64/codec.py +++ b/core/ir/aarch64/codec.py @@ -49,10 +49,6 @@ N = 32 # bits in an instruction word ONES = (1 << N) - 1 -# Stores instances of FallthroughDecode objects for resolution of overlapping -# encodings. -FALLTHROUGH = dict() - opnd_header = '/* This file was generated by codec.py from opnd_defs.txt and the codec_.txt files. */\n\n' opcode_header = '/* This file was generated by codec.py from codec_.txt files. */\n\n' @@ -68,6 +64,14 @@ def __init__(self, gen, used, must_be_set): self.used = used self.non_zero = must_be_set +def opnd_stem(opnd_name): + """Strip off all flags from the opnd""" + return opnd_name.split(".")[0] # pytype: disable=attribute-error + +def opnd_flags(opnd_name): + """Return the opnd's flags""" + return opnd_name.split(".")[1:] + class Opndset: def __init__(self, fixed, dsts, srcs, enc_order): for (ds, i, ot) in enc_order: @@ -79,13 +83,6 @@ def __init__(self, fixed, dsts, srcs, enc_order): self.srcs = srcs self.enc_order = enc_order -class FallthroughDecode: - def __init__(self, opcode, opndset='', decode_clause='', decode_function=''): - self.flag_name = opcode + '_fallthrough_flag' - self.opndset = opndset - self.decode_clause = decode_clause - self.decode_function = decode_function - class Pattern: def __init__(self, pattern, opcode_bits, opnd_bits, high_soft_bits, opcode, opndset, enum, feat): self.pattern = pattern @@ -98,6 +95,7 @@ def __init__(self, pattern, opcode_bits, opnd_bits, high_soft_bits, opcode, opnd self.opndset = opndset self.enum = enum self.feat = feat + self._generated_name = None def __iter__(self): for field in (self.opcode_bits, self.opnd_bits, self.opcode, self.opndset): @@ -116,16 +114,25 @@ def ignored_bit_mask(self): def set_bits(self): return self.opcode_bits | self.high_soft_bits + def set_generated_name(self, name): + """ + Generated names need to take account of all possible patterns, + so can't be simply derived from a single pattern + """ + self._generated_name = name + + @property + def generated_name(self): + if self._generated_name is None: + raise Exception("Attempted to use generated name before it was set") + return self._generated_name + + def codec_header(isa_version): return '/* This file was generated by codec.py from codec_%s.txt. */\n\n' % isa_version -def fallthrough_instr_id(opcode, opcode_bits, opnd_bits): - return '%s_%08x_%08x' % (opcode, opcode_bits, opnd_bits) - def generate_opndset_decoders(opndsettab, opndtab): c = [] - c += ['bool {} = false;'.format(opcode.flag_name) for opcode in - FALLTHROUGH.values()] c += ['\n'] for name in sorted(opndsettab): opnd_set = opndsettab[name] @@ -134,15 +141,15 @@ def generate_opndset_decoders(opndsettab, opndtab): c += ['static bool', 'decode_opnds%s(uint enc, dcontext_t *dcontext, byte *pc, ' 'instr_t *instr, int opcode)' % name, '{'] - if dsts + srcs != []: + if dsts or srcs: vars = (['dst%d' % i for i in range(len(dsts))] + ['src%d' % i for i in range(len(srcs))]) tests = (['!decode_opnd_%s(enc & 0x%08x, opcode, pc, &dst%d)' % - (dsts[i], opndtab[dsts[i]].gen | opndtab[dsts[i]].used, i) + (opnd_stem(dsts[i]), opndtab[dsts[i]].gen | opndtab[dsts[i]].used, i) for i in range(len(dsts))] + ['!decode_opnd_%s(enc & 0x%08x, opcode, pc, &src%d)' % - (srcs[i], opndtab[srcs[i]].gen | opndtab[srcs[i]].used, i) + (opnd_stem(srcs[i]), opndtab[srcs[i]].gen | opndtab[srcs[i]].used, i) for i in range(len(srcs))]) c += [' opnd_t ' + ', '.join(vars) + ';'] c += [' if (' + ' ||\n '.join(tests) + ')'] @@ -150,6 +157,11 @@ def generate_opndset_decoders(opndsettab, opndtab): c.append(' instr_set_opcode(instr, opcode);') c.append(' instr_set_num_opnds(dcontext, instr, %d, %d);' % (len(dsts), len(srcs))) + if any("gov" in opnd_flags(op) for op in srcs): + c += [' instr_set_has_register_predication(instr);'] + for i, op in enumerate(srcs): + if "gov" in opnd_flags(op): + c += [' src{0} = opnd_add_flags(src{0}, DR_OPND_IS_GOVERNING);'.format(i)] for i in range(len(dsts)): c.append(' instr_set_dst(instr, %d, dst%d);' % (i, i)) for i in range(len(srcs)): @@ -176,7 +188,7 @@ def indent_append(text): not_zero_mask = 0 try: - opnd_set = opndsettab[pattern.opndset] + opnd_set = opndsettab[pattern.generated_name] for mask in (opndtab[o].non_zero for o in opnd_set.dsts + opnd_set.srcs): not_zero_mask |= mask except KeyError: @@ -207,20 +219,8 @@ def indent_append(text): 'manually handled in codec.c\'s decode_common().' % pattern.opcode) else: indent_append(' ASSERT(0);') - enc_key = fallthrough_instr_id( - pattern.opcode, pattern.opcode_bits, pattern.opnd_bits) - if enc_key in FALLTHROUGH and pattern.opndset == FALLTHROUGH[enc_key].opndset: - indent_append(' %s = true;' % FALLTHROUGH[enc_key].flag_name) - FALLTHROUGH[enc_key].decode_clause = \ - 'if ((enc & 0x%08x) == 0x%08x && %s == true)' % \ - (((1 << N) - 1) & pattern.ignored_bit_mask(), pattern.opcode_bits, \ - FALLTHROUGH[enc_key].flag_name) - FALLTHROUGH[enc_key].decode_function = \ - 'return decode_opnds%s(enc, dc, pc, instr, OP_%s);' % \ - (pattern.opndset, pattern.opcode) - else: - indent_append(' return decode_opnds%s(enc, dc, pc, ' - 'instr, OP_%s);' % (pattern.opndset, pattern.opcode)) + indent_append(' return decode_opnds%s(enc, dc, pc, ' + 'instr, OP_%s);' % (pattern.generated_name, pattern.opcode)) if opc_props[pattern.opcode].nzcv_rw != 'n': indent_append('}') return @@ -263,9 +263,6 @@ def indent_append(text): 'decoder_' + curr_isa + '(uint enc, dcontext_t *dc, byte *pc, instr_t *instr)', '{'] gen(c, patterns, 1) - for opcode in FALLTHROUGH.values(): - c += [' %s' % opcode.decode_clause] - c += [' %s' % opcode.decode_function] # Call the next version of the decoder if defined. if next_isa != '': c.append(' return decoder_' + next_isa + '(enc, dc, pc, instr);') @@ -284,7 +281,7 @@ def find_required(fixed, reordered, i, opndtab): if opndtab[reordered[j][2]].gen & used & ~known != 0: req = req + ['%s%d' % (reordered[j][0], reordered[j][1])] known = known | opndtab[reordered[j][2]].gen - return 'enc' if req == [] else '(enc | %s)' % ' | '.join(req) + return 'enc' if not req else '(%s)' % ' | '.join(['enc'] + req) def make_enc(n, reordered, f, opndtab): (ds, i, ot) = reordered[n] @@ -293,7 +290,7 @@ def make_enc(n, reordered, f, opndtab): 'instr_get_%s(instr, %d), ' + instr_arg_if_required + '&%s%d)') ret_str = (encode_method_format_str % - (ot, ('0' if opndtab[ot].used == 0 else + (opnd_stem(ot), ('0' if opndtab[ot].used == 0 else 'enc & 0x%08x' % opndtab[ot].used if opndtab[ot].used & ~f == 0 else '%s & 0x%08x' % (find_required(f, reordered, n, opndtab), @@ -319,22 +316,26 @@ def generate_opndset_encoders(opndsettab, opndtab): c += [' int opcode = instr->opcode;'] # The initial values are only required to silence a bad compiler warning: c += [' uint ' + ' = 0, '.join(vars) + ' = 0;'] - tests = (['instr_num_dsts(instr) == %d && instr_num_srcs(instr) == %d' % - (len(dsts), len(srcs))] + - [make_enc(i, enc_order, fixed, opndtab) - for i in range(len(enc_order))]) - - tests2 = (['dst%d == (enc & 0x%08x)' % (i, opndtab[dsts[i]].gen) - for i in range(len(dsts))] + - ['src%d == (enc & 0x%08x)' % (i, opndtab[srcs[i]].gen) - for i in range(len(srcs))]) + tests = ['instr_num_dsts(instr) == %d && instr_num_srcs(instr) == %d' % (len(dsts), len(srcs))] + + has_governing = any("gov" in opnd_flags(op) for op in srcs) + if has_governing: + tests += ['instr_has_register_predication(instr)'] + + tests += [make_enc(i, enc_order, fixed, opndtab) for i in range(len(enc_order))] + + opnd_tests = (['dst%d == (enc & 0x%08x)' % (i, opndtab[dst].gen) + for i, dst in enumerate(dsts)] + + ['src%d == (enc & 0x%08x)' % (i, opndtab[src].gen) + for i, src in enumerate(srcs)]) c += [' if (' + ' &&\n '.join(tests) + ') {'] c += [' ASSERT((dst%d & 0x%08x) == 0);' % - (i, ONES & ~opndtab[dsts[i]].gen) for i in range(len(dsts))] + (i, ONES & ~opndtab[dst].gen) for i, dst in enumerate(dsts)] c += [' ASSERT((src%d & 0x%08x) == 0);' % - (i, ONES & ~opndtab[srcs[i]].gen) for i in range(len(srcs))] + (i, ONES & ~opndtab[src].gen) for i, src in enumerate(srcs)] c += [' enc |= ' + ' | '.join(vars) + ';'] - c += [' if (' + ' &&\n '.join(tests2) + ')'] + c += [' if (' + ' &&\n '.join(opnd_tests) + ')'] + c += [' return enc;'] c += [' }'] c += [' return ENCFAIL;'] @@ -345,11 +346,11 @@ def generate_opndset_encoders(opndsettab, opndtab): def generate_encoder(patterns, opndsettab, opndtab, opc_props, curr_isa, next_isa): c = [] case = dict() - for p in patterns: - (opcode_bits, opnd_bits, opcode, opndset) = p - if opcode not in case: - case[opcode] = [] - case[opcode].append(p) + for pattern in patterns: + try: + case[pattern.opcode].append(pattern) + except KeyError: + case[pattern.opcode] = [pattern] c += ['static uint', 'encoder_' + curr_isa + '(byte *pc, instr_t *instr, decode_info_t *di)', @@ -358,9 +359,8 @@ def generate_encoder(patterns, opndsettab, opndtab, opc_props, curr_isa, next_is ' (void)enc;', ' switch (instr->opcode) {'] - def reorder_key(t): - opcode_bits, opnd_bits, opcode, opndset = t - return (opcode, opndset, opcode_bits, opnd_bits) + def pattern_sort_key(p): + return (p.opcode, p.generated_name, p.opcode_bits, p.opnd_bits) for opcode in sorted(case): c.append(' case OP_%s:' % opcode) @@ -369,26 +369,26 @@ def reorder_key(t): c.append(' if (!proc_has_feature(FEATURE_%s))' % opc_props[opcode].feat) c.append(' return ENCFAIL;') c.append('# endif') - patterns = sorted(case[opcode], key=reorder_key) + patterns = sorted(case[opcode], key=pattern_sort_key) last_pattern = patterns.pop() for pattern in patterns: c.append(' enc = encode_opnds%s(pc, instr, 0x%08x, di);' % ( - pattern.opndset, pattern.set_bits())) + opnd_stem(pattern.generated_name), pattern.set_bits())) c.append(' if (enc != ENCFAIL)') c.append(' return enc;') # Fallthrough to call the next version of the encoder if defined. if next_isa != '': c.append(' enc = encode_opnds%s(pc, instr, 0x%08x, di);' % ( - last_pattern.opndset, last_pattern.set_bits())) + last_pattern.generated_name, last_pattern.set_bits())) c.append(' if (enc != ENCFAIL)') c.append(' return enc;') c += [' break;'] else: c.append(' return encode_opnds%s(pc, instr, 0x%08x, di);' % ( - last_pattern.opndset, last_pattern.set_bits())) + last_pattern.generated_name, last_pattern.set_bits())) c += [' }'] # Call the next version of the encoder if defined. - if next_isa != '': + if next_isa: c += [' return encoder_' + next_isa + '(pc, instr, di);'] else: c += [' return ENCFAIL;'] @@ -514,7 +514,19 @@ def write_if_changed(file, data): open(file, 'w').write(data) def read_opnd_defs_file(path): - opndtab = dict() + class OpndtabDict(dict): + """ + We want additional hints to be present when parsing opnds so that + the generator can make differing choices about them but we always + want to pass down to the same decode function. This object + functions just like a dictionary but ignores everything after + the first . when using a key to access the value. + """ + def __getitem__(self, key): + return super().__getitem__(key.split(".")[0]) + + opndtab = OpndtabDict() + file_msg = 'operand definitions file' try: @@ -522,15 +534,15 @@ def read_opnd_defs_file(path): for line in (l.split('#')[0].strip() for l in file): if not line: continue - if not re.match('^[x\?\-\+]{32} +[a-zA-Z_0-9]+$', line): + if not re.match(r'^[x\?\-\+]{32} +[a-zA-Z_0-9]+$', line): raise Exception('Cannot parse line: %s in %s' % (line, file_msg)) # Syntax: mask opndtype mask, opndtype = line.split() if opndtype in opndtab: raise Exception('Repeated definition of opndtype %s in %s' % (opndtype, file_msg)) - opndtab[opndtype] = Opnd(int(re.sub('[x\+]', '1', re.sub('[^x^\+]', '0', mask)), 2), - int(re.sub('\?', '1', re.sub('[^\?]', '0', mask)), 2), - int(re.sub('\+', '1', re.sub('[^\+]', '0', mask)), 2)) + opndtab[opndtype] = Opnd(int(re.sub(r'[x\+]', '1', re.sub(r'[^x^\+]', '0', mask)), 2), + int(re.sub(r'\?', '1', re.sub(r'[^\?]', '0', mask)), 2), + int(re.sub(r'\+', '1', re.sub(r'[^\+]', '0', mask)), 2)) except IOError as e: raise Exception('Unable to read operand definitions file, {}: {}'.format(path, e.strerror)) @@ -545,7 +557,7 @@ def read_codec_file(path): for line in (l.split('#')[0].strip() for l in file): if not line: continue - if re.match('^[01x\^]{32} +[n|r|w|rw|wr|er|ew]+ +[0-9]+ +[a-zA-Z0-9]* +[a-zA-Z_0-9][a-zA-Z_0-9 ]*:[a-zA-Z_0-9 ]*$', line): + if re.match(r'^[01x\^]{32} +[n|r|w|rw|wr|er|ew]+ +[0-9]+ +[a-zA-Z0-9]* +[a-zA-Z_0-9][a-zA-Z_0-9 \.]*:[a-zA-Z_0-9 \.]*$', line): # Syntax: pattern opcode opndtype* : opndtype* pattern, nzcv_rw_flag, enum, feat, opcode, args = line.split(None, 5) dsts, srcs = [a.split() for a in args.split(':')] @@ -555,7 +567,7 @@ def read_codec_file(path): patterns.append(Pattern(pattern, opcode_bits, opnd_bits, high_soft_bits, opcode, (dsts, srcs), enum, feat)) opc_props[opcode] = Opcode(opcode, nzcv_rw_flag, feat) continue - if re.match('^[01x\^]{32} +[n|r|w|rw|wr|er|ew]+ +[0-9]+ +[a-zA-Z0-9]* +[a-zA-Z_0-9]+ +[a-zA-Z_0-9]+', line): + if re.match(r'^[01x\^]{32} +[n|r|w|rw|wr|er|ew]+ +[0-9]+ +[a-zA-Z0-9]* +[a-zA-Z_0-9]+ +[a-zA-Z_0-9]+', line): # Syntax: pattern opcode opndset pattern, nzcv_rw_flag, enum, feat, opcode, opndset = line.split() opcode_bits = int(re.sub('x', '0', pattern), 2) @@ -589,47 +601,14 @@ def consistency_check(patterns, opndtab): for ot in dsts + srcs: try: unhandled_bits &= ~opndtab[ot].gen - except KeyError: - raise Exception('Undefined opndtype %s in:\n%s' % - (ot, pattern_to_str(*p))) + except KeyError as e : + raise Exception( + 'Undefined opndtype %s in:\n%s' % + (opnd_stem(ot), pattern_to_str(*p))) from e if unhandled_bits: raise Exception('Unhandled bits:\n%32s in:\n%s' % (re.sub('1', 'x', re.sub('0', ' ', bin(unhandled_bits)[2:])), pattern_to_str(*p))) - # Detect and mark overlapping patterns for special handling. Named as - # 'fallthrough' because the special handling is done at the end of the - # decoder's main if/then/else clauses block. - for i, pattern_a in enumerate(patterns): - for pattern_b in patterns[:i]: - non_zero_bits_a = 0 - non_zero_bits_b = 0 - try: - for opnd in (opndtab[op] for op in pattern_a.all_opnds()): - non_zero_bits_a &= opnd.non_zero - except KeyError: - pass - try: - for opnd in (opndtab[op] for op in pattern_b.all_opnds()): - non_zero_bits_b &= opnd.non_zero - except KeyError: - pass - - zero_overlap = ( - non_zero_bits_a & pattern_b.opnd_bits == 0 or - non_zero_bits_b & pattern_b.opnd_bits == 0) - - if ((pattern_b.opcode_bits ^ pattern_a.opcode_bits) & - ~pattern_b.opnd_bits & ~pattern_a.opnd_bits == 0 and - not zero_overlap): - print('Overlap found between:\n%s\nand\n%s' % - (pattern_to_str(*pattern_b), - pattern_to_str(*pattern_a))) - enc_key = fallthrough_instr_id(pattern_a.opcode, pattern_a.opcode_bits, pattern_a.opnd_bits) - if enc_key in FALLTHROUGH: - raise Exception('Error: multiple overlaps detected for ' - '%s. Unable to resolve.\n' % enc_key) - print('Resolving overlap.') - FALLTHROUGH[enc_key] = FallthroughDecode(enc_key) # This function reorders the operands for encoding so that no operand encoder # requires bits that are generated by an operand encoder that has not yet @@ -668,18 +647,15 @@ def opndset_naming(patterns, opndtab): opndsettab = dict() # maps generated name to original opndsets for p in patterns: if type(p.opndset) is str: - new_opndset = '_' + p.opndset + function_name = '_' + p.opndset else: (dsts, srcs) = p.opndset h = (' '.join(dsts), ' '.join(srcs), p.opnd_bits) - new_opndset = 'gen_%08x_%08x' % (opndsets[h], p.opnd_bits) + function_name = 'gen_%08x_%08x' % (opndsets[h], p.opnd_bits) reordered = reorder_opnds(ONES & ~p.opnd_bits, dsts, srcs, opndtab) - if not new_opndset in opndsettab: - opndsettab[new_opndset] = reordered - p.opndset = new_opndset - enc_key = fallthrough_instr_id(p.opcode, p.opcode_bits, p.opnd_bits) - if enc_key in FALLTHROUGH: - FALLTHROUGH[enc_key].opndset = new_opndset + if not function_name in opndsettab: + opndsettab[function_name] = reordered + p.set_generated_name(function_name) return (patterns, opndsettab) def main(): @@ -694,7 +670,7 @@ def main(): # and SVE2 are partially supported. The null terminator element at the end # is required by some generator functions to correctly generate links # between each version's decode/encode logic. - isa_versions = ['v80', 'v81', 'v82', 'v83', 'v84', 'v86', 'sve', 'sve2', ''] + isa_versions = ['v80', 'v81', 'v82', 'v83', 'v84', 'v85', 'v86', 'sve', 'sve2', ''] # Read the instruction operand definitions. Used by the codec when # generating code to decode and encode instructions. diff --git a/core/ir/aarch64/codec_sve.txt b/core/ir/aarch64/codec_sve.txt index 20a2d54155b..31919113223 100644 --- a/core/ir/aarch64/codec_sve.txt +++ b/core/ir/aarch64/codec_sve.txt @@ -40,34 +40,33 @@ # Instruction definitions: -00000100xx010110101xxxxxxxxxxxxx n 6 SVE abs z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 +00000100xx010110101xxxxxxxxxxxxx n 6 SVE abs z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 00000100xx1xxxxx000000xxxxxxxxxx n 9 SVE add z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 -00000100xx000000000xxxxxxxxxxxxx n 9 SVE add z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx000000000xxxxxxxxxxxxx n 9 SVE add z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx10000011xxxxxxxxxxxxxx n 9 SVE add z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 00000100011xxxxx01010xxxxxxxxxxx n 934 SVE addpl x0sp : x16sp simm6_5 00000100001xxxxx01010xxxxxxxxxxx n 935 SVE addvl x0sp : x16sp simm6_5 00000100001xxxxx1010xxxxxxxxxxxx n 15 SVE adr z_d_0 : svemem_vec_vec_idx 00000100011xxxxx1010xxxxxxxxxxxx n 15 SVE adr z_d_0 : svemem_vec_vec_idx 000001001x1xxxxx1010xxxxxxxxxxxx n 15 SVE adr z_sz_sd : svemem_vec_vec_idx -00000100xx011010000xxxxxxxxxxxxx n 21 SVE and z0 : p10_lo z0 z5 bhsd_sz 00000101100000xxxxxxxxxxxxxxxxxx n 21 SVE and z_imm13_bhsd_0 : z_imm13_bhsd_0 imm13_const -001001010000xxxx01xxxx0xxxx0xxxx n 21 SVE and p_b_0 : p10_zer p_b_5 p_b_16 +001001010000xxxx01xxxx0xxxx0xxxx n 21 SVE and p_b_0 : p10_zer.gov p_b_5 p_b_16 00000100001xxxxx001100xxxxxxxxxx n 21 SVE and z_d_0 : z_d_5 z_d_16 -00000100xx011010000xxxxxxxxxxxxx n 21 SVE and z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -001001010100xxxx01xxxx0xxxx0xxxx w 22 SVE ands p_b_0 : p10_zer p_b_5 p_b_16 -0000010000011010001xxxxxxxxxxxxx n 915 SVE andv b0 : p10_lo z_size_bhsd_5 -0000010001011010001xxxxxxxxxxxxx n 915 SVE andv h0 : p10_lo z_size_bhsd_5 -0000010010011010001xxxxxxxxxxxxx n 915 SVE andv s0 : p10_lo z_size_bhsd_5 -0000010011011010001xxxxxxxxxxxxx n 915 SVE andv d0 : p10_lo z_size_bhsd_5 -00000100xx000000100xxxxxxxxxxxxx n 899 SVE asr z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5p1 -00000100xx011000100xxxxxxxxxxxxx n 899 SVE asr z_size_bhs_0 : p10_mrg_lo z_size_bhs_0 z_d_5 -00000100xx010000100xxxxxxxxxxxxx n 899 SVE asr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx011010000xxxxxxxxxxxxx n 21 SVE and z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +001001010100xxxx01xxxx0xxxx0xxxx w 22 SVE ands p_b_0 : p10_zer.gov p_b_5 p_b_16 +0000010000011010001xxxxxxxxxxxxx n 915 SVE andv b0 : p10_lo.gov z_size_bhsd_5 +0000010001011010001xxxxxxxxxxxxx n 915 SVE andv h0 : p10_lo.gov z_size_bhsd_5 +0000010010011010001xxxxxxxxxxxxx n 915 SVE andv s0 : p10_lo.gov z_size_bhsd_5 +0000010011011010001xxxxxxxxxxxxx n 915 SVE andv d0 : p10_lo.gov z_size_bhsd_5 +00000100xx000000100xxxxxxxxxxxxx n 899 SVE asr z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5p1 +00000100xx011000100xxxxxxxxxxxxx n 899 SVE asr z_size_bhs_0 : p10_mrg_lo.gov z_size_bhs_0 z_d_5 +00000100xx010000100xxxxxxxxxxxxx n 899 SVE asr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00000100xx1xxxxx100100xxxxxxxxxx n 899 SVE asr z_tszl19_bhsd_0 : z_tszl19_bhsd_5 tszl19_imm3_16p1 00000100xx1xxxxx100000xxxxxxxxxx n 899 SVE asr z_size_bhs_0 : z_size_bhs_5 z_d_16 -00000100xx000100100xxxxxxxxxxxxx n 900 SVE asrd z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5p1 -00000100xx010100100xxxxxxxxxxxxx n 901 SVE asrr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -0110010110001010101xxxxxxxxxxxxx n 953 BF16 bfcvt z_h_0 : p10_mrg_lo z_s_5 -0110010010001010101xxxxxxxxxxxxx n 1064 BF16 bfcvtnt z_msz_bhsd_0 : z_msz_bhsd_0 p10_mrg_lo z_s_5 +00000100xx000100100xxxxxxxxxxxxx n 900 SVE asrd z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5p1 +00000100xx010100100xxxxxxxxxxxxx n 901 SVE asrr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +0110010110001010101xxxxxxxxxxxxx n 953 BF16 bfcvt z_h_0 : p10_mrg_lo.gov z_s_5 +0110010010001010101xxxxxxxxxxxxx n 1064 BF16 bfcvtnt z_msz_bhsd_0 : z_msz_bhsd_0 p10_mrg_lo.gov z_s_5 01100100011xxxxx100000xxxxxxxxxx n 954 BF16 bfdot z_s_0 : z_s_0 z_h_5 z_h_16 01100100011xxxxx010000xxxxxxxxxx n 954 BF16 bfdot z_s_0 : z_s_0 z_h_5 z3_h_16 i2_index_19 01100100111xxxxx100000xxxxxxxxxx n 955 BF16 bfmlalb z_s_0 : z_s_0 z_h_5 z_h_16 @@ -75,69 +74,68 @@ 01100100111xxxxx100001xxxxxxxxxx n 956 BF16 bfmlalt z_s_0 : z_s_0 z_h_5 z_h_16 01100100111xxxxx0100x1xxxxxxxxxx n 956 BF16 bfmlalt z_s_0 : z_s_0 z_h_5 z3_h_16 i3_index_11 01100100011xxxxx111001xxxxxxxxxx n 957 BF16 bfmmla z_s_0 : z_s_0 z_h_5 z_h_16 -00000100xx011011000xxxxxxxxxxxxx n 29 SVE bic z0 : p10_lo z0 z5 bhsd_sz -001001010000xxxx01xxxx0xxxx1xxxx n 29 SVE bic p_b_0 : p10_zer p_b_5 p_b_16 +001001010000xxxx01xxxx0xxxx1xxxx n 29 SVE bic p_b_0 : p10_zer.gov p_b_5 p_b_16 00000100111xxxxx001100xxxxxxxxxx n 29 SVE bic z_d_0 : z_d_5 z_d_16 -00000100xx011011000xxxxxxxxxxxxx n 29 SVE bic z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -001001010100xxxx01xxxx0xxxx1xxxx w 30 SVE bics p_b_0 : p10_zer p_b_5 p_b_16 -001001010001000001xxxx0xxxx0xxxx n 867 SVE brka p_b_0 : p10_zer p_b_5 -001001010001000001xxxx0xxxx1xxxx n 867 SVE brka p_b_0 : p10_mrg p_b_5 -001001010101000001xxxx0xxxx0xxxx w 868 SVE brkas p_b_0 : p10_zer p_b_5 -001001011001000001xxxx0xxxx0xxxx n 869 SVE brkb p_b_0 : p10_zer p_b_5 -001001011001000001xxxx0xxxx1xxxx n 869 SVE brkb p_b_0 : p10_mrg p_b_5 -001001011101000001xxxx0xxxx0xxxx w 870 SVE brkbs p_b_0 : p10_zer p_b_5 -001001010001100001xxxx0xxxx0xxxx n 871 SVE brkn p_b_0 : p10_zer p_b_5 p_b_0 -001001010101100001xxxx0xxxx0xxxx w 872 SVE brkns p_b_0 : p10_zer p_b_5 p_b_0 -001001010000xxxx11xxxx0xxxx0xxxx n 873 SVE brkpa p_b_0 : p10_zer p_b_5 p_b_16 -001001010100xxxx11xxxx0xxxx0xxxx w 874 SVE brkpas p_b_0 : p10_zer p_b_5 p_b_16 -001001010000xxxx11xxxx0xxxx1xxxx n 875 SVE brkpb p_b_0 : p10_zer p_b_5 p_b_16 -001001010100xxxx11xxxx0xxxx1xxxx w 876 SVE brkpbs p_b_0 : p10_zer p_b_5 p_b_16 -00000101xx110000101xxxxxxxxxxxxx n 835 SVE clasta wx_size_0_zr : p10_lo wx_size_0_zr z_size_bhsd_5 -00000101xx101010100xxxxxxxxxxxxx n 835 SVE clasta bhsd_size_reg0 : p10_lo bhsd_size_reg0 z_size_bhsd_5 -00000101xx101000100xxxxxxxxxxxxx n 835 SVE clasta z_size_bhsd_0 : p10_lo z_size_bhsd_0 z_size_bhsd_5 -00000101xx110001101xxxxxxxxxxxxx n 836 SVE clastb wx_size_0_zr : p10_lo wx_size_0_zr z_size_bhsd_5 -00000101xx101011100xxxxxxxxxxxxx n 836 SVE clastb bhsd_size_reg0 : p10_lo bhsd_size_reg0 z_size_bhsd_5 -00000101xx101001100xxxxxxxxxxxxx n 836 SVE clastb z_size_bhsd_0 : p10_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx011000101xxxxxxxxxxxxx n 59 SVE cls z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 -00000100xx011001101xxxxxxxxxxxxx n 60 SVE clz z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 -00100101xx0xxxxx100xxxxxxxx0xxxx w 807 SVE cmpeq p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 simm5 -00100100xx0xxxxx001xxxxxxxx0xxxx w 807 SVE cmpeq p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx0xxxxx101xxxxxxxx0xxxx w 807 SVE cmpeq p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 z_size_bhsd_16 -00100101xx0xxxxx000xxxxxxxx0xxxx w 808 SVE cmpge p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 simm5 -00100100xx0xxxxx010xxxxxxxx0xxxx w 808 SVE cmpge p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx0xxxxx100xxxxxxxx0xxxx w 808 SVE cmpge p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 z_size_bhsd_16 -00100101xx0xxxxx000xxxxxxxx1xxxx w 809 SVE cmpgt p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 simm5 -00100100xx0xxxxx010xxxxxxxx1xxxx w 809 SVE cmpgt p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx0xxxxx100xxxxxxxx1xxxx w 809 SVE cmpgt p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 z_size_bhsd_16 -00100100xx1xxxxxxx0xxxxxxxx1xxxx w 810 SVE cmphi p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 imm7 -00100100xx0xxxxx110xxxxxxxx1xxxx w 810 SVE cmphi p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx0xxxxx000xxxxxxxx1xxxx w 810 SVE cmphi p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 z_size_bhsd_16 -00100100xx1xxxxxxx0xxxxxxxx0xxxx w 811 SVE cmphs p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 imm7 -00100100xx0xxxxx110xxxxxxxx0xxxx w 811 SVE cmphs p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx0xxxxx000xxxxxxxx0xxxx w 811 SVE cmphs p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 z_size_bhsd_16 -00100101xx0xxxxx001xxxxxxxx1xxxx w 812 SVE cmple p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 simm5 -00100100xx0xxxxx011xxxxxxxx1xxxx w 812 SVE cmple p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx1xxxxxxx1xxxxxxxx0xxxx w 813 SVE cmplo p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 imm7 -00100100xx0xxxxx111xxxxxxxx0xxxx w 813 SVE cmplo p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx1xxxxxxx1xxxxxxxx1xxxx w 814 SVE cmpls p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 imm7 -00100100xx0xxxxx111xxxxxxxx1xxxx w 814 SVE cmpls p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100101xx0xxxxx001xxxxxxxx0xxxx w 815 SVE cmplt p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 simm5 -00100100xx0xxxxx011xxxxxxxx0xxxx w 815 SVE cmplt p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100101xx0xxxxx100xxxxxxxx1xxxx w 816 SVE cmpne p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 simm5 -00100100xx0xxxxx001xxxxxxxx1xxxx w 816 SVE cmpne p_size_bhs_0 : p10_zer_lo z_size_bhs_5 z_d_16 -00100100xx0xxxxx101xxxxxxxx1xxxx w 816 SVE cmpne p_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 z_size_bhsd_16 -00000100xx011011101xxxxxxxxxxxxx n 793 SVE cnot z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 -00000100xx011010101xxxxxxxxxxxxx n 69 SVE cnt z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 +00000100xx011011000xxxxxxxxxxxxx n 29 SVE bic z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +001001010100xxxx01xxxx0xxxx1xxxx w 30 SVE bics p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001010001000001xxxx0xxxx0xxxx n 867 SVE brka p_b_0 : p10_zer.gov p_b_5 +001001010001000001xxxx0xxxx1xxxx n 867 SVE brka p_b_0 : p10_mrg.gov p_b_5 +001001010101000001xxxx0xxxx0xxxx w 868 SVE brkas p_b_0 : p10_zer.gov p_b_5 +001001011001000001xxxx0xxxx0xxxx n 869 SVE brkb p_b_0 : p10_zer.gov p_b_5 +001001011001000001xxxx0xxxx1xxxx n 869 SVE brkb p_b_0 : p10_mrg.gov p_b_5 +001001011101000001xxxx0xxxx0xxxx w 870 SVE brkbs p_b_0 : p10_zer.gov p_b_5 +001001010001100001xxxx0xxxx0xxxx n 871 SVE brkn p_b_0 : p10_zer.gov p_b_5 p_b_0 +001001010101100001xxxx0xxxx0xxxx w 872 SVE brkns p_b_0 : p10_zer.gov p_b_5 p_b_0 +001001010000xxxx11xxxx0xxxx0xxxx n 873 SVE brkpa p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001010100xxxx11xxxx0xxxx0xxxx w 874 SVE brkpas p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001010000xxxx11xxxx0xxxx1xxxx n 875 SVE brkpb p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001010100xxxx11xxxx0xxxx1xxxx w 876 SVE brkpbs p_b_0 : p10_zer.gov p_b_5 p_b_16 +00000101xx110000101xxxxxxxxxxxxx n 835 SVE clasta wx_size_0_zr : p10_lo.gov wx_size_0_zr z_size_bhsd_5 +00000101xx101010100xxxxxxxxxxxxx n 835 SVE clasta bhsd_size_reg0 : p10_lo.gov bhsd_size_reg0 z_size_bhsd_5 +00000101xx101000100xxxxxxxxxxxxx n 835 SVE clasta z_size_bhsd_0 : p10_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000101xx110001101xxxxxxxxxxxxx n 836 SVE clastb wx_size_0_zr : p10_lo.gov wx_size_0_zr z_size_bhsd_5 +00000101xx101011100xxxxxxxxxxxxx n 836 SVE clastb bhsd_size_reg0 : p10_lo.gov bhsd_size_reg0 z_size_bhsd_5 +00000101xx101001100xxxxxxxxxxxxx n 836 SVE clastb z_size_bhsd_0 : p10_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx011000101xxxxxxxxxxxxx n 59 SVE cls z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 +00000100xx011001101xxxxxxxxxxxxx n 60 SVE clz z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 +00100101xx0xxxxx100xxxxxxxx0xxxx w 807 SVE cmpeq p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 simm5 +00100100xx0xxxxx001xxxxxxxx0xxxx w 807 SVE cmpeq p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx0xxxxx101xxxxxxxx0xxxx w 807 SVE cmpeq p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00100101xx0xxxxx000xxxxxxxx0xxxx w 808 SVE cmpge p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 simm5 +00100100xx0xxxxx010xxxxxxxx0xxxx w 808 SVE cmpge p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx0xxxxx100xxxxxxxx0xxxx w 808 SVE cmpge p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00100101xx0xxxxx000xxxxxxxx1xxxx w 809 SVE cmpgt p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 simm5 +00100100xx0xxxxx010xxxxxxxx1xxxx w 809 SVE cmpgt p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx0xxxxx100xxxxxxxx1xxxx w 809 SVE cmpgt p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00100100xx1xxxxxxx0xxxxxxxx1xxxx w 810 SVE cmphi p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 imm7 +00100100xx0xxxxx110xxxxxxxx1xxxx w 810 SVE cmphi p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx0xxxxx000xxxxxxxx1xxxx w 810 SVE cmphi p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00100100xx1xxxxxxx0xxxxxxxx0xxxx w 811 SVE cmphs p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 imm7 +00100100xx0xxxxx110xxxxxxxx0xxxx w 811 SVE cmphs p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx0xxxxx000xxxxxxxx0xxxx w 811 SVE cmphs p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00100101xx0xxxxx001xxxxxxxx1xxxx w 812 SVE cmple p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 simm5 +00100100xx0xxxxx011xxxxxxxx1xxxx w 812 SVE cmple p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx1xxxxxxx1xxxxxxxx0xxxx w 813 SVE cmplo p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 imm7 +00100100xx0xxxxx111xxxxxxxx0xxxx w 813 SVE cmplo p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx1xxxxxxx1xxxxxxxx1xxxx w 814 SVE cmpls p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 imm7 +00100100xx0xxxxx111xxxxxxxx1xxxx w 814 SVE cmpls p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100101xx0xxxxx001xxxxxxxx0xxxx w 815 SVE cmplt p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 simm5 +00100100xx0xxxxx011xxxxxxxx0xxxx w 815 SVE cmplt p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100101xx0xxxxx100xxxxxxxx1xxxx w 816 SVE cmpne p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 simm5 +00100100xx0xxxxx001xxxxxxxx1xxxx w 816 SVE cmpne p_size_bhs_0 : p10_zer_lo.gov z_size_bhs_5 z_d_16 +00100100xx0xxxxx101xxxxxxxx1xxxx w 816 SVE cmpne p_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00000100xx011011101xxxxxxxxxxxxx n 793 SVE cnot z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 +00000100xx011010101xxxxxxxxxxxxx n 69 SVE cnt z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 000001000010xxxx111000xxxxxxxxxx n 839 SVE cntb x0 : pred_constr mul imm4_16p1 000001001110xxxx111000xxxxxxxxxx n 844 SVE cntd x0 : pred_constr mul imm4_16p1 000001000110xxxx111000xxxxxxxxxx n 845 SVE cnth x0 : pred_constr mul imm4_16p1 -00100101xx10000010xxxx0xxxxxxxxx n 821 SVE cntp x0 : p10 p_size_bhsd_5 +00100101xx10000010xxxx0xxxxxxxxx n 821 SVE cntp x0 : p10.gov p_size_bhsd_5 000001001010xxxx111000xxxxxxxxxx n 846 SVE cntw x0 : pred_constr mul imm4_16p1 -00000101xx100001100xxxxxxxxxxxxx n 886 SVE compact z_size_sd_0 : p10_lo z_size_sd_5 -00000101xx01xxxx00xxxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p16_zer simm8_5 lsl shift1 -00000101xx01xxxx01xxxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p16_mrg simm8_5 lsl shift1 -00000101xx101000101xxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p10_mrg_lo wx_size_5_sp -00000101xx100000100xxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p10_mrg_lo bhsd_size_reg5 +00000101xx100001100xxxxxxxxxxxxx n 886 SVE compact z_size_sd_0 : p10_lo.gov z_size_sd_5 +00000101xx01xxxx00xxxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p16_zer.gov simm8_5 lsl shift1 +00000101xx01xxxx01xxxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p16_mrg.gov simm8_5 lsl shift1 +00000101xx101000101xxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p10_mrg_lo.gov wx_size_5_sp +00000101xx100000100xxxxxxxxxxxxx n 785 SVE cpy z_size_bhsd_0 : p10_mrg_lo.gov bhsd_size_reg5 001001011x1xxxxx001000xxxxx00000 rw 923 SVE ctermeq : wx_sz_5 wx_sz_16 001001011x1xxxxx001000xxxxx10000 rw 924 SVE ctermne : wx_sz_5 wx_sz_16 000001000011xxxx111001xxxxxxxxxx n 840 SVE decb x0 : x0 pred_constr mul imm4_16p1 @@ -153,131 +151,130 @@ 00000101xx1xxxxx001000xxxxxxxxxx n 88 SVE dup z_tsz_bhsdq_0 : z_tsz_bhsdq_5 imm2_tsz_index 00000101xx100000001110xxxxxxxxxx n 88 SVE dup z_size_bhsd_0 : wx_size_5_sp 00000101110000xxxxxxxxxxxxxxxxxx n 893 SVE dupm z_imm13_bhsd_0 : imm13_const -00000100xx011001000xxxxxxxxxxxxx n 90 SVE eor z0 : p10_lo z0 z5 bhsd_sz 00000101010000xxxxxxxxxxxxxxxxxx n 90 SVE eor z_imm13_bhsd_0 : z_imm13_bhsd_0 imm13_const -001001010000xxxx01xxxx1xxxx0xxxx n 90 SVE eor p_b_0 : p10_zer p_b_5 p_b_16 +001001010000xxxx01xxxx1xxxx0xxxx n 90 SVE eor p_b_0 : p10_zer.gov p_b_5 p_b_16 00000100101xxxxx001100xxxxxxxxxx n 90 SVE eor z_d_0 : z_d_5 z_d_16 -00000100xx011001000xxxxxxxxxxxxx n 90 SVE eor z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -001001010100xxxx01xxxx1xxxx0xxxx w 828 SVE eors p_b_0 : p10_zer p_b_5 p_b_16 -0000010000011001001xxxxxxxxxxxxx n 916 SVE eorv b0 : p10_lo z_size_bhsd_5 -0000010001011001001xxxxxxxxxxxxx n 916 SVE eorv h0 : p10_lo z_size_bhsd_5 -0000010010011001001xxxxxxxxxxxxx n 916 SVE eorv s0 : p10_lo z_size_bhsd_5 -0000010011011001001xxxxxxxxxxxxx n 916 SVE eorv d0 : p10_lo z_size_bhsd_5 +00000100xx011001000xxxxxxxxxxxxx n 90 SVE eor z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +001001010100xxxx01xxxx1xxxx0xxxx w 828 SVE eors p_b_0 : p10_zer.gov p_b_5 p_b_16 +0000010000011001001xxxxxxxxxxxxx n 916 SVE eorv b0 : p10_lo.gov z_size_bhsd_5 +0000010001011001001xxxxxxxxxxxxx n 916 SVE eorv h0 : p10_lo.gov z_size_bhsd_5 +0000010010011001001xxxxxxxxxxxxx n 916 SVE eorv s0 : p10_lo.gov z_size_bhsd_5 +0000010011011001001xxxxxxxxxxxxx n 916 SVE eorv d0 : p10_lo.gov z_size_bhsd_5 00000101001xxxxx000xxxxxxxxxxxxx n 92 SVE ext z_b_0 : z_b_0 z_b_5 imm8_10 -01100101xx001000100xxxxxxxxxxxxx n 94 SVE fabd z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -00000100xx011100101xxxxxxxxxxxxx n 95 SVE fabs z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx0xxxxx110xxxxxxxx1xxxx n 96 SVE facge p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -01100101xx0xxxxx111xxxxxxxx1xxxx n 97 SVE facgt p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -01100101xx011000100xxx0000xxxxxx n 98 SVE fadd z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_half_one_5 -01100101xx000000100xxxxxxxxxxxxx n 98 SVE fadd z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 +01100101xx001000100xxxxxxxxxxxxx n 94 SVE fabd z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +00000100xx011100101xxxxxxxxxxxxx n 95 SVE fabs z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx0xxxxx110xxxxxxxx1xxxx n 96 SVE facge p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx0xxxxx111xxxxxxxx1xxxx n 97 SVE facgt p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx011000100xxx0000xxxxxx n 98 SVE fadd z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_half_one_5 +01100101xx000000100xxxxxxxxxxxxx n 98 SVE fadd z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 01100101xx0xxxxx000000xxxxxxxxxx n 98 SVE fadd z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 -0110010101011000001xxxxxxxxxxxxx n 917 SVE fadda h0 : p10_lo h0 z_size_hsd_5 -0110010110011000001xxxxxxxxxxxxx n 917 SVE fadda s0 : p10_lo s0 z_size_hsd_5 -0110010111011000001xxxxxxxxxxxxx n 917 SVE fadda d0 : p10_lo d0 z_size_hsd_5 -0110010101000000001xxxxxxxxxxxxx n 918 SVE faddv h0 : p10_lo z_size_hsd_5 -0110010110000000001xxxxxxxxxxxxx n 918 SVE faddv s0 : p10_lo z_size_hsd_5 -0110010111000000001xxxxxxxxxxxxx n 918 SVE faddv d0 : p10_lo z_size_hsd_5 -01100100xx00000x100xxxxxxxxxxxxx n 944 SVE fcadd z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 imm1_ew_16 -01100101xx010010001xxxxxxxx0xxxx n 102 SVE fcmeq p_size_hsd_0 : p10_zer_lo z_size_hsd_5 zero_fp_const -01100101xx0xxxxx011xxxxxxxx0xxxx n 102 SVE fcmeq p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -01100101xx010000001xxxxxxxx0xxxx n 103 SVE fcmge p_size_hsd_0 : p10_zer_lo z_size_hsd_5 zero_fp_const -01100101xx0xxxxx010xxxxxxxx0xxxx n 103 SVE fcmge p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -01100101xx010000001xxxxxxxx1xxxx n 104 SVE fcmgt p_size_hsd_0 : p10_zer_lo z_size_hsd_5 zero_fp_const -01100101xx0xxxxx010xxxxxxxx1xxxx n 104 SVE fcmgt p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -01100100xx0xxxxx0xxxxxxxxxxxxxxx n 945 SVE fcmla z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 imm2_nesw_13 +0110010101011000001xxxxxxxxxxxxx n 917 SVE fadda h0 : p10_lo.gov h0 z_size_hsd_5 +0110010110011000001xxxxxxxxxxxxx n 917 SVE fadda s0 : p10_lo.gov s0 z_size_hsd_5 +0110010111011000001xxxxxxxxxxxxx n 917 SVE fadda d0 : p10_lo.gov d0 z_size_hsd_5 +0110010101000000001xxxxxxxxxxxxx n 918 SVE faddv h0 : p10_lo.gov z_size_hsd_5 +0110010110000000001xxxxxxxxxxxxx n 918 SVE faddv s0 : p10_lo.gov z_size_hsd_5 +0110010111000000001xxxxxxxxxxxxx n 918 SVE faddv d0 : p10_lo.gov z_size_hsd_5 +01100100xx00000x100xxxxxxxxxxxxx n 944 SVE fcadd z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 imm1_ew_16 +01100101xx010010001xxxxxxxx0xxxx n 102 SVE fcmeq p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 zero_fp_const +01100101xx0xxxxx011xxxxxxxx0xxxx n 102 SVE fcmeq p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx010000001xxxxxxxx0xxxx n 103 SVE fcmge p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 zero_fp_const +01100101xx0xxxxx010xxxxxxxx0xxxx n 103 SVE fcmge p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx010000001xxxxxxxx1xxxx n 104 SVE fcmgt p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 zero_fp_const +01100101xx0xxxxx010xxxxxxxx1xxxx n 104 SVE fcmgt p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +01100100xx0xxxxx0xxxxxxxxxxxxxxx n 945 SVE fcmla z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 imm2_nesw_13 01100100101xxxxx0001xxxxxxxxxxxx n 945 SVE fcmla z_h_0 : z_h_0 z_h_5 z3_h_16 i2_index_19 imm2_nesw_10 01100100111xxxxx0001xxxxxxxxxxxx n 945 SVE fcmla z_s_0 : z_s_0 z_s_5 z4_s_16 i1_index_20 imm2_nesw_10 -01100101xx010001001xxxxxxxx1xxxx n 105 SVE fcmle p_size_hsd_0 : p10_zer_lo z_size_hsd_5 zero_fp_const -01100101xx010001001xxxxxxxx0xxxx n 106 SVE fcmlt p_size_hsd_0 : p10_zer_lo z_size_hsd_5 zero_fp_const -01100101xx010011001xxxxxxxx0xxxx n 805 SVE fcmne p_size_hsd_0 : p10_zer_lo z_size_hsd_5 zero_fp_const -01100101xx0xxxxx011xxxxxxxx1xxxx n 805 SVE fcmne p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -01100101xx0xxxxx110xxxxxxxx0xxxx n 806 SVE fcmuo p_size_hsd_0 : p10_zer_lo z_size_hsd_5 z_size_hsd_16 -00000101xx01xxxx110xxxxxxxxxxxxx n 906 SVE fcpy z_size_hsd_0 : p16_mrg fpimm8_5 -0110010111001000101xxxxxxxxxxxxx n 110 SVE fcvt z_h_0 : p10_mrg_lo z_d_5 -0110010111001010101xxxxxxxxxxxxx n 110 SVE fcvt z_s_0 : p10_mrg_lo z_d_5 -0110010111001001101xxxxxxxxxxxxx n 110 SVE fcvt z_d_0 : p10_mrg_lo z_h_5 -0110010110001001101xxxxxxxxxxxxx n 110 SVE fcvt z_s_0 : p10_mrg_lo z_h_5 -0110010111001011101xxxxxxxxxxxxx n 110 SVE fcvt z_d_0 : p10_mrg_lo z_s_5 -0110010110001000101xxxxxxxxxxxxx n 110 SVE fcvt z_h_0 : p10_mrg_lo z_s_5 -0110010111011000101xxxxxxxxxxxxx n 125 SVE fcvtzs z_s_0 : p10_mrg_lo z_d_5 -0110010111011110101xxxxxxxxxxxxx n 125 SVE fcvtzs z_d_0 : p10_mrg_lo z_d_5 -0110010101011010101xxxxxxxxxxxxx n 125 SVE fcvtzs z_h_0 : p10_mrg_lo z_h_5 -0110010101011100101xxxxxxxxxxxxx n 125 SVE fcvtzs z_s_0 : p10_mrg_lo z_h_5 -0110010101011110101xxxxxxxxxxxxx n 125 SVE fcvtzs z_d_0 : p10_mrg_lo z_h_5 -0110010110011100101xxxxxxxxxxxxx n 125 SVE fcvtzs z_s_0 : p10_mrg_lo z_s_5 -0110010111011100101xxxxxxxxxxxxx n 125 SVE fcvtzs z_d_0 : p10_mrg_lo z_s_5 -0110010111011001101xxxxxxxxxxxxx n 126 SVE fcvtzu z_s_0 : p10_mrg_lo z_d_5 -0110010111011111101xxxxxxxxxxxxx n 126 SVE fcvtzu z_d_0 : p10_mrg_lo z_d_5 -0110010101011011101xxxxxxxxxxxxx n 126 SVE fcvtzu z_h_0 : p10_mrg_lo z_h_5 -0110010101011101101xxxxxxxxxxxxx n 126 SVE fcvtzu z_s_0 : p10_mrg_lo z_h_5 -0110010101011111101xxxxxxxxxxxxx n 126 SVE fcvtzu z_d_0 : p10_mrg_lo z_h_5 -0110010110011101101xxxxxxxxxxxxx n 126 SVE fcvtzu z_s_0 : p10_mrg_lo z_s_5 -0110010111011101101xxxxxxxxxxxxx n 126 SVE fcvtzu z_d_0 : p10_mrg_lo z_s_5 -01100101xx001101100xxxxxxxxxxxxx n 127 SVE fdiv z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100101xx001100100xxxxxxxxxxxxx n 926 SVE fdivr z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 +01100101xx010001001xxxxxxxx1xxxx n 105 SVE fcmle p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 zero_fp_const +01100101xx010001001xxxxxxxx0xxxx n 106 SVE fcmlt p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 zero_fp_const +01100101xx010011001xxxxxxxx0xxxx n 805 SVE fcmne p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 zero_fp_const +01100101xx0xxxxx011xxxxxxxx1xxxx n 805 SVE fcmne p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx0xxxxx110xxxxxxxx0xxxx n 806 SVE fcmuo p_size_hsd_0 : p10_zer_lo.gov z_size_hsd_5 z_size_hsd_16 +00000101xx01xxxx110xxxxxxxxxxxxx n 906 SVE fcpy z_size_hsd_0 : p16_mrg.gov fpimm8_5 +0110010111001000101xxxxxxxxxxxxx n 110 SVE fcvt z_h_0 : p10_mrg_lo.gov z_d_5 +0110010111001010101xxxxxxxxxxxxx n 110 SVE fcvt z_s_0 : p10_mrg_lo.gov z_d_5 +0110010111001001101xxxxxxxxxxxxx n 110 SVE fcvt z_d_0 : p10_mrg_lo.gov z_h_5 +0110010110001001101xxxxxxxxxxxxx n 110 SVE fcvt z_s_0 : p10_mrg_lo.gov z_h_5 +0110010111001011101xxxxxxxxxxxxx n 110 SVE fcvt z_d_0 : p10_mrg_lo.gov z_s_5 +0110010110001000101xxxxxxxxxxxxx n 110 SVE fcvt z_h_0 : p10_mrg_lo.gov z_s_5 +0110010111011000101xxxxxxxxxxxxx n 125 SVE fcvtzs z_s_0 : p10_mrg_lo.gov z_d_5 +0110010111011110101xxxxxxxxxxxxx n 125 SVE fcvtzs z_d_0 : p10_mrg_lo.gov z_d_5 +0110010101011010101xxxxxxxxxxxxx n 125 SVE fcvtzs z_h_0 : p10_mrg_lo.gov z_h_5 +0110010101011100101xxxxxxxxxxxxx n 125 SVE fcvtzs z_s_0 : p10_mrg_lo.gov z_h_5 +0110010101011110101xxxxxxxxxxxxx n 125 SVE fcvtzs z_d_0 : p10_mrg_lo.gov z_h_5 +0110010110011100101xxxxxxxxxxxxx n 125 SVE fcvtzs z_s_0 : p10_mrg_lo.gov z_s_5 +0110010111011100101xxxxxxxxxxxxx n 125 SVE fcvtzs z_d_0 : p10_mrg_lo.gov z_s_5 +0110010111011001101xxxxxxxxxxxxx n 126 SVE fcvtzu z_s_0 : p10_mrg_lo.gov z_d_5 +0110010111011111101xxxxxxxxxxxxx n 126 SVE fcvtzu z_d_0 : p10_mrg_lo.gov z_d_5 +0110010101011011101xxxxxxxxxxxxx n 126 SVE fcvtzu z_h_0 : p10_mrg_lo.gov z_h_5 +0110010101011101101xxxxxxxxxxxxx n 126 SVE fcvtzu z_s_0 : p10_mrg_lo.gov z_h_5 +0110010101011111101xxxxxxxxxxxxx n 126 SVE fcvtzu z_d_0 : p10_mrg_lo.gov z_h_5 +0110010110011101101xxxxxxxxxxxxx n 126 SVE fcvtzu z_s_0 : p10_mrg_lo.gov z_s_5 +0110010111011101101xxxxxxxxxxxxx n 126 SVE fcvtzu z_d_0 : p10_mrg_lo.gov z_s_5 +01100101xx001101100xxxxxxxxxxxxx n 127 SVE fdiv z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100101xx001100100xxxxxxxxxxxxx n 926 SVE fdivr z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 00100101xx111001110xxxxxxxxxxxxx n 907 SVE fdup z_size_hsd_0 : fpimm8_5 00000100xx100000101110xxxxxxxxxx n 789 SVE fexpa z_size_hsd_0 : z_size_hsd_5 -01100101xx1xxxxx100xxxxxxxxxxxxx n 927 SVE fmad z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 -01100101xx011110100xxx0000xxxxxx n 129 SVE fmax z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_zero_one_5 -01100101xx000110100xxxxxxxxxxxxx n 129 SVE fmax z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100101xx011100100xxx0000xxxxxx n 130 SVE fmaxnm z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_zero_one_5 -01100101xx000100100xxxxxxxxxxxxx n 130 SVE fmaxnm z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -0110010101000100001xxxxxxxxxxxxx n 132 SVE fmaxnmv h0 : p10_lo z_size_hsd_5 -0110010110000100001xxxxxxxxxxxxx n 132 SVE fmaxnmv s0 : p10_lo z_size_hsd_5 -0110010111000100001xxxxxxxxxxxxx n 132 SVE fmaxnmv d0 : p10_lo z_size_hsd_5 -0110010101000110001xxxxxxxxxxxxx n 134 SVE fmaxv h0 : p10_lo z_size_hsd_5 -0110010110000110001xxxxxxxxxxxxx n 134 SVE fmaxv s0 : p10_lo z_size_hsd_5 -0110010111000110001xxxxxxxxxxxxx n 134 SVE fmaxv d0 : p10_lo z_size_hsd_5 -01100101xx011111100xxx0000xxxxxx n 135 SVE fmin z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_zero_one_5 -01100101xx000111100xxxxxxxxxxxxx n 135 SVE fmin z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100101xx011101100xxx0000xxxxxx n 136 SVE fminnm z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_zero_one_5 -01100101xx000101100xxxxxxxxxxxxx n 136 SVE fminnm z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -0110010101000101001xxxxxxxxxxxxx n 138 SVE fminnmv h0 : p10_lo z_size_hsd_5 -0110010110000101001xxxxxxxxxxxxx n 138 SVE fminnmv s0 : p10_lo z_size_hsd_5 -0110010111000101001xxxxxxxxxxxxx n 138 SVE fminnmv d0 : p10_lo z_size_hsd_5 -0110010101000111001xxxxxxxxxxxxx n 140 SVE fminv h0 : p10_lo z_size_hsd_5 -0110010110000111001xxxxxxxxxxxxx n 140 SVE fminv s0 : p10_lo z_size_hsd_5 -0110010111000111001xxxxxxxxxxxxx n 140 SVE fminv d0 : p10_lo z_size_hsd_5 -01100101xx1xxxxx000xxxxxxxxxxxxx n 141 SVE fmla z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 +01100101xx1xxxxx100xxxxxxxxxxxxx n 927 SVE fmad z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx011110100xxx0000xxxxxx n 129 SVE fmax z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_zero_one_5 +01100101xx000110100xxxxxxxxxxxxx n 129 SVE fmax z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100101xx011100100xxx0000xxxxxx n 130 SVE fmaxnm z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_zero_one_5 +01100101xx000100100xxxxxxxxxxxxx n 130 SVE fmaxnm z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +0110010101000100001xxxxxxxxxxxxx n 132 SVE fmaxnmv h0 : p10_lo.gov z_size_hsd_5 +0110010110000100001xxxxxxxxxxxxx n 132 SVE fmaxnmv s0 : p10_lo.gov z_size_hsd_5 +0110010111000100001xxxxxxxxxxxxx n 132 SVE fmaxnmv d0 : p10_lo.gov z_size_hsd_5 +0110010101000110001xxxxxxxxxxxxx n 134 SVE fmaxv h0 : p10_lo.gov z_size_hsd_5 +0110010110000110001xxxxxxxxxxxxx n 134 SVE fmaxv s0 : p10_lo.gov z_size_hsd_5 +0110010111000110001xxxxxxxxxxxxx n 134 SVE fmaxv d0 : p10_lo.gov z_size_hsd_5 +01100101xx011111100xxx0000xxxxxx n 135 SVE fmin z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_zero_one_5 +01100101xx000111100xxxxxxxxxxxxx n 135 SVE fmin z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100101xx011101100xxx0000xxxxxx n 136 SVE fminnm z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_zero_one_5 +01100101xx000101100xxxxxxxxxxxxx n 136 SVE fminnm z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +0110010101000101001xxxxxxxxxxxxx n 138 SVE fminnmv h0 : p10_lo.gov z_size_hsd_5 +0110010110000101001xxxxxxxxxxxxx n 138 SVE fminnmv s0 : p10_lo.gov z_size_hsd_5 +0110010111000101001xxxxxxxxxxxxx n 138 SVE fminnmv d0 : p10_lo.gov z_size_hsd_5 +0110010101000111001xxxxxxxxxxxxx n 140 SVE fminv h0 : p10_lo.gov z_size_hsd_5 +0110010110000111001xxxxxxxxxxxxx n 140 SVE fminv s0 : p10_lo.gov z_size_hsd_5 +0110010111000111001xxxxxxxxxxxxx n 140 SVE fminv d0 : p10_lo.gov z_size_hsd_5 +01100101xx1xxxxx000xxxxxxxxxxxxx n 141 SVE fmla z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 011001000x1xxxxx000000xxxxxxxxxx n 141 SVE fmla z_h_0 : z_h_0 z_h_5 z3_h_16 i3_index_19 01100100101xxxxx000000xxxxxxxxxx n 141 SVE fmla z_s_0 : z_s_0 z_s_5 z3_s_16 i2_index_19 01100100111xxxxx000000xxxxxxxxxx n 141 SVE fmla z_d_0 : z_d_0 z_d_5 z4_d_16 i1_index_20 -01100101xx1xxxxx001xxxxxxxxxxxxx n 144 SVE fmls z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 +01100101xx1xxxxx001xxxxxxxxxxxxx n 144 SVE fmls z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 011001000x1xxxxx000001xxxxxxxxxx n 144 SVE fmls z_h_0 : z_h_0 z_h_5 z3_h_16 i3_index_19 01100100101xxxxx000001xxxxxxxxxx n 144 SVE fmls z_s_0 : z_s_0 z_s_5 z3_s_16 i2_index_19 01100100111xxxxx000001xxxxxxxxxx n 144 SVE fmls z_d_0 : z_d_0 z_d_5 z4_d_16 i1_index_20 -01100101xx1xxxxx101xxxxxxxxxxxxx n 933 SVE fmsb z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 -01100101xx011010100xxx0000xxxxxx n 149 SVE fmul z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_half_two_5 -01100101xx000010100xxxxxxxxxxxxx n 149 SVE fmul z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 +01100101xx1xxxxx101xxxxxxxxxxxxx n 933 SVE fmsb z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx011010100xxx0000xxxxxx n 149 SVE fmul z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_half_two_5 +01100101xx000010100xxxxxxxxxxxxx n 149 SVE fmul z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 01100101xx0xxxxx000010xxxxxxxxxx n 149 SVE fmul z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 011001000x1xxxxx001000xxxxxxxxxx n 149 SVE fmul z_h_0 : z_h_5 z3_h_16 i3_index_19 01100100101xxxxx001000xxxxxxxxxx n 149 SVE fmul z_s_0 : z_s_5 z3_s_16 i2_index_19 01100100111xxxxx001000xxxxxxxxxx n 149 SVE fmul z_d_0 : z_d_5 z4_d_16 i1_index_20 -01100101xx001010100xxxxxxxxxxxxx n 150 SVE fmulx z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -00000100xx011101101xxxxxxxxxxxxx n 151 SVE fneg z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx1xxxxx110xxxxxxxxxxxxx n 928 SVE fnmad z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 -01100101xx1xxxxx010xxxxxxxxxxxxx n 929 SVE fnmla z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 -01100101xx1xxxxx011xxxxxxxxxxxxx n 930 SVE fnmls z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 -01100101xx1xxxxx111xxxxxxxxxxxxx n 1063 SVE fnmsb z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_size_hsd_5 z_size_hsd_16 +01100101xx001010100xxxxxxxxxxxxx n 150 SVE fmulx z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +00000100xx011101101xxxxxxxxxxxxx n 151 SVE fneg z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx1xxxxx110xxxxxxxxxxxxx n 928 SVE fnmad z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx1xxxxx010xxxxxxxxxxxxx n 929 SVE fnmla z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx1xxxxx011xxxxxxxxxxxxx n 930 SVE fnmls z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 +01100101xx1xxxxx111xxxxxxxxxxxxx n 1063 SVE fnmsb z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_size_hsd_5 z_size_hsd_16 01100101xx001110001100xxxxxxxxxx n 155 SVE frecpe z_size_hsd_0 : z_size_hsd_5 01100101xx0xxxxx000110xxxxxxxxxx n 156 SVE frecps z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 -01100101xx001100101xxxxxxxxxxxxx n 157 SVE frecpx z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000100101xxxxxxxxxxxxx n 158 SVE frinta z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000111101xxxxxxxxxxxxx n 159 SVE frinti z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000010101xxxxxxxxxxxxx n 160 SVE frintm z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000000101xxxxxxxxxxxxx n 161 SVE frintn z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000001101xxxxxxxxxxxxx n 162 SVE frintp z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000110101xxxxxxxxxxxxx n 163 SVE frintx z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx000011101xxxxxxxxxxxxx n 164 SVE frintz z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 +01100101xx001100101xxxxxxxxxxxxx n 157 SVE frecpx z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000100101xxxxxxxxxxxxx n 158 SVE frinta z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000111101xxxxxxxxxxxxx n 159 SVE frinti z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000010101xxxxxxxxxxxxx n 160 SVE frintm z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000000101xxxxxxxxxxxxx n 161 SVE frintn z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000001101xxxxxxxxxxxxx n 162 SVE frintp z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000110101xxxxxxxxxxxxx n 163 SVE frintx z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx000011101xxxxxxxxxxxxx n 164 SVE frintz z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 01100101xx001111001100xxxxxxxxxx n 165 SVE frsqrte z_size_hsd_0 : z_size_hsd_5 01100101xx0xxxxx000111xxxxxxxxxx n 166 SVE frsqrts z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 -01100101xx001001100xxxxxxxxxxxxx n 931 SVE fscale z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100101xx001101101xxxxxxxxxxxxx n 167 SVE fsqrt z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -01100101xx011001100xxx0000xxxxxx n 168 SVE fsub z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_half_one_5 -01100101xx000001100xxxxxxxxxxxxx n 168 SVE fsub z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 +01100101xx001001100xxxxxxxxxxxxx n 931 SVE fscale z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100101xx001101101xxxxxxxxxxxxx n 167 SVE fsqrt z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +01100101xx011001100xxx0000xxxxxx n 168 SVE fsub z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_half_one_5 +01100101xx000001100xxxxxxxxxxxxx n 168 SVE fsub z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 01100101xx0xxxxx000001xxxxxxxxxx n 168 SVE fsub z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 -01100101xx011011100xxx0000xxxxxx n 932 SVE fsubr z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 fpimm1_half_one_5 -01100101xx000011100xxxxxxxxxxxxx n 932 SVE fsubr z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 +01100101xx011011100xxx0000xxxxxx n 932 SVE fsubr z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 fpimm1_half_one_5 +01100101xx000011100xxxxxxxxxxxxx n 932 SVE fsubr z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 01100101xx010xxx100000xxxxxxxxxx n 790 SVE ftmad z_size_hsd_0 : z_size_hsd_0 z_size_hsd_5 imm3 01100101xx0xxxxx000011xxxxxxxxxx n 791 SVE ftsmul z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 00000100xx1xxxxx101100xxxxxxxxxx n 792 SVE ftssel z_size_hsd_0 : z_size_hsd_5 z_size_hsd_16 @@ -296,345 +293,343 @@ 00000100xx1xxxxx010011xxxxxxxxxx n 922 SVE index z_size_bhsd_0 : wx_size_5_zr wx_size_16_zr 00000101xx100100001110xxxxxxxxxx n 881 SVE insr z_size_bhsd_0 : z_size_bhsd_0 wx_size_5_zr 00000101xx110100001110xxxxxxxxxx n 881 SVE insr z_size_bhsd_0 : z_size_bhsd_0 bhsd_size_reg5 -00000101xx100000101xxxxxxxxxxxxx n 837 SVE lasta wx_size_0_zr : p10_lo z_size_bhsd_5 -00000101xx100010100xxxxxxxxxxxxx n 837 SVE lasta bhsd_size_reg0 : p10_lo z_size_bhsd_5 -00000101xx100001101xxxxxxxxxxxxx n 838 SVE lastb wx_size_0_zr : p10_lo z_size_bhsd_5 -00000101xx100011100xxxxxxxxxxxxx n 838 SVE lastb bhsd_size_reg0 : p10_lo z_size_bhsd_5 -10100100001xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_h_0 : svemem_gpr_shf p10_zer_lo -10100100010xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_shf p10_zer_lo -10100100011xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_shf p10_zer_lo -10100100000xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_b_0 : svemem_gpr_shf p10_zer_lo -11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo -10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo -10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100010xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001000x0xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001000x0xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -101001000010xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001000100xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001000110xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001000000xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_b_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -11000101101xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000101111xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000101110xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001011x1xxxxx010xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001011x0xxxxx010xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101111xxxxx010xxxxxxxxxxxxx n 975 SVE ld1d z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo -101001011110xxxx101xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -10000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100111xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000100110xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001001x1xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001001x0xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x1xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x0xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100100101xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo -10100100110xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_shf p10_zer_lo -10100100111xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_shf p10_zer_lo -101001001010xxxx101xxxxxxxxxxxxx n 976 SVE ld1h z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001001100xxxx101xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001001110xxxx101xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -1000010001xxxxxx101xxxxxxxxxxxxx n 908 SVE ld1rb z_h_0 : svememx6_b_5 p10_zer_lo -1000010001xxxxxx110xxxxxxxxxxxxx n 908 SVE ld1rb z_s_0 : svememx6_b_5 p10_zer_lo -1000010001xxxxxx111xxxxxxxxxxxxx n 908 SVE ld1rb z_d_0 : svememx6_b_5 p10_zer_lo -1000010001xxxxxx100xxxxxxxxxxxxx n 908 SVE ld1rb z_b_0 : svememx6_b_5 p10_zer_lo -1000010111xxxxxx111xxxxxxxxxxxxx n 909 SVE ld1rd z_d_0 : svememx6_d_5 p10_zer_lo -1000010011xxxxxx101xxxxxxxxxxxxx n 910 SVE ld1rh z_h_0 : svememx6_h_5 p10_zer_lo -1000010011xxxxxx110xxxxxxxxxxxxx n 910 SVE ld1rh z_s_0 : svememx6_h_5 p10_zer_lo -1000010011xxxxxx111xxxxxxxxxxxxx n 910 SVE ld1rh z_d_0 : svememx6_h_5 p10_zer_lo -10100100001xxxxx000xxxxxxxxxxxxx n 947 SVE ld1rob z_b_0 : svemem_ssz_gpr_shf p10_zer_lo -101001000000xxxx001xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_ssz_gpr_simm4 p10_zer_lo -10100100000xxxxx000xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_ssz_gpr_shf p10_zer_lo -101001011000xxxx001xxxxxxxxxxxxx n 1060 SVE ld1rqd z_d_0 : svemem_ssz_gpr_simm4 p10_zer_lo -10100101100xxxxx000xxxxxxxxxxxxx n 1060 SVE ld1rqd z_d_0 : svemem_ssz_gpr_shf p10_zer_lo -101001001000xxxx001xxxxxxxxxxxxx n 1061 SVE ld1rqh z_h_0 : svemem_ssz_gpr_simm4 p10_zer_lo -10100100100xxxxx000xxxxxxxxxxxxx n 1061 SVE ld1rqh z_h_0 : svemem_ssz_gpr_shf p10_zer_lo -101001010000xxxx001xxxxxxxxxxxxx n 1062 SVE ld1rqw z_s_0 : svemem_ssz_gpr_simm4 p10_zer_lo -10100101000xxxxx000xxxxxxxxxxxxx n 1062 SVE ld1rqw z_s_0 : svemem_ssz_gpr_shf p10_zer_lo -1000010111xxxxxx110xxxxxxxxxxxxx n 911 SVE ld1rsb z_h_0 : svememx6_b_5 p10_zer_lo -1000010111xxxxxx101xxxxxxxxxxxxx n 911 SVE ld1rsb z_s_0 : svememx6_b_5 p10_zer_lo -1000010111xxxxxx100xxxxxxxxxxxxx n 911 SVE ld1rsb z_d_0 : svememx6_b_5 p10_zer_lo -1000010101xxxxxx101xxxxxxxxxxxxx n 912 SVE ld1rsh z_s_0 : svememx6_h_5 p10_zer_lo -1000010101xxxxxx100xxxxxxxxxxxxx n 912 SVE ld1rsh z_d_0 : svememx6_h_5 p10_zer_lo -1000010011xxxxxx100xxxxxxxxxxxxx n 913 SVE ld1rsw z_d_0 : svememx6_s_5 p10_zer_lo -1000010101xxxxxx110xxxxxxxxxxxxx n 914 SVE ld1rw z_s_0 : svememx6_s_5 p10_zer_lo -1000010101xxxxxx111xxxxxxxxxxxxx n 914 SVE ld1rw z_d_0 : svememx6_s_5 p10_zer_lo -10100101110xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_h_0 : svemem_gpr_shf p10_zer_lo -10100101101xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_shf p10_zer_lo -10100101100xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_shf p10_zer_lo -10000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100010xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001000x0xxxxx000xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001000x0xxxxx000xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -101001011100xxxx101xxxxxxxxxxxxx n 949 SVE ld1sb z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001011010xxxx101xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001011000xxxx101xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -10000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100111xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000100110xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001001x1xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001001x0xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x1xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x0xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101001xxxxx010xxxxxxxxxxxxx n 977 SVE ld1sh z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo -10100101000xxxxx010xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_shf p10_zer_lo -101001010010xxxx101xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001010000xxxx101xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -11000101001xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000101011xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000101010xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001010x1xxxxx000xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001010x0xxxxx000xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -11000101001xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo -10100100100xxxxx010xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_shf p10_zer_lo -101001001000xxxx101xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -10000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000101011xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000101010xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001010x1xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001010x0xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001010x1xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -100001010x0xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101010xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo -10100101011xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_shf p10_zer_lo -101001010100xxxx101xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001010110xxxx101xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -10100100001xxxxx110xxxxxxxxxxxxx n 967 SVE ld2b z_b_0 z_msz_bhsd_0p1 : svemem_gprs_bhsdx p10_zer_lo -101001000010xxxx111xxxxxxxxxxxxx n 967 SVE ld2b z_b_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101101xxxxx110xxxxxxxxxxxxx n 983 SVE ld2d z_msz_bhsd_0 z_msz_bhsd_0p1 : svemem_msz_gpr_shf p10_zer_lo -101001011010xxxx111xxxxxxxxxxxxx n 983 SVE ld2d z_d_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100101xxxxx110xxxxxxxxxxxxx n 984 SVE ld2h z_msz_bhsd_0 z_msz_bhsd_0p1 : svemem_msz_gpr_shf p10_zer_lo -101001001010xxxx111xxxxxxxxxxxxx n 984 SVE ld2h z_h_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101001xxxxx110xxxxxxxxxxxxx n 985 SVE ld2w z_msz_bhsd_0 z_msz_bhsd_0p1 : svemem_msz_gpr_shf p10_zer_lo -101001010010xxxx111xxxxxxxxxxxxx n 985 SVE ld2w z_s_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100010xxxxx110xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gprs_bhsdx p10_zer_lo -10100100010xxxxx110xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gprs_bhsdx p10_zer_lo -101001000100xxxx111xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101110xxxxx110xxxxxxxxxxxxx n 986 SVE ld3d z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_msz_gpr_shf p10_zer_lo -101001011100xxxx111xxxxxxxxxxxxx n 986 SVE ld3d z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100110xxxxx110xxxxxxxxxxxxx n 987 SVE ld3h z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_msz_gpr_shf p10_zer_lo -101001001100xxxx111xxxxxxxxxxxxx n 987 SVE ld3h z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101010xxxxx110xxxxxxxxxxxxx n 988 SVE ld3w z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_msz_gpr_shf p10_zer_lo -101001010100xxxx111xxxxxxxxxxxxx n 988 SVE ld3w z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100011xxxxx110xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gprs_bhsdx p10_zer_lo -10100100011xxxxx110xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gprs_bhsdx p10_zer_lo -101001000110xxxx111xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101111xxxxx110xxxxxxxxxxxxx n 989 SVE ld4d z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_msz_gpr_shf p10_zer_lo -101001011110xxxx111xxxxxxxxxxxxx n 989 SVE ld4d z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100111xxxxx110xxxxxxxxxxxxx n 990 SVE ld4h z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_msz_gpr_shf p10_zer_lo -101001001110xxxx111xxxxxxxxxxxxx n 990 SVE ld4h z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101011xxxxx110xxxxxxxxxxxxx n 991 SVE ld4w z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_msz_gpr_shf p10_zer_lo -101001010110xxxx111xxxxxxxxxxxxx n 991 SVE ld4w z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100001xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_h_0 : svemem_gpr_shf p10_zer_lo -10100100010xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_shf p10_zer_lo -10100100011xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_shf p10_zer_lo -10100100000xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_b_0 : svemem_gpr_shf p10_zer_lo -10000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100010xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001000x0xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001000x0xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101111xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_shf p10_zer_lo -11000101101xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000101111xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000101110xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001011x1xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001011x0xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -10100100101xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_h_0 : svemem_gpr_shf p10_zer_lo -10100100110xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_shf p10_zer_lo -10100100111xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_shf p10_zer_lo -10000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100111xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000100110xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001001x1xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001001x0xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x1xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x0xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101110xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_h_0 : svemem_gpr_shf p10_zer_lo -10100101101xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_shf p10_zer_lo -10100101100xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_shf p10_zer_lo -10000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100010xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001000x0xxxxx001xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001000x0xxxxx001xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101001xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_shf p10_zer_lo -10100101000xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_shf p10_zer_lo -10000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000100111xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000100110xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001001x1xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001001x0xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x1xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -100001001x0xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -10100100100xxxxx011xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_shf p10_zer_lo -11000101001xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000101011xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000101010xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001010x1xxxxx001xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001010x0xxxxx001xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -10100101010xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_shf p10_zer_lo -10100101011xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_shf p10_zer_lo -10000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo -11000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo -11000101011xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec64 p10_zer_lo -11000101010xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec64 p10_zer_lo -110001010x1xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -110001010x0xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo -100001010x1xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -100001010x0xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo -101001000011xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001000101xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001000111xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001000001xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_b_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001011111xxxx101xxxxxxxxxxxxx n 1008 SVE ldnf1d z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001001011xxxx101xxxxxxxxxxxxx n 1009 SVE ldnf1h z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001001101xxxx101xxxxxxxxxxxxx n 1009 SVE ldnf1h z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001001111xxxx101xxxxxxxxxxxxx n 1009 SVE ldnf1h z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001011101xxxx101xxxxxxxxxxxxx n 1010 SVE ldnf1sb z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001011011xxxx101xxxxxxxxxxxxx n 1010 SVE ldnf1sb z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001011001xxxx101xxxxxxxxxxxxx n 1010 SVE ldnf1sb z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001010011xxxx101xxxxxxxxxxxxx n 1011 SVE ldnf1sh z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001010001xxxx101xxxxxxxxxxxxx n 1011 SVE ldnf1sh z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001001001xxxx101xxxxxxxxxxxxx n 1012 SVE ldnf1sw z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001010101xxxx101xxxxxxxxxxxxx n 1013 SVE ldnf1w z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -101001010111xxxx101xxxxxxxxxxxxx n 1013 SVE ldnf1w z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo -10100100000xxxxx110xxxxxxxxxxxxx n 950 SVE ldnt1b z_b_0 : svemem_gprs_b1 p10_zer_lo -101001000000xxxx111xxxxxxxxxxxxx n 950 SVE ldnt1b z_b_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101100xxxxx110xxxxxxxxxxxxx n 992 SVE ldnt1d z_msz_bhsd_0 : svemem_msz_gpr_shf p10_zer_lo -101001011000xxxx111xxxxxxxxxxxxx n 992 SVE ldnt1d z_d_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100100100xxxxx110xxxxxxxxxxxxx n 993 SVE ldnt1h z_msz_bhsd_0 : svemem_msz_gpr_shf p10_zer_lo -101001001000xxxx111xxxxxxxxxxxxx n 993 SVE ldnt1h z_h_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo -10100101000xxxxx110xxxxxxxxxxxxx n 994 SVE ldnt1w z_msz_bhsd_0 : svemem_msz_gpr_shf p10_zer_lo -101001010000xxxx111xxxxxxxxxxxxx n 994 SVE ldnt1w z_s_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo +00000101xx100000101xxxxxxxxxxxxx n 837 SVE lasta wx_size_0_zr : p10_lo.gov z_size_bhsd_5 +00000101xx100010100xxxxxxxxxxxxx n 837 SVE lasta bhsd_size_reg0 : p10_lo.gov z_size_bhsd_5 +00000101xx100001101xxxxxxxxxxxxx n 838 SVE lastb wx_size_0_zr : p10_lo.gov z_size_bhsd_5 +00000101xx100011100xxxxxxxxxxxxx n 838 SVE lastb bhsd_size_reg0 : p10_lo.gov z_size_bhsd_5 +10100100001xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_h_0 : svemem_gpr_shf p10_zer_lo.gov +10100100010xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_shf p10_zer_lo.gov +10100100011xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_shf p10_zer_lo.gov +10100100000xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_b_0 : svemem_gpr_shf p10_zer_lo.gov +11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100010xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001000x0xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001000x0xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +101001000010xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001000100xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001000110xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001000000xxxx101xxxxxxxxxxxxx n 946 SVE ld1b z_b_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +11000101101xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000101111xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000101110xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001011x1xxxxx010xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001011x0xxxxx010xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101111xxxxx010xxxxxxxxxxxxx n 975 SVE ld1d z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo.gov +101001011110xxxx101xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +10000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100111xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000100110xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001001x1xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001001x0xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x1xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x0xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100100101xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo.gov +10100100110xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_shf p10_zer_lo.gov +10100100111xxxxx010xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_shf p10_zer_lo.gov +101001001010xxxx101xxxxxxxxxxxxx n 976 SVE ld1h z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001001100xxxx101xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001001110xxxx101xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +1000010001xxxxxx101xxxxxxxxxxxxx n 908 SVE ld1rb z_h_0 : svememx6_b_5 p10_zer_lo.gov +1000010001xxxxxx110xxxxxxxxxxxxx n 908 SVE ld1rb z_s_0 : svememx6_b_5 p10_zer_lo.gov +1000010001xxxxxx111xxxxxxxxxxxxx n 908 SVE ld1rb z_d_0 : svememx6_b_5 p10_zer_lo.gov +1000010001xxxxxx100xxxxxxxxxxxxx n 908 SVE ld1rb z_b_0 : svememx6_b_5 p10_zer_lo.gov +1000010111xxxxxx111xxxxxxxxxxxxx n 909 SVE ld1rd z_d_0 : svememx6_d_5 p10_zer_lo.gov +1000010011xxxxxx101xxxxxxxxxxxxx n 910 SVE ld1rh z_h_0 : svememx6_h_5 p10_zer_lo.gov +1000010011xxxxxx110xxxxxxxxxxxxx n 910 SVE ld1rh z_s_0 : svememx6_h_5 p10_zer_lo.gov +1000010011xxxxxx111xxxxxxxxxxxxx n 910 SVE ld1rh z_d_0 : svememx6_h_5 p10_zer_lo.gov +10100100001xxxxx000xxxxxxxxxxxxx n 947 SVE ld1rob z_b_0 : svemem_gpr_shf p10_zer_lo.gov +101001000000xxxx001xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_ssz_gpr_simm4 p10_zer_lo.gov +10100100000xxxxx000xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_gpr_shf p10_zer_lo.gov +101001011000xxxx001xxxxxxxxxxxxx n 1060 SVE ld1rqd z_d_0 : svemem_ssz_gpr_simm4 p10_zer_lo.gov +10100101100xxxxx000xxxxxxxxxxxxx n 1060 SVE ld1rqd z_d_0 : svemem_gpr_shf p10_zer_lo.gov +101001001000xxxx001xxxxxxxxxxxxx n 1061 SVE ld1rqh z_h_0 : svemem_ssz_gpr_simm4 p10_zer_lo.gov +10100100100xxxxx000xxxxxxxxxxxxx n 1061 SVE ld1rqh z_h_0 : svemem_gpr_shf p10_zer_lo.gov +101001010000xxxx001xxxxxxxxxxxxx n 1062 SVE ld1rqw z_s_0 : svemem_ssz_gpr_simm4 p10_zer_lo.gov +10100101000xxxxx000xxxxxxxxxxxxx n 1062 SVE ld1rqw z_s_0 : svemem_gpr_shf p10_zer_lo.gov +1000010111xxxxxx110xxxxxxxxxxxxx n 911 SVE ld1rsb z_h_0 : svememx6_b_5 p10_zer_lo.gov +1000010111xxxxxx101xxxxxxxxxxxxx n 911 SVE ld1rsb z_s_0 : svememx6_b_5 p10_zer_lo.gov +1000010111xxxxxx100xxxxxxxxxxxxx n 911 SVE ld1rsb z_d_0 : svememx6_b_5 p10_zer_lo.gov +1000010101xxxxxx101xxxxxxxxxxxxx n 912 SVE ld1rsh z_s_0 : svememx6_h_5 p10_zer_lo.gov +1000010101xxxxxx100xxxxxxxxxxxxx n 912 SVE ld1rsh z_d_0 : svememx6_h_5 p10_zer_lo.gov +1000010011xxxxxx100xxxxxxxxxxxxx n 913 SVE ld1rsw z_d_0 : svememx6_s_5 p10_zer_lo.gov +1000010101xxxxxx110xxxxxxxxxxxxx n 914 SVE ld1rw z_s_0 : svememx6_s_5 p10_zer_lo.gov +1000010101xxxxxx111xxxxxxxxxxxxx n 914 SVE ld1rw z_d_0 : svememx6_s_5 p10_zer_lo.gov +10100101110xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_h_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10100101101xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10100101100xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100010xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001000x0xxxxx000xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001000x0xxxxx000xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +101001011100xxxx101xxxxxxxxxxxxx n 949 SVE ld1sb z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001011010xxxx101xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001011000xxxx101xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +10000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100111xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000100110xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001001x1xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001001x0xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x1xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x0xxxxx000xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101001xxxxx010xxxxxxxxxxxxx n 977 SVE ld1sh z_msz_bhsd_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10100101000xxxxx010xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_shf_signed p10_zer_lo.gov +101001010010xxxx101xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001010000xxxx101xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +11000101001xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000101011xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000101010xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001010x1xxxxx000xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001010x0xxxxx000xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +11000101001xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +10100100100xxxxx010xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_shf_signed p10_zer_lo.gov +101001001000xxxx101xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +10000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000101011xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000101010xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001010x1xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001010x0xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001010x1xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001010x0xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101010xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo.gov +10100101011xxxxx010xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_shf p10_zer_lo.gov +101001010100xxxx101xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001010110xxxx101xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +10100100001xxxxx110xxxxxxxxxxxxx n 967 SVE ld2b z_b_0 z_msz_bhsd_0p1 : svemem_gpr_shf p10_zer_lo.gov +101001000010xxxx111xxxxxxxxxxxxx n 967 SVE ld2b z_b_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101101xxxxx110xxxxxxxxxxxxx n 983 SVE ld2d z_msz_bhsd_0 z_msz_bhsd_0p1 : svemem_gpr_shf p10_zer_lo.gov +101001011010xxxx111xxxxxxxxxxxxx n 983 SVE ld2d z_d_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100101xxxxx110xxxxxxxxxxxxx n 984 SVE ld2h z_msz_bhsd_0 z_msz_bhsd_0p1 : svemem_gpr_shf p10_zer_lo.gov +101001001010xxxx111xxxxxxxxxxxxx n 984 SVE ld2h z_h_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101001xxxxx110xxxxxxxxxxxxx n 985 SVE ld2w z_msz_bhsd_0 z_msz_bhsd_0p1 : svemem_gpr_shf p10_zer_lo.gov +101001010010xxxx111xxxxxxxxxxxxx n 985 SVE ld2w z_s_0 z_msz_bhsd_0p1 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100010xxxxx110xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_shf p10_zer_lo.gov +10100100010xxxxx110xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_shf p10_zer_lo.gov +101001000100xxxx111xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101110xxxxx110xxxxxxxxxxxxx n 986 SVE ld3d z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_shf p10_zer_lo.gov +101001011100xxxx111xxxxxxxxxxxxx n 986 SVE ld3d z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100110xxxxx110xxxxxxxxxxxxx n 987 SVE ld3h z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_shf p10_zer_lo.gov +101001001100xxxx111xxxxxxxxxxxxx n 987 SVE ld3h z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101010xxxxx110xxxxxxxxxxxxx n 988 SVE ld3w z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_shf p10_zer_lo.gov +101001010100xxxx111xxxxxxxxxxxxx n 988 SVE ld3w z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100011xxxxx110xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_shf p10_zer_lo.gov +10100100011xxxxx110xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_shf p10_zer_lo.gov +101001000110xxxx111xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101111xxxxx110xxxxxxxxxxxxx n 989 SVE ld4d z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_shf p10_zer_lo.gov +101001011110xxxx111xxxxxxxxxxxxx n 989 SVE ld4d z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100111xxxxx110xxxxxxxxxxxxx n 990 SVE ld4h z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_shf p10_zer_lo.gov +101001001110xxxx111xxxxxxxxxxxxx n 990 SVE ld4h z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101011xxxxx110xxxxxxxxxxxxx n 991 SVE ld4w z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_shf p10_zer_lo.gov +101001010110xxxx111xxxxxxxxxxxxx n 991 SVE ld4w z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100001xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_h_0 : svemem_gpr_shf p10_zer_lo.gov +10100100010xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_shf p10_zer_lo.gov +10100100011xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_shf p10_zer_lo.gov +10100100000xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_b_0 : svemem_gpr_shf p10_zer_lo.gov +10000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100010xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001000x0xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001000x0xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101111xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_shf p10_zer_lo.gov +11000101101xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000101111xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000101110xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001011x1xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001011x0xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100100101xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_h_0 : svemem_gpr_shf p10_zer_lo.gov +10100100110xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_shf p10_zer_lo.gov +10100100111xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_shf p10_zer_lo.gov +10000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100111xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000100110xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001001x1xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001001x0xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x1xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x0xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101110xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_h_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10100101101xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10100101100xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100010xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001000x0xxxxx001xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001000x0xxxxx001xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101001xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10100101000xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_shf_signed p10_zer_lo.gov +10000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000100111xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000100110xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001001x1xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001001x0xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x1xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001001x0xxxxx001xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100100100xxxxx011xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_shf_signed p10_zer_lo.gov +11000101001xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000101011xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000101010xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001010x1xxxxx001xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001010x0xxxxx001xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +10100101010xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_shf p10_zer_lo.gov +10100101011xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_shf p10_zer_lo.gov +10000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo.gov +11000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo.gov +11000101011xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +11000101010xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec64 p10_zer_lo.gov +110001010x1xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +110001010x0xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001010x1xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +100001010x0xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_vec32_ld p10_zer_lo.gov +101001000011xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001000101xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001000111xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001000001xxxx101xxxxxxxxxxxxx n 1007 SVE ldnf1b z_b_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001011111xxxx101xxxxxxxxxxxxx n 1008 SVE ldnf1d z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001001011xxxx101xxxxxxxxxxxxx n 1009 SVE ldnf1h z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001001101xxxx101xxxxxxxxxxxxx n 1009 SVE ldnf1h z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001001111xxxx101xxxxxxxxxxxxx n 1009 SVE ldnf1h z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001011101xxxx101xxxxxxxxxxxxx n 1010 SVE ldnf1sb z_h_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001011011xxxx101xxxxxxxxxxxxx n 1010 SVE ldnf1sb z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001011001xxxx101xxxxxxxxxxxxx n 1010 SVE ldnf1sb z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001010011xxxx101xxxxxxxxxxxxx n 1011 SVE ldnf1sh z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001010001xxxx101xxxxxxxxxxxxx n 1011 SVE ldnf1sh z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001001001xxxx101xxxxxxxxxxxxx n 1012 SVE ldnf1sw z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001010101xxxx101xxxxxxxxxxxxx n 1013 SVE ldnf1w z_s_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +101001010111xxxx101xxxxxxxxxxxxx n 1013 SVE ldnf1w z_d_0 : svemem_gpr_simm4_vl_1reg p10_zer_lo.gov +10100100000xxxxx110xxxxxxxxxxxxx n 950 SVE ldnt1b z_b_0 : svemem_gpr_shf p10_zer_lo.gov +101001000000xxxx111xxxxxxxxxxxxx n 950 SVE ldnt1b z_b_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101100xxxxx110xxxxxxxxxxxxx n 992 SVE ldnt1d z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo.gov +101001011000xxxx111xxxxxxxxxxxxx n 992 SVE ldnt1d z_d_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100100100xxxxx110xxxxxxxxxxxxx n 993 SVE ldnt1h z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo.gov +101001001000xxxx111xxxxxxxxxxxxx n 993 SVE ldnt1h z_h_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov +10100101000xxxxx110xxxxxxxxxxxxx n 994 SVE ldnt1w z_msz_bhsd_0 : svemem_gpr_shf p10_zer_lo.gov +101001010000xxxx111xxxxxxxxxxxxx n 994 SVE ldnt1w z_s_0 : svemem_gpr_simm4_vl_xreg p10_zer_lo.gov 1000010110xxxxxx000xxxxxxxx0xxxx n 227 SVE ldr p0 : svemem_gpr_simm9_vl 1000010110xxxxxx010xxxxxxxxxxxxx n 227 SVE ldr z0 : svemem_gpr_simm9_vl -00000100xx000011100xxxxxxxxxxxxx n 902 SVE lsl z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5 -00000100xx011011100xxxxxxxxxxxxx n 902 SVE lsl z_size_bhs_0 : p10_mrg_lo z_size_bhs_0 z_d_5 -00000100xx010011100xxxxxxxxxxxxx n 902 SVE lsl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx000011100xxxxxxxxxxxxx n 902 SVE lsl z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5 +00000100xx011011100xxxxxxxxxxxxx n 902 SVE lsl z_size_bhs_0 : p10_mrg_lo.gov z_size_bhs_0 z_d_5 +00000100xx010011100xxxxxxxxxxxxx n 902 SVE lsl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00000100xx1xxxxx100111xxxxxxxxxx n 902 SVE lsl z_tszl19_bhsd_0 : z_tszl19_bhsd_5 tszl19_imm3_16 00000100xx1xxxxx100011xxxxxxxxxx n 902 SVE lsl z_size_bhs_0 : z_size_bhs_5 z_d_16 -00000100xx010111100xxxxxxxxxxxxx n 903 SVE lslr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx000001100xxxxxxxxxxxxx n 904 SVE lsr z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5p1 -00000100xx011001100xxxxxxxxxxxxx n 904 SVE lsr z_size_bhs_0 : p10_mrg_lo z_size_bhs_0 z_d_5 -00000100xx010001100xxxxxxxxxxxxx n 904 SVE lsr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx010111100xxxxxxxxxxxxx n 903 SVE lslr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx000001100xxxxxxxxxxxxx n 904 SVE lsr z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5p1 +00000100xx011001100xxxxxxxxxxxxx n 904 SVE lsr z_size_bhs_0 : p10_mrg_lo.gov z_size_bhs_0 z_d_5 +00000100xx010001100xxxxxxxxxxxxx n 904 SVE lsr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00000100xx1xxxxx100101xxxxxxxxxx n 904 SVE lsr z_tszl19_bhsd_0 : z_tszl19_bhsd_5 tszl19_imm3_16p1 00000100xx1xxxxx100001xxxxxxxxxx n 904 SVE lsr z_size_bhs_0 : z_size_bhs_5 z_d_16 -00000100xx010101100xxxxxxxxxxxxx n 905 SVE lsrr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx0xxxxx110xxxxxxxxxxxxx n 787 SVE mad z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo z_size_bhsd_16 z_size_bhsd_5 -00000100xx0xxxxx010xxxxxxxxxxxxx n 312 SVE mla z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo z_size_bhsd_5 z_size_bhsd_16 -00000100xx0xxxxx011xxxxxxxxxxxxx n 313 SVE mls z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo z_size_bhsd_5 z_size_bhsd_16 +00000100xx010101100xxxxxxxxxxxxx n 905 SVE lsrr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx0xxxxx110xxxxxxxxxxxxx n 787 SVE mad z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo.gov z_size_bhsd_16 z_size_bhsd_5 +00000100xx0xxxxx010xxxxxxxxxxxxx n 312 SVE mla z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo.gov z_size_bhsd_5 z_size_bhsd_16 +00000100xx0xxxxx011xxxxxxxxxxxxx n 313 SVE mls z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo.gov z_size_bhsd_5 z_size_bhsd_16 0000010000100000101111xxxxxxxxxx n 783 SVE movprfx z0 : z5 -00000100xx010000001xxxxxxxxxxxxx n 783 SVE movprfx z_size_bhsd_0 : p10_zer_lo z_size_bhsd_5 -00000100xx010001001xxxxxxxxxxxxx n 783 SVE movprfx z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 -00000100xx0xxxxx111xxxxxxxxxxxxx n 788 SVE msb z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo z_size_bhsd_16 z_size_bhsd_5 -00000100xx010000000xxxxxxxxxxxxx n 321 SVE mul z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx010000001xxxxxxxxxxxxx n 783 SVE movprfx z_size_bhsd_0 : p10_zer_lo.gov z_size_bhsd_5 +00000100xx010001001xxxxxxxxxxxxx n 783 SVE movprfx z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 +00000100xx0xxxxx111xxxxxxxxxxxxx n 788 SVE msb z_size_bhsd_0 : z_size_bhsd_0 p10_mrg_lo.gov z_size_bhsd_16 z_size_bhsd_5 +00000100xx010000000xxxxxxxxxxxxx n 321 SVE mul z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx110000110xxxxxxxxxxxxx n 321 SVE mul z_size_bhsd_0 : z_size_bhsd_0 simm8_5 -001001011000xxxx01xxxx1xxxx1xxxx n 829 SVE nand p_b_0 : p10_zer p_b_5 p_b_16 -001001011100xxxx01xxxx1xxxx1xxxx w 830 SVE nands p_b_0 : p10_zer p_b_5 p_b_16 -00000100xx010111101xxxxxxxxxxxxx n 323 SVE neg z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 -001001011000xxxx01xxxx1xxxx0xxxx n 831 SVE nor p_b_0 : p10_zer p_b_5 p_b_16 -001001011100xxxx01xxxx1xxxx0xxxx w 832 SVE nors p_b_0 : p10_zer p_b_5 p_b_16 -00000100xx011110101xxxxxxxxxxxxx n 325 SVE not z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 -001001011000xxxx01xxxx0xxxx1xxxx n 326 SVE orn p_b_0 : p10_zer p_b_5 p_b_16 -001001011100xxxx01xxxx0xxxx1xxxx w 833 SVE orns p_b_0 : p10_zer p_b_5 p_b_16 -00000100xx011000000xxxxxxxxxxxxx n 327 SVE orr z0 : p10_lo z0 z5 bhsd_sz +001001011000xxxx01xxxx1xxxx1xxxx n 829 SVE nand p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001011100xxxx01xxxx1xxxx1xxxx w 830 SVE nands p_b_0 : p10_zer.gov p_b_5 p_b_16 +00000100xx010111101xxxxxxxxxxxxx n 323 SVE neg z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 +001001011000xxxx01xxxx1xxxx0xxxx n 831 SVE nor p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001011100xxxx01xxxx1xxxx0xxxx w 832 SVE nors p_b_0 : p10_zer.gov p_b_5 p_b_16 +00000100xx011110101xxxxxxxxxxxxx n 325 SVE not z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 +001001011000xxxx01xxxx0xxxx1xxxx n 326 SVE orn p_b_0 : p10_zer.gov p_b_5 p_b_16 +001001011100xxxx01xxxx0xxxx1xxxx w 833 SVE orns p_b_0 : p10_zer.gov p_b_5 p_b_16 00000101000000xxxxxxxxxxxxxxxxxx n 327 SVE orr z_imm13_bhsd_0 : z_imm13_bhsd_0 imm13_const -001001011000xxxx01xxxx0xxxx0xxxx n 327 SVE orr p_b_0 : p10_zer p_b_5 p_b_16 +001001011000xxxx01xxxx0xxxx0xxxx n 327 SVE orr p_b_0 : p10_zer.gov p_b_5 p_b_16 00000100011xxxxx001100xxxxxxxxxx n 327 SVE orr z_d_0 : z_d_5 z_d_16 -00000100xx011000000xxxxxxxxxxxxx n 327 SVE orr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -001001011100xxxx01xxxx0xxxx0xxxx w 834 SVE orrs p_b_0 : p10_zer p_b_5 p_b_16 -0000010000011000001xxxxxxxxxxxxx n 919 SVE orv b0 : p10_lo z_size_bhsd_5 -0000010001011000001xxxxxxxxxxxxx n 919 SVE orv h0 : p10_lo z_size_bhsd_5 -0000010010011000001xxxxxxxxxxxxx n 919 SVE orv s0 : p10_lo z_size_bhsd_5 -0000010011011000001xxxxxxxxxxxxx n 919 SVE orv d0 : p10_lo z_size_bhsd_5 +00000100xx011000000xxxxxxxxxxxxx n 327 SVE orr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +001001011100xxxx01xxxx0xxxx0xxxx w 834 SVE orrs p_b_0 : p10_zer.gov p_b_5 p_b_16 +0000010000011000001xxxxxxxxxxxxx n 919 SVE orv b0 : p10_lo.gov z_size_bhsd_5 +0000010001011000001xxxxxxxxxxxxx n 919 SVE orv h0 : p10_lo.gov z_size_bhsd_5 +0000010010011000001xxxxxxxxxxxxx n 919 SVE orv s0 : p10_lo.gov z_size_bhsd_5 +0000010011011000001xxxxxxxxxxxxx n 919 SVE orv d0 : p10_lo.gov z_size_bhsd_5 0010010100011000111001000000xxxx n 894 SVE pfalse p_b_0 : -00100101010110001100000xxxx0xxxx w 895 SVE pfirst p_b_0 : p5 p_b_0 -00100101xx0110011100010xxxx0xxxx w 925 SVE pnext p_size_bhsd_0 : p5 p_size_bhsd_0 -1000010111xxxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_gpr_simm6_vl -10000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_vec_s_imm5 -11000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_vec_d_imm5 -11000100011xxxxx100xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo sveprf_gpr_vec64 -110001000x1xxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo sveprf_gpr_vec32 -100001000x1xxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo sveprf_gpr_vec32 -10000100000xxxxx110xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo sveprf_gpr_shf -1000010111xxxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_gpr_simm6_vl -10000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_vec_s_imm5 -11000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_vec_d_imm5 -11000100011xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo sveprf_gpr_vec64 -110001000x1xxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo sveprf_gpr_vec32 -100001000x1xxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo sveprf_gpr_vec32 -10000101100xxxxx110xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo sveprf_gpr_shf -1000010111xxxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_gpr_simm6_vl -10000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_vec_s_imm5 -11000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_vec_d_imm5 -11000100011xxxxx101xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo sveprf_gpr_vec64 -110001000x1xxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo sveprf_gpr_vec32 -100001000x1xxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo sveprf_gpr_vec32 -10000100100xxxxx110xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo sveprf_gpr_shf -1000010111xxxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_gpr_simm6_vl -10000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_vec_s_imm5 -11000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_vec_d_imm5 -11000100011xxxxx110xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo sveprf_gpr_vec64 -110001000x1xxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo sveprf_gpr_vec32 -100001000x1xxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo sveprf_gpr_vec32 -10000101000xxxxx110xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo sveprf_gpr_shf -001001010101000011xxxx0xxxx00000 w 786 SVE ptest : p10 p_b_5 +00100101010110001100000xxxx0xxxx w 895 SVE pfirst p_b_0 : p5.gov p_b_0 +00100101xx0110011100010xxxx0xxxx w 925 SVE pnext p_size_bhsd_0 : p5.gov p_size_bhsd_0 +1000010111xxxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov svemem_gpr_simm6_vl +10000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov svemem_vec_s_imm5 +11000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov svemem_vec_d_imm5 +11000100011xxxxx100xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov sveprf_gpr_vec64 +110001000x1xxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov sveprf_gpr_vec32 +100001000x1xxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov sveprf_gpr_vec32 +10000100000xxxxx110xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo.gov sveprf_gpr_shf +1000010111xxxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov svemem_gpr_simm6_vl +10000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov svemem_vec_s_imm5 +11000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov svemem_vec_d_imm5 +11000100011xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov sveprf_gpr_vec64 +110001000x1xxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov sveprf_gpr_vec32 +100001000x1xxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov sveprf_gpr_vec32 +10000101100xxxxx110xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo.gov sveprf_gpr_shf +1000010111xxxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov svemem_gpr_simm6_vl +10000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov svemem_vec_s_imm5 +11000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov svemem_vec_d_imm5 +11000100011xxxxx101xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov sveprf_gpr_vec64 +110001000x1xxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov sveprf_gpr_vec32 +100001000x1xxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov sveprf_gpr_vec32 +10000100100xxxxx110xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo.gov sveprf_gpr_shf +1000010111xxxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov svemem_gpr_simm6_vl +10000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov svemem_vec_s_imm5 +11000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov svemem_vec_d_imm5 +11000100011xxxxx110xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov sveprf_gpr_vec64 +110001000x1xxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov sveprf_gpr_vec32 +100001000x1xxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov sveprf_gpr_vec32 +10000101000xxxxx110xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo.gov sveprf_gpr_shf +001001010101000011xxxx0xxxx00000 w 786 SVE ptest : p10.gov p_b_5 00100101xx011000111000xxxxx0xxxx n 897 SVE ptrue p_size_bhsd_0 : pred_constr 00100101xx011001111000xxxxx0xxxx w 898 SVE ptrues p_size_bhsd_0 : pred_constr 00000101001100010100000xxxx0xxxx n 887 SVE punpkhi p_h_0 : p_b_5 00000101001100000100000xxxx0xxxx n 888 SVE punpklo p_h_0 : p_b_5 -00000101xx100111100xxxxxxxxxxxxx n 335 SVE rbit z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 +00000101xx100111100xxxxxxxxxxxxx n 335 SVE rbit z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 0010010100011001111100000000xxxx n 817 SVE rdffr p_b_0 : -00100101000110001111000xxxx0xxxx n 817 SVE rdffr p_b_0 : p5_zer -00100101010110001111000xxxx0xxxx w 818 SVE rdffrs p_b_0 : p5_zer +00100101000110001111000xxxx0xxxx n 817 SVE rdffr p_b_0 : p5_zer.gov +00100101010110001111000xxxx0xxxx w 818 SVE rdffrs p_b_0 : p5_zer.gov 000001001011111101010xxxxxxxxxxx n 936 SVE rdvl x0 : simm6_5 00000101xx1101000100000xxxx0xxxx n 337 SVE rev p_size_bhsd_0 : p_size_bhsd_5 00000101xx111000001110xxxxxxxxxx n 337 SVE rev z_size_bhsd_0 : z_size_bhsd_5 -00000101xx100100100xxxxxxxxxxxxx n 883 SVE revb z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -00000101xx100101100xxxxxxxxxxxxx n 884 SVE revh z_size_sd_0 : p10_mrg_lo z_size_sd_5 -0000010111100110100xxxxxxxxxxxxx n 885 SVE revw z_d_0 : p10_mrg_lo z_d_5 -00000100xx001100000xxxxxxxxxxxxx n 349 SVE sabd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx000000001xxxxxxxxxxxxx n 920 SVE saddv d0 : p10_lo z_size_bhs_5 -0110010101010010101xxxxxxxxxxxxx n 362 SVE scvtf z_h_0 : p10_mrg_lo z_h_5 -0110010111010000101xxxxxxxxxxxxx n 362 SVE scvtf z_d_0 : p10_mrg_lo z_s_5 -0110010101010100101xxxxxxxxxxxxx n 362 SVE scvtf z_h_0 : p10_mrg_lo z_s_5 -0110010110010100101xxxxxxxxxxxxx n 362 SVE scvtf z_s_0 : p10_mrg_lo z_s_5 -0110010111010110101xxxxxxxxxxxxx n 362 SVE scvtf z_d_0 : p10_mrg_lo z_d_5 -0110010101010110101xxxxxxxxxxxxx n 362 SVE scvtf z_h_0 : p10_mrg_lo z_d_5 -0110010111010100101xxxxxxxxxxxxx n 362 SVE scvtf z_s_0 : p10_mrg_lo z_d_5 -00000100xx010100000xxxxxxxxxxxxx n 363 SVE sdiv z_size_sd_0 : p10_mrg_lo z_size_sd_0 z_size_sd_5 -00000100xx010110000xxxxxxxxxxxxx n 794 SVE sdivr z_size_sd_0 : p10_mrg_lo z_size_sd_0 z_size_sd_5 +00000101xx100100100xxxxxxxxxxxxx n 883 SVE revb z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +00000101xx100101100xxxxxxxxxxxxx n 884 SVE revh z_size_sd_0 : p10_mrg_lo.gov z_size_sd_5 +0000010111100110100xxxxxxxxxxxxx n 885 SVE revw z_d_0 : p10_mrg_lo.gov z_d_5 +00000100xx001100000xxxxxxxxxxxxx n 349 SVE sabd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx000000001xxxxxxxxxxxxx n 920 SVE saddv d0 : p10_lo.gov z_size_bhs_5 +0110010101010010101xxxxxxxxxxxxx n 362 SVE scvtf z_h_0 : p10_mrg_lo.gov z_h_5 +0110010111010000101xxxxxxxxxxxxx n 362 SVE scvtf z_d_0 : p10_mrg_lo.gov z_s_5 +0110010101010100101xxxxxxxxxxxxx n 362 SVE scvtf z_h_0 : p10_mrg_lo.gov z_s_5 +0110010110010100101xxxxxxxxxxxxx n 362 SVE scvtf z_s_0 : p10_mrg_lo.gov z_s_5 +0110010111010110101xxxxxxxxxxxxx n 362 SVE scvtf z_d_0 : p10_mrg_lo.gov z_d_5 +0110010101010110101xxxxxxxxxxxxx n 362 SVE scvtf z_h_0 : p10_mrg_lo.gov z_d_5 +0110010111010100101xxxxxxxxxxxxx n 362 SVE scvtf z_s_0 : p10_mrg_lo.gov z_d_5 +00000100xx010100000xxxxxxxxxxxxx n 363 SVE sdiv z_size_sd_0 : p10_mrg_lo.gov z_size_sd_0 z_size_sd_5 +00000100xx010110000xxxxxxxxxxxxx n 794 SVE sdivr z_size_sd_0 : p10_mrg_lo.gov z_size_sd_0 z_size_sd_5 01000100110xxxxx000000xxxxxxxxxx n 364 SVE sdot z_d_0 : z_d_0 z_h_5 z_h_16 01000100100xxxxx000000xxxxxxxxxx n 364 SVE sdot z_s_0 : z_s_0 z_b_5 z_b_16 01000100111xxxxx000000xxxxxxxxxx n 364 SVE sdot z_d_0 : z_d_0 z_h_5 z4_h_16 i1_index_20 01000100101xxxxx000000xxxxxxxxxx n 364 SVE sdot z_s_0 : z_s_0 z_b_5 z3_b_16 i2_index_19 -001001010000xxxx01xxxx1xxxx1xxxx n 896 SVE sel p_b_0 : p10 p_b_5 p_b_16 -00000101xx1xxxxx11xxxxxxxxxxxxxx n 896 SVE sel z_size_bhsd_0 : p10 z_size_bhsd_5 z_size_bhsd_16 +001001010000xxxx01xxxx1xxxx1xxxx n 896 SVE sel p_b_0 : p10.gov p_b_5 p_b_16 +00000101xx1xxxxx11xxxxxxxxxxxxxx n 896 SVE sel z_size_bhsd_0 : p10.gov z_size_bhsd_5 z_size_bhsd_16 00100101001011001001000000000000 n 819 SVE setffr : -00000100xx001000000xxxxxxxxxxxxx n 386 SVE smax z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx001000000xxxxxxxxxxxxx n 386 SVE smax z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx101000110xxxxxxxxxxxxx n 386 SVE smax z_size_bhsd_0 : z_size_bhsd_0 simm8_5 -0000010000001000001xxxxxxxxxxxxx n 388 SVE smaxv b0 : p10_lo z_size_bhsd_5 -0000010001001000001xxxxxxxxxxxxx n 388 SVE smaxv h0 : p10_lo z_size_bhsd_5 -0000010010001000001xxxxxxxxxxxxx n 388 SVE smaxv s0 : p10_lo z_size_bhsd_5 -0000010011001000001xxxxxxxxxxxxx n 388 SVE smaxv d0 : p10_lo z_size_bhsd_5 -00000100xx001010000xxxxxxxxxxxxx n 390 SVE smin z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +0000010000001000001xxxxxxxxxxxxx n 388 SVE smaxv b0 : p10_lo.gov z_size_bhsd_5 +0000010001001000001xxxxxxxxxxxxx n 388 SVE smaxv h0 : p10_lo.gov z_size_bhsd_5 +0000010010001000001xxxxxxxxxxxxx n 388 SVE smaxv s0 : p10_lo.gov z_size_bhsd_5 +0000010011001000001xxxxxxxxxxxxx n 388 SVE smaxv d0 : p10_lo.gov z_size_bhsd_5 +00000100xx001010000xxxxxxxxxxxxx n 390 SVE smin z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx101010110xxxxxxxxxxxxx n 390 SVE smin z_size_bhsd_0 : z_size_bhsd_0 simm8_5 -0000010000001010001xxxxxxxxxxxxx n 392 SVE sminv b0 : p10_lo z_size_bhsd_5 -0000010001001010001xxxxxxxxxxxxx n 392 SVE sminv h0 : p10_lo z_size_bhsd_5 -0000010010001010001xxxxxxxxxxxxx n 392 SVE sminv s0 : p10_lo z_size_bhsd_5 -0000010011001010001xxxxxxxxxxxxx n 392 SVE sminv d0 : p10_lo z_size_bhsd_5 +0000010000001010001xxxxxxxxxxxxx n 392 SVE sminv b0 : p10_lo.gov z_size_bhsd_5 +0000010001001010001xxxxxxxxxxxxx n 392 SVE sminv h0 : p10_lo.gov z_size_bhsd_5 +0000010010001010001xxxxxxxxxxxxx n 392 SVE sminv s0 : p10_lo.gov z_size_bhsd_5 +0000010011001010001xxxxxxxxxxxxx n 392 SVE sminv d0 : p10_lo.gov z_size_bhsd_5 01000101000xxxxx100110xxxxxxxxxx n 958 I8MM smmla z_s_0 : z_s_0 z_b_5 z_b_16 -00000100xx010010000xxxxxxxxxxxxx n 399 SVE smulh z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000101xx101100100xxxxxxxxxxxxx n 882 SVE splice z_size_bhsd_0 : p10_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx1xxxxx000100xxxxxxxxxx n 403 SVE sqadd z0 : z5 z16 bhsd_sz +00000100xx010010000xxxxxxxxxxxxx n 399 SVE smulh z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000101xx101100100xxxxxxxxxxxxx n 882 SVE splice z_size_bhsd_0 : p10_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx10010011xxxxxxxxxxxxxx n 403 SVE sqadd z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 00000100xx1xxxxx000100xxxxxxxxxx n 403 SVE sqadd z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 000001000010xxxx111110xxxxxxxxxx n 852 SVE sqdecb x0 : w0 pred_constr mul imm4_16p1 @@ -665,96 +660,93 @@ 000001001010xxxx111100xxxxxxxxxx n 858 SVE sqincw x0 : w0 pred_constr mul imm4_16p1 000001001011xxxx111100xxxxxxxxxx n 858 SVE sqincw x0 : x0 pred_constr mul imm4_16p1 000001001010xxxx110000xxxxxxxxxx n 858 SVE sqincw z_s_0 : z_s_0 pred_constr mul imm4_16p1 -00000100xx1xxxxx000110xxxxxxxxxx n 425 SVE sqsub z0 : z5 z16 bhsd_sz 00100101xx10011011xxxxxxxxxxxxxx n 425 SVE sqsub z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 00000100xx1xxxxx000110xxxxxxxxxx n 425 SVE sqsub z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 -11100100000xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_b_0 p10_lo -11100100001xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_h_0 p10_lo -11100100010xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_s_0 p10_lo -11100100011xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_d_0 p10_lo -11100100011xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_s_imm5 : z_s_0 p10_lo -11100100010xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_d_imm5 : z_d_0 p10_lo -11100100000xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec64 : z_d_0 p10_lo -11100100000xxxxx1x0xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec32_st : z_d_0 p10_lo -11100100010xxxxx1x0xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec32_st : z_s_0 p10_lo -111001000xx0xxxx111xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_simm4_vl_1reg : z_size21_bhsd_0 p10_lo -11100101110xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_vec_d_imm5 : z_d_0 p10_lo -11100101101xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec64 : z_d_0 p10_lo -11100101100xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec64 : z_d_0 p10_lo -11100101101xxxxx1x0xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec32_st : z_d_0 p10_lo -11100101100xxxxx1x0xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec32_st : z_d_0 p10_lo -11100101111xxxxx010xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_shf : z_msz_bhsd_0 p10_lo -111001011110xxxx111xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_simm4_vl_1reg : z_d_0 p10_lo -11100100111xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_s_imm5 : z_s_0 p10_lo -11100100110xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_d_imm5 : z_d_0 p10_lo -11100100101xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec64 : z_d_0 p10_lo -11100100100xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec64 : z_d_0 p10_lo -11100100101xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_d_0 p10_lo -11100100100xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_d_0 p10_lo -11100100111xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_s_0 p10_lo -11100100110xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_s_0 p10_lo -111001001xxxxxxx010xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_shf : z_size21_hsd_0 p10_lo -111001001xx0xxxx111xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_simm4_vl_1reg : z_size21_hsd_0 p10_lo -11100101011xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_s_imm5 : z_s_0 p10_lo -11100101010xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_d_imm5 : z_d_0 p10_lo -11100101001xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec64 : z_d_0 p10_lo -11100101000xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec64 : z_d_0 p10_lo -11100101001xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_d_0 p10_lo -11100101000xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_d_0 p10_lo -11100101011xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_s_0 p10_lo -11100101010xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_s_0 p10_lo -11100101010xxxxx010xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_shf : z_s_0 p10_lo -11100101011xxxxx010xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_shf : z_d_0 p10_lo -1110010101x0xxxx111xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_simm4_vl_1reg : z_sz21_sd_0 p10_lo -11100100001xxxxx011xxxxxxxxxxxxx n 970 SVE st2b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 p10_lo -111001000011xxxx111xxxxxxxxxxxxx n 970 SVE st2b svemem_gpr_simm4_vl_xreg : z_b_0 z_msz_bhsd_0p1 p10_lo -11100101101xxxxx011xxxxxxxxxxxxx n 995 SVE st2d svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 p10_lo -111001011011xxxx111xxxxxxxxxxxxx n 995 SVE st2d svemem_gpr_simm4_vl_xreg : z_d_0 z_msz_bhsd_0p1 p10_lo -11100100101xxxxx011xxxxxxxxxxxxx n 996 SVE st2h svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 p10_lo -111001001011xxxx111xxxxxxxxxxxxx n 996 SVE st2h svemem_gpr_simm4_vl_xreg : z_h_0 z_msz_bhsd_0p1 p10_lo -11100101001xxxxx011xxxxxxxxxxxxx n 997 SVE st2w svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 p10_lo -111001010011xxxx111xxxxxxxxxxxxx n 997 SVE st2w svemem_gpr_simm4_vl_xreg : z_s_0 z_msz_bhsd_0p1 p10_lo -11100100010xxxxx011xxxxxxxxxxxxx n 971 SVE st3b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -11100100010xxxxx011xxxxxxxxxxxxx n 971 SVE st3b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -111001000101xxxx111xxxxxxxxxxxxx n 971 SVE st3b svemem_gpr_simm4_vl_xreg : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -11100101110xxxxx011xxxxxxxxxxxxx n 998 SVE st3d svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -111001011101xxxx111xxxxxxxxxxxxx n 998 SVE st3d svemem_gpr_simm4_vl_xreg : z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -11100100110xxxxx011xxxxxxxxxxxxx n 999 SVE st3h svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -111001001101xxxx111xxxxxxxxxxxxx n 999 SVE st3h svemem_gpr_simm4_vl_xreg : z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -11100101010xxxxx011xxxxxxxxxxxxx n 1000 SVE st3w svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -111001010101xxxx111xxxxxxxxxxxxx n 1000 SVE st3w svemem_gpr_simm4_vl_xreg : z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo -11100100011xxxxx011xxxxxxxxxxxxx n 972 SVE st4b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -11100100011xxxxx011xxxxxxxxxxxxx n 972 SVE st4b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -111001000111xxxx111xxxxxxxxxxxxx n 972 SVE st4b svemem_gpr_simm4_vl_xreg : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -11100101111xxxxx011xxxxxxxxxxxxx n 1001 SVE st4d svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -111001011111xxxx111xxxxxxxxxxxxx n 1001 SVE st4d svemem_gpr_simm4_vl_xreg : z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -11100100111xxxxx011xxxxxxxxxxxxx n 1002 SVE st4h svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -111001001111xxxx111xxxxxxxxxxxxx n 1002 SVE st4h svemem_gpr_simm4_vl_xreg : z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -11100101011xxxxx011xxxxxxxxxxxxx n 1003 SVE st4w svemem_msz_stgpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -111001010111xxxx111xxxxxxxxxxxxx n 1003 SVE st4w svemem_gpr_simm4_vl_xreg : z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo -11100100000xxxxx011xxxxxxxxxxxxx n 952 SVE stnt1b svemem_gprs_b1 : z_b_0 p10_lo -11100100000xxxxx011xxxxxxxxxxxxx n 952 SVE stnt1b svemem_gprs_b1 : z_b_0 p10_lo -111001000001xxxx111xxxxxxxxxxxxx n 952 SVE stnt1b svemem_gpr_simm4_vl_xreg : z_b_0 p10_lo -11100101100xxxxx011xxxxxxxxxxxxx n 1004 SVE stnt1d svemem_msz_stgpr_shf : z_msz_bhsd_0 p10_lo -111001011001xxxx111xxxxxxxxxxxxx n 1004 SVE stnt1d svemem_gpr_simm4_vl_xreg : z_d_0 p10_lo -11100100100xxxxx011xxxxxxxxxxxxx n 1005 SVE stnt1h svemem_msz_stgpr_shf : z_msz_bhsd_0 p10_lo -111001001001xxxx111xxxxxxxxxxxxx n 1005 SVE stnt1h svemem_gpr_simm4_vl_xreg : z_h_0 p10_lo -11100101000xxxxx011xxxxxxxxxxxxx n 1006 SVE stnt1w svemem_msz_stgpr_shf : z_msz_bhsd_0 p10_lo -111001010001xxxx111xxxxxxxxxxxxx n 1006 SVE stnt1w svemem_gpr_simm4_vl_xreg : z_s_0 p10_lo +11100100000xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_b_0 p10_lo.gov +11100100001xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_h_0 p10_lo.gov +11100100010xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_s_0 p10_lo.gov +11100100011xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_d_0 p10_lo.gov +11100100011xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_s_imm5 : z_s_0 p10_lo.gov +11100100010xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_d_imm5 : z_d_0 p10_lo.gov +11100100000xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100100000xxxxx1x0xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100100010xxxxx1x0xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec32_st : z_s_0 p10_lo.gov +111001000xx0xxxx111xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_simm4_vl_1reg : z_size21_bhsd_0 p10_lo.gov +11100101110xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_vec_d_imm5 : z_d_0 p10_lo.gov +11100101101xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100101100xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100101101xxxxx1x0xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100101100xxxxx1x0xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100101111xxxxx010xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_shf : z_msz_bhsd_0 p10_lo.gov +111001011110xxxx111xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_simm4_vl_1reg : z_d_0 p10_lo.gov +11100100111xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_s_imm5 : z_s_0 p10_lo.gov +11100100110xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_d_imm5 : z_d_0 p10_lo.gov +11100100101xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100100100xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100100101xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100100100xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100100111xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_s_0 p10_lo.gov +11100100110xxxxx1x0xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec32_st : z_s_0 p10_lo.gov +111001001xxxxxxx010xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_shf : z_size21_hsd_0 p10_lo.gov +111001001xx0xxxx111xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_simm4_vl_1reg : z_size21_hsd_0 p10_lo.gov +11100101011xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_s_imm5 : z_s_0 p10_lo.gov +11100101010xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_d_imm5 : z_d_0 p10_lo.gov +11100101001xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100101000xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec64 : z_d_0 p10_lo.gov +11100101001xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100101000xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_d_0 p10_lo.gov +11100101011xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_s_0 p10_lo.gov +11100101010xxxxx1x0xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec32_st : z_s_0 p10_lo.gov +11100101010xxxxx010xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_shf : z_s_0 p10_lo.gov +11100101011xxxxx010xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_shf : z_d_0 p10_lo.gov +1110010101x0xxxx111xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_simm4_vl_1reg : z_sz21_sd_0 p10_lo.gov +11100100001xxxxx011xxxxxxxxxxxxx n 970 SVE st2b svemem_gpr_shf : z_b_0 z_msz_bhsd_0p1 p10_lo.gov +111001000011xxxx111xxxxxxxxxxxxx n 970 SVE st2b svemem_gpr_simm4_vl_xreg : z_b_0 z_msz_bhsd_0p1 p10_lo.gov +11100101101xxxxx011xxxxxxxxxxxxx n 995 SVE st2d svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 p10_lo.gov +111001011011xxxx111xxxxxxxxxxxxx n 995 SVE st2d svemem_gpr_simm4_vl_xreg : z_d_0 z_msz_bhsd_0p1 p10_lo.gov +11100100101xxxxx011xxxxxxxxxxxxx n 996 SVE st2h svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 p10_lo.gov +111001001011xxxx111xxxxxxxxxxxxx n 996 SVE st2h svemem_gpr_simm4_vl_xreg : z_h_0 z_msz_bhsd_0p1 p10_lo.gov +11100101001xxxxx011xxxxxxxxxxxxx n 997 SVE st2w svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 p10_lo.gov +111001010011xxxx111xxxxxxxxxxxxx n 997 SVE st2w svemem_gpr_simm4_vl_xreg : z_s_0 z_msz_bhsd_0p1 p10_lo.gov +11100100010xxxxx011xxxxxxxxxxxxx n 971 SVE st3b svemem_gpr_shf : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +11100100010xxxxx011xxxxxxxxxxxxx n 971 SVE st3b svemem_gpr_shf : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +111001000101xxxx111xxxxxxxxxxxxx n 971 SVE st3b svemem_gpr_simm4_vl_xreg : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +11100101110xxxxx011xxxxxxxxxxxxx n 998 SVE st3d svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +111001011101xxxx111xxxxxxxxxxxxx n 998 SVE st3d svemem_gpr_simm4_vl_xreg : z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +11100100110xxxxx011xxxxxxxxxxxxx n 999 SVE st3h svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +111001001101xxxx111xxxxxxxxxxxxx n 999 SVE st3h svemem_gpr_simm4_vl_xreg : z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +11100101010xxxxx011xxxxxxxxxxxxx n 1000 SVE st3w svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +111001010101xxxx111xxxxxxxxxxxxx n 1000 SVE st3w svemem_gpr_simm4_vl_xreg : z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo.gov +11100100011xxxxx011xxxxxxxxxxxxx n 972 SVE st4b svemem_gpr_shf : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +11100100011xxxxx011xxxxxxxxxxxxx n 972 SVE st4b svemem_gpr_shf : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +111001000111xxxx111xxxxxxxxxxxxx n 972 SVE st4b svemem_gpr_simm4_vl_xreg : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +11100101111xxxxx011xxxxxxxxxxxxx n 1001 SVE st4d svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +111001011111xxxx111xxxxxxxxxxxxx n 1001 SVE st4d svemem_gpr_simm4_vl_xreg : z_d_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +11100100111xxxxx011xxxxxxxxxxxxx n 1002 SVE st4h svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +111001001111xxxx111xxxxxxxxxxxxx n 1002 SVE st4h svemem_gpr_simm4_vl_xreg : z_h_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +11100101011xxxxx011xxxxxxxxxxxxx n 1003 SVE st4w svemem_gpr_shf : z_msz_bhsd_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +111001010111xxxx111xxxxxxxxxxxxx n 1003 SVE st4w svemem_gpr_simm4_vl_xreg : z_s_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo.gov +11100100000xxxxx011xxxxxxxxxxxxx n 952 SVE stnt1b svemem_gpr_shf : z_b_0 p10_lo.gov +111001000001xxxx111xxxxxxxxxxxxx n 952 SVE stnt1b svemem_gpr_simm4_vl_xreg : z_b_0 p10_lo.gov +11100101100xxxxx011xxxxxxxxxxxxx n 1004 SVE stnt1d svemem_gpr_shf : z_msz_bhsd_0 p10_lo.gov +111001011001xxxx111xxxxxxxxxxxxx n 1004 SVE stnt1d svemem_gpr_simm4_vl_xreg : z_d_0 p10_lo.gov +11100100100xxxxx011xxxxxxxxxxxxx n 1005 SVE stnt1h svemem_gpr_shf : z_msz_bhsd_0 p10_lo.gov +111001001001xxxx111xxxxxxxxxxxxx n 1005 SVE stnt1h svemem_gpr_simm4_vl_xreg : z_h_0 p10_lo.gov +11100101000xxxxx011xxxxxxxxxxxxx n 1006 SVE stnt1w svemem_gpr_shf : z_msz_bhsd_0 p10_lo.gov +111001010001xxxx111xxxxxxxxxxxxx n 1006 SVE stnt1w svemem_gpr_simm4_vl_xreg : z_s_0 p10_lo.gov 1110010110xxxxxx000xxxxxxxx0xxxx n 457 SVE str svemem_gpr_simm9_vl : p0 1110010110xxxxxx010xxxxxxxxxxxxx n 457 SVE str svemem_gpr_simm9_vl : z0 -00000100xx1xxxxx000001xxxxxxxxxx n 470 SVE sub z0 : z5 z16 bhsd_sz -00000100xx000001000xxxxxxxxxxxxx n 470 SVE sub z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx000001000xxxxxxxxxxxxx n 470 SVE sub z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx10000111xxxxxxxxxxxxxx n 470 SVE sub z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 00000100xx1xxxxx000001xxxxxxxxxx n 470 SVE sub z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 -00000100xx000011000xxxxxxxxxxxxx n 784 SVE subr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx000011000xxxxxxxxxxxxx n 784 SVE subr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx10001111xxxxxxxxxxxxxx n 784 SVE subr z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 01000100101xxxxx000111xxxxxxxxxx n 959 I8MM sudot z_s_0 : z_s_0 z_b_5 z3_b_16 i2_index_19 00000101xx110001001110xxxxxxxxxx n 889 SVE sunpkhi z_size_hsd_0 : z_tb_bhs_5 00000101xx110000001110xxxxxxxxxx n 890 SVE sunpklo z_size_hsd_0 : z_tb_bhs_5 -00000100xx010000101xxxxxxxxxxxxx n 799 SVE sxtb z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -00000100xx010010101xxxxxxxxxxxxx n 800 SVE sxth z_size_sd_0 : p10_mrg_lo z_size_sd_5 -0000010011010100101xxxxxxxxxxxxx n 801 SVE sxtw z_d_0 : p10_mrg_lo z_d_5 +00000100xx010000101xxxxxxxxxxxxx n 799 SVE sxtb z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +00000100xx010010101xxxxxxxxxxxxx n 800 SVE sxth z_size_sd_0 : p10_mrg_lo.gov z_size_sd_5 +0000010011010100101xxxxxxxxxxxxx n 801 SVE sxtw z_d_0 : p10_mrg_lo.gov z_d_5 00000101xx1xxxxx001100xxxxxxxxxx n 490 SVE tbl z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 00000101xx10xxxx0101000xxxx0xxxx n 494 SVE trn1 p_size_bhsd_0 : p_size_bhsd_5 p_size_bhsd_16 00000101xx1xxxxx011100xxxxxxxxxx n 494 SVE trn1 z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 @@ -762,36 +754,35 @@ 00000101xx10xxxx0101010xxxx0xxxx n 495 SVE trn2 p_size_bhsd_0 : p_size_bhsd_5 p_size_bhsd_16 00000101xx1xxxxx011101xxxxxxxxxx n 495 SVE trn2 z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 00000101101xxxxx000111xxxxxxxxxx n 495 F64MM trn2 z_q_0 : z_q_5 z_q_16 -00000100xx001101000xxxxxxxxxxxxx n 499 SVE uabd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx000001001xxxxxxxxxxxxx n 921 SVE uaddv d0 : p10_lo z_size_bhsd_5 -0110010101010011101xxxxxxxxxxxxx n 510 SVE ucvtf z_h_0 : p10_mrg_lo z_h_5 -0110010111010001101xxxxxxxxxxxxx n 510 SVE ucvtf z_d_0 : p10_mrg_lo z_s_5 -0110010101010101101xxxxxxxxxxxxx n 510 SVE ucvtf z_h_0 : p10_mrg_lo z_s_5 -0110010110010101101xxxxxxxxxxxxx n 510 SVE ucvtf z_s_0 : p10_mrg_lo z_s_5 -0110010111010111101xxxxxxxxxxxxx n 510 SVE ucvtf z_d_0 : p10_mrg_lo z_d_5 -0110010101010111101xxxxxxxxxxxxx n 510 SVE ucvtf z_h_0 : p10_mrg_lo z_d_5 -0110010111010101101xxxxxxxxxxxxx n 510 SVE ucvtf z_s_0 : p10_mrg_lo z_d_5 -00000100xx010101000xxxxxxxxxxxxx n 511 SVE udiv z_size_sd_0 : p10_mrg_lo z_size_sd_0 z_size_sd_5 -00000100xx010111000xxxxxxxxxxxxx n 795 SVE udivr z_size_sd_0 : p10_mrg_lo z_size_sd_0 z_size_sd_5 +00000100xx001101000xxxxxxxxxxxxx n 499 SVE uabd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx000001001xxxxxxxxxxxxx n 921 SVE uaddv d0 : p10_lo.gov z_size_bhsd_5 +0110010101010011101xxxxxxxxxxxxx n 510 SVE ucvtf z_h_0 : p10_mrg_lo.gov z_h_5 +0110010111010001101xxxxxxxxxxxxx n 510 SVE ucvtf z_d_0 : p10_mrg_lo.gov z_s_5 +0110010101010101101xxxxxxxxxxxxx n 510 SVE ucvtf z_h_0 : p10_mrg_lo.gov z_s_5 +0110010110010101101xxxxxxxxxxxxx n 510 SVE ucvtf z_s_0 : p10_mrg_lo.gov z_s_5 +0110010111010111101xxxxxxxxxxxxx n 510 SVE ucvtf z_d_0 : p10_mrg_lo.gov z_d_5 +0110010101010111101xxxxxxxxxxxxx n 510 SVE ucvtf z_h_0 : p10_mrg_lo.gov z_d_5 +0110010111010101101xxxxxxxxxxxxx n 510 SVE ucvtf z_s_0 : p10_mrg_lo.gov z_d_5 +00000100xx010101000xxxxxxxxxxxxx n 511 SVE udiv z_size_sd_0 : p10_mrg_lo.gov z_size_sd_0 z_size_sd_5 +00000100xx010111000xxxxxxxxxxxxx n 795 SVE udivr z_size_sd_0 : p10_mrg_lo.gov z_size_sd_0 z_size_sd_5 01000100110xxxxx000001xxxxxxxxxx n 512 SVE udot z_d_0 : z_d_0 z_h_5 z_h_16 01000100100xxxxx000001xxxxxxxxxx n 512 SVE udot z_s_0 : z_s_0 z_b_5 z_b_16 01000100111xxxxx000001xxxxxxxxxx n 512 SVE udot z_d_0 : z_d_0 z_h_5 z4_h_16 i1_index_20 01000100101xxxxx000001xxxxxxxxxx n 512 SVE udot z_s_0 : z_s_0 z_b_5 z3_b_16 i2_index_19 -00000100xx001001000xxxxxxxxxxxxx n 516 SVE umax z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +00000100xx001001000xxxxxxxxxxxxx n 516 SVE umax z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx101001110xxxxxxxxxxxxx n 516 SVE umax z_size_bhsd_0 : z_size_bhsd_0 imm8_5 -0000010000001001001xxxxxxxxxxxxx n 518 SVE umaxv b0 : p10_lo z_size_bhsd_5 -0000010001001001001xxxxxxxxxxxxx n 518 SVE umaxv h0 : p10_lo z_size_bhsd_5 -0000010010001001001xxxxxxxxxxxxx n 518 SVE umaxv s0 : p10_lo z_size_bhsd_5 -0000010011001001001xxxxxxxxxxxxx n 518 SVE umaxv d0 : p10_lo z_size_bhsd_5 -00000100xx001011000xxxxxxxxxxxxx n 519 SVE umin z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +0000010000001001001xxxxxxxxxxxxx n 518 SVE umaxv b0 : p10_lo.gov z_size_bhsd_5 +0000010001001001001xxxxxxxxxxxxx n 518 SVE umaxv h0 : p10_lo.gov z_size_bhsd_5 +0000010010001001001xxxxxxxxxxxxx n 518 SVE umaxv s0 : p10_lo.gov z_size_bhsd_5 +0000010011001001001xxxxxxxxxxxxx n 518 SVE umaxv d0 : p10_lo.gov z_size_bhsd_5 +00000100xx001011000xxxxxxxxxxxxx n 519 SVE umin z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx101011110xxxxxxxxxxxxx n 519 SVE umin z_size_bhsd_0 : z_size_bhsd_0 imm8_5 -0000010000001011001xxxxxxxxxxxxx n 521 SVE uminv b0 : p10_lo z_size_bhsd_5 -0000010001001011001xxxxxxxxxxxxx n 521 SVE uminv h0 : p10_lo z_size_bhsd_5 -0000010010001011001xxxxxxxxxxxxx n 521 SVE uminv s0 : p10_lo z_size_bhsd_5 -0000010011001011001xxxxxxxxxxxxx n 521 SVE uminv d0 : p10_lo z_size_bhsd_5 +0000010000001011001xxxxxxxxxxxxx n 521 SVE uminv b0 : p10_lo.gov z_size_bhsd_5 +0000010001001011001xxxxxxxxxxxxx n 521 SVE uminv h0 : p10_lo.gov z_size_bhsd_5 +0000010010001011001xxxxxxxxxxxxx n 521 SVE uminv s0 : p10_lo.gov z_size_bhsd_5 +0000010011001011001xxxxxxxxxxxxx n 521 SVE uminv d0 : p10_lo.gov z_size_bhsd_5 01000101110xxxxx100110xxxxxxxxxx n 960 I8MM ummla z_s_0 : z_s_0 z_b_5 z_b_16 -00000100xx010011000xxxxxxxxxxxxx n 528 SVE umulh z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx1xxxxx000101xxxxxxxxxx n 531 SVE uqadd z0 : z5 z16 bhsd_sz +00000100xx010011000xxxxxxxxxxxxx n 528 SVE umulh z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 00100101xx10010111xxxxxxxxxxxxxx n 531 SVE uqadd z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 00000100xx1xxxxx000101xxxxxxxxxx n 531 SVE uqadd z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 000001000010xxxx111111xxxxxxxxxx n 859 SVE uqdecb w0 : w0 pred_constr mul imm4_16p1 @@ -822,7 +813,6 @@ 000001001010xxxx111101xxxxxxxxxx n 866 SVE uqincw w0 : w0 pred_constr mul imm4_16p1 000001001011xxxx111101xxxxxxxxxx n 866 SVE uqincw x0 : x0 pred_constr mul imm4_16p1 000001001010xxxx110001xxxxxxxxxx n 866 SVE uqincw z_s_0 : z_s_0 pred_constr mul imm4_16p1 -00000100xx1xxxxx000111xxxxxxxxxx n 538 SVE uqsub z0 : z5 z16 bhsd_sz 00100101xx10011111xxxxxxxxxxxxxx n 538 SVE uqsub z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1 00000100xx1xxxxx000111xxxxxxxxxx n 538 SVE uqsub z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 01000100100xxxxx011110xxxxxxxxxx n 961 I8MM usdot z_s_0 : z_s_0 z_b_5 z_b_16 @@ -830,15 +820,15 @@ 01000101100xxxxx100110xxxxxxxxxx n 962 I8MM usmmla z_s_0 : z_s_0 z_b_5 z_b_16 00000101xx110011001110xxxxxxxxxx n 891 SVE uunpkhi z_size_hsd_0 : z_tb_bhs_5 00000101xx110010001110xxxxxxxxxx n 892 SVE uunpklo z_size_hsd_0 : z_tb_bhs_5 -00000100xx010001101xxxxxxxxxxxxx n 802 SVE uxtb z_size_hsd_0 : p10_mrg_lo z_size_hsd_5 -00000100xx010011101xxxxxxxxxxxxx n 803 SVE uxth z_size_sd_0 : p10_mrg_lo z_size_sd_5 -0000010011010101101xxxxxxxxxxxxx n 804 SVE uxtw z_d_0 : p10_mrg_lo z_d_5 +00000100xx010001101xxxxxxxxxxxxx n 802 SVE uxtb z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_5 +00000100xx010011101xxxxxxxxxxxxx n 803 SVE uxth z_size_sd_0 : p10_mrg_lo.gov z_size_sd_5 +0000010011010101101xxxxxxxxxxxxx n 804 SVE uxtw z_d_0 : p10_mrg_lo.gov z_d_5 00000101xx10xxxx0100100xxxx0xxxx n 557 SVE uzp1 p_size_bhsd_0 : p_size_bhsd_5 p_size_bhsd_16 00000101xx1xxxxx011010xxxxxxxxxx n 557 SVE uzp1 z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 -00000101101xxxxx000010xxxxxxxxxx n 557 F64MM uzp1 z_q_0 : z_q_5 z_q_16 +00000101101xxxxx000010xxxxxxxxxx n 557 F64MM uzp1 z_q_0 : z_q_5 z_q_16 00000101xx10xxxx0100110xxxx0xxxx n 558 SVE uzp2 p_size_bhsd_0 : p_size_bhsd_5 p_size_bhsd_16 00000101xx1xxxxx011011xxxxxxxxxx n 558 SVE uzp2 z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 -00000101101xxxxx000011xxxxxxxxxx n 558 F64MM uzp2 z_q_0 : z_q_5 z_q_16 +00000101101xxxxx000011xxxxxxxxxx n 558 F64MM uzp2 z_q_0 : z_q_5 z_q_16 00100101xx1xxxxx000001xxxxx1xxxx w 877 SVE whilele p_size_bhsd_0 : w5 w16 00100101xx1xxxxx000101xxxxx1xxxx w 877 SVE whilele p_size_bhsd_0 : x5 x16 00100101xx1xxxxx000011xxxxx0xxxx w 878 SVE whilelo p_size_bhsd_0 : w5 w16 @@ -850,7 +840,7 @@ 00100101001010001001000xxxx00000 n 820 SVE wrffr : p_b_5 00000101xx10xxxx0100000xxxx0xxxx n 565 SVE zip1 p_size_bhsd_0 : p_size_bhsd_5 p_size_bhsd_16 00000101xx1xxxxx011000xxxxxxxxxx n 565 SVE zip1 z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 -00000101101xxxxx000000xxxxxxxxxx n 565 F64MM zip1 z_q_0 : z_q_5 z_q_16 +00000101101xxxxx000000xxxxxxxxxx n 565 F64MM zip1 z_q_0 : z_q_5 z_q_16 00000101101xxxxx000001xxxxxxxxxx n 566 SVE zip2 z_q_0 : z_q_5 z_q_16 00000101xx10xxxx0100010xxxx0xxxx n 566 SVE zip2 p_size_bhsd_0 : p_size_bhsd_5 p_size_bhsd_16 00000101xx1xxxxx011001xxxxxxxxxx n 566 SVE zip2 z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 diff --git a/core/ir/aarch64/codec_sve2.txt b/core/ir/aarch64/codec_sve2.txt index 093e99cf4f0..4b3dda25493 100644 --- a/core/ir/aarch64/codec_sve2.txt +++ b/core/ir/aarch64/codec_sve2.txt @@ -42,7 +42,7 @@ 01000101010xxxxx110101xxxxxxxxxx n 1074 SVE2 adclt z_d_0 : z_d_0 z_d_5 z_d_16 01000101xx1xxxxx011000xxxxxxxxxx n 1082 SVE2 addhnb z_sizep1_bhs_0 : z_size_hsd_5 z_size_hsd_16 01000101xx1xxxxx011001xxxxxxxxxx n 1083 SVE2 addhnt z_sizep1_bhs_0 : z_sizep1_bhs_0 z_size_hsd_5 z_size_hsd_16 -01000100xx010001101xxxxxxxxxxxxx n 12 SVE2 addp z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx010001101xxxxxxxxxxxxx n 12 SVE2 addp z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 0100010100100010111001xxxxxxxxxx n 17 SVEAES aesd z_b_0 : z_b_0 z_b_5 0100010100100010111000xxxxxxxxxx n 18 SVEAES aese z_b_0 : z_b_0 z_b_5 010001010010000011100100000xxxxx n 19 SVEAES aesimc z_b_0 : z_b_0 @@ -64,18 +64,18 @@ 00000100001xxxxx001110xxxxxxxxxx n 600 SVE2 eor3 z_d_0 : z_d_0 z_d_16 z_d_5 01000101xx0xxxxx100100xxxxxxxxxx n 1078 SVE2 eorbt z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 z_size_bhsd_16 01000101xx0xxxxx100101xxxxxxxxxx n 1079 SVE2 eortb z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 z_size_bhsd_16 -01100100xx010000100xxxxxxxxxxxxx n 99 SVE2 faddp z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -0110010010001001101xxxxxxxxxxxxx n 1156 SVE2 fcvtlt z_s_0 : p10_mrg_lo z_msz_bhsd_5 -0110010011001011101xxxxxxxxxxxxx n 1156 SVE2 fcvtlt z_d_0 : p10_mrg_lo z_s_5 -0110010011001010101xxxxxxxxxxxxx n 1157 SVE2 fcvtnt z_s_0 : z_s_0 p10_mrg_lo z_d_5 -0110010010001000101xxxxxxxxxxxxx n 1157 SVE2 fcvtnt z_msz_bhsd_0 : z_msz_bhsd_0 p10_mrg_lo z_s_5 -0110010100001010101xxxxxxxxxxxxx n 1158 SVE2 fcvtx z_msz_bhsd_0 : p10_mrg_lo z_d_5 -0110010000001010101xxxxxxxxxxxxx n 1159 SVE2 fcvtxnt z_s_0 : z_s_0 p10_mrg_lo z_d_5 -0110010100011xx0101xxxxxxxxxxxxx n 1160 SVE2 flogb z_size17_hsd_0 : p10_mrg_lo z_size17_hsd_5 -01100100xx010100100xxxxxxxxxxxxx n 131 SVE2 fmaxnmp z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100100xx010110100xxxxxxxxxxxxx n 133 SVE2 fmaxp z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100100xx010101100xxxxxxxxxxxxx n 137 SVE2 fminnmp z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 -01100100xx010111100xxxxxxxxxxxxx n 139 SVE2 fminp z_size_hsd_0 : p10_mrg_lo z_size_hsd_0 z_size_hsd_5 +01100100xx010000100xxxxxxxxxxxxx n 99 SVE2 faddp z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +0110010010001001101xxxxxxxxxxxxx n 1156 SVE2 fcvtlt z_s_0 : p10_mrg_lo.gov z_msz_bhsd_5 +0110010011001011101xxxxxxxxxxxxx n 1156 SVE2 fcvtlt z_d_0 : p10_mrg_lo.gov z_s_5 +0110010011001010101xxxxxxxxxxxxx n 1157 SVE2 fcvtnt z_s_0 : z_s_0 p10_mrg_lo.gov z_d_5 +0110010010001000101xxxxxxxxxxxxx n 1157 SVE2 fcvtnt z_msz_bhsd_0 : z_msz_bhsd_0 p10_mrg_lo.gov z_s_5 +0110010100001010101xxxxxxxxxxxxx n 1158 SVE2 fcvtx z_msz_bhsd_0 : p10_mrg_lo.gov z_d_5 +0110010000001010101xxxxxxxxxxxxx n 1159 SVE2 fcvtxnt z_s_0 : z_s_0 p10_mrg_lo.gov z_d_5 +0110010100011xx0101xxxxxxxxxxxxx n 1160 SVE2 flogb z_size17_hsd_0 : p10_mrg_lo.gov z_size17_hsd_5 +01100100xx010100100xxxxxxxxxxxxx n 131 SVE2 fmaxnmp z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100100xx010110100xxxxxxxxxxxxx n 133 SVE2 fmaxp z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100100xx010101100xxxxxxxxxxxxx n 137 SVE2 fminnmp z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 +01100100xx010111100xxxxxxxxxxxxx n 139 SVE2 fminp z_size_hsd_0 : p10_mrg_lo.gov z_size_hsd_0 z_size_hsd_5 01100100101xxxxx100000xxxxxxxxxx n 1067 SVE2 fmlalb z_s_0 : z_s_0 z_msz_bhsd_5 z_msz_bhsd_16 01100100101xxxxx0100x0xxxxxxxxxx n 1067 SVE2 fmlalb z_s_0 : z_s_0 z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 01100100101xxxxx100001xxxxxxxxxx n 1068 SVE2 fmlalt z_s_0 : z_s_0 z_msz_bhsd_5 z_msz_bhsd_16 @@ -84,27 +84,27 @@ 01100100101xxxxx0110x0xxxxxxxxxx n 1069 SVE2 fmlslb z_s_0 : z_s_0 z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 01100100101xxxxx101001xxxxxxxxxx n 1070 SVE2 fmlslt z_s_0 : z_s_0 z_msz_bhsd_5 z_msz_bhsd_16 01100100101xxxxx0110x1xxxxxxxxxx n 1070 SVE2 fmlslt z_s_0 : z_s_0 z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 -01000101xx1xxxxx110xxxxxxxxxxxxx n 1145 SVE2 histcnt z_size_sd_0 : p10_zer_lo z_size_sd_5 z_size_sd_16 +01000101xx1xxxxx110xxxxxxxxxxxxx n 1145 SVE2 histcnt z_size_sd_0 : p10_zer_lo.gov z_size_sd_5 z_size_sd_16 01000101001xxxxx101000xxxxxxxxxx n 1071 SVE2 histseg z_b_0 : z_b_5 z_b_16 -11000100000xxxxx110xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -10000100000xxxxx101xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000101100xxxxx110xxxxxxxxxxxxx n 992 SVE2 ldnt1d z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000100100xxxxx110xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -10000100100xxxxx101xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -10000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -10000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000101000xxxxx100xxxxxxxxxxxxx n 1188 SVE2 ldnt1sw z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000101000xxxxx110xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -10000101000xxxxx101xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo -01000101xx1xxxxx100xxxxxxxx0xxxx w 1189 SVE2 match p_size_bh_0 : p10_zer_lo z_size_bh_5 z_size_bh_16 +11000100000xxxxx110xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +10000100000xxxxx101xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +11000101100xxxxx110xxxxxxxxxxxxx n 992 SVE2 ldnt1d z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +11000100100xxxxx110xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +10000100100xxxxx101xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +11000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +10000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +11000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +10000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +11000101000xxxxx100xxxxxxxxxxxxx n 1188 SVE2 ldnt1sw z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +11000101000xxxxx110xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +10000101000xxxxx101xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo.gov +01000101xx1xxxxx100xxxxxxxx0xxxx w 1189 SVE2 match p_size_bh_0 : p10_zer_lo.gov z_size_bh_5 z_size_bh_16 00000100xx1xxxxx011000xxxxxxxxxx n 321 SVE2 mul z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 01000100111xxxxx111110xxxxxxxxxx n 321 SVE2 mul z_d_0 : z_d_5 z4_d_16 i1_index_20 010001000x1xxxxx111110xxxxxxxxxx n 321 SVE2 mul z_h_0 : z_h_5 z3_h_16 i3_index_19 01000100101xxxxx111110xxxxxxxxxx n 321 SVE2 mul z_s_0 : z_s_5 z3_s_16 i2_index_19 00000100111xxxxx001111xxxxxxxxxx n 1072 SVE2 nbsl z_d_0 : z_d_0 z_d_16 z_d_5 -01000101xx1xxxxx100xxxxxxxx1xxxx w 1190 SVE2 nmatch p_size_bh_0 : p10_zer_lo z_size_bh_5 z_size_bh_16 +01000101xx1xxxxx100xxxxxxxx1xxxx w 1190 SVE2 nmatch p_size_bh_0 : p10_zer_lo.gov z_size_bh_5 z_size_bh_16 00000100001xxxxx011001xxxxxxxxxx n 328 SVE2 pmul z_msz_bhsd_0 : z_msz_bhsd_5 z_msz_bhsd_16 01000101xx0xxxxx011010xxxxxxxxxx n 1084 SVE2 pmullb z_size_hd_0 : z_sizep1_bs_5 z_sizep1_bs_16 01000101xx0xxxxx011011xxxxxxxxxx n 1085 SVE2 pmullt z_size_hd_0 : z_sizep1_bs_5 z_sizep1_bs_16 @@ -120,7 +120,7 @@ 01000101xx0xxxxx110001xxxxxxxxxx n 1091 SVE2 sabalt z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx001100xxxxxxxxxx n 1092 SVE2 sabdlb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx001101xxxxxxxxxx n 1093 SVE2 sabdlt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 -01000100xx000100101xxxxxxxxxxxxx n 352 SVE2 sadalp z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_sizep1_bhs_5 +01000100xx000100101xxxxxxxxxxxxx n 352 SVE2 sadalp z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_sizep1_bhs_5 01000101xx0xxxxx000000xxxxxxxxxx n 1094 SVE2 saddlb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx100000xxxxxxxxxx n 1095 SVE2 saddlbt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx000001xxxxxxxxxx n 1096 SVE2 saddlt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 @@ -130,16 +130,16 @@ 01000101110xxxxx110100xxxxxxxxxx n 1080 SVE2 sbclb z_d_0 : z_d_0 z_d_5 z_d_16 01000101100xxxxx110101xxxxxxxxxx n 1081 SVE2 sbclt z_s_0 : z_s_0 z_s_5 z_s_16 01000101110xxxxx110101xxxxxxxxxx n 1081 SVE2 sbclt z_d_0 : z_d_0 z_d_5 z_d_16 -01000100xx010000100xxxxxxxxxxxxx n 377 SVE2 shadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx010000100xxxxxxxxxxxxx n 377 SVE2 shadd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 010001010x1xxxxx000100xxxxxxxxxx n 1166 SVE2 shrnb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx000101xxxxxxxxxx n 1167 SVE2 shrnt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 -01000100xx010010100xxxxxxxxxxxxx n 383 SVE2 shsub z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx010110100xxxxxxxxxxxxx n 1146 SVE2 shsubr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx010010100xxxxxxxxxxxxx n 383 SVE2 shsub z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx010110100xxxxxxxxxxxxx n 1146 SVE2 shsubr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 01000101xx0xxxxx111101xxxxxxxxxx n 384 SVE2 sli z_tszl19_bhsd_0 : z_tszl19_bhsd_0 z_tszl19_bhsd_5 tszl19_imm3_16 0100010100100011111000xxxxxxxxxx n 593 SVESM4 sm4e z_msz_bhsd_0 : z_msz_bhsd_0 z_msz_bhsd_5 01000101001xxxxx111100xxxxxxxxxx n 594 SVESM4 sm4ekey z_msz_bhsd_0 : z_msz_bhsd_5 z_msz_bhsd_16 -01000100xx010100101xxxxxxxxxxxxx n 387 SVE2 smaxp z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx010110101xxxxxxxxxxxxx n 391 SVE2 sminp z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx010100101xxxxxxxxxxxxx n 387 SVE2 smaxp z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx010110101xxxxxxxxxxxxx n 391 SVE2 sminp z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 01000100xx0xxxxx010000xxxxxxxxxx n 1099 SVE2 smlalb z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16 01000100111xxxxx1000x0xxxxxxxxxx n 1099 SVE2 smlalb z_d_0 : z_d_0 z_s_5 z4_s_16 i2_index_11 01000100101xxxxx1000x0xxxxxxxxxx n 1099 SVE2 smlalb z_s_0 : z_s_0 z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 @@ -158,7 +158,8 @@ 01000101xx0xxxxx011101xxxxxxxxxx n 1104 SVE2 smullt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000100111xxxxx1100x1xxxxxxxxxx n 1104 SVE2 smullt z_d_0 : z_s_5 z4_s_16 i2_index_11 01000100101xxxxx1100x1xxxxxxxxxx n 1104 SVE2 smullt z_s_0 : z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 -01000100xx001000101xxxxxxxxxxxxx n 402 SVE2 sqabs z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 +00000101xx101101100xxxxxxxxxxxxx n 882 SVE2 splice z_size_bhsd_0 : p10_lo.gov z_size_bhsd_5 z_size_bhsd_5p1 +01000100xx001000101xxxxxxxxxxxxx n 402 SVE2 sqabs z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 01000101xx00000111011xxxxxxxxxxx n 1168 SVE2 sqcadd z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 imm1_ew_10 01000100xx0xxxxx011000xxxxxxxxxx n 1105 SVE2 sqdmlalb z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16 01000100111xxxxx0010x0xxxxxxxxxx n 1105 SVE2 sqdmlalb z_d_0 : z_d_0 z_s_5 z4_s_16 i2_index_11 @@ -184,7 +185,7 @@ 01000101xx0xxxxx011001xxxxxxxxxx n 1112 SVE2 sqdmullt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000100111xxxxx1110x1xxxxxxxxxx n 1112 SVE2 sqdmullt z_d_0 : z_s_5 z4_s_16 i2_index_11 01000100101xxxxx1110x1xxxxxxxxxx n 1112 SVE2 sqdmullt z_s_0 : z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 -01000100xx001001101xxxxxxxxxxxxx n 411 SVE2 sqneg z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5 +01000100xx001001101xxxxxxxxxxxxx n 411 SVE2 sqneg z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_5 01000100xx0xxxxx0011xxxxxxxxxxxx n 1169 SVE2 sqrdcmlah z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 z_size_bhsd_16 imm2_nesw_10 01000100101xxxxx0111xxxxxxxxxxxx n 1169 SVE2 sqrdcmlah z_msz_bhsd_0 : z_msz_bhsd_0 z_msz_bhsd_5 z3_msz_bhsd_16 i2_index_19 imm2_nesw_10 01000100111xxxxx0111xxxxxxxxxxxx n 1169 SVE2 sqrdcmlah z_s_0 : z_s_0 z_s_5 z4_s_16 i1_index_20 imm2_nesw_10 @@ -200,30 +201,30 @@ 01000100111xxxxx111101xxxxxxxxxx n 413 SVE2 sqrdmulh z_d_0 : z_d_5 z4_d_16 i1_index_20 010001000x1xxxxx111101xxxxxxxxxx n 413 SVE2 sqrdmulh z_h_0 : z_h_5 z3_h_16 i3_index_19 01000100101xxxxx111101xxxxxxxxxx n 413 SVE2 sqrdmulh z_s_0 : z_s_5 z3_s_16 i2_index_19 -01000100xx001010100xxxxxxxxxxxxx n 414 SVE2 sqrshl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx001110100xxxxxxxxxxxxx n 1147 SVE2 sqrshlr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx001010100xxxxxxxxxxxxx n 414 SVE2 sqrshl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx001110100xxxxxxxxxxxxx n 1147 SVE2 sqrshlr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 010001010x1xxxxx001010xxxxxxxxxx n 1170 SVE2 sqrshrnb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx001011xxxxxxxxxx n 1171 SVE2 sqrshrnt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx000010xxxxxxxxxx n 1172 SVE2 sqrshrunb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx000011xxxxxxxxxx n 1173 SVE2 sqrshrunt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 -01000100xx001000100xxxxxxxxxxxxx n 419 SVE2 sqshl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx000110100xxxxxxxxxxxxx n 419 SVE2 sqshl z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5 -01000100xx001100100xxxxxxxxxxxxx n 1148 SVE2 sqshlr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx001111100xxxxxxxxxxxxx n 420 SVE2 sqshlu z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5 +01000100xx001000100xxxxxxxxxxxxx n 419 SVE2 sqshl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx000110100xxxxxxxxxxxxx n 419 SVE2 sqshl z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5 +01000100xx001100100xxxxxxxxxxxxx n 1148 SVE2 sqshlr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx001111100xxxxxxxxxxxxx n 420 SVE2 sqshlu z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5 010001010x1xxxxx001000xxxxxxxxxx n 1174 SVE2 sqshrnb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx001001xxxxxxxxxx n 1175 SVE2 sqshrnt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx000000xxxxxxxxxx n 1176 SVE2 sqshrunb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx000001xxxxxxxxxx n 1177 SVE2 sqshrunt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 -01000100xx011110100xxxxxxxxxxxxx n 1149 SVE2 sqsubr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx011110100xxxxxxxxxxxxx n 1149 SVE2 sqsubr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 010001010x1xx000010000xxxxxxxxxx n 1139 SVE2 sqxtnb z_wtszl19_bhsd_0 : z_wtszl19p1_bhsd_5 010001010x1xx000010001xxxxxxxxxx n 1140 SVE2 sqxtnt z_wtszl19_bhsd_0 : z_wtszl19_bhsd_0 z_wtszl19p1_bhsd_5 010001010x1xx000010100xxxxxxxxxx n 1141 SVE2 sqxtunb z_wtszl19_bhsd_0 : z_wtszl19p1_bhsd_5 010001010x1xx000010101xxxxxxxxxx n 1142 SVE2 sqxtunt z_wtszl19_bhsd_0 : z_wtszl19_bhsd_0 z_wtszl19p1_bhsd_5 -01000100xx010100100xxxxxxxxxxxxx n 430 SVE2 srhadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx010100100xxxxxxxxxxxxx n 430 SVE2 srhadd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 01000101xx0xxxxx111100xxxxxxxxxx n 431 SVE2 sri z_tszl19_bhsd_0 : z_tszl19_bhsd_0 z_tszl19_bhsd_5 tszl19_imm3_16p1 -01000100xx000010100xxxxxxxxxxxxx n 432 SVE2 srshl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx000110100xxxxxxxxxxxxx n 1150 SVE2 srshlr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx001100100xxxxxxxxxxxxx n 433 SVE2 srshr z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5p1 +01000100xx000010100xxxxxxxxxxxxx n 432 SVE2 srshl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx000110100xxxxxxxxxxxxx n 1150 SVE2 srshlr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx001100100xxxxxxxxxxxxx n 433 SVE2 srshr z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5p1 01000101xx0xxxxx111010xxxxxxxxxx n 434 SVE2 srsra z_tszl19_bhsd_0 : z_tszl19_bhsd_0 z_tszl19_bhsd_5 tszl19_imm3_16p1 010001010x0xxxxx101000xxxxxxxxxx n 1178 SVE2 sshllb z_tszl19p1_hsd_0 : z_tszl19_bhs_5 tszl19lo_imm3_16 010001010x0xxxxx101001xxxxxxxxxx n 1179 SVE2 sshllt z_tszl19p1_hsd_0 : z_tszl19_bhs_5 tszl19lo_imm3_16 @@ -234,32 +235,33 @@ 01000101xx0xxxxx100011xxxxxxxxxx n 1116 SVE2 ssubltb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx010100xxxxxxxxxx n 1117 SVE2 ssubwb z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16 01000101xx0xxxxx010101xxxxxxxxxx n 1118 SVE2 ssubwt z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16 -11100100000xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_d_0 p10_lo -11100100010xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_s_0 p10_lo -11100101100xxxxx001xxxxxxxxxxxxx n 1004 SVE2 stnt1d svemem_vec_30sd_gpr16 : z_d_0 p10_lo -11100100100xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_d_0 p10_lo -11100100110xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_s_0 p10_lo -11100101000xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_d_0 p10_lo -11100101010xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_s_0 p10_lo +11100100000xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_d_0 p10_lo.gov +11100100010xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_s_0 p10_lo.gov +11100101100xxxxx001xxxxxxxxxxxxx n 1004 SVE2 stnt1d svemem_vec_30sd_gpr16 : z_d_0 p10_lo.gov +11100100100xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_d_0 p10_lo.gov +11100100110xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_s_0 p10_lo.gov +11100101000xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_d_0 p10_lo.gov +11100101010xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_s_0 p10_lo.gov 01000101xx1xxxxx011100xxxxxxxxxx n 1119 SVE2 subhnb z_sizep1_bhs_0 : z_size_hsd_5 z_size_hsd_16 01000101xx1xxxxx011101xxxxxxxxxx n 1120 SVE2 subhnt z_sizep1_bhs_0 : z_sizep1_bhs_0 z_size_hsd_5 z_size_hsd_16 -01000100xx011100100xxxxxxxxxxxxx n 474 SVE2 suqadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx011100100xxxxxxxxxxxxx n 474 SVE2 suqadd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000101xx1xxxxx001010xxxxxxxxxx n 490 SVE2 tbl z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_5p1 z_size_bhsd_16 00000101xx1xxxxx001011xxxxxxxxxx n 492 SVE2 tbx z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 z_size_bhsd_16 01000101xx0xxxxx111111xxxxxxxxxx n 496 SVE2 uaba z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 z_size_bhsd_16 01000101xx0xxxxx110010xxxxxxxxxx n 1121 SVE2 uabalb z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx110011xxxxxxxxxx n 1122 SVE2 uabalt z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx001110xxxxxxxxxx n 1123 SVE2 uabdlb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx001111xxxxxxxxxx n 1124 SVE2 uabdlt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 -01000100xx000101101xxxxxxxxxxxxx n 502 SVE2 uadalp z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo z_sizep1_bhs_5 +01000100xx000101101xxxxxxxxxxxxx n 502 SVE2 uadalp z_size_hsd_0 : z_size_hsd_0 p10_mrg_lo.gov z_sizep1_bhs_5 01000101xx0xxxxx000010xxxxxxxxxx n 1125 SVE2 uaddlb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx000011xxxxxxxxxx n 1126 SVE2 uaddlt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx010010xxxxxxxxxx n 1127 SVE2 uaddwb z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16 01000101xx0xxxxx010011xxxxxxxxxx n 1128 SVE2 uaddwt z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16 -01000100xx010001100xxxxxxxxxxxxx n 513 SVE2 uhadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx010011100xxxxxxxxxxxxx n 514 SVE2 uhsub z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx010111100xxxxxxxxxxxxx n 1151 SVE2 uhsubr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx010101101xxxxxxxxxxxxx n 517 SVE2 umaxp z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx010111101xxxxxxxxxxxxx n 520 SVE2 uminp z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx010001100xxxxxxxxxxxxx n 513 SVE2 uhadd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx010011100xxxxxxxxxxxxx n 514 SVE2 uhsub z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx010111100xxxxxxxxxxxxx n 1151 SVE2 uhsubr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx010101101xxxxxxxxxxxxx n 517 SVE2 umaxp z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx010111101xxxxxxxxxxxxx n 520 SVE2 uminp z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 01000100xx0xxxxx010010xxxxxxxxxx n 1129 SVE2 umlalb z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16 01000100111xxxxx1001x0xxxxxxxxxx n 1129 SVE2 umlalb z_d_0 : z_d_0 z_s_5 z4_s_16 i2_index_11 01000100101xxxxx1001x0xxxxxxxxxx n 1129 SVE2 umlalb z_s_0 : z_s_0 z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 @@ -278,28 +280,28 @@ 01000101xx0xxxxx011111xxxxxxxxxx n 1134 SVE2 umullt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000100111xxxxx1101x1xxxxxxxxxx n 1134 SVE2 umullt z_d_0 : z_s_5 z4_s_16 i2_index_11 01000100101xxxxx1101x1xxxxxxxxxx n 1134 SVE2 umullt z_s_0 : z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11 -01000100xx001011100xxxxxxxxxxxxx n 532 SVE2 uqrshl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx001111100xxxxxxxxxxxxx n 1152 SVE2 uqrshlr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx001011100xxxxxxxxxxxxx n 532 SVE2 uqrshl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx001111100xxxxxxxxxxxxx n 1152 SVE2 uqrshlr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 010001010x1xxxxx001110xxxxxxxxxx n 1180 SVE2 uqrshrnb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx001111xxxxxxxxxx n 1181 SVE2 uqrshrnt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 -01000100xx001001100xxxxxxxxxxxxx n 535 SVE2 uqshl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx000111100xxxxxxxxxxxxx n 535 SVE2 uqshl z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5 -01000100xx001101100xxxxxxxxxxxxx n 1153 SVE2 uqshlr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx001001100xxxxxxxxxxxxx n 535 SVE2 uqshl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx000111100xxxxxxxxxxxxx n 535 SVE2 uqshl z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5 +01000100xx001101100xxxxxxxxxxxxx n 1153 SVE2 uqshlr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 010001010x1xxxxx001100xxxxxxxxxx n 1182 SVE2 uqshrnb z_tszl19_bhs_0 : z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 010001010x1xxxxx001101xxxxxxxxxx n 1183 SVE2 uqshrnt z_tszl19_bhs_0 : z_tszl19_bhs_0 z_tszl19p1_hsd_5 tszl19lo_imm3_16p1 -01000100xx011111100xxxxxxxxxxxxx n 1154 SVE2 uqsubr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx011111100xxxxxxxxxxxxx n 1154 SVE2 uqsubr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 010001010x1xx000010010xxxxxxxxxx n 1143 SVE2 uqxtnb z_wtszl19_bhsd_0 : z_wtszl19p1_bhsd_5 010001010x1xx000010011xxxxxxxxxx n 1144 SVE2 uqxtnt z_wtszl19_bhsd_0 : z_wtszl19_bhsd_0 z_wtszl19p1_bhsd_5 -0100010010000000101xxxxxxxxxxxxx n 541 SVE2 urecpe z_s_0 : p10_mrg_lo z_s_5 -01000100xx010101100xxxxxxxxxxxxx n 542 SVE2 urhadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx000011100xxxxxxxxxxxxx n 543 SVE2 urshl z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -01000100xx000111100xxxxxxxxxxxxx n 1155 SVE2 urshlr z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 -00000100xx001101100xxxxxxxxxxxxx n 544 SVE2 urshr z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5p1 -0100010010000001101xxxxxxxxxxxxx n 545 SVE2 ursqrte z_s_0 : p10_mrg_lo z_s_5 +0100010010000000101xxxxxxxxxxxxx n 541 SVE2 urecpe z_s_0 : p10_mrg_lo.gov z_s_5 +01000100xx010101100xxxxxxxxxxxxx n 542 SVE2 urhadd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx000011100xxxxxxxxxxxxx n 543 SVE2 urshl z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +01000100xx000111100xxxxxxxxxxxxx n 1155 SVE2 urshlr z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 +00000100xx001101100xxxxxxxxxxxxx n 544 SVE2 urshr z_tszl8_bhsd_0 : p10_mrg_lo.gov z_tszl8_bhsd_0 tszl8_imm3_5p1 +0100010010000001101xxxxxxxxxxxxx n 545 SVE2 ursqrte z_s_0 : p10_mrg_lo.gov z_s_5 01000101xx0xxxxx111011xxxxxxxxxx n 546 SVE2 ursra z_tszl19_bhsd_0 : z_tszl19_bhsd_0 z_tszl19_bhsd_5 tszl19_imm3_16p1 010001010x0xxxxx101010xxxxxxxxxx n 1184 SVE2 ushllb z_tszl19p1_hsd_0 : z_tszl19_bhs_5 tszl19lo_imm3_16 010001010x0xxxxx101011xxxxxxxxxx n 1185 SVE2 ushllt z_tszl19p1_hsd_0 : z_tszl19_bhs_5 tszl19lo_imm3_16 -01000100xx011101100xxxxxxxxxxxxx n 551 SVE2 usqadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 +01000100xx011101100xxxxxxxxxxxxx n 551 SVE2 usqadd z_size_bhsd_0 : p10_mrg_lo.gov z_size_bhsd_0 z_size_bhsd_5 01000101xx0xxxxx111001xxxxxxxxxx n 552 SVE2 usra z_tszl19_bhsd_0 : z_tszl19_bhsd_0 z_tszl19_bhsd_5 tszl19_imm3_16p1 01000101xx0xxxxx000110xxxxxxxxxx n 1135 SVE2 usublb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx000111xxxxxxxxxx n 1136 SVE2 usublt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 diff --git a/core/ir/aarch64/codec_v80.txt b/core/ir/aarch64/codec_v80.txt index 76429ea1940..4e41d2f735a 100644 --- a/core/ir/aarch64/codec_v80.txt +++ b/core/ir/aarch64/codec_v80.txt @@ -68,6 +68,14 @@ # If several operands handle the same 'x' bit then the automatically generated # encoder will check that consistent bit patterns are generated. +# Opndtype instances may be annotated by adding various flags after a '.'. The +# codec may then use these flags to generate extra code for that operand +# or for the instruction it is a part of. This is to allow certain properties +# to be set on the instr without having to use a custom encode/decode function +# as well as well as reducing the need for special operand encoders/decoders if +# only a flag needs to be set or similar. Multiple flags can be supported like +# p10.gov.scatter + # The enum field is managed by the codec sorter, and should be unique per # opcode. If you're adding a new entry just leave it out and run # codecsort.py --rewrite and it will assign the proper enum value. @@ -221,8 +229,8 @@ x1011010100xxxxxxxxx01xxxxxxxxxx r 81 BASE csneg wx0 : wx5 110101010000101101111101001xxxxx n 1058 DPB2 dc_cvadp : memx0 110101010000101101111100001xxxxx n 1059 DPB dc_cvap : memx0 110101010000101101111011001xxxxx n 573 BASE dc_cvau : memx0 -110101010000101101110100011xxxxx n 1209 MTE dc_gva memx0 : -110101010000101101110100100xxxxx n 1210 MTE dc_gzva memx0 : +110101010000101101110100011xxxxx n 1209 MTE dc_gva memx0 : +110101010000101101110100100xxxxx n 1210 MTE dc_gzva memx0 : 110101010000100001110110010xxxxx n 574 BASE dc_isw : x0 110101010000100001110110001xxxxx n 575 BASE dc_ivac : memx0 110101010000101101110100001xxxxx n 568 BASE dc_zva memx0 : diff --git a/core/ir/aarch64/codec_v82.txt b/core/ir/aarch64/codec_v82.txt index 7c84851af1b..07a06547264 100644 --- a/core/ir/aarch64/codec_v82.txt +++ b/core/ir/aarch64/codec_v82.txt @@ -129,7 +129,6 @@ x001111011100001000000xxxxxxxxxx n 120 FP16 fcvtnu wx0 : h5 0x001110110xxxxx000001xxxxxxxxxx n 136 FP16 fminnm dq0 : dq5 dq16 h_sz 0x101110110xxxxx000001xxxxxxxxxx n 137 FP16 fminnmp dq0 : dq5 dq16 h_sz 0101111010110000110010xxxxxxxxxx n 137 FP16 fminnmp h0 : s5 h_sz -0000111010110000110010xxxxxxxxxx n 138 FP16 fminnmv h0 : d5 0x00111010110000110010xxxxxxxxxx n 138 FP16 fminnmv h0 : dq5 h_sz 0101111010110000111110xxxxxxxxxx n 139 FP16 fminp h0 : s5 h_sz 0x101110110xxxxx001101xxxxxxxxxx n 139 FP16 fminp dq0 : dq5 dq16 h_sz diff --git a/core/ir/aarch64/codec_v85.txt b/core/ir/aarch64/codec_v85.txt new file mode 100644 index 00000000000..8166ec0846d --- /dev/null +++ b/core/ir/aarch64/codec_v85.txt @@ -0,0 +1,38 @@ +# ********************************************************** +# Copyright (c) 2024 ARM Limited. All rights reserved. +# ********************************************************** + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of ARM Limited nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL ARM LIMITED OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. + +# This file defines instruction encodings for v8.5 instructions. + +# See header comments in codec_v80.txt and opnd_defs.txt to understand how +# instructions are defined for the purposes of decode and encode code +# generation. + +# Instruction definitions: +110101010000001100100100xx011111 n 1211 BTI bti : imm2_6 diff --git a/core/ir/aarch64/disassemble.c b/core/ir/aarch64/disassemble.c index e433968ae3a..63efbb9bcb5 100644 --- a/core/ir/aarch64/disassemble.c +++ b/core/ir/aarch64/disassemble.c @@ -55,6 +55,7 @@ static const char *const pred_names[] = { "le", /* DR_PRED_LE */ "al", /* DR_PRED_AL */ "nv", /* DR_PRED_NV */ + "", /* DR_PRED_MASKED */ }; int @@ -149,7 +150,8 @@ void print_opcode_name(instr_t *instr, const char *name, char *buf, size_t bufsz, size_t *sofar DR_PARAM_OUT) { - if (instr_get_predicate(instr) != DR_PRED_NONE) { + if (instr_get_predicate(instr) != DR_PRED_NONE && + instr_get_predicate(instr) != DR_PRED_MASKED) { if (instr_get_opcode(instr) == OP_bcond) { print_to_buffer(buf, bufsz, sofar, "b.%s", pred_names[instr_get_predicate(instr)]); diff --git a/core/ir/aarch64/instr.c b/core/ir/aarch64/instr.c index 8e0d6fb7371..63a0ec57f2c 100644 --- a/core/ir/aarch64/instr.c +++ b/core/ir/aarch64/instr.c @@ -34,19 +34,21 @@ #include "../globals.h" #include "instr.h" #include "decode.h" - +#include "encode_api.h" #include "opcode_names.h" +/* XXX i#6690: currently only A64 is supported for instruction encoding. + * We want to add support for A64 decoding and synthetic ISA encoding as well. + * XXX i#1684: move this function to core/ir/instr_shared.c once we can support + * all architectures in the same build of DR. + */ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { - return (mode == DR_ISA_ARM_A64); -} - -dr_isa_mode_t -instr_get_isa_mode(instr_t *instr) -{ - return DR_ISA_ARM_A64; + if (mode != DR_ISA_ARM_A64) + return false; + instr->isa_mode = DR_ISA_ARM_A64; + return true; } int diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index ccee4a382f1..4ca4ec2987f 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -856,6 +856,19 @@ */ #define INSTR_CREATE_ic_ialluis(dc) instr_create_0dst_0src(dc, OP_ic_ialluis) +/** + * Creates a BTI instruction to guard against the execution of instructions + * which are not the intended target of a branch, and is a NOP on hardware + * which does not support FEAT_BTI. BTI belongs to the hints instruction class. + * This macro is used to encode the forms: + * \verbatim + * BTI # + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param imm The imm representing the appropriate bti symbol + */ +#define INSTR_CREATE_bti(dc, imm) instr_create_0dst_1src(dc, OP_bti, imm) + /** * Creates a CLREX instruction. * \param dc The void * dcontext used to allocate memory for the instr_t. @@ -5271,7 +5284,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_orr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_orr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_orr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an EOR instruction. @@ -5286,7 +5299,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_eor_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_eor, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_eor, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an AND instruction. @@ -5301,7 +5314,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_and_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_and, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_and, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a BIC instruction. @@ -5316,7 +5329,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_bic_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_bic, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_bic, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a MOVPRFX instruction. @@ -5345,7 +5358,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_movprfx_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_movprfx, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_movprfx, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SQADD instruction. @@ -5420,7 +5433,7 @@ * \param Zm The third source vector register, Z (Scalable) */ #define INSTR_CREATE_sub_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sub, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sub, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SUB instruction. @@ -5465,7 +5478,7 @@ * \param Zm The third source vector register, Z (Scalable) */ #define INSTR_CREATE_subr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_subr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_subr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SUBR instruction. @@ -5555,7 +5568,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_add_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_add, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_add, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a ADD instruction. @@ -5601,8 +5614,10 @@ * \param simm The signed immediate imm * \param shift The immediate shiftOp for simm */ -#define INSTR_CREATE_cpy_sve_shift_pred(dc, Zd, Pg, simm, shift) \ - instr_create_1dst_4src(dc, OP_cpy, Zd, Pg, simm, OPND_CREATE_LSL(), shift) +#define INSTR_CREATE_cpy_sve_shift_pred(dc, Zd, Pg, simm, shift) \ + INSTR_PRED( \ + instr_create_1dst_4src(dc, OP_cpy, Zd, Pg, simm, OPND_CREATE_LSL(), shift), \ + DR_PRED_MASKED) /** * Creates a CPY instruction. @@ -5621,7 +5636,7 @@ * S (Singleword, 32 bits), or D (Doubleword, 64 bits). */ #define INSTR_CREATE_cpy_sve_pred(dc, Zd, Pg, Rn_or_Vn) \ - instr_create_1dst_2src(dc, OP_cpy, Zd, Pg, Rn_or_Vn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_cpy, Zd, Pg, Rn_or_Vn), DR_PRED_MASKED) /** * Creates a PTEST instruction. @@ -5635,7 +5650,7 @@ * \param Pn The first source predicate register, P (Predicate) */ #define INSTR_CREATE_ptest_sve_pred(dc, Pg, Pn) \ - instr_create_0dst_2src(dc, OP_ptest, Pg, Pn) + INSTR_PRED(instr_create_0dst_2src(dc, OP_ptest, Pg, Pn), DR_PRED_MASKED) /** * Creates a MAD instruction. @@ -5651,7 +5666,7 @@ * \param Za The third source vector register, Z (Scalable). */ #define INSTR_CREATE_mad_sve_pred(dc, Zdn, Pg, Zm, Za) \ - instr_create_1dst_4src(dc, OP_mad, Zdn, Zdn, Pg, Zm, Za) + INSTR_PRED(instr_create_1dst_4src(dc, OP_mad, Zdn, Zdn, Pg, Zm, Za), DR_PRED_MASKED) /** * Creates a MLA instruction. @@ -5667,7 +5682,7 @@ * \param Zm The third source vector register, Z (Scalable). */ #define INSTR_CREATE_mla_sve_pred(dc, Zda, Pg, Zn, Zm) \ - instr_create_1dst_4src(dc, OP_mla, Zda, Zda, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_4src(dc, OP_mla, Zda, Zda, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a MLS instruction. @@ -5683,7 +5698,7 @@ * \param Zm The third source vector register, Z (Scalable). */ #define INSTR_CREATE_mls_sve_pred(dc, Zda, Pg, Zn, Zm) \ - instr_create_1dst_4src(dc, OP_mls, Zda, Zda, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_4src(dc, OP_mls, Zda, Zda, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a MSB instruction. @@ -5699,7 +5714,7 @@ * \param Za The third source vector register, Z (Scalable). */ #define INSTR_CREATE_msb_sve_pred(dc, Zdn, Pg, Zm, Za) \ - instr_create_1dst_4src(dc, OP_msb, Zdn, Zdn, Pg, Zm, Za) + INSTR_PRED(instr_create_1dst_4src(dc, OP_msb, Zdn, Zdn, Pg, Zm, Za), DR_PRED_MASKED) /** * Creates a MUL instruction. @@ -5714,7 +5729,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_mul_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_mul, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_mul, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a MUL instruction. @@ -5743,7 +5758,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_smulh_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_smulh, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_smulh, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UMULH instruction. @@ -5758,7 +5773,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_umulh_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_umulh, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_umulh, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FEXPA instruction. @@ -5831,7 +5846,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_abs_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_abs, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_abs, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a CNOT instruction. @@ -5846,7 +5861,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_cnot_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_cnot, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_cnot, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a NEG instruction. @@ -5861,7 +5876,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_neg_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_neg, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_neg, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SABD instruction. @@ -5876,7 +5891,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_sabd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sabd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sabd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SMAX instruction. @@ -5891,7 +5906,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_smax_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_smax, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_smax, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SMAX instruction. @@ -5920,7 +5935,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_smin_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_smin, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_smin, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SMIN instruction. @@ -5949,7 +5964,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_uabd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uabd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uabd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FACGE instruction. @@ -5965,7 +5980,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_facge_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_facge, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_facge, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FACGT instruction. @@ -5981,7 +5996,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_facgt_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_facgt, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_facgt, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a SDIV instruction. @@ -5996,7 +6011,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_sdiv_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sdiv, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sdiv, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SDIVR instruction. @@ -6011,7 +6026,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_sdivr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sdivr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sdivr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UDIV instruction. @@ -6026,7 +6041,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_udiv_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_udiv, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_udiv, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UDIVR instruction. @@ -6041,7 +6056,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_udivr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_udivr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_udivr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UMAX instruction. @@ -6056,7 +6071,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_umax_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_umax, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_umax, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UMAX instruction. @@ -6085,7 +6100,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_umin_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_umin, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_umin, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UMIN instruction. @@ -6114,7 +6129,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_sxtb_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_sxtb, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_sxtb, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SXTH instruction. @@ -6129,7 +6144,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_sxth_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_sxth, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_sxth, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SXTW instruction. @@ -6144,7 +6159,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_sxtw_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_sxtw, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_sxtw, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UXTB instruction. @@ -6159,7 +6174,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_uxtb_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_uxtb, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_uxtb, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UXTH instruction. @@ -6174,7 +6189,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_uxth_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_uxth, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_uxth, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UXTW instruction. @@ -6189,7 +6204,7 @@ * \param Zn The source vector register, Z (Scalable) */ #define INSTR_CREATE_uxtw_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_uxtw, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_uxtw, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FCMEQ instruction. @@ -6203,8 +6218,10 @@ * \param Pg The governing predicate register, P (Predicate) * \param Zn The first source vector register, Z (Scalable) */ -#define INSTR_CREATE_fcmeq_sve_zero_pred(dc, Pd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcmeq, Pd, Pg, Zn, opnd_create_immed_float(0.0)) +#define INSTR_CREATE_fcmeq_sve_zero_pred(dc, Pd, Pg, Zn) \ + INSTR_PRED( \ + instr_create_1dst_3src(dc, OP_fcmeq, Pd, Pg, Zn, opnd_create_immed_float(0.0)), \ + DR_PRED_MASKED) /** * Creates a FCMEQ instruction. @@ -6220,7 +6237,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmeq_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_fcmeq, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmeq, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FCMGE instruction. @@ -6234,8 +6251,10 @@ * \param Pg The governing predicate register, P (Predicate) * \param Zn The first source vector register, Z (Scalable) */ -#define INSTR_CREATE_fcmge_sve_zero_pred(dc, Pd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcmge, Pd, Pg, Zn, opnd_create_immed_float(0.0)) +#define INSTR_CREATE_fcmge_sve_zero_pred(dc, Pd, Pg, Zn) \ + INSTR_PRED( \ + instr_create_1dst_3src(dc, OP_fcmge, Pd, Pg, Zn, opnd_create_immed_float(0.0)), \ + DR_PRED_MASKED) /** * Creates a FCMGE instruction. @@ -6251,7 +6270,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmge_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_fcmge, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmge, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FCMGT instruction. @@ -6265,8 +6284,10 @@ * \param Pg The governing predicate register, P (Predicate) * \param Zn The first source vector register, Z (Scalable) */ -#define INSTR_CREATE_fcmgt_sve_zero_pred(dc, Pd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcmgt, Pd, Pg, Zn, opnd_create_immed_float(0.0)) +#define INSTR_CREATE_fcmgt_sve_zero_pred(dc, Pd, Pg, Zn) \ + INSTR_PRED( \ + instr_create_1dst_3src(dc, OP_fcmgt, Pd, Pg, Zn, opnd_create_immed_float(0.0)), \ + DR_PRED_MASKED) /** * Creates a FCMGT instruction. @@ -6282,7 +6303,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmgt_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_fcmgt, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmgt, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FCMLE instruction. @@ -6296,8 +6317,10 @@ * \param Pg The governing predicate register, P (Predicate) * \param Zn The first source vector register, Z (Scalable) */ -#define INSTR_CREATE_fcmle_sve_zero_pred(dc, Pd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcmle, Pd, Pg, Zn, opnd_create_immed_float(0.0)) +#define INSTR_CREATE_fcmle_sve_zero_pred(dc, Pd, Pg, Zn) \ + INSTR_PRED( \ + instr_create_1dst_3src(dc, OP_fcmle, Pd, Pg, Zn, opnd_create_immed_float(0.0)), \ + DR_PRED_MASKED) /** * Creates a FCMLT instruction. @@ -6311,8 +6334,10 @@ * \param Pg The governing predicate register, P (Predicate) * \param Zn The first source vector register, Z (Scalable) */ -#define INSTR_CREATE_fcmlt_sve_zero_pred(dc, Pd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcmlt, Pd, Pg, Zn, opnd_create_immed_float(0.0)) +#define INSTR_CREATE_fcmlt_sve_zero_pred(dc, Pd, Pg, Zn) \ + INSTR_PRED( \ + instr_create_1dst_3src(dc, OP_fcmlt, Pd, Pg, Zn, opnd_create_immed_float(0.0)), \ + DR_PRED_MASKED) /** * Creates a FCMNE instruction. @@ -6326,8 +6351,10 @@ * \param Pg The governing predicate register, P (Predicate) * \param Zn The first source vector register, Z (Scalable) */ -#define INSTR_CREATE_fcmne_sve_zero_pred(dc, Pd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcmne, Pd, Pg, Zn, opnd_create_immed_float(0.0)) +#define INSTR_CREATE_fcmne_sve_zero_pred(dc, Pd, Pg, Zn) \ + INSTR_PRED( \ + instr_create_1dst_3src(dc, OP_fcmne, Pd, Pg, Zn, opnd_create_immed_float(0.0)), \ + DR_PRED_MASKED) /** * Creates a FCMNE instruction. @@ -6343,7 +6370,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmne_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_fcmne, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmne, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FCMUO instruction. @@ -6359,7 +6386,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmuo_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_fcmuo, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmuo, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FCMLE instruction. @@ -6375,7 +6402,7 @@ * \param Zn The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmle_sve_pred(dc, Pd, Pg, Zm, Zn) \ - instr_create_1dst_3src(dc, OP_fcmle, Pd, Pg, Zm, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmle, Pd, Pg, Zm, Zn), DR_PRED_MASKED) /** * Creates a FCMLT instruction. @@ -6391,7 +6418,7 @@ * \param Zn The second source vector register, Z (Scalable) */ #define INSTR_CREATE_fcmlt_sve_pred(dc, Pd, Pg, Zm, Zn) \ - instr_create_1dst_3src(dc, OP_fcmlt, Pd, Pg, Zm, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcmlt, Pd, Pg, Zm, Zn), DR_PRED_MASKED) /** * Creates a CMPEQ instruction. @@ -6407,7 +6434,7 @@ * \param simm The signed immediate imm */ #define INSTR_CREATE_cmpeq_sve_pred_simm(dc, Pd, Pg, Zn, simm) \ - instr_create_1dst_3src(dc, OP_cmpeq, Pd, Pg, Zn, simm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpeq, Pd, Pg, Zn, simm), DR_PRED_MASKED) /** * Creates a CMPEQ instruction. @@ -6424,7 +6451,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmpeq_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmpeq, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpeq, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPGE instruction. @@ -6440,7 +6467,7 @@ * \param simm The signed immediate imm */ #define INSTR_CREATE_cmpge_sve_pred_simm(dc, Pd, Pg, Zn, simm) \ - instr_create_1dst_3src(dc, OP_cmpge, Pd, Pg, Zn, simm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpge, Pd, Pg, Zn, simm), DR_PRED_MASKED) /** * Creates a CMPGE instruction. @@ -6457,7 +6484,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmpge_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmpge, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpge, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPGT instruction. @@ -6473,7 +6500,7 @@ * \param simm The signed immediate imm */ #define INSTR_CREATE_cmpgt_sve_pred_simm(dc, Pd, Pg, Zn, simm) \ - instr_create_1dst_3src(dc, OP_cmpgt, Pd, Pg, Zn, simm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpgt, Pd, Pg, Zn, simm), DR_PRED_MASKED) /** * Creates a CMPGT instruction. @@ -6490,7 +6517,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmpgt_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmpgt, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpgt, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPHI instruction. @@ -6506,7 +6533,7 @@ * \param imm The immediate imm */ #define INSTR_CREATE_cmphi_sve_pred_imm(dc, Pd, Pg, Zn, imm) \ - instr_create_1dst_3src(dc, OP_cmphi, Pd, Pg, Zn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmphi, Pd, Pg, Zn, imm), DR_PRED_MASKED) /** * Creates a CMPHI instruction. @@ -6523,7 +6550,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmphi_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmphi, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmphi, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPHS instruction. @@ -6539,7 +6566,7 @@ * \param imm The immediate imm */ #define INSTR_CREATE_cmphs_sve_pred_imm(dc, Pd, Pg, Zn, imm) \ - instr_create_1dst_3src(dc, OP_cmphs, Pd, Pg, Zn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmphs, Pd, Pg, Zn, imm), DR_PRED_MASKED) /** * Creates a CMPHS instruction. @@ -6556,7 +6583,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmphs_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmphs, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmphs, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPLE instruction. @@ -6572,7 +6599,7 @@ * \param simm The signed immediate imm */ #define INSTR_CREATE_cmple_sve_pred_simm(dc, Pd, Pg, Zn, simm) \ - instr_create_1dst_3src(dc, OP_cmple, Pd, Pg, Zn, simm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmple, Pd, Pg, Zn, simm), DR_PRED_MASKED) /** * Creates a CMPLE instruction. @@ -6588,7 +6615,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmple_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmple, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmple, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPLO instruction. @@ -6604,7 +6631,7 @@ * \param imm The immediate imm */ #define INSTR_CREATE_cmplo_sve_pred_imm(dc, Pd, Pg, Zn, imm) \ - instr_create_1dst_3src(dc, OP_cmplo, Pd, Pg, Zn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmplo, Pd, Pg, Zn, imm), DR_PRED_MASKED) /** * Creates a CMPLO instruction. @@ -6620,7 +6647,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmplo_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmplo, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmplo, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPLS instruction. @@ -6636,7 +6663,7 @@ * \param imm The immediate imm */ #define INSTR_CREATE_cmpls_sve_pred_imm(dc, Pd, Pg, Zn, imm) \ - instr_create_1dst_3src(dc, OP_cmpls, Pd, Pg, Zn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpls, Pd, Pg, Zn, imm), DR_PRED_MASKED) /** * Creates a CMPLS instruction. @@ -6652,7 +6679,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmpls_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmpls, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpls, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPLT instruction. @@ -6668,7 +6695,7 @@ * \param simm The signed immediate imm */ #define INSTR_CREATE_cmplt_sve_pred_simm(dc, Pd, Pg, Zn, simm) \ - instr_create_1dst_3src(dc, OP_cmplt, Pd, Pg, Zn, simm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmplt, Pd, Pg, Zn, simm), DR_PRED_MASKED) /** * Creates a CMPLT instruction. @@ -6684,7 +6711,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmplt_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmplt, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmplt, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a CMPNE instruction. @@ -6700,7 +6727,7 @@ * \param simm The signed immediate imm */ #define INSTR_CREATE_cmpne_sve_pred_simm(dc, Pd, Pg, Zn, simm) \ - instr_create_1dst_3src(dc, OP_cmpne, Pd, Pg, Zn, simm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpne, Pd, Pg, Zn, simm), DR_PRED_MASKED) /** * Creates a CMPNE instruction. @@ -6717,7 +6744,7 @@ * \param Zm The second source vector register, Z (Scalable) */ #define INSTR_CREATE_cmpne_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_cmpne, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_cmpne, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a SETFFR instruction. @@ -6754,7 +6781,7 @@ * \param Pg The governing predicate register, P (Predicate) */ #define INSTR_CREATE_rdffr_sve_pred(dc, Pd, Pg) \ - instr_create_1dst_1src(dc, OP_rdffr, Pd, Pg) + INSTR_PRED(instr_create_1dst_1src(dc, OP_rdffr, Pd, Pg), DR_PRED_MASKED) /** * Creates a RDFFRS instruction. @@ -6768,7 +6795,7 @@ * \param Pg The governing predicate register, P (Predicate) */ #define INSTR_CREATE_rdffrs_sve_pred(dc, Pd, Pg) \ - instr_create_1dst_1src(dc, OP_rdffrs, Pd, Pg) + INSTR_PRED(instr_create_1dst_1src(dc, OP_rdffrs, Pd, Pg), DR_PRED_MASKED) /** * Creates a WRFFR instruction. @@ -6795,7 +6822,7 @@ * \param Pn The source predicate register, P (Predicate). */ #define INSTR_CREATE_cntp_sve_pred(dc, Rd, Pg, Pn) \ - instr_create_1dst_2src(dc, OP_cntp, Rd, Pg, Pn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_cntp, Rd, Pg, Pn), DR_PRED_MASKED) /** * Creates a DECP instruction. @@ -7093,7 +7120,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_and_sve_pred_b(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_and, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_and, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates an AND instruction. @@ -7124,7 +7151,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_ands_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_ands, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_ands, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a BIC instruction. @@ -7140,7 +7167,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_bic_sve_pred_b(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_bic, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_bic, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a BIC instruction. @@ -7171,7 +7198,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_bics_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_bics, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_bics, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates an EOR instruction. @@ -7187,7 +7214,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_eor_sve_pred_b(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_eor, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_eor, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a NOT instruction. @@ -7234,7 +7261,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_eors_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_eors, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_eors, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a NAND instruction. @@ -7250,7 +7277,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_nand_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_nand, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_nand, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a NANDS instruction. @@ -7266,7 +7293,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_nands_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_nands, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_nands, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a NOR instruction. @@ -7282,7 +7309,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_nor_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_nor, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_nor, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a NORS instruction. @@ -7298,7 +7325,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_nors_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_nors, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_nors, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a NOT instruction. @@ -7313,7 +7340,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_not_sve_pred_vec(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_not, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_not, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an ORN instruction. @@ -7329,7 +7356,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_orn_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_orn, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_orn, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates an ORNS instruction. @@ -7345,7 +7372,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_orns_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_orns, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_orns, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates an ORR instruction. @@ -7361,7 +7388,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_orr_sve_pred_b(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_orr, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_orr, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates an ORR instruction. @@ -7392,7 +7419,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_orrs_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_orrs, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_orrs, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a CLASTA instruction. @@ -7408,7 +7435,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_clasta_sve_scalar(dc, Rdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_clasta, Rdn, Pg, Rdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_clasta, Rdn, Pg, Rdn, Zm), DR_PRED_MASKED) /** * Creates a CLASTA instruction. @@ -7424,7 +7451,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_clasta_sve_simd_fp(dc, Vdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_clasta, Vdn, Pg, Vdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_clasta, Vdn, Pg, Vdn, Zm), DR_PRED_MASKED) /** * Creates a CLASTA instruction. @@ -7439,7 +7466,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_clasta_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_clasta, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_clasta, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a CLASTB instruction. @@ -7455,7 +7482,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_clastb_sve_scalar(dc, Rdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_clastb, Rdn, Pg, Rdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_clastb, Rdn, Pg, Rdn, Zm), DR_PRED_MASKED) /** * Creates a CLASTB instruction. @@ -7471,7 +7498,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_clastb_sve_simd_fp(dc, Vdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_clastb, Vdn, Pg, Vdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_clastb, Vdn, Pg, Vdn, Zm), DR_PRED_MASKED) /** * Creates a CLASTB instruction. @@ -7486,7 +7513,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_clastb_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_clastb, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_clastb, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a LASTA instruction. @@ -7502,7 +7529,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_lasta_sve_scalar(dc, Rd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_lasta, Rd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_lasta, Rd, Pg, Zn), DR_PRED_MASKED) /** * Creates a LASTA instruction. @@ -7518,7 +7545,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_lasta_sve_simd_fp(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_lasta, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_lasta, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a LASTB instruction. @@ -7534,7 +7561,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_lastb_sve_scalar(dc, Rd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_lastb, Rd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_lastb, Rd, Pg, Zn), DR_PRED_MASKED) /** * Creates a LASTB instruction. @@ -7550,7 +7577,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_lastb_sve_simd_fp(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_lastb, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_lastb, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a CNT instruction. @@ -7565,7 +7592,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_cnt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_cnt, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_cnt, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a CNTB instruction. @@ -8450,7 +8477,7 @@ * \param Pn The source predicate register, P (Predicate). */ #define INSTR_CREATE_brka_sve_pred(dc, Pd, Pg, Pn) \ - instr_create_1dst_2src(dc, OP_brka, Pd, Pg, Pn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_brka, Pd, Pg, Pn), DR_PRED_MASKED) /** * Creates a BRKAS instruction. @@ -8465,7 +8492,7 @@ * \param Pn The source predicate register, P (Predicate). */ #define INSTR_CREATE_brkas_sve_pred(dc, Pd, Pg, Pn) \ - instr_create_1dst_2src(dc, OP_brkas, Pd, Pg, Pn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_brkas, Pd, Pg, Pn), DR_PRED_MASKED) /** * Creates a BRKB instruction. @@ -8480,7 +8507,7 @@ * \param Pn The source predicate register, P (Predicate). */ #define INSTR_CREATE_brkb_sve_pred(dc, Pd, Pg, Pn) \ - instr_create_1dst_2src(dc, OP_brkb, Pd, Pg, Pn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_brkb, Pd, Pg, Pn), DR_PRED_MASKED) /** * Creates a BRKBS instruction. @@ -8495,7 +8522,7 @@ * \param Pn The source predicate register, P (Predicate). */ #define INSTR_CREATE_brkbs_sve_pred(dc, Pd, Pg, Pn) \ - instr_create_1dst_2src(dc, OP_brkbs, Pd, Pg, Pn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_brkbs, Pd, Pg, Pn), DR_PRED_MASKED) /** * Creates a BRKN instruction. @@ -8511,7 +8538,7 @@ * \param Pn The first source predicate register, P (Predicate). */ #define INSTR_CREATE_brkn_sve_pred(dc, Pdm, Pg, Pn) \ - instr_create_1dst_3src(dc, OP_brkn, Pdm, Pg, Pn, Pdm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_brkn, Pdm, Pg, Pn, Pdm), DR_PRED_MASKED) /** * Creates a BRKNS instruction. @@ -8527,7 +8554,7 @@ * \param Pn The first source predicate register, P (Predicate). */ #define INSTR_CREATE_brkns_sve_pred(dc, Pdm, Pg, Pn) \ - instr_create_1dst_3src(dc, OP_brkns, Pdm, Pg, Pn, Pdm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_brkns, Pdm, Pg, Pn, Pdm), DR_PRED_MASKED) /** * Creates a BRKPA instruction. @@ -8543,7 +8570,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_brkpa_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_brkpa, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_brkpa, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a BRKPAS instruction. @@ -8559,7 +8586,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_brkpas_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_brkpas, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_brkpas, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a BRKPB instruction. @@ -8575,7 +8602,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_brkpb_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_brkpb, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_brkpb, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a BRKPBS instruction. @@ -8591,7 +8618,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_brkpbs_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_brkpbs, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_brkpbs, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a WHILELE instruction. @@ -8664,7 +8691,7 @@ /** * Creates a TBL instruction. * - * This macro is used to encode the forms: + * This macro is used to encode the form: * \verbatim * TBL ., { . }, . * \endverbatim @@ -8676,6 +8703,23 @@ #define INSTR_CREATE_tbl_sve(dc, Zd, Zn, Zm) \ instr_create_1dst_2src(dc, OP_tbl, Zd, Zn, Zm) +/** + * Creates a TBL instruction. + * + * This macro is used to encode the form: + \verbatim + TBL ., { ., . }, . + \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zd The destination vector register. Can be Z.b, Z.h, Z.s or Z.d. + * \param Zn The first source vector register. Can be Z.b, Z.h, Z.s or Z.d. + * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. + * + * The Zn2 parameter is derived from Zn. + */ +#define INSTR_CREATE_tbl_sve_mulvec(dc, Zd, Zn, Zm) \ + instr_create_1dst_3src(dc, OP_tbl, Zd, Zn, opnd_create_increment_reg(Zn, 1), Zm) + /** * Creates a DUP instruction. * @@ -8773,17 +8817,36 @@ /** * Creates a SPLICE instruction (destructive). * - * This macro is used to encode the forms: + * This macro is used to encode the form: * \verbatim * SPLICE ., , ., . * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zdn The second source and destination vector register, Z (Scalable). - * \param Pv The first source predicate register, P (Predicate). - * \param Zm The third source vector register, Z (Scalable). + * \param Pv The governing predicate register, P (Predicate). + * \param Zm The last source vector register, Z (Scalable). + */ +#define INSTR_CREATE_splice_sve_des(dc, Zdn, Pv, Zm) \ + INSTR_PRED(instr_create_1dst_3src(dc, OP_splice, Zdn, Pv, Zdn, Zm), DR_PRED_MASKED) + +/** + * Creates a SPLICE instruction. + * + * This macro is used to encode the form: + \verbatim + SPLICE ., , { ., . } + \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zd The destination vector register. Can be Z.b, Z.h, Z.s or Z.d. + * \param Pv The governing predicate register, P (Predicate). + * \param Zn The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. + * + * The Zn2 parameter is derived from Zn. */ -#define INSTR_CREATE_splice_sve(dc, Zdn, Pv, Zm) \ - instr_create_1dst_3src(dc, OP_splice, Zdn, Pv, Zdn, Zm) +#define INSTR_CREATE_splice_sve_con(dc, Zd, Pv, Zn) \ + INSTR_PRED(instr_create_1dst_3src(dc, OP_splice, Zd, Pv, Zn, \ + opnd_create_increment_reg(Zn, 1)), \ + DR_PRED_MASKED) /** * Creates a REV instruction. @@ -8824,7 +8887,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_revb_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_revb, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_revb, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a REVH instruction. @@ -8839,7 +8902,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_revh_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_revh, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_revh, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a REVW instruction. @@ -8854,7 +8917,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_revw_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_revw, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_revw, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a COMPACT instruction. @@ -8869,7 +8932,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_compact_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_compact, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_compact, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a PUNPKHI instruction. @@ -9186,7 +9249,7 @@ * \param Pg The governing predicate register, P (Predicate). */ #define INSTR_CREATE_pfirst_sve(dc, Pdn, Pg) \ - instr_create_1dst_2src(dc, OP_pfirst, Pdn, Pg, Pdn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_pfirst, Pdn, Pg, Pdn), DR_PRED_MASKED) /** * Creates a SEL instruction. @@ -9202,7 +9265,7 @@ * \param Pm The second source predicate register, P (Predicate). */ #define INSTR_CREATE_sel_sve_pred(dc, Pd, Pg, Pn, Pm) \ - instr_create_1dst_3src(dc, OP_sel, Pd, Pg, Pn, Pm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sel, Pd, Pg, Pn, Pm), DR_PRED_MASKED) /** * Creates a SEL instruction. @@ -9213,12 +9276,12 @@ * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zd The destination vector register, Z (Scalable). - * \param Pv The first source predicate register, P (Predicate). - * \param Zn The second source vector register, Z (Scalable). - * \param Zm The third source vector register, Z (Scalable). + * \param Pv The governing predicate register, P (Predicate). + * \param Zn The first source vector register, Z (Scalable). + * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_sel_sve_vector(dc, Zd, Pv, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_sel, Zd, Pv, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sel, Zd, Pv, Zn, Zm), DR_PRED_MASKED) /** * Creates an MOV instruction. @@ -9231,9 +9294,11 @@ * \param Pd The destination predicate register, P (Predicate). * \param Pn The first source predicate register, P (Predicate). */ -#define INSTR_CREATE_mov_sve_pred(dc, Pd, Pn) \ - instr_create_1dst_3src(dc, OP_orr, Pd, \ - opnd_create_predicate_reg(opnd_get_reg(Pn), false), Pn, Pn) +#define INSTR_CREATE_mov_sve_pred(dc, Pd, Pn) \ + INSTR_PRED( \ + instr_create_1dst_3src( \ + dc, OP_orr, Pd, opnd_create_predicate_reg(opnd_get_reg(Pn), false), Pn, Pn), \ + DR_PRED_MASKED) /** * Creates an MOVS instruction. @@ -9248,7 +9313,7 @@ * \param Pn The first source predicate register, P (Predicate). */ #define INSTR_CREATE_movs_sve_pred(dc, Pd, Pg, Pn) \ - instr_create_1dst_3src(dc, OP_ands, Pd, Pg, Pn, Pn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_ands, Pd, Pg, Pn, Pn), DR_PRED_MASKED) /** * Creates a PTRUE instruction. @@ -9306,7 +9371,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_asr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_asr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_asr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an ASR instruction. @@ -9321,7 +9386,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_asr_sve_pred_wide(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_asr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_asr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an ASR instruction. @@ -9351,7 +9416,7 @@ * \param imm The immediate imm, one indexed. */ #define INSTR_CREATE_asrd_sve_pred(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_asrd, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_asrd, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates an ASRR instruction. @@ -9366,7 +9431,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_asrr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_asrr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_asrr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a CLS instruction. @@ -9381,7 +9446,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_cls_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_cls, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_cls, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a CLZ instruction. @@ -9396,7 +9461,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_clz_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_clz, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_clz, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a CNT instruction. @@ -9411,7 +9476,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_cnt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_cnt, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_cnt, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a LSL instruction. @@ -9441,7 +9506,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_lsl_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_lsl, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_lsl, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a LSL instruction. @@ -9456,7 +9521,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_lsl_sve_pred_wide(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_lsl, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_lsl, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a LSL instruction. @@ -9486,7 +9551,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_lslr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_lslr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_lslr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a LSR instruction. @@ -9516,7 +9581,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_lsr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_lsr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_lsr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a LSR instruction. @@ -9531,7 +9596,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_lsr_sve_pred_wide(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_lsr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_lsr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a LSR instruction. @@ -9561,7 +9626,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_lsrr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_lsrr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_lsrr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a RBIT instruction. @@ -9576,7 +9641,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_rbit_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_rbit, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_rbit, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an ANDV instruction. @@ -9593,7 +9658,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_andv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_andv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_andv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates an EORV instruction. @@ -9610,7 +9675,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_eorv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_eorv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_eorv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FADDA instruction. @@ -9627,7 +9692,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fadda_sve_pred(dc, Vdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fadda, Vdn, Pg, Vdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fadda, Vdn, Pg, Vdn, Zm), DR_PRED_MASKED) /** * Creates a FADDV instruction. @@ -9643,7 +9708,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_faddv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_faddv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_faddv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FMAXNMV instruction. @@ -9659,7 +9724,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fmaxnmv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fmaxnmv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fmaxnmv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FMAXV instruction. @@ -9675,7 +9740,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fmaxv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fmaxv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fmaxv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FMINNMV instruction. @@ -9691,7 +9756,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fminnmv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fminnmv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fminnmv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FMINV instruction. @@ -9707,7 +9772,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fminv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fminv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fminv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates an ORV instruction. @@ -9724,7 +9789,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_orv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_orv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_orv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SADDV instruction. @@ -9739,7 +9804,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_saddv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_saddv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_saddv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SMAXV instruction. @@ -9756,7 +9821,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_smaxv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_smaxv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_smaxv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SMINV instruction. @@ -9773,7 +9838,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_sminv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_sminv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_sminv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UADDV instruction. @@ -9788,7 +9853,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_uaddv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_uaddv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_uaddv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UMAXV instruction. @@ -9805,7 +9870,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_umaxv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_umaxv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_umaxv, Vd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UMINV instruction. @@ -9822,7 +9887,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_uminv_sve_pred(dc, Vd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_uminv, Vd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_uminv, Vd, Pg, Zn), DR_PRED_MASKED) /* * Creates a FCPY instruction. @@ -9833,7 +9898,7 @@ * \param imm The floating-point immediate value to be copied. */ #define INSTR_CREATE_fcpy_sve_pred(dc, Zd, Pg, imm) \ - instr_create_1dst_2src(dc, OP_fcpy, Zd, Pg, imm) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fcpy, Zd, Pg, imm), DR_PRED_MASKED) /** * Creates a FDUP instruction. @@ -9867,7 +9932,7 @@ * OPSZ_1) */ #define INSTR_CREATE_ld1rb_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rb, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rb, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RH instruction. @@ -9887,7 +9952,7 @@ * OPSZ_2) */ #define INSTR_CREATE_ld1rh_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rh, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rh, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RW instruction. @@ -9906,7 +9971,7 @@ * OPSZ_4) */ #define INSTR_CREATE_ld1rw_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rw, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rw, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RD instruction. @@ -9924,7 +9989,7 @@ * OPSZ_8) */ #define INSTR_CREATE_ld1rd_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rd, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rd, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RSB instruction. @@ -9944,7 +10009,7 @@ * OPSZ_1) */ #define INSTR_CREATE_ld1rsb_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rsb, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rsb, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RSH instruction. @@ -9963,7 +10028,7 @@ * OPSZ_2) */ #define INSTR_CREATE_ld1rsh_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rsh, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rsh, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RSW instruction. @@ -9981,7 +10046,7 @@ * OPSZ_4) */ #define INSTR_CREATE_ld1rsw_sve(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rsw, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rsw, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates an INDEX instruction. @@ -10023,7 +10088,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fcvt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fcvt, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fcvt, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FCVTZS instruction. @@ -10044,7 +10109,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fcvtzs_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fcvtzs, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fcvtzs, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FCVTZU instruction. @@ -10065,7 +10130,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fcvtzu_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fcvtzu, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fcvtzu, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTA instruction. @@ -10080,7 +10145,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frinta_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frinta, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frinta, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTI instruction. @@ -10095,7 +10160,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frinti_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frinti, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frinti, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTM instruction. @@ -10110,7 +10175,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frintm_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frintm, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frintm, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTN instruction. @@ -10125,7 +10190,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frintn_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frintn, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frintn, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTP instruction. @@ -10140,7 +10205,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frintp_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frintp, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frintp, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTX instruction. @@ -10155,7 +10220,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frintx_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frintx, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frintx, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRINTZ instruction. @@ -10170,7 +10235,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frintz_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frintz, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frintz, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SCVTF instruction. @@ -10191,7 +10256,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_scvtf_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_scvtf, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_scvtf, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UCVTF instruction. @@ -10212,7 +10277,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_ucvtf_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ucvtf, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ucvtf, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a CTERMEQ instruction. @@ -10257,7 +10322,7 @@ * \param Pv The first source predicate register, P (Predicate). */ #define INSTR_CREATE_pnext_sve(dc, Pdn, Pv) \ - instr_create_1dst_2src(dc, OP_pnext, Pdn, Pv, Pdn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_pnext, Pdn, Pv, Pdn), DR_PRED_MASKED) /** * Creates a FABD instruction. @@ -10272,7 +10337,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fabd_sve(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fabd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fabd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FABS instruction. @@ -10287,7 +10352,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fabs_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fabs, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fabs, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FDIV instruction. @@ -10302,7 +10367,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fdiv_sve(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fdiv, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fdiv, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FDIVR instruction. @@ -10317,7 +10382,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fdivr_sve(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fdivr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fdivr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMAD instruction. @@ -10333,7 +10398,7 @@ * \param Za The third source vector register, Z (Scalable). */ #define INSTR_CREATE_fmad_sve(dc, Zdn, Pg, Zm, Za) \ - instr_create_1dst_4src(dc, OP_fmad, Zdn, Zdn, Pg, Zm, Za) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fmad, Zdn, Zdn, Pg, Zm, Za), DR_PRED_MASKED) /** * Creates a FMULX instruction. @@ -10348,7 +10413,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmulx_sve(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmulx, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmulx, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FNEG instruction. @@ -10363,7 +10428,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fneg_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fneg, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fneg, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FNMAD instruction. @@ -10379,7 +10444,7 @@ * \param Za The third source vector register, Z (Scalable). */ #define INSTR_CREATE_fnmad_sve(dc, Zdn, Pg, Zm, Za) \ - instr_create_1dst_4src(dc, OP_fnmad, Zdn, Zdn, Pg, Zm, Za) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fnmad, Zdn, Zdn, Pg, Zm, Za), DR_PRED_MASKED) /** * Creates a FNMLA instruction. @@ -10395,7 +10460,7 @@ * \param Zm The third source vector register, Z (Scalable). */ #define INSTR_CREATE_fnmla_sve(dc, Zda, Pg, Zn, Zm) \ - instr_create_1dst_4src(dc, OP_fnmla, Zda, Zda, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fnmla, Zda, Zda, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FNMLS instruction. @@ -10411,7 +10476,7 @@ * \param Zm The third source vector register, Z (Scalable). */ #define INSTR_CREATE_fnmls_sve(dc, Zda, Pg, Zn, Zm) \ - instr_create_1dst_4src(dc, OP_fnmls, Zda, Zda, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fnmls, Zda, Zda, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FNMSB instruction. @@ -10427,7 +10492,7 @@ * \param Za The third source vector register, Z (Scalable). */ #define INSTR_CREATE_fnmsb_sve_pred(dc, Zdn, Pg, Zm, Za) \ - instr_create_1dst_4src(dc, OP_fnmsb, Zdn, Zdn, Pg, Zm, Za) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fnmsb, Zdn, Zdn, Pg, Zm, Za), DR_PRED_MASKED) /** * Creates a FRECPE instruction. @@ -10470,7 +10535,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_frecpx_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_frecpx, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_frecpx, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FRSQRTE instruction. @@ -10514,7 +10579,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fscale_sve(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fscale, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fscale, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FSQRT instruction. @@ -10529,7 +10594,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_fsqrt_sve(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fsqrt, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fsqrt, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FADD instruction. @@ -10544,7 +10609,7 @@ * \param imm Floating point constant, either 0.5 or 1.0. */ #define INSTR_CREATE_fadd_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fadd, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fadd, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FADD instruction. @@ -10559,7 +10624,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FADD instruction. @@ -10589,7 +10654,7 @@ * \param imm Floating point constant, either 0.5 or 1.0. */ #define INSTR_CREATE_fsub_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fsub, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fsub, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FSUB instruction. @@ -10604,7 +10669,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fsub_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fsub, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fsub, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FSUB instruction. @@ -10634,7 +10699,7 @@ * \param imm Floating point constant, either 0.5 or 1.0. */ #define INSTR_CREATE_fsubr_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fsubr, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fsubr, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FSUBR instruction. @@ -10649,7 +10714,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fsubr_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fsubr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fsubr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMAX instruction. @@ -10664,7 +10729,7 @@ * \param imm Floating point constant, either 0.0 or 1.0. */ #define INSTR_CREATE_fmax_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fmax, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmax, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FMAX instruction. @@ -10679,7 +10744,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmax_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmax, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmax, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMAXNM instruction. @@ -10694,7 +10759,7 @@ * \param imm Floating point constant, either 0.0 or 1.0. */ #define INSTR_CREATE_fmaxnm_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fmaxnm, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmaxnm, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FMAXNM instruction. @@ -10709,7 +10774,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmaxnm_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmaxnm, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmaxnm, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMIN instruction. @@ -10724,7 +10789,7 @@ * \param imm Floating point constant, either 0.0 or 1.0. */ #define INSTR_CREATE_fmin_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fmin, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmin, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FMIN instruction. @@ -10739,7 +10804,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmin_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmin, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmin, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMINNM instruction. @@ -10754,7 +10819,7 @@ * \param imm Floating point constant, either 0.0 or 1.0. */ #define INSTR_CREATE_fminnm_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fminnm, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fminnm, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FMINNM instruction. @@ -10769,7 +10834,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fminnm_sve_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fminnm, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fminnm, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMLA instruction. @@ -10785,7 +10850,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmla_sve_vector(dc, Zda, Pg, Zn, Zm) \ - instr_create_1dst_4src(dc, OP_fmla, Zda, Zda, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fmla, Zda, Zda, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FMLA instruction. @@ -10819,7 +10884,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmls_sve_vector(dc, Zda, Pg, Zn, Zm) \ - instr_create_1dst_4src(dc, OP_fmls, Zda, Zda, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fmls, Zda, Zda, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a FMLS instruction. @@ -10853,7 +10918,7 @@ * \param Za The third source vector register, Z (Scalable). */ #define INSTR_CREATE_fmsb_sve(dc, Zdn, Pg, Zm, Za) \ - instr_create_1dst_4src(dc, OP_fmsb, Zdn, Zdn, Pg, Zm, Za) + INSTR_PRED(instr_create_1dst_4src(dc, OP_fmsb, Zdn, Zdn, Pg, Zm, Za), DR_PRED_MASKED) /** * Creates a FMUL instruction. @@ -10868,7 +10933,7 @@ * \param imm Floating point constant, either 0.5 or 2.0. */ #define INSTR_CREATE_fmul_sve(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_fmul, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmul, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a FMUL instruction. @@ -10883,7 +10948,7 @@ * \param Zm The second source vector register, Z (Scalable). */ #define INSTR_CREATE_fmul_sve_pred_vector(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmul, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmul, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMUL instruction. @@ -10984,23 +11049,23 @@ * For the [\{, \}] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_1, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_1, 0) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * DR_EXTEND_UXTX, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, DR_EXTEND_UXTX, 0, OPSZ_1, 0) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_1, 0) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ldff1b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1b, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1b, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDFF1D instruction. @@ -11020,26 +11085,26 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\{, \, LSL #3}] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 3) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_8, 0) * For the [\, \.D, LSL #3] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_8, 3) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0) * For the [\, \.D, \ #3] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_8, 3) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_8, 0) */ #define INSTR_CREATE_ldff1d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1d, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1d, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDFF1H instruction. @@ -11064,35 +11129,35 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\{, \, LSL #1}] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, 1, 0, 0, OPSZ_2, 1) * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\, \.D, LSL #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_2, 1) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) * For the [\, \.D, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_2, 1) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_2, 0) * For the [\, \.S, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_2, 1) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_2, 0) */ #define INSTR_CREATE_ldff1h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1h, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1h, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDFF1SB instruction. @@ -11114,25 +11179,26 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\{, \}] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, false, 0, 0, OPSZ_1) - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_base_disp_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, false, 0, 0, OPSZ_1) + * For the [\.S{, #\}] variant: + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_1, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_1, 0) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_1, 0) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ldff1sb_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1sb, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1sb, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDFF1SH instruction. @@ -11156,35 +11222,35 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\{, \, LSL #1}] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, 1, 0, 0, OPSZ_2, 1) * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\, \.D, LSL #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_2, 1) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) * For the [\, \.D, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_2, 1) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_2, 0) * For the [\, \.S, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_2, 1) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_2, 0) */ #define INSTR_CREATE_ldff1sh_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1sh, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1sh, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDFF1SW instruction. @@ -11193,12 +11259,12 @@ * \verbatim * LDFF1SW { .D }, /Z, [{, , LSL #2}] * LDFF1SW { .D }, /Z, [.D{, #}] - * LDFF1W { .D }, /Z, [, .D, LSL #2] - * LDFF1W { .D }, /Z, [, .D] - * LDFF1W { .D }, /Z, [, .D, #2] - * LDFF1W { .D }, /Z, [, .D, ] - * LDFF1W { .S }, /Z, [, .S, #2] - * LDFF1W { .S }, /Z, [, .S, ] + * LDFF1SW { .D }, /Z, [, .D, LSL #2] + * LDFF1SW { .D }, /Z, [, .D] + * LDFF1SW { .D }, /Z, [, .D, #2] + * LDFF1SW { .D }, /Z, [, .D, ] + * LDFF1SW { .S }, /Z, [, .S, #2] + * LDFF1SW { .S }, /Z, [, .S, ] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11206,32 +11272,32 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\{, \, LSL #2}] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 2) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\, \.D, LSL #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_4, 2) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) * For the [\, \.D, \ #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_4, 2) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_4, 0) * For the [\, \.S, \ #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_4, 2) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_4, 0) */ #define INSTR_CREATE_ldff1sw_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1sw, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1sw, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDFF1W instruction. @@ -11249,17 +11315,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\{, \, LSL #2}] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, 1, 0, 0, OPSZ_4, 2) * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) */ #define INSTR_CREATE_ldff1w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldff1w, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldff1w, Zt, Rn, Pg), DR_PRED_MASKED) /** @@ -11275,8 +11341,9 @@ * \param Zm The second source vector register, Z (Scalable). * \param rot The immediate rot, must be 90 or 270. */ -#define INSTR_CREATE_fcadd_sve_pred(dc, Zdn, Pg, Zm, rot) \ - instr_create_1dst_4src(dc, OP_fcadd, Zdn, Pg, Zdn, Zm, rot) +#define INSTR_CREATE_fcadd_sve_pred(dc, Zdn, Pg, Zm, rot) \ + INSTR_PRED(instr_create_1dst_4src(dc, OP_fcadd, Zdn, Pg, Zdn, Zm, rot), \ + DR_PRED_MASKED) /** * Creates a FCMLA instruction. @@ -11292,8 +11359,9 @@ * \param Zm The third source vector register, Z (Scalable). * \param rot The immediate rot, must be 0, 90, 180, or 270. */ -#define INSTR_CREATE_fcmla_sve_vector(dc, Zda, Pg, Zn, Zm, rot) \ - instr_create_1dst_5src(dc, OP_fcmla, Zda, Zda, Pg, Zn, Zm, rot) +#define INSTR_CREATE_fcmla_sve_vector(dc, Zda, Pg, Zn, Zm, rot) \ + INSTR_PRED(instr_create_1dst_5src(dc, OP_fcmla, Zda, Zda, Pg, Zn, Zm, rot), \ + DR_PRED_MASKED) /** * Creates a FCMLA instruction. @@ -11338,38 +11406,27 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) - * For the B element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the H element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the S element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) - * For the D element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, byte_disp, OPSZ_1) * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm, 0, OPSZ_1, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm, 0, OPSZ_1, 0) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_1, 0) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ld1b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1b, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1b, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1ROB instruction. @@ -11387,7 +11444,7 @@ * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) */ #define INSTR_CREATE_ld1rob_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rob, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rob, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RQB instruction. @@ -11404,13 +11461,13 @@ * constructed with the function: * For the [\{, #\}] variant: * opnd_create_base_disp_aarch64( - * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_1) * For the [\, \] variant: * opnd_create_base_disp_shift_aarch64( - * Xn, Xm, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16, 0) + * Xn, Xm, DR_EXTEND_UXTX, false, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ld1rqb_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rqb, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rqb, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RQH instruction. @@ -11427,13 +11484,13 @@ * constructed with the function: * For the [\{, #\}] variant: * opnd_create_base_disp_aarch64( - * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_2) * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64( - * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_16, 1) + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 2) */ #define INSTR_CREATE_ld1rqh_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rqh, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rqh, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RQW instruction. @@ -11450,13 +11507,13 @@ * constructed with the function: * For the [\{, #\}] variant: * opnd_create_base_disp_aarch64( - * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_4) * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64( - * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_16, 2) + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 4) */ #define INSTR_CREATE_ld1rqw_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rqw, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rqw, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1RQD instruction. @@ -11473,13 +11530,13 @@ * constructed with the function: * For the [\{, #\}] variant: * opnd_create_base_disp_aarch64( - * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_8) * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64( - * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_16, 3) + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) */ #define INSTR_CREATE_ld1rqd_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1rqd, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1rqd, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LD1SB instruction. @@ -11506,33 +11563,26 @@ * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) - * For the H element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the S element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) - * For the D element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, byte_disp, OPSZ_1) * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm, 0, OPSZ_1, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm, 0, OPSZ_1, 0) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_1, 0) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ld1sb_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ld1sb, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1sb, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNT1B instruction. @@ -11550,16 +11600,19 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, opnd_size_from_bytes( - * dr_get_sve_vector_length() / 8)) - * For the [\{, #\, MUL VL}] variant: opnd_create_base_disp(Rn, - * DR_REG_NULL, 0, imm, opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the vector+scalar variant: opnd_create_base_disp_aarch64(Zn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) + * opnd_create_base_disp_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) + * For the [\.D{, \}] variant: + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) + * For the [\.S{, \}] variant: + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ldnt1b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnt1b, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1b, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a ST1B instruction. @@ -11583,26 +11636,25 @@ * For the [\, \] variant: * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / (8 * - * opnd_size_to_bytes(Ts)))) For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) + * For the [\.S{, #\}] variant: + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_1, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_1, 0) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_1, 0) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_st1b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_st1b, Rn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_st1b, Rn, Zt, Pg), DR_PRED_MASKED) /** * Creates a STNT1B instruction. @@ -11620,26 +11672,18 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(dr_get_sve_vector_length() / - * 8)) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 8), - * 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) * For the [\.S{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 4), - * 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_stnt1b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_stnt1b, Rn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1b, Rn, Zt, Pg), DR_PRED_MASKED) /** * Creates a BFCVT instruction. @@ -11654,7 +11698,7 @@ * \param Zn The source vector register, Z (Scalable). */ #define INSTR_CREATE_bfcvt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_bfcvt, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_bfcvt, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a BFDOT instruction. @@ -11896,7 +11940,7 @@ * DR_EXTEND_UXTX, false, 0, 0, OPSZ_0, 0) */ #define INSTR_CREATE_prfb_sve_pred(dc, prfop, Pg, Rn) \ - instr_create_0dst_3src(dc, OP_prfb, prfop, Pg, Rn) + INSTR_PRED(instr_create_0dst_3src(dc, OP_prfb, prfop, Pg, Rn), DR_PRED_MASKED) /** * Creates a PRFD instruction. @@ -11938,7 +11982,7 @@ * DR_EXTEND_UXTX, true, 0, 0, OPSZ_0, 3) */ #define INSTR_CREATE_prfd_sve_pred(dc, prfop, Pg, Rn) \ - instr_create_0dst_3src(dc, OP_prfd, prfop, Pg, Rn) + INSTR_PRED(instr_create_0dst_3src(dc, OP_prfd, prfop, Pg, Rn), DR_PRED_MASKED) /** * Creates a PRFH instruction. @@ -11980,7 +12024,7 @@ * DR_EXTEND_UXTX, true, 0, 0, OPSZ_0, 1) */ #define INSTR_CREATE_prfh_sve_pred(dc, prfop, Pg, Rn) \ - instr_create_0dst_3src(dc, OP_prfh, prfop, Pg, Rn) + INSTR_PRED(instr_create_0dst_3src(dc, OP_prfh, prfop, Pg, Rn), DR_PRED_MASKED) /** * Creates a PRFW instruction. @@ -12022,7 +12066,7 @@ * DR_EXTEND_UXTX, true, 0, 0, OPSZ_0, 2) */ #define INSTR_CREATE_prfw_sve_pred(dc, prfop, Pg, Rn) \ - instr_create_0dst_3src(dc, OP_prfw, prfop, Pg, Rn) + INSTR_PRED(instr_create_0dst_3src(dc, OP_prfw, prfop, Pg, Rn), DR_PRED_MASKED) /** * Creates an ADR instruction. @@ -12060,14 +12104,16 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_ld2b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_2dst_2src(dc, OP_ld2b, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) +#define INSTR_CREATE_ld2b_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_2dst_2src(dc, OP_ld2b, Zt, opnd_create_increment_reg(Zt, 1), \ + Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD3B instruction. @@ -12083,15 +12129,16 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_1) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_ld3b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_3dst_2src(dc, OP_ld3b, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Rn, Pg) +#define INSTR_CREATE_ld3b_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_3dst_2src(dc, OP_ld3b, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD4B instruction. @@ -12107,16 +12154,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_ld4b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_4dst_2src(dc, OP_ld4b, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Rn, Pg) +#define INSTR_CREATE_ld4b_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_4dst_2src(dc, OP_ld4b, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a ST2B instruction. @@ -12132,14 +12180,16 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_1) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_st2b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_3src(dc, OP_st2b, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) +#define INSTR_CREATE_st2b_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_3src(dc, OP_st2b, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), Pg), \ + DR_PRED_MASKED) /** * Creates a ST3B instruction. @@ -12155,15 +12205,17 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_st3b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_4src(dc, OP_st3b, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Pg) +#define INSTR_CREATE_st3b_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_4src(dc, OP_st3b, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Pg), \ + DR_PRED_MASKED) /** * Creates a ST4B instruction. @@ -12179,16 +12231,18 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \] variant: - * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_1) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_st4b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_5src(dc, OP_st4b, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Pg) +#define INSTR_CREATE_st4b_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_5src(dc, OP_st4b, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Pg), \ + DR_PRED_MASKED) /** * Creates a LD1H instruction. @@ -12216,45 +12270,37 @@ * \param Zn The first source vector base register with an immediate offset, * constructed with the function: * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\, \.D, LSL #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_2, 1) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * 0, 0, OPSZ_2, 0) * For the [\, \.D, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_2, 1) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_2, 0) * For the [\, \.S, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_2, 1) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_2, 0) * For the [\, \, LSL #1] variants: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * /8/16/32), 1) - * For the H element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the S element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the D element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) + * opnd_create_base_disp_shift_aarch64( + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) */ #define INSTR_CREATE_ld1h_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ld1h, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1h, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates a LD1SH instruction. @@ -12280,42 +12326,37 @@ * \param Zn The first source vector base register with an immediate offset, * constructed with the function: * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\, \.D, LSL #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_2, 1) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) * For the [\, \.D, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_2, 1) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_2, 0) * For the [\, \.S, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_2, 1) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_2, 0) * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 16/32), 1) depending on Zt's element size. - * For the S element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the D element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) */ #define INSTR_CREATE_ld1sh_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ld1sh, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1sh, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates a LD1W instruction. @@ -12341,42 +12382,39 @@ * \param Zn The first source vector base register with an immediate offset, * constructed with the function: * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\, \.D, LSL #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_4, 2) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) * For the [\, \.D, \ #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_4, 2) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_4, 0) * For the [\, \.S, \ #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_4, 2) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_4, 0) * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8/16), 2) depending on Zt's element size. - * For the S element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the D element size [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) */ -#define INSTR_CREATE_ld1w_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ld1w, Zt, Zn, Pg) +#define INSTR_CREATE_ld1w_sve_pred(dc, Zt, Pg, Zn) \ + INSTR_PRED( \ + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1w, Zt, Zn, Pg), DR_PRED_MASKED), \ + DR_PRED_MASKED) /** * Creates a LD1D instruction. @@ -12397,29 +12435,28 @@ * \param Zn The first source vector base register with an immediate offset, * constructed with the function: * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_8, 0) * For the [\, \.D, LSL #3] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_8, 3) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0) * For the [\, \.D, \ #3] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_8, 3) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_8, 0) * For the variant \, \, LSL #3]: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, - * true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_8) */ #define INSTR_CREATE_ld1d_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ld1d, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1d, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates a LD1SW instruction. @@ -12436,18 +12473,16 @@ * \param Zn The first source vector base register with an immediate offset, * constructed with the function: * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 16), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) */ #define INSTR_CREATE_ld1sw_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ld1sw, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ld1sw, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates a ST1H instruction. @@ -12471,39 +12506,37 @@ * \param Zn The second source vector base register with an immediate offset, * constructed with the function: * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_2, 0) * For the [\, \.D, LSL #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_2, 1) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) * For the [\, \.D, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_2, 1) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_2, 0) * For the [\, \.S, \ #1] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_2, 1) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_2, 0) * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * /8/16/32), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / opnd_size_to_bytes(Ts))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) */ #define INSTR_CREATE_st1h_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_st1h, Zn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_st1h, Zn, Zt, Pg), DR_PRED_MASKED) /** * Creates a ST1W instruction. @@ -12527,40 +12560,37 @@ * \param Zn The second source vector base register with an immediate offset, * constructed with the function: * For the [\.S{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_4, 0) * For the [\, \.D, LSL #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_4, 2) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) * For the [\, \.D, \ #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_4, 2) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_4, 0) * For the [\, \.S, \ #2] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 2) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, true, 0, OPSZ_4, 2) * For the [\, \.S, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_4, extend, 0, 0, OPSZ_4, 0) * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8/16), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / (8 * - * opnd_size_to_bytes(Ts)))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) */ #define INSTR_CREATE_st1w_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_st1w, Zn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_st1w, Zn, Zt, Pg), DR_PRED_MASKED) /** * Creates a ST1D instruction. @@ -12581,29 +12611,28 @@ * \param Zn The second source vector base register with an immediate offset, * constructed with the function: * For the [\.D{, #\}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, - * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5, 0, OPSZ_8, 0) * For the [\, \.D, LSL #3] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 3) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, true, 0, OPSZ_8, 3) * For the [\, \.D] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0) * For the [\, \.D, \ #3] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 3) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, true, 0, OPSZ_8, 3) * For the [\, \.D, \] variant: - * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) + * opnd_create_vector_base_disp_aarch64( + * Xn, Zm, OPSZ_8, extend, 0, 0, OPSZ_8, 0) * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 3) For the [\{, #\, MUL VL}] variant: opnd_create_base_disp(Rn, - * DR_REG_NULL, 0, imm, opnd_size_from_bytes(dr_get_sve_vector_length() / (8 * - * opnd_size_to_bytes(Ts)))) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) + * For the [\{, #\, MUL VL}] variant: + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_8) */ #define INSTR_CREATE_st1d_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_st1d, Zn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_st1d, Zn, Zt, Pg), DR_PRED_MASKED) /** * Creates a LD2D instruction. @@ -12619,14 +12648,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_8) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_ld2d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_2dst_2src(dc, OP_ld2d, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) +#define INSTR_CREATE_ld2d_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_2dst_2src(dc, OP_ld2d, Zt, opnd_create_increment_reg(Zt, 1), \ + Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD2H instruction. @@ -12642,14 +12674,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_2) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_ld2h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_2dst_2src(dc, OP_ld2h, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) +#define INSTR_CREATE_ld2h_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_2dst_2src(dc, OP_ld2h, Zt, opnd_create_increment_reg(Zt, 1), \ + Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD2W instruction. @@ -12665,15 +12700,15 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 4)) - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_4) */ -#define INSTR_CREATE_ld2w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_2dst_2src(dc, OP_ld2w, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) +#define INSTR_CREATE_ld2w_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_2dst_2src(dc, OP_ld2w, Zt, opnd_create_increment_reg(Zt, 1), \ + Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD3D instruction. @@ -12689,15 +12724,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_8) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_ld3d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_3dst_2src(dc, OP_ld3d, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Rn, Pg) +#define INSTR_CREATE_ld3d_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_3dst_2src(dc, OP_ld3d, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD3H instruction. @@ -12713,15 +12750,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_2) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_ld3h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_3dst_2src(dc, OP_ld3h, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Rn, Pg) +#define INSTR_CREATE_ld3h_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_3dst_2src(dc, OP_ld3h, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD3W instruction. @@ -12737,15 +12776,17 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_4) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_ld3w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_3dst_2src(dc, OP_ld3w, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Rn, Pg) +#define INSTR_CREATE_ld3w_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_3dst_2src(dc, OP_ld3w, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD4D instruction. @@ -12761,16 +12802,18 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_8) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_ld4d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_4dst_2src(dc, OP_ld4d, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Rn, Pg) +#define INSTR_CREATE_ld4d_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_4dst_2src(dc, OP_ld4d, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD4H instruction. @@ -12786,16 +12829,18 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_2) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_ld4h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_4dst_2src(dc, OP_ld4h, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Rn, Pg) +#define INSTR_CREATE_ld4h_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_4dst_2src(dc, OP_ld4h, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LD4W instruction. @@ -12811,16 +12856,18 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_4) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_ld4w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_4dst_2src(dc, OP_ld4w, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Rn, Pg) +#define INSTR_CREATE_ld4w_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_4dst_2src(dc, OP_ld4w, Zt, opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Rn, Pg), \ + DR_PRED_MASKED) /** * Creates a LDNT1D instruction. @@ -12837,19 +12884,16 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_8) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes()), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0) */ #define INSTR_CREATE_ldnt1d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnt1d, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1d, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNT1H instruction. @@ -12867,24 +12911,19 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 4), - * 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) * For the [\.S{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) */ #define INSTR_CREATE_ldnt1h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnt1h, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1h, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNT1W instruction. @@ -12902,22 +12941,19 @@ * \param Rn The first source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) * For the [\.S{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, - * 0, opnd_size_from_bytes(proc_get_vector_length_bytes()), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) */ #define INSTR_CREATE_ldnt1w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnt1w, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1w, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a ST2D instruction. @@ -12933,14 +12969,17 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_8) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_st2d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_3src(dc, OP_st2d, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) +#define INSTR_CREATE_st2d_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_3src(dc, OP_st2d, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), Pg), \ + DR_PRED_MASKED) /** * Creates a ST2H instruction. @@ -12956,14 +12995,17 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_2) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_st2h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_3src(dc, OP_st2h, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) +#define INSTR_CREATE_st2h_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_3src(dc, OP_st2h, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), Pg), \ + DR_PRED_MASKED) /** * Creates a ST2W instruction. @@ -12979,14 +13021,17 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_4) + * + * The Zt2 parameter is derived from Zt. */ -#define INSTR_CREATE_st2w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_3src(dc, OP_st2w, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) +#define INSTR_CREATE_st2w_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_3src(dc, OP_st2w, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), Pg), \ + DR_PRED_MASKED) /** * Creates a ST3D instruction. @@ -13002,15 +13047,18 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_8) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_st3d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_4src(dc, OP_st3d, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Pg) +#define INSTR_CREATE_st3d_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_4src(dc, OP_st3d, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Pg), \ + DR_PRED_MASKED) /** * Creates a ST3H instruction. @@ -13026,15 +13074,18 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_2) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_st3h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_4src(dc, OP_st3h, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Pg) +#define INSTR_CREATE_st3h_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_4src(dc, OP_st3h, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Pg), \ + DR_PRED_MASKED) /** * Creates a ST3W instruction. @@ -13050,15 +13101,18 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_4) + * + * The Zt2 and Zt3 parameters are derived from Zt. */ -#define INSTR_CREATE_st3w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_4src(dc, OP_st3w, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), Pg) +#define INSTR_CREATE_st3w_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_4src(dc, OP_st3w, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), Pg), \ + DR_PRED_MASKED) /** * Creates a ST4D instruction. @@ -13074,16 +13128,19 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 3) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_8) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_st4d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_5src(dc, OP_st4d, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Pg) +#define INSTR_CREATE_st4d_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_5src(dc, OP_st4d, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Pg), \ + DR_PRED_MASKED) /** * Creates a ST4H instruction. @@ -13099,16 +13156,19 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_2) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_st4h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_5src(dc, OP_st4h, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Pg) +#define INSTR_CREATE_st4h_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_5src(dc, OP_st4h, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Pg), \ + DR_PRED_MASKED) /** * Creates a ST4W instruction. @@ -13124,16 +13184,19 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, OPSZ_4) + * + * The Zt2, Zt3 and Zt4 parameters are derived from Zt. */ -#define INSTR_CREATE_st4w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_5src(dc, OP_st4w, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ - opnd_create_increment_reg(Zt, 2), \ - opnd_create_increment_reg(Zt, 3), Pg) +#define INSTR_CREATE_st4w_sve_pred(dc, Zt, Pg, Rn) \ + INSTR_PRED(instr_create_1dst_5src(dc, OP_st4w, Rn, Zt, \ + opnd_create_increment_reg(Zt, 1), \ + opnd_create_increment_reg(Zt, 2), \ + opnd_create_increment_reg(Zt, 3), Pg), \ + DR_PRED_MASKED) /** * Creates a STNT1D instruction. @@ -13150,20 +13213,16 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #3] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 3) - * For the [\, \, LSL #3] variant: + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_8, 3) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_8) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes()), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0) */ #define INSTR_CREATE_stnt1d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_stnt1d, Rn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1d, Rn, Zt, Pg), DR_PRED_MASKED) /** * Creates a STNT1H instruction. @@ -13181,23 +13240,19 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #1] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 1) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_2, 1) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 4), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) * For the [\.S{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) */ #define INSTR_CREATE_stnt1h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_stnt1h, Rn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1h, Rn, Zt, Pg), DR_PRED_MASKED) /** * Creates a STNT1W instruction. @@ -13215,23 +13270,19 @@ * \param Rn The second source base register with a register offset, * constructed with the function: * For the [\, \, LSL #2] variant: - * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() - * / 8), 2) + * opnd_create_base_disp_shift_aarch64( + * Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_4, 2) * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) * For the [\.D{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) * For the [\.S{, \}] variant: - * opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4, - * DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(proc_get_vector_length_bytes()), 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) */ #define INSTR_CREATE_stnt1w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_stnt1w, Rn, Zt, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1w, Rn, Zt, Pg), DR_PRED_MASKED) /** * Creates a LDNF1B instruction. @@ -13248,21 +13299,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * For the B element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the H element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the S element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) - * For the D element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) */ #define INSTR_CREATE_ldnf1b_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1b, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1b, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNF1D instruction. @@ -13276,11 +13316,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_8) */ #define INSTR_CREATE_ldnf1d_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1d, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1d, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNF1H instruction. @@ -13296,18 +13335,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * For the H element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the S element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the D element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) */ #define INSTR_CREATE_ldnf1h_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1h, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1h, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNF1SB instruction. @@ -13323,18 +13354,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * For the H element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the S element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) - * For the D element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_1) */ #define INSTR_CREATE_ldnf1sb_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1sb, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1sb, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNF1SH instruction. @@ -13349,15 +13372,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * For the S element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) - * For the D element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_2) */ #define INSTR_CREATE_ldnf1sh_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1sh, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1sh, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNF1SW instruction. @@ -13371,11 +13389,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) */ #define INSTR_CREATE_ldnf1sw_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1sw, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1sw, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDNF1W instruction. @@ -13390,15 +13407,10 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with an immediate offset, * constructed with the function: - * For the S element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) - * For the D element size variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) + * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, OPSZ_4) */ #define INSTR_CREATE_ldnf1w_sve_pred(dc, Zt, Pg, Rn) \ - instr_create_1dst_2src(dc, OP_ldnf1w, Zt, Rn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnf1w, Zt, Rn, Pg), DR_PRED_MASKED) /** * Creates a LDAPUR instruction. @@ -14334,7 +14346,7 @@ * \param Zn The second source vector register, Z (Scalable). */ #define INSTR_CREATE_bfcvtnt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_bfcvtnt, Zd, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_bfcvtnt, Zd, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an AESD instruction. @@ -16207,7 +16219,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_addp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_addp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_addp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FADDP instruction. @@ -16223,7 +16235,7 @@ * \param Zm The second source vector register. Can be Z.h, Z.s or Z.d. */ #define INSTR_CREATE_faddp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_faddp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_faddp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMAXNMP instruction. @@ -16239,7 +16251,7 @@ * \param Zm The second source vector register. Can be Z.h, Z.s or Z.d. */ #define INSTR_CREATE_fmaxnmp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmaxnmp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmaxnmp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMAXP instruction. @@ -16255,7 +16267,7 @@ * \param Zm The second source vector register. Can be Z.h, Z.s or Z.d. */ #define INSTR_CREATE_fmaxp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fmaxp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fmaxp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMINNMP instruction. @@ -16271,7 +16283,7 @@ * \param Zm The second source vector register. Can be Z.h, Z.s or Z.d. */ #define INSTR_CREATE_fminnmp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fminnmp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fminnmp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FMINP instruction. @@ -16287,7 +16299,7 @@ * \param Zm The second source vector register. Can be Z.h, Z.s or Z.d. */ #define INSTR_CREATE_fminp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_fminp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fminp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a HISTCNT instruction. @@ -16303,7 +16315,7 @@ * \param Zm The second source vector register. Can be Z.s or Z.d. */ #define INSTR_CREATE_histcnt_sve_pred(dc, Zd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_histcnt, Zd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_histcnt, Zd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a SHADD instruction. @@ -16319,7 +16331,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_shadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_shadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_shadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SHSUB instruction. @@ -16335,7 +16347,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_shsub_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_shsub, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_shsub, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SHSUBR instruction. @@ -16351,7 +16363,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_shsubr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_shsubr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_shsubr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SMAXP instruction. @@ -16367,7 +16379,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_smaxp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_smaxp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_smaxp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SMINP instruction. @@ -16383,7 +16395,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sminp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sminp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sminp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SQRSHL instruction. @@ -16399,7 +16411,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sqrshl_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sqrshl, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sqrshl, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SQRSHLR instruction. @@ -16415,7 +16427,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sqrshlr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sqrshlr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sqrshlr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SQSHL instruction. @@ -16433,7 +16445,7 @@ * or an immediate */ #define INSTR_CREATE_sqshl_sve_pred(dc, Zdn, Pg, Zm_imm) \ - instr_create_1dst_3src(dc, OP_sqshl, Zdn, Pg, Zdn, Zm_imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sqshl, Zdn, Pg, Zdn, Zm_imm), DR_PRED_MASKED) /** * Creates a SQSHLR instruction. @@ -16449,7 +16461,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sqshlr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sqshlr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sqshlr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SQSUBR instruction. @@ -16465,7 +16477,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sqsubr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_sqsubr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sqsubr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SRHADD instruction. @@ -16481,7 +16493,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_srhadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_srhadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_srhadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SRSHL instruction. @@ -16497,7 +16509,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_srshl_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_srshl, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_srshl, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SRSHLR instruction. @@ -16513,7 +16525,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_srshlr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_srshlr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_srshlr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a SUQADD instruction. @@ -16529,7 +16541,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_suqadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_suqadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_suqadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UHADD instruction. @@ -16545,7 +16557,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uhadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uhadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uhadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UHSUB instruction. @@ -16561,7 +16573,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uhsub_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uhsub, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uhsub, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UHSUBR instruction. @@ -16577,7 +16589,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uhsubr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uhsubr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uhsubr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UMAXP instruction. @@ -16593,7 +16605,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_umaxp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_umaxp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_umaxp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UMINP instruction. @@ -16609,7 +16621,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uminp_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uminp, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uminp, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UQRSHL instruction. @@ -16625,7 +16637,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uqrshl_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uqrshl, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uqrshl, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UQRSHLR instruction. @@ -16641,7 +16653,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uqrshlr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uqrshlr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uqrshlr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UQSHL instruction. @@ -16659,7 +16671,7 @@ * or can be an immediate. */ #define INSTR_CREATE_uqshl_sve_pred(dc, Zdn, Pg, Zm_imm) \ - instr_create_1dst_3src(dc, OP_uqshl, Zdn, Pg, Zdn, Zm_imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uqshl, Zdn, Pg, Zdn, Zm_imm), DR_PRED_MASKED) /** * Creates an UQSHLR instruction. @@ -16675,7 +16687,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uqshlr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uqshlr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uqshlr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an UQSUBR instruction. @@ -16691,7 +16703,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_uqsubr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_uqsubr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uqsubr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an URHADD instruction. @@ -16707,7 +16719,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_urhadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_urhadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_urhadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an URSHL instruction. @@ -16723,7 +16735,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_urshl_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_urshl, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_urshl, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an URSHLR instruction. @@ -16739,7 +16751,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_urshlr_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_urshlr, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_urshlr, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates an USQADD instruction. @@ -16755,7 +16767,7 @@ * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_usqadd_sve_pred(dc, Zdn, Pg, Zm) \ - instr_create_1dst_3src(dc, OP_usqadd, Zdn, Pg, Zdn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_usqadd, Zdn, Pg, Zdn, Zm), DR_PRED_MASKED) /** * Creates a FCVTLT instruction. @@ -16771,7 +16783,7 @@ * \param Zn The source vector register. Can be Z.h or Z.s. */ #define INSTR_CREATE_fcvtlt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fcvtlt, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fcvtlt, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FCVTNT instruction. @@ -16787,7 +16799,7 @@ * \param Zn The second source vector register. Can be Z.d or Z.s. */ #define INSTR_CREATE_fcvtnt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcvtnt, Zd, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcvtnt, Zd, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FCVTX instruction. @@ -16802,7 +16814,7 @@ * \param Zn The source vector register, Z.d. */ #define INSTR_CREATE_fcvtx_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_fcvtx, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_fcvtx, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FCVTXNT instruction. @@ -16817,7 +16829,7 @@ * \param Zn The second source vector register, Z.d. */ #define INSTR_CREATE_fcvtxnt_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_fcvtxnt, Zd, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_fcvtxnt, Zd, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a FLOGB instruction. @@ -16832,7 +16844,7 @@ * \param Zn The source vector register. Can be Z.h, Z.s or Z.d. */ #define INSTR_CREATE_flogb_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_flogb, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_flogb, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SADALP instruction. @@ -16848,7 +16860,7 @@ * \param Zn The second source vector register. Can be Z.b, Z.h or Z.s. */ #define INSTR_CREATE_sadalp_sve_pred(dc, Zda, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_sadalp, Zda, Zda, Pg, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sadalp, Zda, Zda, Pg, Zn), DR_PRED_MASKED) /** * Creates a SQABS instruction. @@ -16863,7 +16875,7 @@ * \param Zn The source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sqabs_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_sqabs, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_sqabs, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a SQNEG instruction. @@ -16878,7 +16890,7 @@ * \param Zn The source vector register. Can be Z.b, Z.h, Z.s or Z.d. */ #define INSTR_CREATE_sqneg_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_sqneg, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_sqneg, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an UADALP instruction. @@ -16894,7 +16906,7 @@ * \param Zn The second source vector register. Can be Z.b, Z.h or Z.s. */ #define INSTR_CREATE_uadalp_sve_pred(dc, Zda, Pg, Zn) \ - instr_create_1dst_3src(dc, OP_uadalp, Zda, Zda, Pg, Zn) + INSTR_PRED(instr_create_1dst_3src(dc, OP_uadalp, Zda, Zda, Pg, Zn), DR_PRED_MASKED) /** * Creates a CADD instruction. @@ -17132,7 +17144,7 @@ * \param imm The immediate imm. */ #define INSTR_CREATE_sqshlu_sve_pred(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_sqshlu, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_sqshlu, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a SQSHRNB instruction. @@ -17226,7 +17238,7 @@ * \param imm The immediate imm1. */ #define INSTR_CREATE_srshr_sve_pred(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_srshr, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_srshr, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates a SRSRA instruction. @@ -17366,7 +17378,7 @@ * \param imm The immediate imm1. */ #define INSTR_CREATE_urshr_sve_pred(dc, Zdn, Pg, imm) \ - instr_create_1dst_3src(dc, OP_urshr, Zdn, Pg, Zdn, imm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_urshr, Zdn, Pg, Zdn, imm), DR_PRED_MASKED) /** * Creates an URSRA instruction. @@ -17459,11 +17471,13 @@ * \param Pg The governing predicate register, P (Predicate). * \param Zn The first source vector base register with a register offset, * constructed with the function: - * opnd_create_vector_base_disp_aarch64(Zn, Rm, - * OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0) + * For the [\.D{, \}] variant: opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) + * For the [\.S{, \}] variant: opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0) */ #define INSTR_CREATE_ldnt1sb_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ldnt1sb, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1sb, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates a LDNT1SH instruction. @@ -17478,11 +17492,13 @@ * \param Pg The governing predicate register, P (Predicate). * \param Zn The first source vector base register with a register offset, * constructed with the function: - * opnd_create_vector_base_disp_aarch64(Zn, Rm, - * OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8, 0) + * For the [\.D{, \}] variant: opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) + * For the [\.S{, \}] variant: opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0) */ #define INSTR_CREATE_ldnt1sh_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ldnt1sh, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1sh, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates a LDNT1SW instruction. @@ -17496,11 +17512,11 @@ * \param Pg The governing predicate register, P (Predicate). * \param Zn The first source vector base register with a register offset, * constructed with the function: - * opnd_create_vector_base_disp_aarch64(Zn, Rm, - * OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16, 0) + * opnd_create_vector_base_disp_aarch64( + * Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0) */ #define INSTR_CREATE_ldnt1sw_sve_pred(dc, Zt, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ldnt1sw, Zt, Zn, Pg) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1sw, Zt, Zn, Pg), DR_PRED_MASKED) /** * Creates an UZP1 instruction. @@ -17586,7 +17602,7 @@ * \param Zm The second source vector register. Can be Z.b or Z.h. */ #define INSTR_CREATE_match_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_match, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_match, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates a NMATCH instruction. @@ -17602,7 +17618,7 @@ * \param Zm The second source vector register. Can be Z.b or Z.h. */ #define INSTR_CREATE_nmatch_sve_pred(dc, Pd, Pg, Zn, Zm) \ - instr_create_1dst_3src(dc, OP_nmatch, Pd, Pg, Zn, Zm) + INSTR_PRED(instr_create_1dst_3src(dc, OP_nmatch, Pd, Pg, Zn, Zm), DR_PRED_MASKED) /** * Creates an URECPE instruction. @@ -17617,7 +17633,7 @@ * \param Zn The source vector register, Z.s. */ #define INSTR_CREATE_urecpe_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_urecpe, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_urecpe, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates an URSQRTE instruction. @@ -17632,7 +17648,7 @@ * \param Zn The source vector register, Z.s. */ #define INSTR_CREATE_ursqrte_sve_pred(dc, Zd, Pg, Zn) \ - instr_create_1dst_2src(dc, OP_ursqrte, Zd, Pg, Zn) + INSTR_PRED(instr_create_1dst_2src(dc, OP_ursqrte, Zd, Pg, Zn), DR_PRED_MASKED) /** * Creates a WHILEGE instruction. diff --git a/core/ir/aarch64/opnd_defs.txt b/core/ir/aarch64/opnd_defs.txt index 89f355aae08..089f8944d6d 100644 --- a/core/ir/aarch64/opnd_defs.txt +++ b/core/ir/aarch64/opnd_defs.txt @@ -108,6 +108,7 @@ --------------------------x----- fpimm1_half_one_5 # 1 bit floating-point index, represents 0.5 or 1.0 --------------------------x----- fpimm1_zero_one_5 # 1 bit floating-point index, represents 0.0 or 1.0 --------------------------x----- fpimm1_half_two_5 # 1 bit floating-point index, represents 0.5 or 2.0 +------------------------xx------ imm2_6 # 2 bit immediate from 6-7 ------------------------xxx----- op2 # 3 bit immediate from 5-7 -----------------------xxxx----- p_b_5 # P register with a byte element size -----------------------xxxx----- p5 # P register @@ -212,7 +213,6 @@ -----------xxxxx---------------- x16p1 # ... add 1 -----------xxxxx---------------- d16 # D register -----------xxxxx---------------- q16 # Q register ------------xxxxx---------------- z16 # Z register -----------xxxxx---------------- z_b_16 # Z register with b size elements -----------xxxxx---------------- z_h_16 # Z register with h size elements -----------xxxxx---------------- z_s_16 # Z register with h size elements @@ -221,7 +221,6 @@ -----------xxxxx---------------- b16 # B register -----------xxxxx---------------- h16 # H register -----------xxxxx---------------- s16 # S register ------------xxxxx------xxxxx----- svemem_gprs_b1 # memory reg from Rm and Rn fields transferring 1 bytes per element -----------xxxxx---xxx---------- imm8_10 # 8 bit imm at pos 10, split across 20:16 and 12:10 -----------xxxxx-??---xxxxx----- sveprf_gpr_vec64 # SVE prefetch memory address (64-bit offset) [, .D{, }] -----------xxxxxx--------------- imm6_15 # 6 bit immediate from 20:15 @@ -252,10 +251,6 @@ ----------xxxxxxxxxxxx---------- imm12 # immediate for ADD/SUB ----------xxxxxxxxxxxxxxxxx----- mem12q # size is 16 bytes ----------xxxxxxxxxxxxxxxxx----- prf12 # size is 0 bytes (prefetch variant of mem12) ----------??-xxxx------xxxxx----- svemem_ssz_gpr_simm4 # SVE memory operand [{, #}], - # gets memory size from bits 22:21 ----------??-xxxx------xxxxx----- svemem_gpr_simm4_vl_xreg # SVE memory operand [{, #, MUL VL}] - # multiple src/dest registers or single non-temporals ---------????------------------- hsd_immh_sz # encoding of vector element size in immh field ---------????------------------- bhsd_immh_sz # encoding of vector element size in immh field ---------????--------------xxxxx hsd_immh_reg0 # hsd register, depending on immh field @@ -326,6 +321,7 @@ --------xx------------xxxxx----- p_size_bhsd_5 # sve predicate vector reg, elsz depending on size --------xx------------xxxxx----- p_size_hsd_5 # sve predicate vector reg, elsz depending on size --------xx------------xxxxx----- z_size_bhsd_5 # sve vector reg, elsz depending on size +--------xx------------xxxxx----- z_size_bhsd_5p1 # sve vector reg, elsz depending on size, plus 1 --------xx------------xxxxx----- z_size_bhs_5 # sve vector reg, elsz depending on size --------xx------------xxxxx----- z_size_bh_5 # sve vector reg, elsz depending on size --------xx------------xxxxx----- z_sizep1_bhs_5 # sve vector reg, elsz depending on size, plus 1 @@ -354,16 +350,17 @@ -------??--xxxxx------xxxxx----- svemem_vec_s_imm5 # SVE memory address [.S{, #}] -------??--xxxxx------xxxxx----- svemem_vec_d_imm5 # SVE memory address [.D{, #}] -------??--xxxxx------xxxxx----- sveprf_gpr_shf # SVE memory address [, , LSL #x] for prefetch operation +-------??--xxxxx------xxxxx----- svemem_gpr_shf # SVE memory address [, , LSL #x] +-------??--xxxxx------xxxxx----- svemem_gpr_shf_signed # SVE memory address [, , LSL #x] for signed load operations -------??-?xxxxx------xxxxx----- svemem_gpr_vec64 # SVE memory address (64-bit offset) [, .D{, }] -------??-xxxxxxx-----xxxxx----- mem7_tag # Write bytes is fixed at 16bytes, post/pre/offset is in 24:23, with memory tag scaling -------???-xxxxx------xxxxx----- svemem_vec_22sd_gpr16 # SVE memort operand [.S/D{, }] -------????-xxxx------xxxxx----- svemem_gpr_simm4_vl_1reg # SVE memory operand [{, #, MUL VL}] # 1 src/dest register --------????xxxxx------xxxxx----- svemem_ssz_gpr_shf # SVE memory operand [, , LSL #x] --------????xxxxx------xxxxx----- svemem_msz_gpr_shf # SVE memory address [, , LSL #x] --------????xxxxx------xxxxx----- svemem_msz_stgpr_shf # SVE memory address [, , LSL #x] --------????xxxxx------xxxxx----- svemem_gpr_shf # GPR offset and base reg for SVE ld/st, with optional shift --------????xxxxx------xxxxx----- svemem_gprs_bhsdx # memory reg from Rm and Rn fields transferring x bytes per element +-------????-xxxx------xxxxx----- svemem_gpr_simm4_vl_xreg # SVE memory operand [{, #, MUL VL}] + # multiple src/dest registers or single non-temporals +-------????-xxxx------xxxxx----- svemem_ssz_gpr_simm4 # SVE memory operand [{, #}], + # replicating loads -------????xxxxx-x----xxxxx----- svemem_gpr_vec32_st # SVE memory address (32-bit offset) [, ., ] -------xx------------------xxxxx z_msz_bhsd_0 # z register with element size determined by msz -------xx------------------xxxxx z_msz_bhsd_0p1 # z register with element size determined by msz, plus 1 diff --git a/core/ir/arm/instr.c b/core/ir/arm/instr.c index 7097f19045e..08c7e90014f 100644 --- a/core/ir/arm/instr.c +++ b/core/ir/arm/instr.c @@ -32,28 +32,24 @@ #include "../globals.h" #include "instr.h" +#include "encode_api.h" #include "decode.h" -/* FIXME i#1551: add A64 and Thumb support throughout */ - +/* XXX i#6690: currently only A32 and Thumb is supported for instruction encoding. + * We want to add support for A32 and Thumb decoding and synthetic ISA encoding as well. + * XXX i#1684: move this function to core/ir/instr_shared.c once we can support + * all architectures in the same build of DR. + */ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { - if (mode == DR_ISA_ARM_THUMB) - instr->flags |= INSTR_THUMB_MODE; - else if (mode == DR_ISA_ARM_A32) - instr->flags &= ~INSTR_THUMB_MODE; - else + if (mode != DR_ISA_ARM_THUMB && mode != DR_ISA_ARM_A32) { return false; + } + instr->isa_mode = mode; return true; } -dr_isa_mode_t -instr_get_isa_mode(instr_t *instr) -{ - return TEST(INSTR_THUMB_MODE, instr->flags) ? DR_ISA_ARM_THUMB : DR_ISA_ARM_A32; -} - int instr_length_arch(dcontext_t *dcontext, instr_t *instr) { diff --git a/core/ir/decode_shared.c b/core/ir/decode_shared.c index 10b3c784464..8a190790e69 100644 --- a/core/ir/decode_shared.c +++ b/core/ir/decode_shared.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2021 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * Copyright (c) 2001-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -179,17 +179,18 @@ int sve_veclen; int sve_veclens[] = { 128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048 }; -void +bool dr_set_sve_vector_length(int vl) { - /* TODO i#3044: Vector length will be read from h/w when running on SVE. */ - for (int i = 0; i < sizeof(sve_veclens); i++) { + for (int i = 0; i < sizeof(sve_veclens) / sizeof(sve_veclens[0]); i++) { if (vl == sve_veclens[i]) { sve_veclen = vl; - return; + return true; } } - CLIENT_ASSERT(false, "invalid SVE vector length"); + /* Make unusual values visible in case our internal uses mess up. */ + ASSERT_CURIOSITY(false); + return false; } int diff --git a/core/ir/encode_api.h b/core/ir/encode_api.h index 8987e55b683..be9a94c79ae 100644 --- a/core/ir/encode_api.h +++ b/core/ir/encode_api.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2021 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -74,15 +74,18 @@ DR_API dr_isa_mode_t dr_get_isa_mode(void *drcontext); +DR_API /** * AArch64 Scalable Vector Extension's vector length in bits is one of: * 128 256 384 512 640 768 896 1024 1152 1280 1408 1536 1664 1792 1920 2048 + * Returns whether successful. * TODO i#3044: This function will only allow setting vector length if not * running on SVE. */ -void +bool dr_set_sve_vector_length(int vl); +DR_API /** * Read AArch64 Scalable Vector Extension's vector length, in bits. */ diff --git a/core/ir/instr.h b/core/ir/instr.h index e27bca47918..76dc3a02c82 100644 --- a/core/ir/instr.h +++ b/core/ir/instr.h @@ -202,19 +202,8 @@ enum { INSTR_DO_NOT_EMIT = 0x10000000, /* PR 251479: re-relativization support: is instr->rip_rel_pos valid? */ INSTR_RIP_REL_VALID = 0x20000000, -#ifdef X86 - /* PR 278329: each instr stores its own mode */ - INSTR_X86_MODE = 0x40000000, -#elif defined(ARM) - /* We assume we don't need to distinguish A64 from A32 as you cannot swap - * between them in user mode. Thus we only need one flag. - * XXX: we might want more power for drdecode, though the global isa_mode - * should be sufficient there. - */ - INSTR_THUMB_MODE = 0x40000000, -#endif /* PR 267260: distinguish our own mangling from client-added instrs */ - INSTR_OUR_MANGLING = 0x80000000, + INSTR_OUR_MANGLING = 0x40000000, }; #define DR_TUPLE_TYPE_BITS 4 @@ -516,6 +505,21 @@ instr_t * instr_set_translation_mangling_epilogue(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr); +#ifdef AARCH64 +/* Sets the DR_PRED_MASKED flag on the instruction to indicate that + * this instruction is predicated and execution depends on the value of a + * predicate register + */ +void +instr_set_has_register_predication(instr_t *instr); + +/* Checks if DR_PRED_MASKED is set on the instruction, which indicates +it has a governing predicate register. +*/ +bool +instr_has_register_predication(instr_t *instr); +#endif + app_pc instr_compute_address_priv(instr_t *instr, priv_mcontext_t *mc); diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h index 9fc140d7a36..11f05e0c6eb 100644 --- a/core/ir/instr_api.h +++ b/core/ir/instr_api.h @@ -138,7 +138,10 @@ typedef enum _dr_pred_type_t { DR_PRED_LE, /**< ARM condition: 1101 Signed <= (Z == 1 or N != V) */ DR_PRED_AL, /**< ARM condition: 1110 Always (unconditional) */ # ifdef AARCH64 - DR_PRED_NV, /**< ARM condition: 1111 Never, meaning always */ + DR_PRED_NV, /**< ARM condition: 1111 Never, meaning always */ + DR_PRED_MASKED, /** Used for AArch64 SVE instructions with governing predicate + * registers + */ # endif # ifdef ARM DR_PRED_OP, /**< ARM condition: 1111 Part of opcode */ @@ -307,6 +310,12 @@ struct _instr_t { byte num_dsts; byte num_srcs; + /* Instruction ISA mode to support multiple architectures in the same build of DR + * (xref i#6698 i#1684). + * This field holds values of type #dr_isa_mode_t. + */ + byte isa_mode; + union { struct { /* for efficiency everyone has a 1st src opnd, since we often just @@ -1908,6 +1917,8 @@ instr_is_rep_string_op(instr_t *instr); /** * Indicates which category the instruction corresponds to. + * Update instr_get_category_name() in core/ir/instr_shared.c + * when adding new categories in this enum. */ typedef enum { DR_INSTR_CATEGORY_UNCATEGORIZED = 0x0, /**< Uncategorized. */ @@ -1934,6 +1945,15 @@ typedef enum { DR_FP_MATH, /**< Performs arithmetic or conditional operations. */ } dr_fp_type_t; +DR_API +/** + * Assumes \p category is a DR_INSTR_CATEGORY_ constant. + * See #dr_instr_category_t. + * Returns \p category name in string format. + */ +const char * +instr_get_category_name(dr_instr_category_t category); + DR_API /** * Returns true iff \p instr is a floating point instruction. @@ -2539,17 +2559,17 @@ instr_is_reg_spill_or_restore(void *drcontext, instr_t *instr, bool *tls DR_PARA /* we only care about these 11 flags, and mostly only about the first 6 * we consider an undefined effect on a flag to be a write */ -# define EFLAGS_READ_CF 0x00000001 /**< Reads CF (Carry Flag). */ -# define EFLAGS_READ_PF 0x00000002 /**< Reads PF (Parity Flag). */ -# define EFLAGS_READ_AF 0x00000004 /**< Reads AF (Auxiliary Carry Flag). */ -# define EFLAGS_READ_ZF 0x00000008 /**< Reads ZF (Zero Flag). */ -# define EFLAGS_READ_SF 0x00000010 /**< Reads SF (Sign Flag). */ -# define EFLAGS_READ_TF 0x00000020 /**< Reads TF (Trap Flag). */ -# define EFLAGS_READ_IF 0x00000040 /**< Reads IF (Interrupt Enable Flag). */ -# define EFLAGS_READ_DF 0x00000080 /**< Reads DF (Direction Flag). */ -# define EFLAGS_READ_OF 0x00000100 /**< Reads OF (Overflow Flag). */ -# define EFLAGS_READ_NT 0x00000200 /**< Reads NT (Nested Task). */ -# define EFLAGS_READ_RF 0x00000400 /**< Reads RF (Resume Flag). */ +# define EFLAGS_READ_CF 0x00000001 /**< Reads CF (Carry Flag). */ +# define EFLAGS_READ_PF 0x00000002 /**< Reads PF (Parity Flag). */ +# define EFLAGS_READ_AF 0x00000004 /**< Reads AF (Auxiliary Carry Flag). */ +# define EFLAGS_READ_ZF 0x00000008 /**< Reads ZF (Zero Flag). */ +# define EFLAGS_READ_SF 0x00000010 /**< Reads SF (Sign Flag). */ +# define EFLAGS_READ_TF 0x00000020 /**< Reads TF (Trap Flag). */ +# define EFLAGS_READ_IF 0x00000040 /**< Reads IF (Interrupt Enable Flag). */ +# define EFLAGS_READ_DF 0x00000080 /**< Reads DF (Direction Flag). */ +# define EFLAGS_READ_OF 0x00000100 /**< Reads OF (Overflow Flag). */ +# define EFLAGS_READ_NT 0x00000200 /**< Reads NT (Nested Task). */ +# define EFLAGS_READ_RF 0x00000400 /**< Reads RF (Resume Flag). */ # define EFLAGS_WRITE_CF 0x00000800 /**< Writes CF (Carry Flag). */ # define EFLAGS_WRITE_PF 0x00001000 /**< Writes PF (Parity Flag). */ diff --git a/core/ir/instr_inline_api.h b/core/ir/instr_inline_api.h index 4284e901e53..8c9840bdce1 100644 --- a/core/ir/instr_inline_api.h +++ b/core/ir/instr_inline_api.h @@ -175,6 +175,13 @@ opnd_is_predicate_zero(opnd_t op) return opnd_is_predicate_reg(op) && ((op.aux.flags & DR_OPND_IS_ZERO_PREDICATE) != 0); } +INSTR_INLINE +bool +opnd_is_governing(opnd_t op) +{ + return opnd_is_predicate_reg(op) && ((op.aux.flags & DR_OPND_IS_GOVERNING) != 0); +} + # if defined(X64) || defined(ARM) # ifdef X86 # define OPND_IS_REL_ADDR(op) ((op).kind == REL_ADDR_kind) diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index aa0a25f0c5d..a3943249feb 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -84,16 +84,19 @@ instr_t * instr_create(void *drcontext) { + bool is_instr_isa_mode_set = false; dcontext_t *dcontext = (dcontext_t *)drcontext; instr_t *instr = (instr_t *)heap_alloc(dcontext, sizeof(instr_t) HEAPACCT(ACCT_IR)); /* everything initializes to 0, even flags, to indicate * an uninitialized instruction */ memset((void *)instr, 0, sizeof(instr_t)); #if defined(X86) && defined(X64) - instr_set_isa_mode(instr, X64_CACHE_MODE_DC(dcontext) ? DR_ISA_AMD64 : DR_ISA_IA32); -#elif defined(ARM) - instr_set_isa_mode(instr, dr_get_isa_mode(dcontext)); + is_instr_isa_mode_set = instr_set_isa_mode( + instr, X64_CACHE_MODE_DC(dcontext) ? DR_ISA_AMD64 : DR_ISA_IA32); +#else + is_instr_isa_mode_set = instr_set_isa_mode(instr, dr_get_isa_mode(dcontext)); #endif + CLIENT_ASSERT(is_instr_isa_mode_set, "setting instruction ISA mode unsuccessful"); return instr; } @@ -442,6 +445,12 @@ private_instr_encode(dcontext_t *dcontext, instr_t *instr, bool always_cache) return len; } +dr_isa_mode_t +instr_get_isa_mode(instr_t *instr) +{ + return (dr_isa_mode_t)instr->isa_mode; +} + #define inlined_instr_get_opcode(instr) \ (IF_DEBUG_(CLIENT_ASSERT(sizeof(*instr) == sizeof(instr_t), "invalid type"))( \ ((instr)->opcode == OP_UNDECODED) \ @@ -470,6 +479,25 @@ instr_get_category(instr_t *instr) /* in rest of file, directly de-reference for performance (PR 622253) */ #define instr_get_category inlined_instr_get_category +const char * +instr_get_category_name(dr_instr_category_t category) +{ + switch (category) { + case DR_INSTR_CATEGORY_UNCATEGORIZED: return "uncategorized"; + case DR_INSTR_CATEGORY_FP: return "fp"; + case DR_INSTR_CATEGORY_LOAD: return "load"; + case DR_INSTR_CATEGORY_STORE: return "store"; + case DR_INSTR_CATEGORY_BRANCH: return "branch"; + case DR_INSTR_CATEGORY_SIMD: return "simd"; + case DR_INSTR_CATEGORY_STATE: return "state"; + case DR_INSTR_CATEGORY_MOVE: return "move"; + case DR_INSTR_CATEGORY_CONVERT: return "convert"; + case DR_INSTR_CATEGORY_MATH: return "math"; + case DR_INSTR_CATEGORY_OTHER: return "other"; + default: return ""; + } +} + static inline void instr_being_modified(instr_t *instr, bool raw_bits_valid) { @@ -2577,6 +2605,20 @@ instr_set_translation_mangling_epilogue(dcontext_t *dcontext, instrlist_t *ilist return instr; } +#ifdef AARCH64 +void +instr_set_has_register_predication(instr_t *instr) +{ + instr_set_predicate(instr, DR_PRED_MASKED); +} + +bool +instr_has_register_predication(instr_t *instr) +{ + return instr_get_predicate(instr) == DR_PRED_MASKED; +} +#endif + /* Emulates instruction to find the address of the index-th memory operand. * Either or both OUT variables can be NULL. */ diff --git a/core/ir/opnd_api.h b/core/ir/opnd_api.h index 30b573ef711..b62a5807440 100644 --- a/core/ir/opnd_api.h +++ b/core/ir/opnd_api.h @@ -1011,9 +1011,9 @@ enum { DR_REG_SPSR_UND, /**< The "spsr_und" register. */ DR_REG_SPSR_FIQ, /**< The "spsr_fiq" register. */ # else - DR_REG_CPSR, /**< The "cpsr" register. */ - DR_REG_SPSR, /**< The "spsr" register. */ - DR_REG_FPSCR, /**< The "fpscr" register. */ + DR_REG_CPSR, /**< The "cpsr" register. */ + DR_REG_SPSR, /**< The "spsr" register. */ + DR_REG_FPSCR, /**< The "fpscr" register. */ # endif /* AArch32 Thread Registers */ @@ -1083,13 +1083,13 @@ enum { DR_REG_SP = DR_REG_XSP, /**< The stack pointer register. */ DR_REG_LR = DR_REG_X30, /**< The link register. */ # else - DR_REG_SP = DR_REG_R13, /**< The stack pointer register. */ - DR_REG_LR = DR_REG_R14, /**< The link register. */ - DR_REG_PC = DR_REG_R15, /**< The program counter register. */ + DR_REG_SP = DR_REG_R13, /**< The stack pointer register. */ + DR_REG_LR = DR_REG_R14, /**< The link register. */ + DR_REG_PC = DR_REG_R15, /**< The program counter register. */ # endif - DR_REG_SL = DR_REG_R10, /**< Alias for the r10 register. */ - DR_REG_FP = DR_REG_R11, /**< Alias for the r11 register. */ - DR_REG_IP = DR_REG_R12, /**< Alias for the r12 register. */ + DR_REG_SL = DR_REG_R10, /**< Alias for the r10 register. */ + DR_REG_FP = DR_REG_R11, /**< Alias for the r11 register. */ + DR_REG_IP = DR_REG_R12, /**< Alias for the r12 register. */ # ifndef AARCH64 /** Alias for cpsr register (thus this is the full cpsr, not just the apsr bits). */ DR_REG_APSR = DR_REG_CPSR, @@ -1101,8 +1101,8 @@ enum { /** Thread Pointer/ID Register, Read-Only, EL0. */ DR_REG_TPIDRRO_EL0 = DR_REG_TPIDRURO, /* ARMv7 Thread Registers */ - DR_REG_CP15_C13_2 = DR_REG_TPIDRURW, /**< User Read/Write Thread ID Register */ - DR_REG_CP15_C13_3 = DR_REG_TPIDRURO, /**< User Read-Only Thread ID Register */ + DR_REG_CP15_C13_2 = DR_REG_TPIDRURW, /**< User Read/Write Thread ID Register */ + DR_REG_CP15_C13_3 = DR_REG_TPIDRURO, /**< User Read-Only Thread ID Register */ # ifdef AARCH64 DR_REG_LAST_VALID_ENUM = DR_REG_CNTVCT_EL0, /**< Last valid register enum */ @@ -1128,7 +1128,7 @@ enum { DR_NUM_GPR_REGS = DR_REG_STOP_GPR - DR_REG_START_GPR + 1, /**< Count of GPR regs. */ # ifdef AARCH64 - DR_NUM_SIMD_VECTOR_REGS = DR_REG_Z31 - DR_REG_Z0 + 1, /**< Count of SIMD regs. */ + DR_NUM_SIMD_VECTOR_REGS = DR_REG_Z31 - DR_REG_Z0 + 1, /**< Count of SIMD regs. */ # else /* XXX: maybe we want more distinct names that provide counts for 64-bit D or 32-bit * S registers. @@ -1773,7 +1773,7 @@ typedef enum _dr_opnd_flags_t { */ DR_OPND_IS_VECTOR = 0x100, /** - * Predicate registers can either be merging, zero or neither. If one of these + * SVE predicate registers can either be merging, zero or neither. If one of these * are set then they are either a merge or zero otherwise aren't either. */ DR_OPND_IS_MERGE_PREDICATE = 0x200, @@ -1788,6 +1788,17 @@ typedef enum _dr_opnd_flags_t { * This is used by RISCV64 for immediates display format. */ DR_OPND_IMM_PRINT_DECIMAL = 0x1000, + + /** + * The register number is not in the instruction encoding but is calculated + * based on another register + */ + DR_OPND_IMPLICIT = 0x2000, + /** + * The register is a SVE governing predicate register: it is used to select + * which elements of a vector are actually read or written to in AArch64 SVE + */ + DR_OPND_IS_GOVERNING = 0x4000, } dr_opnd_flags_t; #ifdef DR_FAST_IR @@ -2563,22 +2574,28 @@ opnd_is_element_vector_reg(opnd_t opnd); DR_API INSTR_INLINE -/** Returns true iff \p opnd is a predicate register. */ +/** Returns true iff \p opnd is an SVE predicate register. */ bool opnd_is_predicate_reg(opnd_t opnd); DR_API INSTR_INLINE -/** Returns true iff \p opnd is a merging predicate register. */ +/** Returns true iff \p opnd is a n SVE merging predicate register. */ bool opnd_is_predicate_merge(opnd_t opnd); DR_API INSTR_INLINE -/** Returns true iff \p opnd is a zeroing predicate register. */ +/** Returns true iff \p opnd is an SVE zeroing predicate register. */ bool opnd_is_predicate_zero(opnd_t opnd); +DR_API +INSTR_INLINE +/** Returns true iff \p opnd is an SVE governing predicate register. */ +bool +opnd_is_governing(opnd_t opnd); + DR_API /** * Returns true iff \p opnd uses vector indexing via a VSIB byte. This diff --git a/core/ir/opnd_shared.c b/core/ir/opnd_shared.c index 6009133970e..f78d56a8d6e 100644 --- a/core/ir/opnd_shared.c +++ b/core/ir/opnd_shared.c @@ -1496,6 +1496,7 @@ opnd_create_increment_reg(opnd_t opnd, uint increment) opnd.value.reg_and_element_size.element_size; inc_opnd.size = opnd.size; /* indicates full size of reg */ inc_opnd.aux.flags = opnd.aux.flags; + inc_opnd.aux.flags |= DR_OPND_IMPLICIT; return inc_opnd; } diff --git a/core/ir/riscv64/codec.c b/core/ir/riscv64/codec.c index 034c6ccfce8..a0a31e889f6 100644 --- a/core/ir/riscv64/codec.c +++ b/core/ir/riscv64/codec.c @@ -90,8 +90,8 @@ typedef bool (*opnd_dec_func_t)(dcontext_t *dc, uint32_t inst, int op_sz, byte * * Helper functions. */ -#define INFO_NDST(opcode) GET_FIELD((opcode), 31, 31); -#define INFO_NSRC(opcode) GET_FIELD((opcode), 30, 28); +#define INFO_NDST(opcode) GET_FIELD((opcode), 31, 30); +#define INFO_NSRC(opcode) GET_FIELD((opcode), 29, 27); /* * End of helper functions. @@ -1049,7 +1049,9 @@ decode_v_l_rs1_disp_opnd(dcontext_t *dc, uint32_t inst, int op_sz, byte *pc, byte *orig_pc, int idx, instr_t *out) { reg_t reg = DR_REG_X0 + GET_FIELD(inst, 19, 15); - int32_t imm = SIGN_EXTEND(GET_FIELD(inst, 31, 20), 12); + /* Immediate part of LR.W/D is always 0. */ + int32_t imm = + GET_FIELD(inst, 6, 0) == 0b0101111 ? 0 : SIGN_EXTEND(GET_FIELD(inst, 31, 20), 12); opnd_t opnd = opnd_add_flags(opnd_create_base_disp(reg, DR_REG_NULL, 0, imm, op_sz), DR_OPND_IMM_PRINT_DECIMAL); instr_set_src(out, idx, opnd); @@ -1072,7 +1074,10 @@ decode_v_s_rs1_disp_opnd(dcontext_t *dc, uint32_t inst, int op_sz, byte *pc, byte *orig_pc, int idx, instr_t *out) { reg_t reg = DR_REG_X0 + GET_FIELD(inst, 19, 15); - int32_t imm = (GET_FIELD(inst, 31, 25) << 5) | GET_FIELD(inst, 11, 7); + /* Immediate part of SC.W/D is always 0. */ + int32_t imm = GET_FIELD(inst, 6, 0) == 0b0101111 + ? 0 + : (GET_FIELD(inst, 31, 25) << 5) | GET_FIELD(inst, 11, 7); imm = SIGN_EXTEND(imm, 12); opnd_t opnd = opnd_add_flags(opnd_create_base_disp(reg, DR_REG_NULL, 0, imm, op_sz), DR_OPND_IMM_PRINT_DECIMAL); @@ -1259,6 +1264,7 @@ opnd_dec_func_t opnd_decoders[] = { [RISCV64_FLD_IIMM_0] = decode_iimm_0_opnd, [RISCV64_FLD_ICRS1] = decode_icrs1_opnd, [RISCV64_FLD_ICRS1__] = decode_icrs1___opnd, + [RISCV64_FLD_I_S_RS1_DISP] = decode_v_s_rs1_disp_opnd, }; /* Decode RVC quadrant 0. @@ -1515,11 +1521,20 @@ decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr) instr_set_opcode(instr, info->info.type); instr_set_num_opnds(dcontext, instr, ndst, nsrc); - CLIENT_ASSERT(info->info.dst1_type < RISCV64_FLD_CNT, "Invalid dst1_type."); - if (ndst > 0 && - !opnd_decoders[info->info.dst1_type](dcontext, inst, info->info.dst1_size, pc, - orig_pc, 0, instr)) - goto decode_failure; + switch (ndst) { + case 2: + CLIENT_ASSERT(info->info.dst2_type < RISCV64_FLD_CNT, "Invalid dst2_type."); + if (!opnd_decoders[info->info.dst2_type](dcontext, inst, info->info.dst2_size, pc, + orig_pc, 1, instr)) + goto decode_failure; + case 1: + CLIENT_ASSERT(info->info.dst1_type < RISCV64_FLD_CNT, "Invalid dst1_type."); + if (!opnd_decoders[info->info.dst1_type](dcontext, inst, info->info.dst1_size, pc, + orig_pc, 0, instr)) + goto decode_failure; + case 0: break; + default: ASSERT_NOT_REACHED(); + } switch (nsrc) { case 4: CLIENT_ASSERT(info->info.dst2_type < RISCV64_FLD_CNT, "Invalid dst2_type."); @@ -2474,8 +2489,10 @@ encode_v_l_rs1_disp_opnd(instr_t *instr, byte *pc, int idx, uint32_t *out, opnd_t opnd = instr_get_src(instr, idx); uint32_t reg = opnd_get_base(opnd) - DR_REG_X0; *out |= SET_FIELD(reg, 19, 15); - int32_t imm = opnd_get_disp(opnd); - *out |= SET_FIELD(imm, 31, 20); + if (instr->opcode != OP_lr_w && instr->opcode != OP_lr_d) { + int32_t imm = opnd_get_disp(opnd); + *out |= SET_FIELD(imm, 31, 20); + } return true; } @@ -2497,8 +2514,10 @@ encode_v_s_rs1_disp_opnd(instr_t *instr, byte *pc, int idx, uint32_t *out, opnd_t opnd = instr_get_dst(instr, idx); uint32_t reg = opnd_get_base(opnd) - DR_REG_X0; *out |= SET_FIELD(reg, 19, 15); - int32_t imm = opnd_get_disp(opnd); - *out |= SET_FIELD(imm, 11, 7) | SET_FIELD(imm >> 5, 31, 25); + if (instr->opcode != OP_sc_w && instr->opcode != OP_sc_d) { + int32_t imm = opnd_get_disp(opnd); + *out |= SET_FIELD(imm, 11, 7) | SET_FIELD(imm >> 5, 31, 25); + } return true; } @@ -2566,6 +2585,7 @@ opnd_enc_func_t opnd_encoders[] = { [RISCV64_FLD_IIMM_0] = encode_implicit_opnd, [RISCV64_FLD_ICRS1] = encode_implicit_opnd, [RISCV64_FLD_ICRS1__] = encode_implicit_opnd, + [RISCV64_FLD_I_S_RS1_DISP] = encode_implicit_opnd, }; uint @@ -2582,9 +2602,18 @@ encode_common(byte *pc, instr_t *instr, decode_info_t *di) CLIENT_ASSERT(ndst >= 0 || ndst <= 1, "Invalid number of destination operands."); CLIENT_ASSERT(nsrc >= 0 || nsrc <= 4, "Invalid number of source operands."); - CLIENT_ASSERT(info->info.dst1_type < RISCV64_FLD_CNT, "Invalid dst1_type."); - if (ndst > 0 && !opnd_encoders[info->info.dst1_type](instr, pc, 0, &inst, di)) - goto encode_failure; + switch (ndst) { + case 2: + CLIENT_ASSERT(info->info.dst2_type < RISCV64_FLD_CNT, "Invalid dst2_type."); + if (!opnd_encoders[info->info.dst2_type](instr, pc, 1, &inst, di)) + goto encode_failure; + case 1: + CLIENT_ASSERT(info->info.dst1_type < RISCV64_FLD_CNT, "Invalid dst1_type."); + if (!opnd_encoders[info->info.dst1_type](instr, pc, 0, &inst, di)) + goto encode_failure; + case 0: break; + default: ASSERT_NOT_REACHED(); + } switch (nsrc) { case 4: CLIENT_ASSERT(info->info.dst2_type < RISCV64_FLD_CNT, "Invalid dst2_type."); diff --git a/core/ir/riscv64/codec.h b/core/ir/riscv64/codec.h index 332e40c5a02..6028a3a253d 100644 --- a/core/ir/riscv64/codec.h +++ b/core/ir/riscv64/codec.h @@ -255,6 +255,7 @@ typedef enum { RISCV64_FLD_IIMM_0, RISCV64_FLD_ICRS1, RISCV64_FLD_ICRS1__, + RISCV64_FLD_I_S_RS1_DISP, RISCV64_FLD_CNT, /* Keep this last */ } riscv64_fld_t; diff --git a/core/ir/riscv64/codec.py b/core/ir/riscv64/codec.py index 2cad2cb08c9..04041127550 100755 --- a/core/ir/riscv64/codec.py +++ b/core/ir/riscv64/codec.py @@ -596,7 +596,13 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool, is_implicit: bool, 'ld': 'OPSZ_8', 'lbu': 'OPSZ_1', 'lhu': 'OPSZ_2', 'lwu': 'OPSZ_4', 'sb': 'OPSZ_1', 'sh': 'OPSZ_2', 'sw': 'OPSZ_4', 'sd': 'OPSZ_8', 'flw': 'OPSZ_4', 'fld': 'OPSZ_8', 'fsw': 'OPSZ_4', 'fsd': 'OPSZ_8', - 'flq': 'OPSZ_16', 'fsq': 'OPSZ_16' + 'flq': 'OPSZ_16', 'fsq': 'OPSZ_16', 'lr.w': 'OPSZ_4', 'lr.d': 'OPSZ_8', + 'amoswap.w': 'OPSZ_4', 'amoadd.w': 'OPSZ_4', 'amoxor.w': 'OPSZ_4', + 'amoand.w': 'OPSZ_4', 'amoor.w': 'OPSZ_4', 'amomin.w': 'OPSZ_4', + 'amomax.w': 'OPSZ_4', 'amominu.w': 'OPSZ_4', 'amomaxu.w': 'OPSZ_4', + 'amoswap.d': 'OPSZ_8', 'amoadd.d': 'OPSZ_8', 'amoxor.d': 'OPSZ_8', + 'amoand.d': 'OPSZ_8', 'amoor.d': 'OPSZ_8', 'amomin.d': 'OPSZ_8', + 'amomax.d': 'OPSZ_8', 'amominu.d': 'OPSZ_8', 'amomaxu.d': 'OPSZ_8', }, 'imm(rs1)', 'The register-relative memory source location (reg+imm).' @@ -611,7 +617,7 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool, is_implicit: bool, 'ld': 'OPSZ_8', 'lbu': 'OPSZ_1', 'lhu': 'OPSZ_2', 'lwu': 'OPSZ_4', 'sb': 'OPSZ_1', 'sh': 'OPSZ_2', 'sw': 'OPSZ_4', 'sd': 'OPSZ_8', 'flw': 'OPSZ_4', 'fld': 'OPSZ_8', 'fsw': 'OPSZ_4', 'fsd': 'OPSZ_8', - 'flq': 'OPSZ_16', 'fsq': 'OPSZ_16' + 'flq': 'OPSZ_16', 'fsq': 'OPSZ_16', 'sc.w': 'OPSZ_4', 'sc.d': 'OPSZ_8' }, 'imm(rs1)', 'The register-relative memory target location (reg+imm).' @@ -697,6 +703,22 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool, is_implicit: bool, 'rs1', 'Implicit rs1, same as CRD__.', ) + I_S_RS1_DISP = (63, + 'Mem', + True, + True, + True, + { + 'amoswap.w': 'OPSZ_4', 'amoadd.w': 'OPSZ_4', 'amoxor.w': 'OPSZ_4', + 'amoand.w': 'OPSZ_4', 'amoor.w': 'OPSZ_4', 'amomin.w': 'OPSZ_4', + 'amomax.w': 'OPSZ_4', 'amominu.w': 'OPSZ_4', 'amomaxu.w': 'OPSZ_4', + 'amoswap.d': 'OPSZ_8', 'amoadd.d': 'OPSZ_8', 'amoxor.d': 'OPSZ_8', + 'amoand.d': 'OPSZ_8', 'amoor.d': 'OPSZ_8', 'amomin.d': 'OPSZ_8', + 'amomax.d': 'OPSZ_8', 'amominu.d': 'OPSZ_8', 'amomaxu.d': 'OPSZ_8', + }, + 'imm(rs1)', + 'The register-relative memory target location (reg+imm).' + ) def __str__(self) -> str: return self.name.lower().replace("fp", "(fp)") @@ -830,6 +852,8 @@ def __fixup_compressed_inst(self, inst: Instruction): def __fixup_uncompressed_inst(self, inst: Instruction): opc = (inst.match & inst.mask) & 0x7F + funct3 = ((inst.match & inst.mask) >> 12) & 0x7 + rs3 = ((inst.match & inst.mask) >> 27) & 0x1f if opc in [0b0000011, 0b0000111]: # LOAD instructions dbg(f'fixup: {inst.name} {[f.name for f in inst.flds]}') inst.flds[0] = Field.V_L_RS1_DISP @@ -840,6 +864,22 @@ def __fixup_uncompressed_inst(self, inst: Instruction): inst.flds[2] = Field.V_S_RS1_DISP inst.flds.pop(0) dbg(f' -> {" " * len(inst.name)} {[f.name for f in inst.flds]}') + elif opc == 0b0101111 and (funct3 == 0b010 or funct3 == 0b011): + if rs3 == 0x2: # LR.W/D instructions + dbg(f'fixup: {inst.name} {[f.name for f in inst.flds]}') + inst.flds[1] = Field.V_L_RS1_DISP + dbg(f' -> {" " * len(inst.name)} {[f.name for f in inst.flds]}') + elif rs3 == 0x3: # SC.W/D instructions + dbg(f'fixup: {inst.name} {[f.name for f in inst.flds]}') + inst.flds[2] = Field.V_S_RS1_DISP + # Swap the rd and mem operand positions so that mem becomes the + # first operand to be consistent with AArch64. + inst.flds[2], inst.flds[3] = inst.flds[3], inst.flds[2] + dbg(f' -> {" " * len(inst.name)} {[f.name for f in inst.flds]}') + else: # AMO instructions + dbg(f'fixup: {inst.name} {[f.name for f in inst.flds]}') + inst.flds[2] = Field.V_L_RS1_DISP + inst.flds.append(Field.I_S_RS1_DISP) elif inst.mask == 0x1f07fff and inst.match in [0x6013, 0x106013, 0x306013]: # prefetch.[irw] instructions dbg(f'fixup: {inst.name} {[f.name for f in inst.flds]}') @@ -1211,10 +1251,12 @@ def generate_instr_info_trie(self, out_file, op_offset) -> bool: asm_args += ' ' asm_args += ', '.join([f.asm_name() for f in flds]) isrc = 1 + idst = 0 for f in flds: if f.is_dest: - oidx = 0 + oidx = idst ndst += 1 + idst = 4 else: oidx = isrc isrc += 1 @@ -1225,7 +1267,7 @@ def generate_instr_info_trie(self, out_file, op_offset) -> bool: instr_infos.append(f'''[OP_{i.formatted_name()}] = {{ /* {i.name}{asm_args} */ .info = {{ .type = OP_{i.formatted_name()}, - .opcode = 0x{(ndst << 31) | (nsrc << 28):08x}, /* {ndst} dst, {nsrc} src */ + .opcode = 0x{(ndst << 30) | (nsrc << 27):08x}, /* {ndst} dst, {nsrc} src */ .name = "{i.name}",{''.join(opnds)} .code = (((uint64){hex(i.match)}) << 32) | ({hex(i.mask)}), }}, diff --git a/core/ir/riscv64/instr.c b/core/ir/riscv64/instr.c index 2a28ceece39..e35550ce62f 100644 --- a/core/ir/riscv64/instr.c +++ b/core/ir/riscv64/instr.c @@ -32,17 +32,20 @@ #include "../globals.h" #include "instr.h" +#include "encode_api.h" +/* XXX i#6690: currently only RISCV64 is supported for instruction encoding. + * We want to add support for RISCV64 decoding and synthetic ISA encoding as well. + * XXX i#1684: move this function to core/ir/instr_shared.c once we can support + * all architectures in the same build of DR. + */ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { - return (mode == DR_ISA_RV64IMAFDC); -} - -dr_isa_mode_t -instr_get_isa_mode(instr_t *instr) -{ - return DR_ISA_RV64IMAFDC; + if (mode != DR_ISA_RV64IMAFDC) + return false; + instr->isa_mode = DR_ISA_RV64IMAFDC; + return true; } int diff --git a/core/ir/riscv64/isl/README.md b/core/ir/riscv64/isl/README.md index 48c95798f3c..527081c1137 100644 --- a/core/ir/riscv64/isl/README.md +++ b/core/ir/riscv64/isl/README.md @@ -123,6 +123,7 @@ start with i, which means implicit. - iimm_0 - icrs1 - icrs1__ +- i_s_rs1_disp This maps into `riscv64_fld_t` enum in `codec.h` and `Field` enum in `codec.py` generator. diff --git a/core/ir/x86/instr.c b/core/ir/x86/instr.c index 9db609e7e81..b9cf49eea69 100644 --- a/core/ir/x86/instr.c +++ b/core/ir/x86/instr.c @@ -40,6 +40,7 @@ #include "instr.h" #include "decode.h" #include "decode_private.h" +#include "encode_api.h" #include "instr_create_shared.h" #ifdef X64 @@ -51,9 +52,9 @@ void instr_set_x86_mode(instr_t *instr, bool x86) { if (x86) - instr->flags |= INSTR_X86_MODE; + instr->isa_mode = DR_ISA_IA32; else - instr->flags &= ~INSTR_X86_MODE; + instr->isa_mode = DR_ISA_AMD64; } /* @@ -63,37 +64,29 @@ instr_set_x86_mode(instr_t *instr, bool x86) bool instr_get_x86_mode(instr_t *instr) { - return TEST(INSTR_X86_MODE, instr->flags); + return instr->isa_mode == DR_ISA_IA32; } #endif +/* XXX i#6690: currently only x86 and x64 are supported for instruction encoding. + * We want to add support for x86 and x64 decoding and synthetic ISA encoding as well. + * XXX i#1684: move this function to core/ir/instr_shared.c once we can support + * all architectures in the same build of DR. + */ bool instr_set_isa_mode(instr_t *instr, dr_isa_mode_t mode) { #ifdef X64 - if (mode == DR_ISA_IA32) - instr_set_x86_mode(instr, true); - else if (mode == DR_ISA_AMD64) - instr_set_x86_mode(instr, false); - else + if (mode != DR_ISA_IA32 && mode != DR_ISA_AMD64) return false; #else if (mode != DR_ISA_IA32) return false; #endif + instr->isa_mode = mode; return true; } -dr_isa_mode_t -instr_get_isa_mode(instr_t *instr) -{ -#ifdef X64 - return TEST(INSTR_X86_MODE, instr->flags) ? DR_ISA_IA32 : DR_ISA_AMD64; -#else - return DR_ISA_IA32; -#endif -} - int instr_length_arch(dcontext_t *dcontext, instr_t *instr) { diff --git a/core/lib/dr_tools.h b/core/lib/dr_tools.h index 263ee7aae47..57563a4f383 100644 --- a/core/lib/dr_tools.h +++ b/core/lib/dr_tools.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2023, Inc. All rights reserved. + * Copyright (c) 2010-2024, Inc. All rights reserved. * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -48,9 +48,21 @@ DR_API * \warning This context cannot be used as the drcontext for a thread * running under DR control! It is only for standalone programs that * wish to use DR as a library of disassembly, etc. routines. + * \warning This context is not fully thread-safe as it stores some state + * (such as #dr_isa_mode_t and other fields related to AArch32 encoding + * and decoding) that is global and may be prone to data races. + * For example, having different threads use dr_set_isa_mode() to set + * different ISA modes at the same time can result in a data race. + * Furthermore, encoding and decoding of AArch32 instructions in parallel + * may also result in a data race. + * Code that uses a standalone DR context across multiple threads should + * implement its own lock/unlock mechanism to avoid such data races + * when using dr_set_isa_mode() or encoding/decoding AArch32 instructions. * \return NULL on failure, such as running on an unsupported operating * system version. */ +/* TODO i#6690: Add better multi-thread standalone decoding support. + */ void * dr_standalone_init(void); @@ -66,6 +78,9 @@ dr_standalone_exit(void); /** * Use this dcontext for use with the standalone static decoder library. * Pass it whenever a decoding-related API routine asks for a context. + * Note that this GLOBAL_DCONTEXT is returned by dr_standalone_init(); + * beware of its limitations (especially about thread-safety) described + * there. */ # define GLOBAL_DCONTEXT ((void *)-1) #endif @@ -103,6 +118,16 @@ DR_API bool dr_using_all_private_caches(void); +DR_API +/** + * Returns false if DynamoRIO is being used as a "regular" standalone library + * (see dr_standalone_init() and \ref page_standalone). + * Returns true if DynamoRIO is controlling the application by running + * its code through a software code cache. + */ +bool +dr_running_under_dynamorio(void); + DR_API /** \deprecated Replaced by dr_set_process_exit_behavior() */ void diff --git a/core/lib/globals_shared.h b/core/lib/globals_shared.h index 06748b4d77b..392b875b07d 100644 --- a/core/lib/globals_shared.h +++ b/core/lib/globals_shared.h @@ -933,9 +933,9 @@ enum { #else NUDGE_NUDGER_FREE_STACK = 0x02, /* nudger will free the nudge thread's stack so the * nudge thread itself shouldn't */ - NUDGE_FREE_ARG = 0x04, /* nudge arg is in a separate allocation and should - * be freed by the nudge thread */ #endif + NUDGE_FREE_ARG = 0x04, /* nudge arg is in a separate allocation and should + * be freed by the nudge thread */ }; typedef struct { diff --git a/core/lib/instrument.c b/core/lib/instrument.c index 5e3a8d763dd..8ca5dc5d5cb 100644 --- a/core/lib/instrument.c +++ b/core/lib/instrument.c @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2023 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2010-2011 Massachusetts Institute of Technology All rights reserved. * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. * ******************************************************************************/ @@ -2509,6 +2509,13 @@ dr_using_all_private_caches(void) return !SHARED_FRAGMENTS_ENABLED(); } +DR_API +bool +dr_running_under_dynamorio(void) +{ + return !standalone_library; +} + DR_API void dr_request_synchronized_exit(void) @@ -7376,11 +7383,9 @@ dr_insert_get_stolen_reg_value(void *drcontext, instrlist_t *ilist, instr_t *ins "dr_insert_get_stolen_reg: reg has wrong size\n"); CLIENT_ASSERT(!reg_is_stolen(reg), "dr_insert_get_stolen_reg: reg is used by DynamoRIO\n"); -#ifdef AARCHXX +#if defined(AARCHXX) || defined(RISCV64) instrlist_meta_preinsert( ilist, instr, instr_create_restore_from_tls(drcontext, reg, TLS_REG_STOLEN_SLOT)); -#elif defined(RISCV64) - CLIENT_ASSERT(false, "NYI on RISCV64"); #endif return true; } diff --git a/core/nudge.c b/core/nudge.c index 4df3a4aa549..e8ab897d92b 100644 --- a/core/nudge.c +++ b/core/nudge.c @@ -42,6 +42,10 @@ #else #endif /* WINDOWS */ +#ifdef LINUX +# include "synch.h" +#endif + #ifdef HOT_PATCHING_INTERFACE # include "hotpatch.h" /* for hotp_nudge_update() */ #endif @@ -430,12 +434,30 @@ handle_nudge(dcontext_t *dcontext, nudge_arg_t *arg) SYSLOG_INTERNAL_WARNING("nudge reset ignored since resets are disabled"); } } -#ifdef WINDOWS +#if defined(WINDOWS) || defined(LINUX) /* The detach handler is last since in the common case it doesn't return. */ if (TEST(NUDGE_GENERIC(detach), nudge_action_mask)) { +# ifdef WINDOWS dcontext->free_app_stack = false; nudge_action_mask &= ~NUDGE_GENERIC(detach); detach_helper(DETACH_NORMAL_TYPE); +# else + nudge_action_mask &= ~NUDGE_GENERIC(detach); + /* This is not using stack_alloc() because we can't have this being cleaned up + * via normal cleanup paths. */ + heap_error_code_t error_code_reserve, error_code_commit; + void *d_r_detachstack = + os_heap_reserve(NULL, DYNAMORIO_STACK_SIZE, &error_code_reserve, false); + /* XXX: This memory is not freed. */ + if (!os_heap_commit(d_r_detachstack, DYNAMORIO_STACK_SIZE, + MEMPROT_READ | MEMPROT_WRITE, &error_code_commit)) { + ASSERT_NOT_REACHED(); + } + call_switch_stack(dcontext, + (byte *)((ptr_uint_t)d_r_detachstack + DYNAMORIO_STACK_SIZE), + (void (*)(void *))detach_externally_on_new_stack, NULL, true); + ASSERT_NOT_REACHED(); +# endif } #endif } diff --git a/core/optionsx.h b/core/optionsx.h index 52f98c1f868..de257ea8182 100644 --- a/core/optionsx.h +++ b/core/optionsx.h @@ -923,8 +923,11 @@ OPTION_DEFAULT(bool, elide_back_calls, true, OPTION_DEFAULT(uint, selfmod_max_writes, 5, "maximum write instrs per selfmod fragment") /* If this is too large, clients with heavyweight instrumentation hit the * "exceeded maximum size" failure. + * On RISC-V, direct branch has a range of +/- 4 KiB -- for extreme use cases, such as + * putting a clean call before every app instruction, 15 is a safe value to use. */ -OPTION_DEFAULT(uint, max_bb_instrs, 256, "maximum instrs per basic block") +OPTION_DEFAULT(uint, max_bb_instrs, IF_RISCV64_ELSE(15, 256), + "maximum instrs per basic block") PC_OPTION_DEFAULT(bool, process_SEH_push, IF_RETURN_AFTER_CALL_ELSE(true, false), "break bb's at an SEH push so we can see the frame pushed on in " "interp, required for -borland_SEH_rct") diff --git a/core/os_shared.h b/core/os_shared.h index b2ef7d568c4..5b828e66259 100644 --- a/core/os_shared.h +++ b/core/os_shared.h @@ -204,15 +204,21 @@ is_thread_currently_native(thread_record_t *tr); */ bool thread_get_mcontext(thread_record_t *tr, priv_mcontext_t *mc); + +#ifdef LINUX +bool +thread_get_nudged_mcontext(thread_record_t *tr, priv_mcontext_t *mc); +#endif + bool thread_set_mcontext(thread_record_t *tr, priv_mcontext_t *mc); /* Takes an os-specific context. Does not return. */ void -thread_set_self_context(void *cxt); +thread_set_self_context(void *cxt, bool is_detach_external); /* Only sets the priv_mcontext_t state. Does not return. */ void -thread_set_self_mcontext(priv_mcontext_t *mc); +thread_set_self_mcontext(priv_mcontext_t *mc, bool is_detach_external); /* Assumes target thread is suspended */ bool diff --git a/core/synch.c b/core/synch.c index 4c96c2b0e88..3af5add4afe 100644 --- a/core/synch.c +++ b/core/synch.c @@ -761,9 +761,9 @@ check_wait_at_safe_spot(dcontext_t *dcontext, thread_synch_permission_t cur_stat * being at the synch point vs in the cache. */ if (set_mcontext) - thread_set_self_mcontext((priv_mcontext_t *)cxt); + thread_set_self_mcontext((priv_mcontext_t *)cxt, false); else - thread_set_self_context((void *)cxt); + thread_set_self_context((void *)cxt, false); ASSERT_NOT_REACHED(); } } @@ -1965,6 +1965,77 @@ send_all_other_threads_native(void) return; } +static void +detach_set_mcontext_helper(thread_record_t *thread) +{ + priv_mcontext_t mc; + LOG(GLOBAL, LOG_ALL, 2, "Detach: translating " TIDFMT "\n", thread); + DEBUG_DECLARE(bool ok =) + thread_get_mcontext(thread, &mc); + ASSERT(ok); + /* For a thread at a syscall, we use SA_RESTART for our suspend signal, + * so the kernel will adjust the restart point back to the syscall for us + * where expected. This is an artifical signal we're introducing, so an + * app that assumes no signals and assumes its non-auto-restart syscalls + * don't need loops could be broken. + */ + LOG(GLOBAL, LOG_ALL, 3, + /* Having the code bytes can help diagnose post-detach where the code + * cache is gone. + */ + "Detach: pre-xl8 pc=%p (%02x %02x %02x %02x %02x), xsp=%p " + "for thread " TIDFMT "\n", + mc.pc, *mc.pc, *(mc.pc + 1), *(mc.pc + 2), *(mc.pc + 3), *(mc.pc + 4), mc.xsp, + thread->id); + DEBUG_DECLARE(ok =) + translate_mcontext(thread, &mc, true /*restore mem*/, NULL /*f*/); + ASSERT(ok); + if (!thread->under_dynamo_control) { + LOG(GLOBAL, LOG_ALL, 1, "Detach : thread " TIDFMT " already running natively\n", + thread->id); + /* we do need to restore the app ret addr, for native_exec */ + if (!DYNAMO_OPTION(thin_client) && DYNAMO_OPTION(native_exec) && + !vmvector_empty(native_exec_areas)) { + put_back_native_retaddrs(thread->dcontext); + } + } + detach_finalize_translation(thread, &mc); + LOG(GLOBAL, LOG_ALL, 1, "Detach: pc=" PFX " for thread " TIDFMT "\n", mc.pc, + thread->id); + ASSERT(!is_dynamo_address(mc.pc) && !in_fcache(mc.pc)); + /* XXX case 7457: if the thread is suspended after it received a fault + * but before the kernel copied the faulting context to the user mode + * structures for the handler, it could result in a codemod exception + * that wouldn't happen natively! + */ + DEBUG_DECLARE(ok =) + thread_set_mcontext(thread, &mc); + ASSERT(ok); + /* i#249: restore app's PEB/TEB fields */ + IF_WINDOWS(restore_peb_pointer_for_thread(thread->dcontext)); +} + +static void +detach_cleanup_helper(thread_record_t *thread _IF_WINDOWS(bool detach_stacked_callbacks)) +{ + DEBUG_DECLARE(int exit_res =) + dynamo_shared_exit(thread _IF_WINDOWS(detach_stacked_callbacks)); + ASSERT(exit_res == SUCCESS); + detach_finalize_cleanup(); + + stack_free(d_r_initstack, DYNAMORIO_STACK_SIZE); + + dynamo_exit_post_detach(); + + doing_detach = false; + started_detach = false; + + SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); + dynamo_detaching_flag = LOCK_FREE_STATE; + EXITING_DR(); + options_detach(); +} + void detach_on_permanent_stack(bool internal, bool do_cleanup, dr_stats_t *drstats) { @@ -1977,8 +2048,6 @@ detach_on_permanent_stack(bool internal, bool do_cleanup, dr_stats_t *drstats) bool detach_stacked_callbacks; bool *cleanup_tpc; #endif - DEBUG_DECLARE(bool ok;) - DEBUG_DECLARE(int exit_res;) /* synch-all flags: */ uint flags = 0; @@ -2152,7 +2221,6 @@ detach_on_permanent_stack(bool internal, bool do_cleanup, dr_stats_t *drstats) LOG(GLOBAL, LOG_ALL, 1, "Detach: starting to translate contexts\n"); for (i = 0; i < num_threads; i++) { - priv_mcontext_t mc; if (threads[i]->dcontext == my_dcontext) { my_idx = i; my_tr = threads[i]; @@ -2166,54 +2234,7 @@ detach_on_permanent_stack(bool internal, bool do_cleanup, dr_stats_t *drstats) LOG(GLOBAL, LOG_ALL, 2, "Detach: not translating " TIDFMT "\n", threads[i]->id); } else { - LOG(GLOBAL, LOG_ALL, 2, "Detach: translating " TIDFMT "\n", threads[i]->id); - DEBUG_DECLARE(ok =) - thread_get_mcontext(threads[i], &mc); - ASSERT(ok); - /* For a thread at a syscall, we use SA_RESTART for our suspend signal, - * so the kernel will adjust the restart point back to the syscall for us - * where expected. This is an artifical signal we're introducing, so an - * app that assumes no signals and assumes its non-auto-restart syscalls - * don't need loops could be broken. - */ - LOG(GLOBAL, LOG_ALL, 3, - /* Having the code bytes can help diagnose post-detach where the code - * cache is gone. - */ - "Detach: pre-xl8 pc=%p (%02x %02x %02x %02x %02x), xsp=%p " - "for thread " TIDFMT "\n", - mc.pc, *mc.pc, *(mc.pc + 1), *(mc.pc + 2), *(mc.pc + 3), *(mc.pc + 4), - mc.xsp, threads[i]->id); - DEBUG_DECLARE(ok =) - translate_mcontext(threads[i], &mc, true /*restore mem*/, NULL /*f*/); - ASSERT(ok); - - if (!threads[i]->under_dynamo_control) { - LOG(GLOBAL, LOG_ALL, 1, - "Detach : thread " TIDFMT " already running natively\n", - threads[i]->id); - /* we do need to restore the app ret addr, for native_exec */ - if (!DYNAMO_OPTION(thin_client) && DYNAMO_OPTION(native_exec) && - !vmvector_empty(native_exec_areas)) { - put_back_native_retaddrs(threads[i]->dcontext); - } - } - detach_finalize_translation(threads[i], &mc); - - LOG(GLOBAL, LOG_ALL, 1, "Detach: pc=" PFX " for thread " TIDFMT "\n", mc.pc, - threads[i]->id); - ASSERT(!is_dynamo_address(mc.pc) && !in_fcache(mc.pc)); - /* XXX case 7457: if the thread is suspended after it received a fault - * but before the kernel copied the faulting context to the user mode - * structures for the handler, it could result in a codemod exception - * that wouldn't happen natively! - */ - DEBUG_DECLARE(ok =) - thread_set_mcontext(threads[i], &mc); - ASSERT(ok); - - /* i#249: restore app's PEB/TEB fields */ - IF_WINDOWS(restore_peb_pointer_for_thread(threads[i]->dcontext)); + detach_set_mcontext_helper(threads[i]); } /* Resumes the thread, which will do kernel-visible cleanup of * signal state. Resume happens within the synch_all region where @@ -2272,20 +2293,148 @@ detach_on_permanent_stack(bool internal, bool do_cleanup, dr_stats_t *drstats) SYSLOG_INTERNAL_INFO("Detaching from process, entering final cleanup"); if (drstats != NULL) stats_get_snapshot(drstats); - DEBUG_DECLARE(exit_res =) - dynamo_shared_exit(my_tr _IF_WINDOWS(detach_stacked_callbacks)); - ASSERT(exit_res == SUCCESS); - detach_finalize_cleanup(); - - stack_free(d_r_initstack, DYNAMORIO_STACK_SIZE); - - dynamo_exit_post_detach(); - - doing_detach = false; - started_detach = false; + detach_cleanup_helper(my_tr _IF_WINDOWS(detach_stacked_callbacks)); +} - SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); - dynamo_detaching_flag = LOCK_FREE_STATE; - EXITING_DR(); - options_detach(); +#ifdef LINUX +void +detach_externally_on_new_stack() +{ + dcontext_t *my_dcontext; + priv_mcontext_t my_mcontext; + thread_record_t **threads; + thread_record_t *my_tr = NULL; + int i, num_threads, my_idx = -1; + thread_id_t my_id; + DEBUG_DECLARE(bool ok;) + /* synch-all flags: */ + uint flags = 0; + /* For Unix, such privilege problems are rarer but we would still prefer to + * continue if we hit a problem. + */ + flags |= THREAD_SYNCH_SUSPEND_FAILURE_IGNORE; + /* i#297: we only synch client threads after process exit event. */ + flags |= THREAD_SYNCH_SKIP_CLIENT_THREAD; + ENTERING_DR(); + /* dynamo_detaching_flag is not really a lock, and since no one ever waits + * on it we can't deadlock on it either. + */ + if (!atomic_compare_exchange(&dynamo_detaching_flag, LOCK_FREE_STATE, LOCK_SET_STATE)) + return; + instrument_pre_detach_event(); + /* Unprotect .data for exit cleanup. + * XXX: more secure to not do this until we've synched, but then need + * alternative prot for started_detach and init_apc_go_native* + */ + SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT); + ASSERT(!started_detach); + started_detach = true; + ASSERT(dynamo_initialized); + ASSERT(!dynamo_exited); + my_id = d_r_get_thread_id(); + my_dcontext = get_thread_private_dcontext(); + ASSERT(my_dcontext != NULL); + LOG(GLOBAL, LOG_ALL, 1, "Detach: thread %d starting detach process\n", my_id); + SYSLOG(SYSLOG_INFORMATION, INFO_DETACHING, 2, get_application_name(), + get_application_pid()); + /* synch with flush */ + if (my_dcontext != NULL) + enter_threadexit(my_dcontext); + /* i#2270: we ignore alarm signals during detach to reduce races. */ + signal_remove_alarm_handlers(my_dcontext); + /* suspend all DR-controlled threads at safe locations */ + if (!synch_with_all_threads(THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT, &threads, + &num_threads, + /* Case 6821: allow other synch-all-thread uses + * that beat us to not wait on us. We still have + * a problem if we go first since we must xfer + * other threads. + */ + THREAD_SYNCH_NO_LOCKS_NO_XFER, flags)) { + REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_SYNCHRONIZE_THREADS, 2, + get_application_name(), get_application_pid()); + } + /* Now we own the thread_initexit_lock. We'll release the locks grabbed in + * synch_with_all_threads below after cleaning up all the threads in case we + * need to grab it during process exit cleanup. + */ + ASSERT(mutex_testlock(&all_threads_synch_lock) && + mutex_testlock(&thread_initexit_lock)); + ASSERT(!doing_detach); + doing_detach = true; + detacher_tid = d_r_get_thread_id(); +# ifdef HOT_PATCHING_INTERFACE + /* In hotp_only mode, we must remove patches when detaching; we don't want + * to leave in all our hooks and detach; that will definitely crash the app. + */ + if (DYNAMO_OPTION(hotp_only)) + hotp_only_detach_helper(); +# endif + if (!DYNAMO_OPTION(thin_client)) + revert_memory_regions(); + unhook_vsyscall(); + LOG(GLOBAL, LOG_ALL, 1, + "Detach : unpatched ntdll.dll and fixed memory permissions\n"); + /* perform exit tasks that require full thread data structs */ + dynamo_process_exit_with_thread_info(); + LOG(GLOBAL, LOG_ALL, 1, "Detach: starting to translate contexts\n"); + for (i = 0; i < num_threads; i++) { + if (threads[i]->dcontext == my_dcontext) { + my_idx = i; + my_tr = threads[i]; + DEBUG_DECLARE(ok =) + thread_get_nudged_mcontext(threads[i], &my_mcontext); + DEBUG_DECLARE(ok =) + translate_mcontext(threads[i], &my_mcontext, true /*restore mem*/, + NULL /*f*/); + continue; + } else if (IS_CLIENT_THREAD(threads[i]->dcontext)) { + /* i#297 we will kill client-owned threads later after app exit events + * in dynamo_shared_exit(). + */ + continue; + } else if (detach_do_not_translate(threads[i])) { + LOG(GLOBAL, LOG_ALL, 2, "Detach: not translating " TIDFMT "\n", + threads[i]->id); + } else { + detach_set_mcontext_helper(threads[i]); + } + /* Resumes the thread, which will do kernel-visible cleanup of + * signal state. Resume happens within the synch_all region where + * the thread_initexit_lock is held so that we can clean up thread + * data later. + */ + os_signal_thread_detach(threads[i]->dcontext); + LOG(GLOBAL, LOG_ALL, 1, "Detach: thread " TIDFMT " is being resumed as native\n", + threads[i]->id); + os_thread_resume(threads[i]); + } + LOG(GLOBAL, LOG_ALL, 1, "Detach: waiting for threads to fully detach\n"); + for (i = 0; i < num_threads; i++) { + if (i != my_idx && !IS_CLIENT_THREAD(threads[i]->dcontext)) + os_wait_thread_detached(threads[i]->dcontext); + } + /* Clean up each thread now that everyone has gone native. Needs to be + * done with the thread_initexit_lock held, which is true within a synched + * region. + */ + for (i = 0; i < num_threads; i++) { + if (i != my_idx && !IS_CLIENT_THREAD(threads[i]->dcontext)) { + LOG(GLOBAL, LOG_ALL, 1, "Detach: cleaning up thread " TIDFMT " %s\n", + threads[i]->id, IF_WINDOWS_ELSE(cleanup_tpc[i] ? "and its TPC" : "", "")); + dynamo_other_thread_exit(threads[i] _IF_WINDOWS(!cleanup_tpc[i])); + } + } + if (my_idx != -1) { + /* pre-client thread cleanup (PR 536058) */ + dynamo_thread_exit_pre_client(my_dcontext, my_tr->id); + } + LOG(GLOBAL, LOG_ALL, 1, "Detach: Letting secondary threads go native\n"); + end_synch_with_all_threads(threads, num_threads, false /*don't resume */); + threads = NULL; + LOG(GLOBAL, LOG_ALL, 1, "Detach: Entering final cleanup and unload\n"); + SYSLOG_INTERNAL_INFO("Detaching from process, entering final cleanup"); + detach_cleanup_helper(my_tr); + thread_set_self_mcontext(&my_mcontext, true); } +#endif diff --git a/core/synch.h b/core/synch.h index 90e1382c8f4..461d429a625 100644 --- a/core/synch.h +++ b/core/synch.h @@ -263,6 +263,11 @@ send_all_other_threads_native(void); void detach_on_permanent_stack(bool internal, bool do_cleanup, dr_stats_t *drstats); +#ifdef LINUX +void +detach_externally_on_new_stack(); +#endif + /*** exported for detach only ***/ bool diff --git a/core/unix/loader.c b/core/unix/loader.c index 88c8ec35e3f..0e29bd5fca6 100644 --- a/core/unix/loader.c +++ b/core/unix/loader.c @@ -1,5 +1,5 @@ /* ******************************************************************************* - * Copyright (c) 2011-2023 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * Copyright (c) 2011 Massachusetts Institute of Technology All rights reserved. * *******************************************************************************/ @@ -157,7 +157,7 @@ static privmod_t * privload_locate_and_load(const char *impname, privmod_t *dependent, bool reachable); static void -privload_call_lib_func(fp_t func); +privload_call_lib_func(dcontext_t *dcontext, privmod_t *privmod, fp_t func); static void privload_relocate_mod(privmod_t *mod); @@ -618,7 +618,7 @@ privload_call_entry(dcontext_t *dcontext, privmod_t *privmod, uint reason) if (opd->init != NULL) { LOG(GLOBAL, LOG_LOADER, 4, "%s: calling %s init func " PFX "\n", __FUNCTION__, privmod->name, opd->init); - privload_call_lib_func(opd->init); + privload_call_lib_func(dcontext, privmod, opd->init); } if (opd->init_array != NULL) { uint i; @@ -626,7 +626,7 @@ privload_call_entry(dcontext_t *dcontext, privmod_t *privmod, uint reason) if (opd->init_array[i] != NULL) { /* be paranoid */ LOG(GLOBAL, LOG_LOADER, 4, "%s: calling %s init array func " PFX "\n", __FUNCTION__, privmod->name, opd->init_array[i]); - privload_call_lib_func(opd->init_array[i]); + privload_call_lib_func(dcontext, privmod, opd->init_array[i]); } } } @@ -648,7 +648,7 @@ privload_call_entry(dcontext_t *dcontext, privmod_t *privmod, uint reason) if (opd->fini != NULL) { LOG(GLOBAL, LOG_LOADER, 4, "%s: calling %s fini func " PFX "\n", __FUNCTION__, privmod->name, opd->fini); - privload_call_lib_func(opd->fini); + privload_call_lib_func(dcontext, privmod, opd->fini); } if (opd->fini_array != NULL) { uint i; @@ -656,7 +656,7 @@ privload_call_entry(dcontext_t *dcontext, privmod_t *privmod, uint reason) if (opd->fini_array[i] != NULL) { /* be paranoid */ LOG(GLOBAL, LOG_LOADER, 4, "%s: calling %s fini array func " PFX "\n", __FUNCTION__, privmod->name, opd->fini_array[i]); - privload_call_lib_func(opd->fini_array[i]); + privload_call_lib_func(dcontext, privmod, opd->fini_array[i]); } } } @@ -723,6 +723,15 @@ privload_os_finalize(privmod_t *privmod) if ((ver[0] == '\0' || ver[0] < '2') || ver[1] != '.' || ver[2] < '3' || (ver[2] == '3' && ver[3] < '4')) return; +# ifndef X86 + /* XXX i#6611: We have privload_set_pthread_tls_fields() setting the pthread tid + * field on x86, but not other arches. Since we have the glibc version here and + * believe this to be limited to 2.37 we warn about it here. + */ + if (ver[2] == '3' && ver[3] >= '7') { + SYSLOG_INTERNAL_WARNING("glibc 2.37+ i#6611 pthread tid fix NYI for non-x86"); + } +# endif if (privmod_ld_linux == NULL) { SYSLOG_INTERNAL_WARNING("glibc 2.34+ i#5437 workaround failed: missed ld"); return; @@ -734,15 +743,21 @@ privload_os_finalize(privmod_t *privmod) SYSLOG_INTERNAL_WARNING("glibc 2.34+ i#5437 workaround failed: missed glro"); return; } + int GLRO_dl_tls_static_size_OFFS; + int GLRO_dl_tls_static_align_OFFS; # ifdef X64 - const int GLRO_dl_tls_static_size_OFFS = 0x2a8; - const int GLRO_dl_tls_static_align_OFFS = 0x2b0; + // The offsets changed between 2.38 and 2.39. + if (ver[2] == '3' && ver[3] < '9') { + GLRO_dl_tls_static_size_OFFS = 0x2a8; + GLRO_dl_tls_static_align_OFFS = 0x2b0; + } else { + GLRO_dl_tls_static_size_OFFS = 0x2c8; + GLRO_dl_tls_static_align_OFFS = 0x2d0; + } # else // The offsets changed between 2.35 and 2.36. - const int GLRO_dl_tls_static_size_OFFS = - (ver[2] == '3' && ver[3] == '5') ? 0x328 : 0x31c; - const int GLRO_dl_tls_static_align_OFFS = - (ver[2] == '3' && ver[3] == '5') ? 0x32c : 0x320; + GLRO_dl_tls_static_size_OFFS = (ver[2] == '3' && ver[3] == '5') ? 0x328 : 0x31c; + GLRO_dl_tls_static_align_OFFS = (ver[2] == '3' && ver[3] == '5') ? 0x32c : 0x320; # endif size_t val = 4096, written; if (!safe_write_ex(glro + GLRO_dl_tls_static_size_OFFS, sizeof(val), &val, @@ -1064,7 +1079,7 @@ get_private_library_address(app_pc modbase, const char *name) } static void -privload_call_lib_func(fp_t func) +privload_call_lib_func(dcontext_t *dcontext, privmod_t *privmod, fp_t func) { char dummy_str[] = "dummy"; char *dummy_argv[2]; @@ -1076,7 +1091,12 @@ privload_call_lib_func(fp_t func) */ dummy_argv[0] = dummy_str; dummy_argv[1] = NULL; - func(1, dummy_argv, our_environ); + TRY_EXCEPT_ALLOW_NO_DCONTEXT( + dcontext, { func(1, dummy_argv, our_environ); }, + { /* EXCEPT */ + SYSLOG_INTERNAL_ERROR("Private library %s init/fini func " PFX " crashed", + privmod->name, func); + }); } bool diff --git a/core/unix/loader_linux.c b/core/unix/loader_linux.c index f194e6ad00e..eb307818b87 100644 --- a/core/unix/loader_linux.c +++ b/core/unix/loader_linux.c @@ -39,6 +39,7 @@ #include "../module_shared.h" #include "os_private.h" #include "../ir/instr.h" /* SEG_GS/SEG_FS */ +#include "../ir/decode.h" #include "module.h" #include "module_private.h" #include "../heap.h" /* HEAPACCT */ @@ -283,7 +284,108 @@ privload_mod_tls_init(privmod_t *mod) } static void -privload_copy_tls_block(app_pc priv_tls_base, uint mod_idx) +privload_set_pthread_tls_fields(privmod_t *mod, app_pc priv_tls_base) +{ + /* Set pthreads fields for glibc 3.24+ where pthreads is inside libc and has + * special undocumented initialization by ld.so. + */ + if (strstr(mod->name, "libc.so") != mod->name) + return; + os_privmod_data_t *opd = (os_privmod_data_t *)mod->os_privmod_data; + /* i#6611: Find the pthread tid TLS field offset by decoding a function known + * to reference it. + */ +# define PTHREAD_TID_FUNC_NAME "pthread_mutex_consistent" + void (*tid_using_func)(bool) = (void (*)(bool))get_proc_address_from_os_data( + &opd->os_data, opd->load_delta, PTHREAD_TID_FUNC_NAME, NULL); + if (tid_using_func == NULL) + return; + LOG(GLOBAL, LOG_LOADER, 2, "%s: decoding %s to find tid offset\n", __FUNCTION__, + PTHREAD_TID_FUNC_NAME); + app_pc pc = (app_pc)tid_using_func; + instr_t instr; + dcontext_t *dcontext = get_thread_private_dcontext(); + if (dcontext == NULL) + dcontext = GLOBAL_DCONTEXT; + instr_init(dcontext, &instr); + /* This is a small function with 10-15 instructions, and we stop when we hit + * a return. We set a just-in-case upper limit of 64 to ensure we don't + * loop for too long if something goes wrong. + */ +# define MAX_INSTRS_TO_DECODE 64 + int instr_count = 0; + while (instr_count < MAX_INSTRS_TO_DECODE) { + IF_DEBUG(app_pc prev_pc = pc;) + pc = decode(dcontext, pc, &instr); + if (pc == NULL || !instr_valid(&instr)) { + SYSLOG_INTERNAL_WARNING("%s: failed to decode from %p\n", __FUNCTION__, + prev_pc); + break; + } + if (instr_is_return(&instr)) { + SYSLOG_INTERNAL_WARNING("%s: failed to find TLS offset\n", __FUNCTION__); + break; + } + long *tid_slot = NULL; +# ifdef X86 + /* We're looking for the only far ref in the function, like this: + * 8ac26: 64 8b 04 25 d0 02 00 mov %fs:0x2d0,%eax + * 8ac2d: 00 + */ + if (instr_get_opcode(&instr) == OP_mov_ld && + opnd_is_far_base_disp(instr_get_src(&instr, 0)) && + opnd_get_segment(instr_get_src(&instr, 0)) == LIB_SEG_TLS) { + int offs = opnd_get_disp(instr_get_src(&instr, 0)); + tid_slot = (long *)(priv_tls_base + offs); + } +# elif defined(AARCH64) + /* TODO i#6611: We have to decode something like this to come up with + * privlib_tls_base - 0x700 + 208 but we need a 2.37+ machine to test on: + * 13dec: d53bd042 mrs x2, tpidr_el0 + * 13df0: 52800000 mov w0, #0x0 + * 13df4: d11c0042 sub x2, x2, #0x700 + * 13df8: b940d042 ldr w2, [x2, #208] + */ + /* We have a glibc 2.37+ SYSLOG_INTERNAL_WARNING in privload_os_finalize(). */ + break; +# else + /* XXX i#6611: Not supported yet. */ + /* We have a glibc 2.37+ SYSLOG_INTERNAL_WARNING in privload_os_finalize(). */ + break; +# endif + if (tid_slot != NULL) { + long cur_tid; + thread_id_t real_tid = get_sys_thread_id(); + if (!d_r_safe_read(tid_slot, sizeof(cur_tid), &cur_tid)) { + SYSLOG_INTERNAL_WARNING("%s: failed to read tid from slot %p\n", + __FUNCTION__, tid_slot); + } else if (cur_tid == real_tid) { + LOG(GLOBAL, LOG_LOADER, 2, "%s: tid slot is already correct\n", + __FUNCTION__); + } else { + LOG(GLOBAL, LOG_LOADER, 2, "%s: writing tid " TIDFMT " to slot %p\n", + __FUNCTION__, real_tid, tid_slot); + size_t written; + if (!safe_write_ex(tid_slot, sizeof(*tid_slot), &real_tid, &written) || + written != sizeof(*tid_slot)) { + SYSLOG_INTERNAL_WARNING("%s: failed to write tid to slot %p\n", + __FUNCTION__, tid_slot); + } + } + break; + } + instr_reset(dcontext, &instr); + ++instr_count; + } + if (instr_count >= MAX_INSTRS_TO_DECODE) { + SYSLOG_INTERNAL_WARNING("%s: decoding hit max instr count before target or ret\n", + __FUNCTION__); + } + instr_free(dcontext, &instr); +} + +static void +privload_copy_tls_block(privmod_t *mod, app_pc priv_tls_base, uint mod_idx) { os_privmod_data_t *opd = tls_info.mods[mod_idx]->os_privmod_data; void *dest; @@ -306,6 +408,7 @@ privload_copy_tls_block(app_pc priv_tls_base, uint mod_idx) */ ASSERT(opd->tls_block_size >= opd->tls_image_size); memset(dest + opd->tls_image_size, 0, opd->tls_block_size - opd->tls_image_size); + privload_set_pthread_tls_fields(mod, priv_tls_base); } /* Called post-reloc. */ @@ -320,7 +423,7 @@ privload_mod_tls_primary_thread_init(privmod_t *mod) os_local_state_t *os_tls = get_os_tls(); app_pc priv_tls_base = os_tls->os_seg_info.priv_lib_tls_base; os_privmod_data_t *opd = (os_privmod_data_t *)mod->os_privmod_data; - privload_copy_tls_block(priv_tls_base, opd->tls_modid); + privload_copy_tls_block(mod, priv_tls_base, opd->tls_modid); } #endif @@ -401,7 +504,7 @@ privload_tls_init(void *app_tp) if (dynamo_initialized) { uint i; for (i = 0; i < tls_info.num_mods; i++) - privload_copy_tls_block(dr_tp, i); + privload_copy_tls_block(tls_info.mods[i], dr_tp, i); } return dr_tp; diff --git a/core/unix/module_elf.h b/core/unix/module_elf.h index 5e1ad537207..08e1b292143 100644 --- a/core/unix/module_elf.h +++ b/core/unix/module_elf.h @@ -42,6 +42,11 @@ # define DT_RELR 36 #endif +/* Workaround for EM_RISCV not being defined in elf.h on RHEL-7. */ +#ifndef EM_RISCV +# define EM_RISCV 243 +#endif + /* XXX i#1345: support mixed-mode 32-bit and 64-bit in one process. * There is no official support for that on Linux or Mac and for now we do * not support it either, especially not mixing libraries. diff --git a/core/unix/os.c b/core/unix/os.c index 6949390b5f3..6d7f85dcf88 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -3979,6 +3979,21 @@ thread_get_mcontext(thread_record_t *tr, priv_mcontext_t *mc) return true; } +#ifdef LINUX +bool +thread_get_nudged_mcontext(thread_record_t *tr, priv_mcontext_t *mc) +{ + /* This only works for a thread that just received a nduge signal. */ + os_thread_data_t *ostd = (os_thread_data_t *)tr->dcontext->os_field; + ASSERT(ostd != NULL); + ASSERT(ostd->nudged_sigcxt != NULL); + sigcontext_to_mcontext(mc, ostd->nudged_sigcxt, DR_MC_ALL); + IF_ARM(dr_set_isa_mode(tr->dcontext, get_sigcontext_isa_mode(ostd->nudged_sigcxt), + NULL)); + return true; +} +#endif + bool thread_set_mcontext(thread_record_t *tr, priv_mcontext_t *mc) { diff --git a/core/unix/os_private.h b/core/unix/os_private.h index 1cfef5572a5..861cab9dc20 100644 --- a/core/unix/os_private.h +++ b/core/unix/os_private.h @@ -165,6 +165,11 @@ typedef struct _os_thread_data_t { KSYNCH_TYPE resumed; sig_full_cxt_t *suspended_sigcxt; +#ifdef LINUX + /* For detachment on Linux. */ + sig_full_cxt_t *nudged_sigcxt; +#endif + /* PR 297902: for thread termination */ bool terminate; /* Any function that sets this flag must also notify possibly waiting diff --git a/core/unix/rseq_linux.c b/core/unix/rseq_linux.c index 41300a080ad..08fad619860 100644 --- a/core/unix/rseq_linux.c +++ b/core/unix/rseq_linux.c @@ -581,7 +581,9 @@ rseq_process_module(module_area_t *ma, bool at_map, bool saw_glibc_rseq_reg) ELF_SECTION_HEADER_TYPE *sec_hdr = NULL; char *strtab; ssize_t load_offs = ma->start - ma->os_data.base_address; - if (at_map && elf_hdr->e_shoff + ma->start < ma->end) { + if (at_map && + elf_hdr->e_shoff + elf_hdr->e_shnum * elf_hdr->e_shentsize + ma->start < + ma->end) { sec_map = elf_hdr->e_shoff + ma->start; sec_hdr = (ELF_SECTION_HEADER_TYPE *)sec_map; /* We assume strtab is there too. */ diff --git a/core/unix/signal.c b/core/unix/signal.c index 90767800081..98648d2e84b 100644 --- a/core/unix/signal.c +++ b/core/unix/signal.c @@ -817,12 +817,18 @@ create_clone_record(dcontext_t *dcontext, reg_t *app_thread_xsp) * cl_args->stack. But we expect the highest (non-inclusive) * in the clone record's app_thread_xsp. */ - record->app_thread_xsp = dr_clone_args->stack + dr_clone_args->stack_size; + if (dr_clone_args->stack == 0) + record->app_thread_xsp = get_mcontext(dcontext)->xsp; + else + record->app_thread_xsp = dr_clone_args->stack + dr_clone_args->stack_size; record->clone_flags = dr_clone_args->flags; record->app_clone_args = app_clone_args; } else { #endif - record->app_thread_xsp = *app_thread_xsp; + if (*app_thread_xsp == 0) + record->app_thread_xsp = get_mcontext(dcontext)->xsp; + else + record->app_thread_xsp = *app_thread_xsp; record->clone_flags = dcontext->sys_param0; IF_LINUX(record->app_clone_args = NULL); #ifdef LINUX @@ -3166,10 +3172,10 @@ translate_sigcontext(dcontext_t *dcontext, kernel_ucontext_t *uc, bool avoid_fai /* Takes an os-specific context */ void -thread_set_self_context(void *cxt) +thread_set_self_context(void *cxt, bool is_detach_external) { #ifdef X86 - if (!INTERNAL_OPTION(use_sigreturn_setcontext)) { + if (!INTERNAL_OPTION(use_sigreturn_setcontext) || is_detach_external) { sigcontext_t *sc = (sigcontext_t *)cxt; dr_jmp_buf_t buf; buf.xbx = sc->SC_XBX; @@ -3311,7 +3317,7 @@ thread_set_segment_registers(sigcontext_t *sc) /* Takes a priv_mcontext_t */ void -thread_set_self_mcontext(priv_mcontext_t *mc) +thread_set_self_mcontext(priv_mcontext_t *mc, bool is_detach_external) { kernel_ucontext_t ucxt; sig_full_cxt_t sc_full; @@ -3325,7 +3331,7 @@ thread_set_self_mcontext(priv_mcontext_t *mc) IF_ARM( set_pc_mode_in_cpsr(sc_full.sc, dr_get_isa_mode(get_thread_private_dcontext()))); /* thread_set_self_context will fill in the real fp/simd state for x86 */ - thread_set_self_context((void *)sc_full.sc); + thread_set_self_context((void *)sc_full.sc, is_detach_external); ASSERT_NOT_REACHED(); } @@ -4142,6 +4148,7 @@ send_signal_to_client(dcontext_t *dcontext, int sig, sigframe_rt_t *frame, /* i#207: fragment tag and fcache start pc on fault. */ si.fault_fragment_info.tag = NULL; si.fault_fragment_info.cache_start_pc = NULL; + si.fault_fragment_info.ilist = NULL; /* i#182/PR 449996: we provide the pre-translation context */ if (raw_sc != NULL) { fragment_t wrapper; @@ -4755,8 +4762,8 @@ find_next_fragment_from_gencode(dcontext_t *dcontext, sigcontext_t *sc) if (f == NULL && sc->SC_XCX != 0) f = fragment_lookup(dcontext, (app_pc)sc->SC_XCX); #elif defined(RISCV64) - /* FIXME i#3544: Not implemented */ - ASSERT_NOT_IMPLEMENTED(false); + if (f == NULL && sc->SC_A2 != 0) + f = fragment_lookup(dcontext, (app_pc)(sc->SC_A2)); #else # error Unsupported arch. #endif @@ -7917,10 +7924,15 @@ signal_to_itimer_type(int sig) static bool alarm_signal_has_DR_only_itimer(dcontext_t *dcontext, int signal) { - thread_sig_info_t *info = (thread_sig_info_t *)dcontext->signal_field; int which = signal_to_itimer_type(signal); if (which == -1) return false; +#ifdef LINUX + if (dcontext == GLOBAL_DCONTEXT) { + return false; + } +#endif + thread_sig_info_t *info = (thread_sig_info_t *)dcontext->signal_field; if (info->shared_itimer) acquire_recursive_lock(&(*info->itimer)[which].lock); bool DR_only = @@ -8480,8 +8492,13 @@ handle_suspend_signal(dcontext_t *dcontext, kernel_siginfo_t *siginfo, if (is_sigqueue_supported() && SUSPEND_SIGNAL == NUDGESIG_SIGNUM) { nudge_arg_t *arg = (nudge_arg_t *)siginfo; - if (!TEST(NUDGE_IS_SUSPEND, arg->flags)) + if (!TEST(NUDGE_IS_SUSPEND, arg->flags)) { +#ifdef LINUX + sig_full_initialize(&sc_full, ucxt); + ostd->nudged_sigcxt = &sc_full; +#endif return handle_nudge_signal(dcontext, siginfo, ucxt); + } } /* We distinguish from an app signal further below from the rare case of an diff --git a/core/unix/signal_private.h b/core/unix/signal_private.h index 251145be0f9..cd0d0dd2694 100644 --- a/core/unix/signal_private.h +++ b/core/unix/signal_private.h @@ -214,7 +214,11 @@ typedef _STRUCT_UCONTEXT /* == __darwin_ucontext */ kernel_ucontext_t; * (these are from /usr/src/linux/arch/i386/kernel/signal.c for kernel 2.4.17) */ -# define RETCODE_SIZE 8 +# if defined(X86) +# define RETCODE_SIZE 8 +# elif defined(ARM) +# define RETCODE_SIZE 16 +# endif typedef struct sigframe { # ifdef X86 @@ -280,11 +284,12 @@ typedef struct rt_sigframe { # elif defined(AARCHXX) kernel_siginfo_t info; kernel_ucontext_t uc; +# ifdef ARM char retcode[RETCODE_SIZE]; +# endif # elif defined(RISCV64) kernel_siginfo_t info; kernel_ucontext_t uc; - char retcode[RETCODE_SIZE]; # endif #elif defined(MACOS) diff --git a/core/vmareas.c b/core/vmareas.c index 7ce19f0c85e..9658dde7690 100644 --- a/core/vmareas.c +++ b/core/vmareas.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2023 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -8385,7 +8385,7 @@ check_in_last_thread_vm_area(dcontext_t *dcontext, app_pc pc) data->last_area->start <= pc); } /* last decoded app pc may be in last shared area instead */ - if (!in_last && DYNAMO_OPTION(shared_bbs)) { + if (!in_last && DYNAMO_OPTION(shared_bbs) && shared_data != NULL) { /* We avoid the high-ranked shared_vm_areas lock which can easily cause * rank order violations (i#3346). We're trying to catch the scenario * where a shared bb is being built and we fault decoding it. There, diff --git a/core/win32/events.mc b/core/win32/events.mc index 5b5a9e338f8..569bf164df3 100644 --- a/core/win32/events.mc +++ b/core/win32/events.mc @@ -533,7 +533,7 @@ Severity = Error Facility = DRCore SymbolicName = MSG_INSTRUMENTATION_TOO_LARGE Language=English -Application %1!s! (%2!s!). Basic block or trace instrumentation exceeded maximum size. Try lowering -max_bb_instrs and/or -max_trace_bbs. +Application %1!s! (%2!s!). Basic block or trace instrumentation exceeded maximum size. Try lowering -max_bb_instrs and/or -max_trace_bbs and/or set -disable_traces. . MessageId = diff --git a/core/win32/loader.c b/core/win32/loader.c index 9acf13d1777..005ee062c8f 100644 --- a/core/win32/loader.c +++ b/core/win32/loader.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2021 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * Copyright (c) 2009-2010 Derek Bruening All rights reserved. * **********************************************************/ @@ -1501,7 +1501,11 @@ privload_call_entry(dcontext_t *dcontext, privmod_t *privmod, uint reason) /* i#2221: combase's entry fails on win10. So far ignoring it * hasn't cause any problems with simple clients. */ - str_case_prefix(privmod->name, "combase"))) { + str_case_prefix(privmod->name, "combase") || + /* i#6570: bcrypt's entry suddenly started failing. Ignoring it + * is working so far; if that changes we'll have to dig into it. + */ + str_case_prefix(privmod->name, "bcrypt"))) { LOG(GLOBAL, LOG_LOADER, 1, "%s: ignoring failure of %s entry\n", __FUNCTION__, privmod->name); res = TRUE; diff --git a/core/win32/os.c b/core/win32/os.c index b5dc3b3793f..2f294643ef2 100644 --- a/core/win32/os.c +++ b/core/win32/os.c @@ -2795,7 +2795,7 @@ thread_attach_setup(priv_mcontext_t *mc) * sets the context back). */ mc->pc = data->continuation_pc; - thread_set_self_mcontext(mc); + thread_set_self_mcontext(mc, false); ASSERT_NOT_REACHED(); } /* Preclude double takeover if we become suspended while in ntdll */ @@ -5201,7 +5201,7 @@ thread_set_context(thread_record_t *tr, CONTEXT *context) /* Takes an os-specific context */ void -thread_set_self_context(void *cxt) +thread_set_self_context(void *cxt, bool is_detach_external) { /* We use NtContinue to avoid privilege issues with NtSetContext */ nt_continue((CONTEXT *)cxt); @@ -5210,7 +5210,7 @@ thread_set_self_context(void *cxt) /* Takes a priv_mcontext_t */ void -thread_set_self_mcontext(priv_mcontext_t *mc) +thread_set_self_mcontext(priv_mcontext_t *mc, bool is_detach_external) { /* We can't use heap for our CONTEXT as we have no opportunity to free it. * We assume call paths can handle a large stack buffer as size something @@ -5232,7 +5232,7 @@ thread_set_self_mcontext(priv_mcontext_t *mc) cxt = nt_initialize_context(buf, bufsz, cxt_flags); /* need ss and cs for setting my own context */ mcontext_to_context(cxt, mc, true /* set_cur_seg */); - thread_set_self_context(cxt); + thread_set_self_context(cxt, false); ASSERT_NOT_REACHED(); } diff --git a/ext/drpttracer/drpttracer.dox b/ext/drpttracer/drpttracer.dox index 08ec8273b79..bba5bc832f9 100644 --- a/ext/drpttracer/drpttracer.dox +++ b/ext/drpttracer/drpttracer.dox @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -82,8 +82,8 @@ The create function lets the client specify the following parameters: - pt_size_shift: The size shift of PT trace's ring buffer. It must be greater than 0, and the buffer size is 2^pt_size_shift * PAGE_SIZE. - - sideband_size_shift: The size shift of PT sideband data's ring buffer. It must be greater -than 0, and the buffer size is 2^sideband_size_shift * PAGE_SIZE. + - sideband_size_shift: The size shift of PT sideband data's ring buffer. It must be +greater than 0, and the buffer size is 2^sideband_size_shift * PAGE_SIZE. \note Linux perf sets the buffer size to 4MiB by default. Therefore, it is best for clients to set trace and sideband buffers larger than 4Mib. @@ -141,4 +141,42 @@ flag to drpttracer_start_tracing(). data from \p drpttracer doesn't contain sideband data; it only contains the trace data and metadata. +\section sec_unit_tests Unit Tests + +\p We have some unit tests that verify the kernel tracing feature in drmemtrace, which +also uses the `drpttracer` DynamoRIO extension. These tests are not +built and run by default because they require superuser permission. They are also +disabled automatically if the host system does not support the Intel-PT feature. + +To run these tests, pass `-DRUN_SUDO_TESTS=ON` to cmake when building DynamoRIO. E.g., + +``` +$ cmake -DRUN_SUDO_TESTS=ON -DBUILD_TESTS=ON +$ make -j +$ ctest -R 'drpttracer|drcacheoff.kernel' +``` + +On some systems, one may see errors like the following: + +``` +408: *** postcmd failed (1): drpt2ir: [28430, IP:ffffffffc11dd000] get next +408: instruction error: no memory mapped at this address +``` + +This is because our kcore logic copy may have missed copying some instructions from +`/proc/kcore`. We rely on `/proc/modules` and `/proc/kallsyms` to point to relevant +kernel code regions. Symbols for JIT code like eBPF are not included by default. The +following workaround may help in cases where the missing memory region belongs to +BPF JIT code. They make the BPF JIT code symbols visible in `/proc/kallsyms`. + +``` +$ sudo bash -c "echo 0 > /proc/sys/net/core/bpf_jit_harden" +$ sudo bash -c "echo 1 > /proc/sys/net/core/bpf_jit_kallsyms" +``` + +You may want to record the existing values in these configs so you can revert them +after running the tests. See +https://docs.kernel.org/admin-guide/sysctl/net.html#proc-sys-net-core-network-core-options +for more details. + */ diff --git a/ext/drsyms/CMakeLists.txt b/ext/drsyms/CMakeLists.txt index b8b4571e255..8b9627911a8 100644 --- a/ext/drsyms/CMakeLists.txt +++ b/ext/drsyms/CMakeLists.txt @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2010-2022 Google, Inc. All rights reserved. +# Copyright (c) 2010-2024 Google, Inc. All rights reserved. # Copyright (c) 2010 VMware, Inc. All rights reserved. # ********************************************************** @@ -69,6 +69,8 @@ endif () # we need libc b/c our elftoolchain libraries use it set(DynamoRIO_USE_LIBC ON) +set(USE_ELFUTILS OFF) + # We use our own .lib file to support VS2005 whose dbghelp.lib doesn't have some # routines we want to use. if (WIN32) @@ -126,22 +128,108 @@ if (WIN32) elseif (UNIX) set(srcs drsyms_unix_frontend.c drsyms_unix_common.c - drsyms_dwarf.c demangle.cc drsyms_common.c) + demangle.cc drsyms_common.c) if (APPLE) - set(srcs ${srcs} drsyms_macho.c) + set(srcs ${srcs} drsyms_dwarf.c drsyms_macho.c) set(dwarf_libpath "${PROJECT_SOURCE_DIR}/ext/drsyms/libelftc-macho${ARCH}/lib${BITS}/libdwarf.a") set(elftc_libpath "${PROJECT_SOURCE_DIR}/ext/drsyms/libelftc-macho${ARCH}/lib${BITS}/libelftc.a") - else (APPLE) - set(srcs ${srcs} drsyms_elf.c) + elseif (ANDROID) + # TODO i#5926: Use elfutils for Android. First we need to get zlib installed + # in our test environment. + set(srcs ${srcs} drsyms_dwarf.c drsyms_elf.c) set(dwarf_libpath "${PROJECT_SOURCE_DIR}/ext/drsyms/libelftc${ARCH}/lib${BITS}/libdwarf.a") set(elftc_libpath "${PROJECT_SOURCE_DIR}/ext/drsyms/libelftc${ARCH}/lib${BITS}/libelftc.a") set(elf_libpath "${PROJECT_SOURCE_DIR}/ext/drsyms/libelftc${ARCH}/lib${BITS}/libelf.a") - endif (APPLE) + else () + set(elftc_libpath + "${PROJECT_SOURCE_DIR}/ext/drsyms/libelftc${ARCH}/lib${BITS}/libelftc.a") + + message(STATUS "Using elfutils") + # TODO i#5926: Use elfutils everywhere. We start out with just Linux. + set(USE_ELFUTILS ON) + + set(elfutils_dir "${PROJECT_SOURCE_DIR}/third_party/elfutils") + if (NOT EXISTS "${elfutils_dir}") + message(FATAL_ERROR "Missing required submodule ${elfutils_dir}") + endif () + + # Apply patches. + # XXX i#5926: Better to fork the elfutils repo and apply these in the fork + # and have the fork be the source of our submodule? We could store config.h + # there too. + find_program(PATCH patch DOC "patch") + if (NOT PATCH) + message(FATAL_ERROR "Unable to find patch") + endif () + file(GLOB patches "${CMAKE_CURRENT_SOURCE_DIR}/elfutils/*.patch") + foreach (patch ${patches}) + get_filename_component(patch_base ${patch} NAME) + string(REGEX REPLACE ".patch$" "" patch_base ${patch_base}) + file(GLOB orig_path "${elfutils_dir}/*/${patch_base}") + list(LENGTH orig_path glob_count) + if (NOT glob_count EQUAL 1) + message(FATAL_ERROR "Failed to find single source for ${patch}") + endif () + list(APPEND patch_srcs ${orig_path}) + set(patch_path "${CMAKE_CURRENT_BINARY_DIR}/${patch_base}") + execute_process(COMMAND ${PATCH} -p1 -d "${elfutils_dir}" -o "${patch_path}" + INPUT_FILE "${patch}" + RESULT_VARIABLE patch_result ERROR_VARIABLE patch_err) + if (patch_result) + message(FATAL_ERROR "Failed to apply ${patch}: ${patch_err}") + endif () + endforeach () + + # Add the elfutils library build rules we need. We want PIC static libs. + foreach (lib elf;dw;dwelf;ebl) + file(GLOB ${lib}_files "${elfutils_dir}/lib${lib}/*.c") + foreach (orig_path ${patch_srcs}) + if ("${orig_path}" IN_LIST ${lib}_files) + # Swap in our patched file. + list(REMOVE_ITEM ${lib}_files "${orig_path}") + get_filename_component(base ${orig_path} NAME) + set(patch_path "${CMAKE_CURRENT_BINARY_DIR}/${base}") + list(APPEND ${lib}_files "${patch_path}") + message(STATUS "Swapped in patched ${patch_path}") + endif () + endforeach () + add_library(${lib}_pic STATIC ${${lib}_files}) + # We want to directly use DR's allocator instead of relying on its private loader + # redirecting in order to support static usage with no loader. + # ld is not actually used, so we can't use its -wrap=malloc feature. + # Instead we rely on the preprocessor. + set_target_properties(${lib}_pic PROPERTIES + # We have a presumably-widely-applicable config.h in drsyms/elfutils. + INCLUDE_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/elfutils;${elfutils_dir}/lib;${elfutils_dir}/libasm;${elfutils_dir}/libebl;${elfutils_dir}/libdwelf;${elfutils_dir}/libdwfl" + COMPILE_DEFINITIONS + "_GNU_SOURCE;HAVE_CONFIG_H;_FORTIFY_SOURCE=3;PIC;SHARED;SYMBOL_VERSIONING;malloc=__wrap_malloc;calloc=__wrap_calloc;realloc=__wrap_realloc;free=__wrap_free;strdup=__wrap_strdup" + COMPILE_FLAGS "-std=gnu99 -Wall -g -O2 -fPIC") + DR_export_target(${lib}_pic) + install_exported_target(${lib}_pic ${INSTALL_EXT_LIB}) + copy_target_to_device(${lib}_pic "${location_suffix}") + endforeach () + # libdw uses pthread_rwlock_* routines. + link_with_pthread(dw_pic) + + include_directories("${elfutils_dir}/libelf") + include_directories("${elfutils_dir}/libdw") + set(srcs ${srcs} drsyms_dw.c drsyms_elf.c) + add_definitions(-DUSE_ELFUTILS) + if (ZLIB_FOUND) + add_definitions(-DHAS_ZLIB) + include_directories(${ZLIB_INCLUDE_DIRS}) + else () + message(FATAL_ERROR "zlib not found but required to build drsyms_static on Linux") + endif () + # Avoid stdbool.h from libdw.h defining _Bool after dr_defines.h uses char. + add_definitions(-DDR__Bool_EXISTS) + endif () set(srcs_static ${srcs}) endif (WIN32) @@ -162,6 +250,14 @@ macro(configure_drsyms_target target) endif () # we always use the elftoolchain library when building with cmake append_property_list(TARGET ${target} COMPILE_DEFINITIONS "DRSYM_HAVE_LIBELFTC") + # For elfutils the linking is the same for shared and static so we place here. + if (USE_ELFUTILS) + target_link_libraries(${target} dw_pic) + if (LINUX) + target_link_libraries(${target} dwelf_pic elf_pic) + endif () + target_link_libraries(${target} ebl_pic ${ZLIB_LIBRARIES}) + endif () endmacro(configure_drsyms_target) configure_drsyms_target(drsyms) @@ -186,10 +282,13 @@ configure_extension(drsyms_static ON) configure_drsyms_target(drsyms_static) use_DynamoRIO_extension(drsyms_static drcontainers) -target_link_libraries(drsyms dwarf elftc) -if (LINUX) - target_link_libraries(drsyms elf) +if (NOT USE_ELFUTILS) + target_link_libraries(drsyms dwarf) + if (LINUX) + target_link_libraries(drsyms elf) + endif () endif () +target_link_libraries(drsyms elftc) # i#693: CMake will try to export the path to the static libs we use via # IMPORTED_LINK_INTERFACE_LIBRARIES_NOCONFIG, but they won't exist on the # user's machine. Clearing this property prevents that. @@ -197,18 +296,21 @@ endif () # INTERFACE_LINK_LIBRARIES property if policy CMP0022 is NEW. set_target_properties(drsyms PROPERTIES INTERFACE_LINK_LIBRARIES "") -# If drsyms is built static we need to include these with an exports path +# If drsyms is built static we need to include libelftc libs with an exports path # in DynamoRIOTarget*.cmake and not with the source path here: -add_library(dwarf STATIC IMPORTED) -set_property(TARGET dwarf PROPERTY IMPORTED_LOCATION "${dwarf_libpath}") +if (NOT USE_ELFUTILS) + add_library(dwarf STATIC IMPORTED) + set_property(TARGET dwarf PROPERTY IMPORTED_LOCATION "${dwarf_libpath}") + target_link_libraries(drsyms_static dwarf) + if (LINUX) + add_library(elf STATIC IMPORTED) + set_property(TARGET elf PROPERTY IMPORTED_LOCATION "${elf_libpath}") + target_link_libraries(drsyms_static elf) + endif (LINUX) +endif () add_library(elftc STATIC IMPORTED) set_property(TARGET elftc PROPERTY IMPORTED_LOCATION "${elftc_libpath}") -target_link_libraries(drsyms_static dwarf elftc) -if (LINUX) - add_library(elf STATIC IMPORTED) - set_property(TARGET elf PROPERTY IMPORTED_LOCATION "${elf_libpath}") - target_link_libraries(drsyms_static elf) -endif (LINUX) +target_link_libraries(drsyms_static elftc) if (UNIX) # Avoid missing symbols in static library build from g++ libs when # drsyms_bench is linked with gcc instead of g++ (i#715, happens w/ cmake diff --git a/ext/drsyms/drsyms.dox b/ext/drsyms/drsyms.dox index d056dccadee..245e1af3aa8 100755 --- a/ext/drsyms/drsyms.dox +++ b/ext/drsyms/drsyms.dox @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2023 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * Copyright (c) 2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -86,16 +86,6 @@ and \p libelftc built from the elftoolchain project and requires no setup. -\subsection sec_drsyms_unsupported_dwarfv5 DWARFv5 not supported yet - -The elftoolchain project does -not support DWARFv5 yet (ticket). -As a result, drsyms is not able to read line information output by some compiler -varsions (i#5926); -e.g. this is the default behavior of g++-11. Possible workarounds are to use -a different compiler version that outputs DWARF version 2 to 4, or set -"-gdwarf-4" in the g++/gcc invocation to explicitly select the DWARFv4 format. - \section sec_drsyms_paths Search Paths On Linux, \p drsyms will look in the default debug directories for symbols diff --git a/ext/drsyms/drsyms_dw.c b/ext/drsyms/drsyms_dw.c new file mode 100644 index 00000000000..6b0525764ad --- /dev/null +++ b/ext/drsyms/drsyms_dw.c @@ -0,0 +1,467 @@ +/* ********************************************************** + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of VMware, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* DRSyms DynamoRIO Extension */ + +/* Symbol lookup routines for DWARF via elfutil's libdw. */ + +#include "dr_api.h" +#include "drsyms.h" +#include "drsyms_private.h" +#include "drsyms_obj.h" + +#include "libdw.h" + +#include /* qsort */ +#include + +/* For debugging */ +static bool verbose = false; + +/* dwarf_errmsg(-1) uses the most recent error. */ +#define NOTIFY_DWARF() \ + do { \ + if (verbose) { \ + dr_fprintf(STDERR, "drsyms: Dwarf error: %s\n", dwarf_errmsg(-1)); \ + } \ + } while (0) + +typedef struct _dwarf_module_t { + byte *load_base; + dwarf_lib_handle_t dbg; + /* we cache the last CU we looked up */ + Dwarf_Die lines_cu; + Dwarf_Lines *lines; + size_t num_lines; + /* Amount to adjust all offsets for __PAGEZERO + PIE (i#1365) */ + ssize_t offs_adjust; +} dwarf_module_t; + +typedef enum { + SEARCH_FOUND = 0, + SEARCH_MAYBE = 1, + SEARCH_NOT_FOUND = 2, +} search_result_t; + +static search_result_t +search_addr2line_in_cu(dwarf_module_t *mod, Dwarf_Addr pc, Dwarf_Die *cu_die, + drsym_info_t *sym_info DR_PARAM_OUT); + +/****************************************************************************** + * DWARF parsing code. + */ + +#if 0 /* NOCHECK see below: do we need this? */ +/* Find the next DIE matching this tag. Uses the internal state of dbg to + * determine where to start searching. + */ +static Dwarf_Die +next_die_matching_tag(dwarf_lib_handle_t dbg, Dwarf_Tag search_tag) +{ + Dwarf_Half tag = 0; + Dwarf_Die *die = NULL; + + while (dwarf_siblingof(dbg, die, &die, &de) == DW_DLV_OK) { + if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { + NOTIFY_DWARF(de); + die = NULL; + break; + } + if (tag == search_tag) + break; + } + return die; +} +#endif + +/* Iterate over all the CUs in the module to find the CU containing the given + * PC. + */ +static Dwarf_Die * +find_cu_die_via_iter(dwarf_lib_handle_t dbg, Dwarf_Addr pc, Dwarf_Die *cu_die) +{ + Dwarf_Die *res = NULL; + Dwarf_Off cu_offset = 0, prev_offset = 0; + size_t hsize; + + while (dwarf_nextcu(dbg, cu_offset, &cu_offset, &hsize, NULL, NULL, NULL) == 0) { + /* NOCHECK: do we need to "Scan forward in the tag soup for a CU DIE" via + * next_die_matching_tag(dbg, DW_TAG_compile_unit) as drsyms_dwarf does? + * Wouldn't dwarf_nextcu always find a CU DIE?? + * Ditto for the other 2 dwarf_nextcu loops below. + */ + if (dwarf_offdie(dbg, prev_offset + hsize, cu_die) != NULL) { + /* We found a CU die, now check if it's the one we wanted. */ + Dwarf_Addr lo_pc, hi_pc; + if (dwarf_lowpc(cu_die, &lo_pc) != 0 || dwarf_highpc(cu_die, &hi_pc) != 0) { + NOTIFY_DWARF(); + break; + } + if (lo_pc <= pc && pc < hi_pc) { + res = cu_die; + break; + } + } + prev_offset = cu_offset; + } + + while (dwarf_nextcu(dbg, cu_offset, &cu_offset, &hsize, NULL, NULL, NULL) == 0) { + /* Reset the internal CU header state. */ + } + + return res; +} + +static Dwarf_Die * +find_cu_die(dwarf_lib_handle_t dbg, Dwarf_Addr pc, Dwarf_Die *cu_die) +{ + Dwarf_Aranges *arlist; + size_t arcnt; + Dwarf_Arange *ar; + Dwarf_Off die_offs; + if (dwarf_getaranges(dbg, &arlist, &arcnt) != 0) { + NOTIFY_DWARF(); + return NULL; + } + ar = dwarf_getarange_addr(arlist, pc); + if (ar == NULL || dwarf_getarangeinfo(ar, NULL, NULL, &die_offs) != 0 || + dwarf_offdie(dbg, die_offs, cu_die) == NULL) { + NOTIFY_DWARF(); + /* Try to find it by walking all CU's and looking at their lowpc+highpc + * entries, which should work if each has a single contiguous + * range. Note that Cygwin and MinGW gcc don't seen to include + * lowpc+highpc in their CU's. + */ + return find_cu_die_via_iter(dbg, pc, cu_die); + } + return cu_die; +} + +/* Given a function DIE and a PC, fill out sym_info with line information. + */ +bool +drsym_dwarf_search_addr2line(void *mod_in, Dwarf_Addr pc, + drsym_info_t *sym_info DR_PARAM_OUT) +{ + dwarf_module_t *mod = (dwarf_module_t *)mod_in; + Dwarf_Die cu_die; + Dwarf_Off cu_offset = 0, prev_offset = 0; + size_t hsize; + bool success = false; + search_result_t res; + + pc += mod->offs_adjust; + + /* On failure, these should be zeroed. + */ + sym_info->file_available_size = 0; + if (sym_info->file != NULL) + sym_info->file[0] = '\0'; + sym_info->line = 0; + sym_info->line_offs = 0; + + /* First try cutting down the search space by finding the CU (i.e., the .c + * file) that this function belongs to. + */ + if (find_cu_die(mod->dbg, pc, &cu_die) == NULL) { + NOTIFY("%s: failed to find CU die for " PFX ", searching all CUs\n", __FUNCTION__, + (ptr_uint_t)pc); + } else { + return (search_addr2line_in_cu(mod, pc, &cu_die, sym_info) != SEARCH_NOT_FOUND); + } + + /* We failed to find a CU containing this PC. Some compilers (clang) don't + * put lo_pc hi_pc attributes on compilation units. In this case, we + * iterate all the CUs and dig into the dwarf tag soup for all of them. + */ + while (dwarf_nextcu(mod->dbg, cu_offset, &cu_offset, &hsize, NULL, NULL, NULL) == 0) { + /* Scan forward in the tag soup for a CU DIE. */ + if (dwarf_offdie(mod->dbg, prev_offset + hsize, &cu_die) != NULL) { + /* We found a CU die, now check if it's the one we wanted. */ + res = search_addr2line_in_cu(mod, pc, &cu_die, sym_info); + if (res == SEARCH_FOUND) { + success = true; + break; + } else if (res == SEARCH_MAYBE) { + success = true; + /* try to find a better fit: continue searching */ + } + } + prev_offset = cu_offset; + } + + while (dwarf_nextcu(mod->dbg, cu_offset, &cu_offset, &hsize, NULL, NULL, NULL) == 0) { + /* Reset the internal CU header state. */ + } + + return success; +} + +static size_t +get_lines_from_cu(dwarf_module_t *mod, Dwarf_Die *cu_die, + Dwarf_Lines **lines_out DR_PARAM_OUT) +{ + if (memcmp(&mod->lines_cu, cu_die, sizeof(mod->lines_cu)) != 0) { + Dwarf_Lines *lines; + size_t num_lines; + if (dwarf_getsrclines(cu_die, &lines, &num_lines) != 0) { + NOTIFY_DWARF(); + return -1; + } + /* XXX: Confirm that libdw sorts, unlike libelftc; seems to in + * libdw/dwarf_getsrclines.c, so we don't re-sort here. + */ + mod->lines_cu = *cu_die; + mod->lines = lines; + mod->num_lines = num_lines; + } + *lines_out = mod->lines; + return mod->num_lines; +} + +static search_result_t +search_addr2line_in_cu(dwarf_module_t *mod, Dwarf_Addr pc, Dwarf_Die *cu_die, + drsym_info_t *sym_info DR_PARAM_OUT) +{ + Dwarf_Lines *lines = NULL; + size_t num_lines; + int i; + Dwarf_Addr lineaddr, next_lineaddr = 0; + Dwarf_Line *dw_line; + search_result_t res = SEARCH_NOT_FOUND; + + num_lines = get_lines_from_cu(mod, cu_die, &lines); + if (num_lines < 0) + return SEARCH_NOT_FOUND; + + if (verbose) { + const char *name = dwarf_diename(cu_die); + if (name != NULL) { + NOTIFY("%s: searching cu %s for pc 0" PFX "\n", __FUNCTION__, name, + (ptr_uint_t)pc); + } + } + + /* We could binary search this, but we assume dwarf_srclines is the + * bottleneck. + */ + dw_line = NULL; + for (i = 0; i < num_lines - 1; i++) { + Dwarf_Line *line = dwarf_onesrcline(lines, i); + Dwarf_Line *next_line = dwarf_onesrcline(lines, i + 1); + if (line == NULL || next_line == NULL || dwarf_lineaddr(line, &lineaddr) != 0 || + dwarf_lineaddr(next_line, &next_lineaddr) != 0) { + NOTIFY_DWARF(); + break; + } + NOTIFY("%s: pc " PFX " vs line " PFX "-" PFX "\n", __FUNCTION__, (ptr_uint_t)pc, + (ptr_uint_t)lineaddr, (ptr_uint_t)next_lineaddr); + if (lineaddr <= pc && pc < next_lineaddr) { + dw_line = line; + res = SEARCH_FOUND; + break; + } + } + /* Handle the case when the PC is from the last line of the CU. */ + if (i == num_lines - 1 && dw_line == NULL && next_lineaddr <= pc) { + NOTIFY("%s: pc " PFX " vs last line " PFX "\n", __FUNCTION__, (ptr_uint_t)pc, + (ptr_uint_t)next_lineaddr); + dw_line = dwarf_onesrcline(lines, num_lines - 1); + if (dw_line != NULL) + res = SEARCH_MAYBE; + } + + /* If we found dw_line, use it to fill out sym_info. */ + if (dw_line != NULL) { + const char *file; + int lineno; + + file = dwarf_linesrc(dw_line, NULL, NULL); + if (file == NULL || dwarf_lineno(dw_line, &lineno) != 0 || + dwarf_lineaddr(dw_line, &lineaddr) != 0) { + NOTIFY_DWARF(); + res = SEARCH_NOT_FOUND; + } else { + /* File comes from .debug_str and therefore lives until + * drsym_exit, but caller has provided space that we must copy into. + */ + sym_info->file_available_size = strlen(file); + if (sym_info->file != NULL) { + strncpy(sym_info->file, file, sym_info->file_size); + sym_info->file[sym_info->file_size - 1] = '\0'; + } + sym_info->line = lineno; + sym_info->line_offs = (size_t)(pc - lineaddr); + } + } + + return res; +} + +/* Return value: 0 means success but break; 1 means success and continue; + * -1 means error. + */ +static int +enumerate_lines_in_cu(dwarf_module_t *mod, Dwarf_Die *cu_die, + drsym_enumerate_lines_cb callback, void *data) +{ + Dwarf_Lines *lines = NULL; + size_t num_lines; + int i; + drsym_line_info_t info; + + info.cu_name = dwarf_diename(cu_die); + if (info.cu_name == NULL) { + /* i#1477: it is possible that a DIE entrie has a NULL name */ + NOTIFY_DWARF(); + } + + num_lines = get_lines_from_cu(mod, cu_die, &lines); + if (num_lines < 0) { + /* This cu has no line info. Don't bail: keep going. */ + info.file = NULL; + info.line = 0; + info.line_addr = 0; + if (!(*callback)(&info, data)) + return 0; + return 1; + } + + for (i = 0; i < num_lines; i++) { + int lineno; + Dwarf_Addr lineaddr; + Dwarf_Line *line = dwarf_onesrcline(lines, i); + + /* We do not want to bail on failure of any of these: we want to + * provide as much information as possible. + */ + info.file = dwarf_linesrc(line, NULL, NULL); + if (info.file == NULL) + NOTIFY_DWARF(); + + if (dwarf_lineno(line, &lineno) != 0) { + NOTIFY_DWARF(); + info.line = 0; + } else + info.line = lineno; + + if (dwarf_lineaddr(line, &lineaddr) != 0) { + NOTIFY_DWARF(); + info.line_addr = 0; + } else { + info.line_addr = (size_t)(lineaddr - (Dwarf_Addr)(ptr_uint_t)mod->load_base - + mod->offs_adjust); + } + if (!(*callback)(&info, data)) + return 0; + } + + return 1; +} + +drsym_error_t +drsym_dwarf_enumerate_lines(void *mod_in, drsym_enumerate_lines_cb callback, void *data) +{ + drsym_error_t success = DRSYM_SUCCESS; + dwarf_module_t *mod = (dwarf_module_t *)mod_in; + Dwarf_Die cu_die; + Dwarf_Off cu_offset = 0, prev_offset = 0; + size_t hsize; + + /* Enumerate all CU's */ + while (dwarf_nextcu(mod->dbg, cu_offset, &cu_offset, &hsize, NULL, NULL, NULL) == 0) { + if (dwarf_offdie(mod->dbg, prev_offset + hsize, &cu_die) != NULL) { + int res = enumerate_lines_in_cu(mod, &cu_die, callback, data); + if (res < 0) + success = DRSYM_ERROR_LINE_NOT_AVAILABLE; + if (res <= 0) + break; + } + prev_offset = cu_offset; + } + + while (dwarf_nextcu(mod->dbg, cu_offset, &cu_offset, &hsize, NULL, NULL, NULL) == 0) { + /* Reset the internal CU header state. */ + } + + return success; +} + +void * +drsym_dwarf_init(dwarf_lib_handle_t dbg) +{ + dwarf_module_t *mod = (dwarf_module_t *)dr_global_alloc(sizeof(*mod)); + memset(mod, 0, sizeof(*mod)); + mod->dbg = dbg; + return mod; +} + +void +drsym_dwarf_exit(void *mod_in) +{ + dwarf_module_t *mod = (dwarf_module_t *)mod_in; + dwarf_end(mod->dbg); + dr_global_free(mod, sizeof(*mod)); +} + +void +drsym_dwarf_set_obj_offs(void *mod_in, ssize_t adjust) +{ + dwarf_module_t *mod = (dwarf_module_t *)mod_in; + mod->offs_adjust = adjust; +} + +void +drsym_dwarf_set_load_base(void *mod_in, byte *load_base) +{ + dwarf_module_t *mod = (dwarf_module_t *)mod_in; + mod->load_base = load_base; +} + +#if defined(WINDOWS) && defined(STATIC_LIB) +/* if we build as a static library with "/MT /link /nodefaultlib libcmt.lib", + * somehow we're missing strdup + */ +char * +strdup(const char *s) +{ + char *res; + size_t len; + if (s == NULL) + return NULL; + len = strlen(s) + 1; + res = (char *)malloc(strlen(s) + 1); + strncpy(res, s, len); + res[len - 1] = '\0'; + return res; +} +#endif diff --git a/ext/drsyms/drsyms_dwarf.c b/ext/drsyms/drsyms_dwarf.c index 4077da82acc..deb586732b2 100644 --- a/ext/drsyms/drsyms_dwarf.c +++ b/ext/drsyms/drsyms_dwarf.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2015 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -32,11 +32,13 @@ /* DRSyms DynamoRIO Extension */ -/* Symbol lookup routines for DWARF */ +/* Symbol lookup routines for DWARF via elftoolchain's libdwarf. */ +/* TODO i#5926: Use elfutils everywhere and remove this file in favor of drsysm_dw.c. */ #include "dr_api.h" #include "drsyms.h" #include "drsyms_private.h" +#include "drsyms_obj.h" #include "dwarf.h" #include "libdwarf.h" @@ -56,7 +58,7 @@ static bool verbose = false; typedef struct _dwarf_module_t { byte *load_base; - Dwarf_Debug dbg; + dwarf_lib_handle_t dbg; /* we cache the last CU we looked up */ Dwarf_Die lines_cu; Dwarf_Line *lines; @@ -83,7 +85,7 @@ search_addr2line_in_cu(dwarf_module_t *mod, Dwarf_Addr pc, Dwarf_Die cu_die, * determine where to start searching. */ static Dwarf_Die -next_die_matching_tag(Dwarf_Debug dbg, Dwarf_Tag search_tag) +next_die_matching_tag(dwarf_lib_handle_t dbg, Dwarf_Tag search_tag) { Dwarf_Half tag = 0; Dwarf_Die die = NULL; @@ -105,7 +107,7 @@ next_die_matching_tag(Dwarf_Debug dbg, Dwarf_Tag search_tag) * PC. */ static Dwarf_Die -find_cu_die_via_iter(Dwarf_Debug dbg, Dwarf_Addr pc) +find_cu_die_via_iter(dwarf_lib_handle_t dbg, Dwarf_Addr pc) { Dwarf_Die die = NULL; Dwarf_Unsigned cu_offset = 0; @@ -142,7 +144,7 @@ find_cu_die_via_iter(Dwarf_Debug dbg, Dwarf_Addr pc) } static Dwarf_Die -find_cu_die(Dwarf_Debug dbg, Dwarf_Addr pc) +find_cu_die(dwarf_lib_handle_t dbg, Dwarf_Addr pc) { Dwarf_Error de; /* expensive to init (DrM#1770) */ Dwarf_Die cu_die = NULL; @@ -443,7 +445,7 @@ drsym_dwarf_enumerate_lines(void *mod_in, drsym_enumerate_lines_cb callback, voi } void * -drsym_dwarf_init(Dwarf_Debug dbg) +drsym_dwarf_init(dwarf_lib_handle_t dbg) { dwarf_module_t *mod = (dwarf_module_t *)dr_global_alloc(sizeof(*mod)); memset(mod, 0, sizeof(*mod)); diff --git a/ext/drsyms/drsyms_elf.c b/ext/drsyms/drsyms_elf.c index f2c843a9da1..a24de379b0f 100644 --- a/ext/drsyms/drsyms_elf.c +++ b/ext/drsyms/drsyms_elf.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2020 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -40,8 +40,12 @@ #include "drsyms_obj.h" #include "libelf.h" -#include "dwarf.h" -#include "libdwarf.h" +#ifdef USE_ELFUTILS +# include "libdw.h" +#else +# include "dwarf.h" +# include "libdwarf.h" +#endif #include #include @@ -61,7 +65,7 @@ # endif #endif -static bool verbose = 0; +static int verbose = 0; #undef NOTIFY #ifdef DEBUG @@ -107,6 +111,9 @@ static bool verbose = 0; # define Elf_Shdr Elf64_Shdr # define Elf_Sym Elf64_Sym # define ELF_ST_TYPE ELF64_ST_TYPE +# ifdef USE_ELFUTILS +# define Elf_Note Elf64_Nhdr +# endif #else # define elf_getehdr elf32_getehdr # define elf_getphdr elf32_getphdr @@ -116,6 +123,9 @@ static bool verbose = 0; # define Elf_Shdr Elf32_Shdr # define Elf_Sym Elf32_Sym # define ELF_ST_TYPE ELF32_ST_TYPE +# ifdef USE_ELFUTILS +# define Elf_Note Elf32_Nhdr +# endif #endif typedef struct _elf_info_t { @@ -323,9 +333,15 @@ drsym_obj_mod_init_post(void *mod_in, byte *map_base, void *dwarf_info) } bool -drsym_obj_dwarf_init(void *mod_in, Dwarf_Debug *dbg) +drsym_obj_dwarf_init(void *mod_in, dwarf_lib_handle_t *dbg) { elf_info_t *mod = (elf_info_t *)mod_in; +#ifdef USE_ELFUTILS + // Need to use elfutils Elf* from elf_memory (after calling elf_version(EV_CURRENT)) + *dbg = dwarf_begin_elf(mod->elf, DWARF_C_READ, NULL); + if (*dbg == NULL) + return false; +#else Dwarf_Error de; /* expensive to init (DrM#1770) */ if (mod == NULL) return false; @@ -333,6 +349,7 @@ drsym_obj_dwarf_init(void *mod_in, Dwarf_Debug *dbg) NOTIFY_DWARF(de); return false; } +#endif return true; } @@ -521,3 +538,40 @@ drsym_obj_debug_path(void) { return "/usr/lib/debug"; } + +#ifdef USE_ELFUTILS +/*************************************************************************** + * elfutils libz helpers. + */ + +/* XXX: If we were guaranteed that the libz deflate calls from libelf were + * always in the same thread we could avoid the global heap lock and use + * thread-local heap. + */ +void * +drsym_redirect_malloc(void *context, uint items, uint per_size) +{ + void *mem; + size_t size = items * per_size; + if (!dr_running_under_dynamorio()) + return malloc(size); + size += sizeof(size_t); + mem = dr_custom_alloc(NULL, 0, size, DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL); + if (mem == NULL) + return NULL; + *((size_t *)mem) = size; + return (byte *)mem + sizeof(size_t); +} + +void +drsym_redirect_free(void *context, void *ptr) +{ + if (!dr_running_under_dynamorio()) + return free(ptr); + if (ptr != NULL) { + byte *mem = (byte *)ptr; + mem -= sizeof(size_t); + dr_custom_free(NULL, 0, mem, *((size_t *)mem)); + } +} +#endif diff --git a/ext/drsyms/drsyms_macho.c b/ext/drsyms/drsyms_macho.c index 18ce4b61df1..1db0fa545bd 100644 --- a/ext/drsyms/drsyms_macho.c +++ b/ext/drsyms/drsyms_macho.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2014-2020 Google, Inc. All rights reserved. + * Copyright (c) 2014-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -430,7 +430,7 @@ drsym_obj_mod_init_post(void *mod_in, byte *map_base, void *dwarf_info) } bool -drsym_obj_dwarf_init(void *mod_in, Dwarf_Debug *dbg) +drsym_obj_dwarf_init(void *mod_in, dwarf_lib_handle_t *dbg) { macho_info_t *mod = (macho_info_t *)mod_in; Dwarf_Error de; /* expensive to init (DrM#1770) */ diff --git a/ext/drsyms/drsyms_obj.h b/ext/drsyms/drsyms_obj.h index 71160e273be..f94deb3ca30 100644 --- a/ext/drsyms/drsyms_obj.h +++ b/ext/drsyms/drsyms_obj.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2020 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -38,13 +38,25 @@ #define DRSYMS_ARCH_H #include "drsyms.h" -#include "dwarf.h" -#include "libdwarf.h" +#ifdef USE_ELFUTILS +# include "libdw.h" +#else +# include "dwarf.h" +# include "libdwarf.h" +#endif /*************************************************************************** * Platform-specific: Linux (ELF) or Cygwin/MinGW (PECOFF) */ +/* TODO i#5926: Use elfutils everywhere. We start out with just Linux. */ +#ifdef USE_ELFUTILS +typedef Dwarf *dwarf_lib_handle_t; +#else +/* elftoolchain */ +typedef Dwarf_Debug dwarf_lib_handle_t; +#endif + void drsym_obj_init(void); @@ -64,7 +76,7 @@ bool drsym_obj_mod_init_post(void *mod_in, byte *map_base, void *dwarf_info); bool -drsym_obj_dwarf_init(void *mod_in, Dwarf_Debug *dbg); +drsym_obj_dwarf_init(void *mod_in, dwarf_lib_handle_t *dbg); void drsym_obj_mod_exit(void *mod_in); @@ -109,7 +121,7 @@ drsym_obj_build_id(void *mod_in); */ void * -drsym_dwarf_init(Dwarf_Debug dbg); +drsym_dwarf_init(dwarf_lib_handle_t dbg); void drsym_dwarf_exit(void *mod_in); diff --git a/ext/drsyms/drsyms_pecoff.c b/ext/drsyms/drsyms_pecoff.c index 7e001a0212c..a4e2e979015 100644 --- a/ext/drsyms/drsyms_pecoff.c +++ b/ext/drsyms/drsyms_pecoff.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2012-2020 Google, Inc. All rights reserved. + * Copyright (c) 2012-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -256,7 +256,7 @@ drsym_obj_mod_init_post(void *mod_in, byte *map_base, void *dwarf_info) } bool -drsym_obj_dwarf_init(void *mod_in, Dwarf_Debug *dbg) +drsym_obj_dwarf_init(void *mod_in, dwarf_lib_handle_t *dbg) { pecoff_data_t *mod = (pecoff_data_t *)mod_in; Dwarf_Error de; /* expensive to init (DrM#1770) */ diff --git a/ext/drsyms/drsyms_unix_common.c b/ext/drsyms/drsyms_unix_common.c index 3827e77591c..d57f64237b4 100644 --- a/ext/drsyms/drsyms_unix_common.c +++ b/ext/drsyms/drsyms_unix_common.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2020 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -40,9 +40,6 @@ #include "drsyms_obj.h" #include "hashtable.h" -#include "dwarf.h" -#include "libdwarf.h" - #include /* strlen */ #include #include /* offsetof */ @@ -191,7 +188,7 @@ load_module(const char *modpath) } /* else stick with mod */ } if (newmod == NULL) { - Dwarf_Debug dbg; + dwarf_lib_handle_t dbg; /* If there is no .gnu_debuglink, initialize parsing. */ #ifdef WINDOWS /* i#1395: support switching to expots-only for MinGW, for which we diff --git a/ext/drsyms/elfutils/config.h b/ext/drsyms/elfutils/config.h new file mode 100644 index 00000000000..bc684b2d518 --- /dev/null +++ b/ext/drsyms/elfutils/config.h @@ -0,0 +1,211 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Building with -fsanitize=undefined or not */ +#define CHECK_UNDEFINED 0 + +/* Should ar and ranlib use -D behavior by default? */ +#define DEFAULT_AR_DETERMINISTIC false + +/* Build dummy libdebuginfod */ +/* #undef DUMMY_LIBDEBUGINFOD */ + +/* Build debuginfod */ +/* #undef ENABLE_DEBUGINFOD */ + +/* Enable libdebuginfod */ +/* #undef ENABLE_LIBDEBUGINFOD */ + +/* Define to 1 if translation of program messages to the user's native + language is requested. */ +#define ENABLE_NLS 1 + +/* Define to 1 if you have the Mac OS X function + CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ +/* #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES */ + +/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in + the CoreFoundation framework. */ +/* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */ + +/* define if the compiler supports basic C++11 syntax */ +#define HAVE_CXX11 1 + +/* Define if the GNU dcgettext() function is already present or preinstalled. + */ +#define HAVE_DCGETTEXT 1 + +/* Define to 1 if you have the declaration of `mempcpy', and to 0 if you + don't. */ +#define HAVE_DECL_MEMPCPY 1 + +/* Define to 1 if you have the declaration of `memrchr', and to 0 if you + don't. */ +#define HAVE_DECL_MEMRCHR 1 + +/* Define to 1 if you have the declaration of `powerof2', and to 0 if you + don't. */ +#define HAVE_DECL_POWEROF2 1 + +/* Define to 1 if you have the declaration of `rawmemchr', and to 0 if you + don't. */ +#define HAVE_DECL_RAWMEMCHR 1 + +/* Define to 1 if you have the declaration of `reallocarray', and to 0 if you + don't. */ +#define HAVE_DECL_REALLOCARRAY 1 + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#define HAVE_DECL_STRERROR_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ERROR_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ERR_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_EXECINFO_H 1 + +/* Defined if __attribute__((fallthrough)) is supported */ +#define HAVE_FALLTHROUGH 1 + +/* Defined if __attribute__((gcc_struct)) is supported */ +#if defined(__x86_64__) || defined(__i386__) +# define HAVE_GCC_STRUCT 1 +#endif + +/* Define to 1 if you have the `getrlimit' function. */ +#define HAVE_GETRLIMIT 1 + +/* Define if the GNU gettext() function is already present or preinstalled. */ +#define HAVE_GETTEXT 1 + +/* Define if you have the iconv() function and it works. */ +/* #undef HAVE_ICONV */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `mremap' function. */ +#define HAVE_MREMAP 1 + +/* Define to 1 if you have the `process_vm_readv' function. */ +#define HAVE_PROCESS_VM_READV 1 + +/* Enable pthread_setname_np */ +#define HAVE_PTHREAD_SETNAME_NP 1 + +/* Define to 1 if you have the `sched_getaffinity' function. */ +#define HAVE_SCHED_GETAFFINITY 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SCHED_H 1 + +/* Define to 1 if `stdatomic.h` is provided by the system, 0 otherwise. */ +#define HAVE_STDATOMIC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define if you have `strerror_r'. */ +#define HAVE_STRERROR_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if defines struct user_regs_struct */ +#define HAVE_SYS_USER_REGS 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Defined if __attribute__((visibility())) is supported */ +#define HAVE_VISIBILITY 1 + +/* Name of package */ +#define PACKAGE "elfutils" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "https://sourceware.org/bugzilla" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "elfutils" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "elfutils 0.190" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "elfutils" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "http://elfutils.org/" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "0.190" + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* Define to 1 if strerror_r returns char *. */ +#define STRERROR_R_CHAR_P 1 + +/* Support bzip2 decompression via -lbz2. */ +/* #undef USE_BZLIB */ + +/* Defined if demangling is enabled */ +/* #undef USE_DEMANGLE */ + +/* Defined if libraries should be thread-safe. */ +/* #undef USE_LOCKS */ + +/* Support LZMA (xz) decompression via -llzma. */ +/* #undef USE_LZMA */ + +/* Support gzip decompression via -lz. */ +#define USE_ZLIB 1 + +/* Support ZSTD (zst) decompression via -lzstd. */ +/* #undef USE_ZSTD */ + +/* zstd compression support */ +/* #undef USE_ZSTD_COMPRESS */ + +/* Version number of package */ +#define VERSION "0.190" + +/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a + `char[]'. */ +#define YYTEXT_POINTER 1 + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +#include diff --git a/ext/drsyms/elfutils/dwarf_begin_elf.c.patch b/ext/drsyms/elfutils/dwarf_begin_elf.c.patch new file mode 100644 index 00000000000..ca912b8c5e0 --- /dev/null +++ b/ext/drsyms/elfutils/dwarf_begin_elf.c.patch @@ -0,0 +1,12 @@ +diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c +index 323a91d0..3c695586 100644 +--- a/libdw/dwarf_begin_elf.c ++++ b/libdw/dwarf_begin_elf.c +@@ -278,7 +278,8 @@ __libdw_elfpath (int fd) + /* strlen ("/proc/self/fd/") = 14 + strlen () = 10 + 1 = 25. */ + char devfdpath[25]; + sprintf (devfdpath, "/proc/self/fd/%u", fd); +- return realpath (devfdpath, NULL); ++ char *dest = malloc(PATH_MAX); ++ return realpath (devfdpath, dest); + } diff --git a/ext/drsyms/elfutils/dwarf_getsrclines.c.patch b/ext/drsyms/elfutils/dwarf_getsrclines.c.patch new file mode 100644 index 00000000000..c760fa0776f --- /dev/null +++ b/ext/drsyms/elfutils/dwarf_getsrclines.c.patch @@ -0,0 +1,15 @@ +diff --git a/libdw/dwarf_getsrclines.c b/libdw/dwarf_getsrclines.c +index 69e10c7b..473255dd 100644 +--- a/libdw/dwarf_getsrclines.c ++++ b/libdw/dwarf_getsrclines.c +@@ -171,8 +171,8 @@ read_srclines (Dwarf *dbg, + the stack. Stack allocate some entries, only dynamically malloc + when more than MAX. */ + #define MAX_STACK_ALLOC 4096 +-#define MAX_STACK_LINES (MAX_STACK_ALLOC / 2) +-#define MAX_STACK_FILES (MAX_STACK_ALLOC / 4) ++#define MAX_STACK_LINES (MAX_STACK_ALLOC / 32) ++#define MAX_STACK_FILES (MAX_STACK_ALLOC / 32) + #define MAX_STACK_DIRS (MAX_STACK_ALLOC / 16) + + /* Initial statement program state (except for stmt_list, see below). */ diff --git a/ext/drsyms/elfutils/elf_compress.c.patch b/ext/drsyms/elfutils/elf_compress.c.patch new file mode 100644 index 00000000000..61d0228391c --- /dev/null +++ b/ext/drsyms/elfutils/elf_compress.c.patch @@ -0,0 +1,35 @@ +diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c +index 0ad6a32a..4903620f 100644 +--- a/libelf/elf_compress.c ++++ b/libelf/elf_compress.c +@@ -71,8 +71,10 @@ __libelf_compress_zlib (Elf_Scn *scn, size_t hsize, int ei_data, + size_t used = hsize; + + z_stream z; +- z.zalloc = Z_NULL; +- z.zfree = Z_NULL; ++ extern void *drsym_redirect_malloc(void *, uint, uint); ++ extern void drsym_redirect_free(void *, void *); ++ z.zalloc = drsym_redirect_malloc; ++ z.zfree = drsym_redirect_free; + z.opaque = Z_NULL; + int zrc = deflateInit (&z, Z_BEST_COMPRESSION); + if (zrc != Z_OK) +@@ -375,12 +377,16 @@ __libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out) + return NULL; + } + ++ extern void *drsym_redirect_malloc(void *, uint, uint); ++ extern void drsym_redirect_free(void *, void *); + z_stream z = + { + .next_in = buf_in, + .avail_in = size_in, + .next_out = buf_out, +- .avail_out = size_out ++ .avail_out = size_out, ++ .zalloc = drsym_redirect_malloc, ++ .zfree = drsym_redirect_free + }; + int zrc = inflateInit (&z); + while (z.avail_in > 0 && likely (zrc == Z_OK)) diff --git a/ext/drsyms/elfutils/elf_memory.c.patch b/ext/drsyms/elfutils/elf_memory.c.patch new file mode 100644 index 00000000000..248457837d8 --- /dev/null +++ b/ext/drsyms/elfutils/elf_memory.c.patch @@ -0,0 +1,11 @@ +diff --git a/libelf/elf_memory.c b/libelf/elf_memory.c +index a47f1d24..13d77cb7 100644 +--- a/libelf/elf_memory.c ++++ b/libelf/elf_memory.c +@@ -46,5 +46,5 @@ elf_memory (char *image, size_t size) + return NULL; + } + +- return __libelf_read_mmaped_file (-1, image, 0, size, ELF_C_READ, NULL); ++ return __libelf_read_mmaped_file (-1, image, 0, size, ELF_C_READ_MMAP, NULL); + } diff --git a/ext/drsyms/libelftc-aarch64/lib64/libdwarf.a b/ext/drsyms/libelftc-aarch64/lib64/libdwarf.a deleted file mode 100644 index 6d258f0a58c..00000000000 Binary files a/ext/drsyms/libelftc-aarch64/lib64/libdwarf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-aarch64/lib64/libelf.a b/ext/drsyms/libelftc-aarch64/lib64/libelf.a deleted file mode 100644 index 8a6dffc8122..00000000000 Binary files a/ext/drsyms/libelftc-aarch64/lib64/libelf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-arm/lib32-eabi/libdwarf.a b/ext/drsyms/libelftc-arm/lib32-eabi/libdwarf.a deleted file mode 100644 index 9928deeb770..00000000000 Binary files a/ext/drsyms/libelftc-arm/lib32-eabi/libdwarf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-arm/lib32-eabi/libelf.a b/ext/drsyms/libelftc-arm/lib32-eabi/libelf.a deleted file mode 100644 index 043110ef3f3..00000000000 Binary files a/ext/drsyms/libelftc-arm/lib32-eabi/libelf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-arm/lib32-eabihf/libdwarf.a b/ext/drsyms/libelftc-arm/lib32-eabihf/libdwarf.a deleted file mode 100644 index 1322ff36d83..00000000000 Binary files a/ext/drsyms/libelftc-arm/lib32-eabihf/libdwarf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-arm/lib32-eabihf/libelf.a b/ext/drsyms/libelftc-arm/lib32-eabihf/libelf.a deleted file mode 100644 index 443dfd2f864..00000000000 Binary files a/ext/drsyms/libelftc-arm/lib32-eabihf/libelf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-riscv64/lib64/libdwarf.a b/ext/drsyms/libelftc-riscv64/lib64/libdwarf.a deleted file mode 100644 index 8b18f2c6b84..00000000000 Binary files a/ext/drsyms/libelftc-riscv64/lib64/libdwarf.a and /dev/null differ diff --git a/ext/drsyms/libelftc-riscv64/lib64/libelf.a b/ext/drsyms/libelftc-riscv64/lib64/libelf.a deleted file mode 100644 index 635cebc87cb..00000000000 Binary files a/ext/drsyms/libelftc-riscv64/lib64/libelf.a and /dev/null differ diff --git a/ext/drsyms/libelftc/lib32/libdwarf.a b/ext/drsyms/libelftc/lib32/libdwarf.a deleted file mode 100644 index 286b2f616f5..00000000000 Binary files a/ext/drsyms/libelftc/lib32/libdwarf.a and /dev/null differ diff --git a/ext/drsyms/libelftc/lib32/libelf.a b/ext/drsyms/libelftc/lib32/libelf.a deleted file mode 100644 index a7fd873bfba..00000000000 Binary files a/ext/drsyms/libelftc/lib32/libelf.a and /dev/null differ diff --git a/ext/drsyms/libelftc/lib64/libdwarf.a b/ext/drsyms/libelftc/lib64/libdwarf.a deleted file mode 100644 index 7422c72ef17..00000000000 Binary files a/ext/drsyms/libelftc/lib64/libdwarf.a and /dev/null differ diff --git a/ext/drsyms/libelftc/lib64/libelf.a b/ext/drsyms/libelftc/lib64/libelf.a deleted file mode 100644 index 866b28c48bc..00000000000 Binary files a/ext/drsyms/libelftc/lib64/libelf.a and /dev/null differ diff --git a/ext/drutil/drutil.c b/ext/drutil/drutil.c index 6f7e1e38881..c0da4e43748 100644 --- a/ext/drutil/drutil.c +++ b/ext/drutil/drutil.c @@ -162,16 +162,11 @@ static bool drutil_insert_get_mem_addr_x86(void *drcontext, instrlist_t *bb, instr_t *where, opnd_t memref, reg_id_t dst, reg_id_t scratch, DR_PARAM_OUT bool *scratch_used); -#elif defined(AARCHXX) +#elif defined(AARCHXX) || defined(RISCV64) static bool -drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, - opnd_t memref, reg_id_t dst, reg_id_t scratch, - DR_PARAM_OUT bool *scratch_used); -#elif defined(RISCV64) -static bool -drutil_insert_get_mem_addr_riscv64(void *drcontext, instrlist_t *bb, instr_t *where, - opnd_t memref, reg_id_t dst, reg_id_t scratch, - DR_PARAM_OUT bool *scratch_used); +drutil_insert_get_mem_addr_risc(void *drcontext, instrlist_t *bb, instr_t *where, + opnd_t memref, reg_id_t dst, reg_id_t scratch, + DR_PARAM_OUT bool *scratch_used); #endif /* X86/ARM/RISCV64 */ /* Could be optimized to have scratch==dst for many common cases, but @@ -195,12 +190,9 @@ drutil_insert_get_mem_addr_ex(void *drcontext, instrlist_t *bb, instr_t *where, #if defined(X86) return drutil_insert_get_mem_addr_x86(drcontext, bb, where, memref, dst, scratch, scratch_used); -#elif defined(AARCHXX) - return drutil_insert_get_mem_addr_arm(drcontext, bb, where, memref, dst, scratch, - scratch_used); -#elif defined(RISCV64) - return drutil_insert_get_mem_addr_riscv64(drcontext, bb, where, memref, dst, scratch, - scratch_used); +#elif defined(AARCHXX) || defined(RISCV64) + return drutil_insert_get_mem_addr_risc(drcontext, bb, where, memref, dst, scratch, + scratch_used); #endif } @@ -212,12 +204,9 @@ drutil_insert_get_mem_addr(void *drcontext, instrlist_t *bb, instr_t *where, #if defined(X86) return drutil_insert_get_mem_addr_x86(drcontext, bb, where, memref, dst, scratch, NULL); -#elif defined(AARCHXX) - return drutil_insert_get_mem_addr_arm(drcontext, bb, where, memref, dst, scratch, - NULL); -#elif defined(RISCV64) - return drutil_insert_get_mem_addr_riscv64(drcontext, bb, where, memref, dst, scratch, - NULL); +#elif defined(AARCHXX) || defined(RISCV64) + return drutil_insert_get_mem_addr_risc(drcontext, bb, where, memref, dst, scratch, + NULL); #endif } @@ -341,7 +330,7 @@ drutil_insert_get_mem_addr_x86(void *drcontext, instrlist_t *bb, instr_t *where, } return true; } -#elif defined(AARCHXX) +#elif defined(AARCHXX) || defined(RISCV64) # ifdef ARM static bool @@ -394,11 +383,11 @@ replace_stolen_reg(void *drcontext, instrlist_t *bb, instr_t *where, opnd_t memr } static bool -drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, - opnd_t memref, reg_id_t dst, reg_id_t scratch, - DR_PARAM_OUT bool *scratch_used) +drutil_insert_get_mem_addr_risc(void *drcontext, instrlist_t *bb, instr_t *where, + opnd_t memref, reg_id_t dst, reg_id_t scratch, + DR_PARAM_OUT bool *scratch_used) { - if (!opnd_is_base_disp(memref) IF_AARCH64(&&!opnd_is_rel_addr(memref))) + if (!opnd_is_base_disp(memref) IF_AARCHXX_OR_RISCV64(&&!opnd_is_rel_addr(memref))) return false; # ifdef ARM if (opnd_get_base(memref) == DR_REG_PC) { @@ -414,26 +403,28 @@ drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)target, opnd_create_reg(dst), bb, where, NULL, NULL); } -# else /* AARCH64 */ +# else /* AARCH64/RISCV64 */ if (opnd_is_rel_addr(memref)) { instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)opnd_get_addr(memref), opnd_create_reg(dst), bb, where, NULL, NULL); return true; } -# endif /* ARM/AARCH64 */ +# endif /* ARM/AARCH64/RISCV64 */ else { instr_t *instr; reg_id_t base = opnd_get_base(memref); reg_id_t index = opnd_get_index(memref); - bool negated = TEST(DR_OPND_NEGATED, opnd_get_flags(memref)); int disp = opnd_get_disp(memref); reg_id_t stolen = dr_get_stolen_reg(); +# ifdef AARCHXX + bool negated = TEST(DR_OPND_NEGATED, opnd_get_flags(memref)); /* On ARM, disp is never negative; on AArch64, we do not use DR_OPND_NEGATED. */ ASSERT(IF_ARM_ELSE(disp >= 0, !negated), "DR_OPND_NEGATED internal error"); if (disp < 0) { disp = -disp; negated = !negated; } +# endif # ifdef AARCH64 /* In cases where only the lower 32 bits of the index register are * used, we need to widen to 64 bits in order to handle stolen @@ -463,17 +454,19 @@ drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, # endif } if (index == REG_NULL && opnd_get_disp(memref) != 0) { - /* first try "add dst, base, #disp" */ - instr = negated - ? INSTR_CREATE_sub(drcontext, opnd_create_reg(dst), opnd_create_reg(base), - OPND_CREATE_INT(disp)) - : XINST_CREATE_add_2src(drcontext, opnd_create_reg(dst), - opnd_create_reg(base), OPND_CREATE_INT(disp)); + /* First try "add dst, base, #disp". */ + instr = IF_AARCHXX(negated ? INSTR_CREATE_sub(drcontext, opnd_create_reg(dst), + opnd_create_reg(base), + OPND_CREATE_INT(disp)) + :) + XINST_CREATE_add_2src(drcontext, opnd_create_reg(dst), + opnd_create_reg(base), OPND_CREATE_INT(disp)); # define MAX_ADD_IMM_DISP (1 << 12) if (IF_ARM_ELSE(instr_is_encoding_possible(instr), disp < MAX_ADD_IMM_DISP)) { PRE(bb, where, instr); return true; } +# undef MAX_ADD_IMM_DISP instr_destroy(drcontext, instr); /* The memref may have a disp that cannot be directly encoded into an * add_imm instr, so we use movw to put disp into the scratch instead @@ -499,7 +492,8 @@ drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, : INSTR_CREATE_add_shimm(drcontext, opnd_create_reg(dst), opnd_create_reg(base), opnd_create_reg(index), OPND_CREATE_INT(shift), OPND_CREATE_INT(amount)); -# else /* AARCH64 */ + PRE(bb, where, instr); +# elif defined(AARCH64) uint amount; dr_extend_type_t extend = opnd_get_index_extend(memref, NULL, &amount); instr = negated @@ -511,8 +505,11 @@ drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, opnd_create_reg(base), opnd_create_reg(index), OPND_CREATE_INT(extend), OPND_CREATE_INT(amount)); -# endif /* ARM/AARCH64 */ PRE(bb, where, instr); +# else /* RISCV64 */ + ASSERT(false, + "Unreachable, there is no base + index addressing mode in RISC-V."); +# endif /* AARCHXX/RISCV64 */ } else if (base != dst) { PRE(bb, where, XINST_CREATE_move(drcontext, opnd_create_reg(dst), @@ -521,16 +518,6 @@ drutil_insert_get_mem_addr_arm(void *drcontext, instrlist_t *bb, instr_t *where, } return true; } -#elif defined(RISCV64) -static bool -drutil_insert_get_mem_addr_riscv64(void *drcontext, instrlist_t *bb, instr_t *where, - opnd_t memref, reg_id_t dst, reg_id_t scratch, - DR_PARAM_OUT bool *scratch_used) -{ - /* FIXME i#3544: Not implemented */ - ASSERT(false, "Not implemented"); - return false; -} #endif /* X86/AARCHXX/RISCV64 */ DR_EXPORT diff --git a/ext/drwrap/drwrap.c b/ext/drwrap/drwrap.c index 3881f0868e8..1f4621334ca 100644 --- a/ext/drwrap/drwrap.c +++ b/ext/drwrap/drwrap.c @@ -2021,9 +2021,8 @@ drwrap_in_callee(void *arg1, reg_t xsp _IF_NOT_X86(reg_t lr)) NOTIFY(2, "%s: level %d function " PFX "\n", __FUNCTION__, pt->wrap_level + 1, pc); app_pc retaddr = IF_X86_ELSE(get_retaddr_from_stack(xsp), (app_pc)lr); -#ifdef X86 if (TEST(DRWRAP_REPLACE_RETADDR, global_flags)) { - /* In case of a tailcall for X86, the return address has already been replaced by + /* In case of a tailcall, the return address has already been replaced by * the sentinel in the stack, we need to retrieve the return address from the * outer level. */ @@ -2034,7 +2033,6 @@ drwrap_in_callee(void *arg1, reg_t xsp _IF_NOT_X86(reg_t lr)) pt->wrap_level, retaddr); } } -#endif drwrap_context_init(drcontext, &wrapcxt, pc, &mc, DRWRAP_WHERE_PRE_FUNC, retaddr); drwrap_in_callee_check_unwind(drcontext, pt, &mc); diff --git a/ext/drx/scatter_gather_aarch64.c b/ext/drx/scatter_gather_aarch64.c index 4a4a620e602..85bc58f1fdb 100644 --- a/ext/drx/scatter_gather_aarch64.c +++ b/ext/drx/scatter_gather_aarch64.c @@ -168,98 +168,115 @@ get_scatter_gather_info(instr_t *instr, DR_PARAM_OUT scatter_gather_info_t *sg_i sg_info->extend = opnd_get_index_extend(memopnd, &sg_info->scaled, &sg_info->extend_amount); - sg_info->scatter_gather_size = opnd_get_size(memopnd); + sg_info->scalar_value_size = opnd_get_size(memopnd); switch (instr_get_opcode(instr)) { -#define DRX_CASE(op, _reg_count, _scalar_value_size, _is_scalar_value_signed, \ - _is_replicating, _faulting_behavior) \ - case OP_##op: \ - sg_info->reg_count = _reg_count; \ - sg_info->scalar_value_size = _scalar_value_size; \ - sg_info->is_scalar_value_signed = _is_scalar_value_signed; \ - sg_info->is_replicating = _is_replicating; \ - sg_info->faulting_behavior = _faulting_behavior; \ +#define DRX_CASE(op, _reg_count, _is_scalar_value_signed, _faulting_behavior) \ + case OP_##op: \ + sg_info->reg_count = _reg_count; \ + sg_info->is_scalar_value_signed = _is_scalar_value_signed; \ + sg_info->is_replicating = false; \ + sg_info->faulting_behavior = _faulting_behavior; \ + /* The size of the vector in memory is: \ + * number_of_elements = (reg_count * vector_length) / element_size \ + * size = number_of_elements * value_size \ + * = (reg_count * vector_length / element_size) * value_size \ + * = (reg_count * vector_length * value_size) / element_size \ + */ \ + sg_info->scatter_gather_size = \ + opnd_size_from_bytes((sg_info->reg_count * proc_get_vector_length_bytes() * \ + opnd_size_in_bytes(sg_info->scalar_value_size)) / \ + opnd_size_in_bytes(sg_info->element_size)); \ break - DRX_CASE(ld1b, 1, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld1h, 1, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld1w, 1, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld1d, 1, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld1sb, 1, OPSZ_1, true, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld1sh, 1, OPSZ_2, true, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld1sw, 1, OPSZ_4, true, false, DRX_NORMAL_FAULTING); - - DRX_CASE(ldff1b, 1, OPSZ_1, false, false, DRX_FIRST_FAULTING); - DRX_CASE(ldff1h, 1, OPSZ_2, false, false, DRX_FIRST_FAULTING); - DRX_CASE(ldff1w, 1, OPSZ_4, false, false, DRX_FIRST_FAULTING); - DRX_CASE(ldff1d, 1, OPSZ_8, false, false, DRX_FIRST_FAULTING); - DRX_CASE(ldff1sb, 1, OPSZ_1, true, false, DRX_FIRST_FAULTING); - DRX_CASE(ldff1sh, 1, OPSZ_2, true, false, DRX_FIRST_FAULTING); - DRX_CASE(ldff1sw, 1, OPSZ_4, true, false, DRX_FIRST_FAULTING); - - DRX_CASE(ldnf1b, 1, OPSZ_1, false, false, DRX_NON_FAULTING); - DRX_CASE(ldnf1h, 1, OPSZ_2, false, false, DRX_NON_FAULTING); - DRX_CASE(ldnf1w, 1, OPSZ_4, false, false, DRX_NON_FAULTING); - DRX_CASE(ldnf1d, 1, OPSZ_8, false, false, DRX_NON_FAULTING); - DRX_CASE(ldnf1sb, 1, OPSZ_1, true, false, DRX_NON_FAULTING); - DRX_CASE(ldnf1sh, 1, OPSZ_2, true, false, DRX_NON_FAULTING); - DRX_CASE(ldnf1sw, 1, OPSZ_4, true, false, DRX_NON_FAULTING); - - DRX_CASE(ldnt1b, 1, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ldnt1h, 1, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ldnt1w, 1, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ldnt1d, 1, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ldnt1sb, 1, OPSZ_1, true, false, DRX_NORMAL_FAULTING); - DRX_CASE(ldnt1sh, 1, OPSZ_2, true, false, DRX_NORMAL_FAULTING); - DRX_CASE(ldnt1sw, 1, OPSZ_4, true, false, DRX_NORMAL_FAULTING); - - DRX_CASE(st1b, 1, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st1h, 1, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st1w, 1, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st1d, 1, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(stnt1b, 1, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(stnt1h, 1, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(stnt1w, 1, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(stnt1d, 1, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(ld2b, 2, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld2h, 2, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld2w, 2, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld2d, 2, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(st2b, 2, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st2h, 2, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st2w, 2, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st2d, 2, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(ld3b, 3, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld3h, 3, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld3w, 3, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld3d, 3, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(st3b, 3, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st3h, 3, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st3w, 3, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st3d, 3, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(ld4b, 4, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld4h, 4, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld4w, 4, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(ld4d, 4, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(st4b, 4, OPSZ_1, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st4h, 4, OPSZ_2, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st4w, 4, OPSZ_4, false, false, DRX_NORMAL_FAULTING); - DRX_CASE(st4d, 4, OPSZ_8, false, false, DRX_NORMAL_FAULTING); - - DRX_CASE(ld1rob, 1, OPSZ_1, false, true, DRX_NORMAL_FAULTING); - - DRX_CASE(ld1rqb, 1, OPSZ_1, false, true, DRX_NORMAL_FAULTING); - DRX_CASE(ld1rqh, 1, OPSZ_2, false, true, DRX_NORMAL_FAULTING); - DRX_CASE(ld1rqw, 1, OPSZ_4, false, true, DRX_NORMAL_FAULTING); - DRX_CASE(ld1rqd, 1, OPSZ_8, false, true, DRX_NORMAL_FAULTING); + DRX_CASE(ld1b, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld1h, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld1w, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld1d, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld1sb, 1, true, DRX_NORMAL_FAULTING); + DRX_CASE(ld1sh, 1, true, DRX_NORMAL_FAULTING); + DRX_CASE(ld1sw, 1, true, DRX_NORMAL_FAULTING); + + DRX_CASE(ldff1b, 1, false, DRX_FIRST_FAULTING); + DRX_CASE(ldff1h, 1, false, DRX_FIRST_FAULTING); + DRX_CASE(ldff1w, 1, false, DRX_FIRST_FAULTING); + DRX_CASE(ldff1d, 1, false, DRX_FIRST_FAULTING); + DRX_CASE(ldff1sb, 1, true, DRX_FIRST_FAULTING); + DRX_CASE(ldff1sh, 1, true, DRX_FIRST_FAULTING); + DRX_CASE(ldff1sw, 1, true, DRX_FIRST_FAULTING); + + DRX_CASE(ldnf1b, 1, false, DRX_NON_FAULTING); + DRX_CASE(ldnf1h, 1, false, DRX_NON_FAULTING); + DRX_CASE(ldnf1w, 1, false, DRX_NON_FAULTING); + DRX_CASE(ldnf1d, 1, false, DRX_NON_FAULTING); + DRX_CASE(ldnf1sb, 1, true, DRX_NON_FAULTING); + DRX_CASE(ldnf1sh, 1, true, DRX_NON_FAULTING); + DRX_CASE(ldnf1sw, 1, true, DRX_NON_FAULTING); + + DRX_CASE(ldnt1b, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ldnt1h, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ldnt1w, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ldnt1d, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(ldnt1sb, 1, true, DRX_NORMAL_FAULTING); + DRX_CASE(ldnt1sh, 1, true, DRX_NORMAL_FAULTING); + DRX_CASE(ldnt1sw, 1, true, DRX_NORMAL_FAULTING); + + DRX_CASE(st1b, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(st1h, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(st1w, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(st1d, 1, false, DRX_NORMAL_FAULTING); + + DRX_CASE(stnt1b, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(stnt1h, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(stnt1w, 1, false, DRX_NORMAL_FAULTING); + DRX_CASE(stnt1d, 1, false, DRX_NORMAL_FAULTING); + + DRX_CASE(ld2b, 2, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld2h, 2, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld2w, 2, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld2d, 2, false, DRX_NORMAL_FAULTING); + + DRX_CASE(st2b, 2, false, DRX_NORMAL_FAULTING); + DRX_CASE(st2h, 2, false, DRX_NORMAL_FAULTING); + DRX_CASE(st2w, 2, false, DRX_NORMAL_FAULTING); + DRX_CASE(st2d, 2, false, DRX_NORMAL_FAULTING); + + DRX_CASE(ld3b, 3, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld3h, 3, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld3w, 3, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld3d, 3, false, DRX_NORMAL_FAULTING); + + DRX_CASE(st3b, 3, false, DRX_NORMAL_FAULTING); + DRX_CASE(st3h, 3, false, DRX_NORMAL_FAULTING); + DRX_CASE(st3w, 3, false, DRX_NORMAL_FAULTING); + DRX_CASE(st3d, 3, false, DRX_NORMAL_FAULTING); + + DRX_CASE(ld4b, 4, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld4h, 4, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld4w, 4, false, DRX_NORMAL_FAULTING); + DRX_CASE(ld4d, 4, false, DRX_NORMAL_FAULTING); + + DRX_CASE(st4b, 4, false, DRX_NORMAL_FAULTING); + DRX_CASE(st4h, 4, false, DRX_NORMAL_FAULTING); + DRX_CASE(st4w, 4, false, DRX_NORMAL_FAULTING); + DRX_CASE(st4d, 4, false, DRX_NORMAL_FAULTING); +#define DRX_CASE_REP(op, loaded_vector_size) \ + case OP_##op: \ + sg_info->reg_count = 1; \ + sg_info->scatter_gather_size = loaded_vector_size; \ + sg_info->is_scalar_value_signed = false; \ + sg_info->is_replicating = true; \ + sg_info->faulting_behavior = DRX_NORMAL_FAULTING; \ + break + + DRX_CASE_REP(ld1rob, OPSZ_32); + + DRX_CASE_REP(ld1rqb, OPSZ_16); + DRX_CASE_REP(ld1rqh, OPSZ_16); + DRX_CASE_REP(ld1rqw, OPSZ_16); + DRX_CASE_REP(ld1rqd, OPSZ_16); #undef DRX_CASE +#undef DRX_CASE_REP default: DR_ASSERT_MSG(false, "Invalid scatter/gather instruction"); } @@ -1453,7 +1470,7 @@ drx_expand_scatter_gather(void *drcontext, instrlist_t *bb, DR_PARAM_OUT bool *e governing_pred, scratch_vec); } } else { - /* scalar+vector or vector+immediate scatter/gather */ + /* scalar+vector, vector+immediate, or vector+scalar scatter/gather */ reg_id_t scalar_base; reg_id_t scalar_index; if (reg_is_z(sg_info.index_reg)) { diff --git a/make/CMake_aarch64_gen_codec.cmake b/make/CMake_aarch64_gen_codec.cmake index fad346df947..06785ff9e54 100644 --- a/make/CMake_aarch64_gen_codec.cmake +++ b/make/CMake_aarch64_gen_codec.cmake @@ -82,6 +82,8 @@ add_custom_command( ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_v81.txt ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_v82.txt ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_v83.txt + ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_v84.txt + ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_v85.txt ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_v86.txt ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_sve.txt ${PROJECT_SOURCE_DIR}/core/ir/${ARCH_NAME}/codec_sve2.txt diff --git a/make/aarch64_check_codec_order.py b/make/aarch64_check_codec_order.py index 9b83ab20175..ccc78ac6acc 100755 --- a/make/aarch64_check_codec_order.py +++ b/make/aarch64_check_codec_order.py @@ -108,9 +108,9 @@ def main(): print(' OK!') # The Arm AArch64's architecture versions supported by the DynamoRIO codec. - # Currently, v8.0 is fully supported, while v8.1, v8.2, v8.3, v8.4, v8.6, SVE, - # and SVE2 are partially supported. - isa_versions = ['v80', 'v81', 'v82', 'v83', 'v84', 'v86', 'sve', 'sve2'] + # Currently, v8.0 is fully supported, while v8.1, v8.2, v8.3, v8.4, v8.5, + # v8.6, SVE, and SVE2 are partially supported. + isa_versions = ['v80', 'v81', 'v82', 'v83', 'v84', 'v85', 'v86', 'sve', 'sve2'] codecsort_py = os.path.join(src_dir, "codecsort.py") diff --git a/make/cpp2asm_support.cmake b/make/cpp2asm_support.cmake index 98ac55f6154..36637aa8ab6 100644 --- a/make/cpp2asm_support.cmake +++ b/make/cpp2asm_support.cmake @@ -228,7 +228,9 @@ elseif (UNIX) # Some tests and libgcc/arm use deprecated instructions, disable warnings. set(ASM_FLAGS "${ASM_FLAGS} -mfpu=neon -mno-warn-deprecated") elseif (DR_HOST_AARCH64) - if (proc_supports_sve) + if (proc_supports_sve2) + set(ASM_FLAGS "${ASM_FLAGS} ${ASMFLAGS_SVE2}") + elseif (proc_supports_sve) set(ASM_FLAGS "${ASM_FLAGS} ${ASMFLAGS_SVE}") endif () endif () @@ -323,6 +325,8 @@ elseif (proc_supports_avx2) set(rule_flags "${rule_flags} ${CFLAGS_AVX2}") elseif (proc_supports_avx) set(rule_flags "${rule_flags} ${CFLAGS_AVX}") +elseif (proc_supports_sve2) + set(rule_flags "${rule_flags} ${CFLAGS_SVE2}") elseif (proc_supports_sve) set(rule_flags "${rule_flags} ${CFLAGS_SVE}") endif () diff --git a/make/utils.cmake b/make/utils.cmake index 6ba37c2df89..720c1524114 100644 --- a/make/utils.cmake +++ b/make/utils.cmake @@ -355,6 +355,32 @@ function (check_sve_processor_and_compiler_support out vl_out) endif () endfunction (check_sve_processor_and_compiler_support) +function (check_sve2_processor_and_compiler_support out) + include(CheckCSourceRuns) + set(sve2_prog "int main() { + asm(\"histcnt z0.d, p0/z, z0.d, z0.d\"); + return 0; + }") + set(CMAKE_REQUIRED_FLAGS ${CFLAGS_SVE2}) + if (CMAKE_CROSSCOMPILING) + # If we are cross-compiling check_c_source_runs() can't run the executable on the + # host to find out whether the target processor supports SVE2, so we assume it + # doesn't. + set(proc_found_sve2_EXITCODE 1 CACHE STRING + "Set to 0 if target processor/emulator supports SVE2 to enable SVE2 tests" + FORCE) + else () + check_c_source_runs("${sve2_prog}" proc_found_sve2) + endif () + if (proc_found_sve2) + message(STATUS "Compiler and processor support SVE2.") + else () + message(STATUS "WARNING: Compiler or processor do not support SVE2. " + "Skipping tests") + endif () + set(${out} ${proc_found_sve2} PARENT_SCOPE) +endfunction (check_sve2_processor_and_compiler_support) + function (get_processor_vendor out) set(cpu_vendor "") if (APPLE) diff --git a/suite/runsuite.cmake b/suite/runsuite.cmake index 56e12a8aaa2..09162bc8acd 100644 --- a/suite/runsuite.cmake +++ b/suite/runsuite.cmake @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2010-2022 Google, Inc. All rights reserved. +# Copyright (c) 2010-2024 Google, Inc. All rights reserved. # Copyright (c) 2009-2010 VMware, Inc. All rights reserved. # ********************************************************** @@ -91,6 +91,11 @@ if (UNIX AND NOT APPLE AND NOT ANDROID AND NOT cross_riscv64_linux_only) # just a few tests. set(extra_ctest_args INCLUDE_LABEL UBUNTU_22) set(arg_debug_only ON) + elseif (arg_32_only AND NOT cross_aarchxx_linux_only AND NOT cross_android_only) + # TODO i#6417: The switch to AMD VM's for GA CI has broken many of our tests. + # This includes timeouts which increases suite length. + # Until we get ths x86-32 job back green, we drop back to a small set of tests. + set(extra_ctest_args INCLUDE_LABEL UBUNTU_22) endif () endif () @@ -138,9 +143,10 @@ endif() if (TEST_LONG) set(DO_ALL_BUILDS ON) - # i#2974: We skip tests marked _FLAKY since we have no other mechanism to - # have CDash ignore them and avoid going red and sending emails. - # We rely on our CI for a history of _FLAKY results. + # i#2974: Skip tests marked _FLAKY to avoid test runs going red. + # This is the less preferred way of marking flaky tests, and is for use for + # lower priority tests. The preferred mechanism is to use the ignored section + # in runsuite_wrapper.pl. We rely on our CI for a history of _FLAKY results. set(base_cache "${base_cache} ${build_tests} TEST_LONG:BOOL=ON diff --git a/suite/runsuite_wrapper.pl b/suite/runsuite_wrapper.pl index a5169d3c258..e9135e9331c 100755 --- a/suite/runsuite_wrapper.pl +++ b/suite/runsuite_wrapper.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl # ********************************************************** -# Copyright () 2016-2023 Google, Inc. All rights reserved. +# Copyright () 2016-2024 Google, Inc. All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -203,6 +203,7 @@ my $issue_no = ""; my %ignore_failures_32 = (); my %ignore_failures_64 = (); + my %ignore_failures_sve = (); if ($^O eq 'cygwin' || $^O eq 'MSWin32') { # FIXME i#2145: ignoring certain Windows CI test failures until @@ -245,6 +246,10 @@ 'code_api,thread_private,disable_traces|common.decode-stress' => 1, # i#1807 'code_api,thread_private,tracedump_binary|common.decode-stress' => 1, # i#1807 'code_api|client.file_io' => 1, # i#5802 + 'code_api|tool.drcacheoff.windows-invar' => 1, # i#6599 + 'code_api|tool.drcacheoff.invariant_checker' => 1, # i#6599 + 'code_api|tool.drcacheoff.getretaddr_record_replace_retaddr' => 1, # i#6599 + 'code_api|tool.record_filter' => 1, # i#6599 ); %ignore_failures_64 = ( @@ -304,6 +309,9 @@ # We list this without any "options|" which will match all variations. 'common.floatpc_xl8all' => 1, # i#2267 'code_api|client.file_io' => 1, # i#5802 + # These we have failed to reproduce after many attempts under tmate. + 'code_api|tool.drcacheoff.burst_traceopts' => 1, # i#6423 + 'code_api|tool.drcacheoff.burst_replaceall' => 1, # i#5412 ); if ($is_long) { # These are important tests so we only ignore in the long suite, @@ -339,15 +347,37 @@ 'code_api|linux.fib-conflict-early' => 1, 'code_api|linux.mangle_asynch' => 1, 'code_api,tracedump_text,tracedump_origins,syntax_intel|common.loglevel' => 1, # i#1807 - 'code_api|client.attach_test' => 1, # i#5740 - 'code_api|client.attach_blocking' => 1, # i#5740 'code_api|tool.drcacheoff.rseq' => 1, # i#5734 'code_api|tool.drcacheoff.windows-zlib' => 1, # i#5507 ); + # FIXME i#5365: fix flaky AArch64 tests running on SVE hardware. + # Note that apart from tool.drcachesim.scattergather-aarch64, these + # have NOT been built with SVE compiler options and are seen to + # fail intermittently on SVE hardware. + %ignore_failures_sve = ('code_api|tool.drcacheoff.burst_threads_counts' => 1, + 'code_api|tool.drcachesim.scattergather-aarch64' => 1, # i#3320 + 'code_api|tool.drcachesim.threads' => 1, # i#3320 + 'code_api|tool.drcachesim.threads-with-config-file' => 1, # i#3320 + 'code_api|tool.drcachesim.coherence' => 1, # i#3320 + 'code_api|tool.drcachesim.miss_analyzer' => 1, # i#3320 + 'code_api|tool.drcacheoff.burst_threads' => 1, + 'code_api|tool.drcacheoff.burst_threads_counts' => 1, + 'code_api|tool.drcacheoff.burst_threadL0filter' => 1, + 'code_api|tool.drcacheoff.burst_threadfilter' => 1, + 'code_api|api.static_signal' => 1, + ); + # Establish if tests are running on SVE hardware. + system('cat /proc/cpuinfo | grep Features | head -1 | grep sve > /dev/null'); + my $is_sve = ($? >> 8 == 0) ? 1 : 0; if ($is_32) { $issue_no = "#2416"; } else { - $issue_no = "#2417"; + if ($is_sve) { + $issue_no = "#5365"; + } + else { + $issue_no = "#2417"; + } } } elsif ($is_x86_64 && ($ENV{'DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY'} eq 'yes') && $args =~ /64_only/) { # These AArch64 cross-compiled tests fail on x86-64 QEMU but pass @@ -387,7 +417,10 @@ 'prof_pcs,thread_private|common.nativeexec_bindnow_opt' => 1, # i#2052 ); %ignore_failures_64 = ( + 'code_api|api.rseq' => 1, # i#6185 i#1807 'code_api|tool.drcacheoff.burst_threadfilter' => 1, # i#2941 + 'code_api|client.attach_test' => 1, # i#6452 + 'code_api|client.detach_test' => 1, # i#6536 # These are from the long suite. 'code_api,opt_memory|common.loglevel' => 1, # i#1807 'code_api,opt_speed|common.decode-stress' => 1, # i#1807 @@ -431,7 +464,9 @@ if (($is_32 && ($ignore_failures_32{$test} || $ignore_failures_32{$test_base_name})) || (!$is_32 && ($ignore_failures_64{$test} || - $ignore_failures_64{$test_base_name}))) { + $ignore_failures_64{$test_base_name} || + $ignore_failures_sve{$test} || + $ignore_failures_sve{$test_base_name}))) { $lines[$j] = "\t(ignore: i" . $issue_no . ") " . $lines[$j]; $num_ignore++; } elsif ($test =~ /_FLAKY$/) { diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 9d511f656f0..fa2ac4bf7d1 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2010-2023 Google, Inc. All rights reserved. +# Copyright (c) 2010-2024 Google, Inc. All rights reserved. # Copyright (c) 2009-2010 VMware, Inc. All rights reserved. # Copyright (c) 2016-2023 ARM Limited. All rights reserved. # ********************************************************** @@ -490,8 +490,8 @@ endfunction(append_link_flags) # TODO i#6429 Change this allowlist to a blocklist. set(sve_tests simple_app api.ir api.ir_negative api.ir_v81 api.ir_v82 api.ir_v83 api.ir_v84 - api.ir_v86 api.ir_sve api.ir_sve2 api.ir-static api.drdecode common.broadfun - common.fib common.nzcv common.getretaddr common.segfault + api.ir_v85 api.ir_v86 api.ir_sve api.ir_sve2 api.ir-static api.drdecode + common.broadfun common.nzcv common.getretaddr common.segfault common.allasm_aarch64_isa common.allasm_aarch64_cache allasm_aarch64_prefetch allasm_aarch64_flush libutil.frontend_test libutil.drconfig_test client.call-retarget client.modules client.annotation-concurrency @@ -511,8 +511,8 @@ set(sve_tests api.static_reattach_client_flags api.static_crash api.static_sideline_FLAKY api.static_symbols api.static_maps_mixup_yesvars api.static_maps_mixup_novars_FLAKY api.thread_churn client.app_args - client.destructor builtin_prefetch tool.multiproc stride_benchmark - tool.fib_plus tool.heap_test tool.drcacheoff.gencode linux.eintr + client.destructor builtin_prefetch stride_benchmark + tool.heap_test tool.drcacheoff.gencode linux.execve-sub linux.execve-null linux.execve-config linux.execv linux.execve-rec linux.exit linux.fork linux.fork-sleep linux.infinite linux.longjmp linux.prctl linux.mmap linux.zero-length-mem-ranges @@ -520,7 +520,7 @@ set(sve_tests linux.threadexit linux.threadexit2 linux.signalfd linux.alarm linux.signal_racesys linux.signal_pre_syscall linux.bad-signal-stack linux.sigsuspend linux.sigmask linux.mangle_asynch linux.app_tls - linux.readlink linux.fib-conflict linux.fib-static linux.fib-pie linux.vfork + linux.readlink linux.vfork pthreads.pthreads pthreads.pthreads_exit pthreads.ptsig pthreads.pthreads_fork_FLAKY security-linux.trampoline linux.infloop linux.rseq_disable security-common.codemod security-common.ret_noncall_trace @@ -1118,6 +1118,10 @@ function(template2expect outexpect template runops key) set(rundefs "${rundefs} -D__AVX__ -D__AVX512F__") elseif (DEFINED ${key}_runavx) set(rundefs "${rundefs} -D__AVX__") + elseif (DEFINED ${key}_runsve2) + set(rundefs "${rundefs} -D__ARM_FEATURE_SVE") + set(rundefs "${rundefs} -D__ARM_FEATURE_SVE2") + set(rundefs "${rundefs} -D__ARM_FEATURE_SVE_BITS=${proc_sve_vl}") elseif (DEFINED ${key}_runsve) set(rundefs "${rundefs} -D__ARM_FEATURE_SVE") set(rundefs "${rundefs} -D__ARM_FEATURE_SVE_BITS=${proc_sve_vl}") @@ -1411,6 +1415,9 @@ function(torun test key source native standalone_dr dr_ops exe_ops added_out pas # is failing). set(exe_ops "${exe_ops};-v;-attach") endif () + if ("${runall}" MATCHES "") + set(exe_ops "${exe_ops};-v;") + endif () if ("${runall}" MATCHES "") set(exe_ops "${exe_ops};-block") endif () @@ -1532,6 +1539,7 @@ function(torun test key source native standalone_dr dr_ops exe_ops added_out pas -D postcmd=${${key}_postcmd} -D postcmd2=${${key}_postcmd2} -D postcmd3=${${key}_postcmd3} + -D postcmd4=${${key}_postcmd4} -D failok=${${key}_failok} -D cmp=${CMAKE_CURRENT_BINARY_DIR}/${expectbase}.expect -D code=${${key}_code} @@ -1624,9 +1632,14 @@ function(torun test key source native standalone_dr dr_ops exe_ops added_out pas # Though we mostly use drrun's -s we set this too in case the requested # value is higher than ctest's default. set_tests_properties(${test} PROPERTIES TIMEOUT ${${key}_timeout}) - elseif (is_runcmp) - # Runcmp generally doesn't use drrun or runstats so we need a ctest timeout. + elseif (is_runcmp OR is_runall) + # Runcmp/runall generally don't use drrun or runstats so we need a ctest timeout. set_tests_properties(${test} PROPERTIES TIMEOUT ${TEST_SECONDS}) + else () + # Even though we expect drrun -s to enforce a timeout, set one in ctest just + # in case, but give time for drrun first. + math(EXPR timeout "${TEST_SECONDS}+30") + set_tests_properties(${test} PROPERTIES TIMEOUT ${timeout}) endif () # Though we use drrun and runstats -s timeout parameters, we have @@ -1804,7 +1817,12 @@ macro(set_avx_flags target) endmacro(set_avx_flags) macro(set_sve_flags target) - if (proc_supports_sve) + if (proc_supports_sve2) + if (TARGET ${target}) # Support calling on non-exe target. + append_property_string(TARGET ${target} COMPILE_FLAGS "${CFLAGS_SVE2}") + endif () + set(${target}_runsve2 1) + elseif (proc_supports_sve) if (TARGET ${target}) # Support calling on non-exe target. append_property_string(TARGET ${target} COMPILE_FLAGS "${CFLAGS_SVE}") endif () @@ -2011,6 +2029,7 @@ if (NOT ANDROID) tobuild_api(api.ir_v82 api/ir_aarch64_v82.c "" "" OFF OFF OFF) tobuild_api(api.ir_v83 api/ir_aarch64_v83.c "" "" OFF OFF OFF) tobuild_api(api.ir_v84 api/ir_aarch64_v84.c "" "" OFF OFF OFF) + tobuild_api(api.ir_v85 api/ir_aarch64_v85.c "" "" OFF OFF OFF) tobuild_api(api.ir_v86 api/ir_aarch64_v86.c "" "" OFF OFF OFF) tobuild_api(api.ir_sve api/ir_aarch64_sve.c "" "" OFF OFF OFF) tobuild_api(api.ir_sve2 api/ir_aarch64_sve2.c "" "" OFF OFF OFF) @@ -2578,9 +2597,9 @@ endif (ANNOTATIONS AND NOT ARM) if (NOT ANDROID) # TODO i#38: Port test to Android. tobuild_ci(client.attach_test client-interface/attach_test.runall "" "" "") - if (WIN32) + if ((LINUX OR WIN32) AND NOT RISCV64) tobuild_ci(client.detach_test client-interface/detach_test.runall "" "" "") - endif (WIN32) + endif () if (UNIX) # Test attaching during a blocking syscall. torunonly_ci(client.attach_blocking linux.infloop client.attach_test.dll @@ -2941,9 +2960,13 @@ if (X86 OR AARCH64) "" "" "") set_sve_flags(client.drx-scattergather) - if (proc_supports_sve) + if (proc_supports_sve OR proc_supports_sve2) # Run the tests natively as well to confirm the test reference data is correct. - set(client.drx-scattergather-native_runsve 1) + if (proc_supports_sve2) + set(client.drx-scattergather-native_runsve2 1) + else () + set(client.drx-scattergather-native_runsve 1) + endif () set(client.drx-scattergather-native_test_sample_client 1) torunonly_native(client.drx-scattergather-native client.drx-scattergather drx-scattergather-aarch64 "client-interface/drx-scattergather-aarch64.cpp" "") @@ -3007,7 +3030,7 @@ if (X86 OR AARCH64) "-early_inject" "") append_pure_asm_app_link_flags(allasm_repstr) endif () - endif() + endif () endif () if (NOT RISCV64) @@ -3074,17 +3097,40 @@ endif () if (NOT ANDROID AND NOT RISCV64) # TODO i#3544: Port tests to RISC-V 64 # XXX i#1874: get working on Android - tobuild_appdll(client.drsyms-test client-interface/drsyms-test.cpp) - get_target_path_for_execution(drsyms_libpath client.drsyms-test.appdll "${location_suffix}") - tobuild_ci(client.drsyms-test client-interface/drsyms-test.cpp "" - "" "${drsyms_libpath}") - # Disable optimizations for the exe and appdll to allow stack tracing. - disable_optimizations_for_file(client-interface/drsyms-test.cpp) - disable_optimizations_for_file(client-interface/drsyms-test.appdll.cpp) - # Debug libc seems to mess up the test. - use_MT_not_MTd(client-interface/drsyms-test.appdll.cpp) - use_DynamoRIO_extension(client.drsyms-test.dll drsyms) - use_DynamoRIO_extension(client.drsyms-test.dll drwrap) # Makes testing easy + macro (add_drsyms_test suffix flag) + tobuild_appdll(client.drsyms${suffix}-test client-interface/drsyms-test.cpp) + get_target_path_for_execution(drsyms_libpath client.drsyms${suffix}-test.appdll + "${location_suffix}") + tobuild_ci(client.drsyms${suffix}-test client-interface/drsyms-test.cpp "" + "" "${drsyms_libpath}") + # Disable optimizations for the exe and appdll to allow stack tracing. + disable_optimizations_for_file(client-interface/drsyms-test.cpp) + disable_optimizations_for_file(client-interface/drsyms-test.appdll.cpp) + # Debug libc seems to mess up the test. + use_MT_not_MTd(client-interface/drsyms-test.appdll.cpp) + use_DynamoRIO_extension(client.drsyms${suffix}-test.dll drsyms) + use_DynamoRIO_extension(client.drsyms${suffix}-test.dll drwrap) # Makes testing easy + if (NOT "${flag}" STREQUAL "") + append_property_string(TARGET client.drsyms${suffix}-test + COMPILE_FLAGS "${flag}") + append_property_string(TARGET client.drsyms${suffix}-test.appdll + COMPILE_FLAGS "${flag}") + endif () + endmacro () + + add_drsyms_test("" "") + if (LINUX) + # We don't try to figure out the default: we just try each variant in addition to + # whatever the default is. + CHECK_C_COMPILER_FLAG("-gdwarf-4" gdwarf4_avail) + if (gdwarf4_avail) + add_drsyms_test("-dwarf4" "-gdwarf-4") + endif () + CHECK_C_COMPILER_FLAG("-gdwarf-5" gdwarf5_avail) + if (gdwarf5_avail) + add_drsyms_test("-dwarf5" "-gdwarf-5") + endif () + endif () # TODO i#2414: Port to Windows, Mac, and Android. if (LINUX AND HAVE_LIBUNWIND_H) @@ -3196,6 +3242,8 @@ elseif (AARCH64) "-q;${CMAKE_CURRENT_SOURCE_DIR}/api/dis-a64-v83.txt" OFF OFF) torunonly_api(api.dis-a64-v84 api.dis-a64 api/dis-a64.c "" "-q;${CMAKE_CURRENT_SOURCE_DIR}/api/dis-a64-v84.txt" OFF OFF) + torunonly_api(api.dis-a64-v85 api.dis-a64 api/dis-a64.c "" + "-q;${CMAKE_CURRENT_SOURCE_DIR}/api/dis-a64-v85.txt" OFF OFF) torunonly_api(api.dis-a64-v86 api.dis-a64 api/dis-a64.c "" "-q;${CMAKE_CURRENT_SOURCE_DIR}/api/dis-a64-v86.txt" OFF OFF) torunonly_api(api.dis-a64-sve api.dis-a64 api/dis-a64.c "" @@ -3506,7 +3554,9 @@ if (BUILD_SAMPLES) torunonly_ci(sample.${sample}_scattergather client.drx-scattergather ${sample} client-interface/drx-scattergather-aarch64.cpp "" "" "") set(sample.${sample}_scattergather_test_sample_client 1) - if (proc_supports_sve) + if (proc_supports_sve2) + set(sample.${sample}_scattergather_runsve2 1) + elseif (proc_supports_sve) set(sample.${sample}_scattergather_runsve 1) endif () endif () @@ -3802,6 +3852,41 @@ if (BUILD_CLIENTS) "-indir ${thread_trace_dir} -simulator_type schedule_stats -core_sharded -sched_quantum 10000000" "") set(tool.schedule_stats_nopreempt_rawtemp ON) # no preprocessor + + torunonly_simtool(core_serial ${ci_shared_app} + "-indir ${thread_trace_dir} -simulator_type schedule_stats:basic_counts -core_serial" + "") + set(tool.core_serial_rawtemp ON) # no preprocessor + + set(cpu_sched_path "${thread_trace_dir}/cpu_schedule.bin.zip") + torunonly_simtool(simulate_as_traced ${ci_shared_app} + "-indir ${thread_trace_dir} -core_serial -cpu_schedule_file ${cpu_sched_path} -cores 7" + "") + set(tool.simulate_as_traced_rawtemp ON) # no preprocessor + + set(switch_file + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/mock_switch_sequences.x64.zip") + torunonly_simtool(switch_insertion ${ci_shared_app} + "-indir ${thread_trace_dir} -simulator_type basic_counts -core_sharded -sched_quantum 1000 -sched_switch_file ${switch_file}" + "") + set(tool.switch_insertion_rawtemp ON) # no preprocessor + + # Sanity test that core-sharded at least runs without errors on our other tools. + torunonly_simtool(core_sharded ${ci_shared_app} + "-indir ${thread_trace_dir} -simulator_type reuse_time:reuse_distance:histogram:opcode_mix:syscall_mix -core_sharded" + "") + set(tool.core_sharded_rawtemp ON) # no preprocessor + + # Test analysis of core-sharded-on-disk traces. + set(core_sharded_dir + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.threadsig-core-sharded.x64.tracedir") + torunonly_simtool(core_on_disk ${ci_shared_app} + "-indir ${core_sharded_dir} -simulator_type basic_counts" "") + set(tool.core_on_disk_rawtemp ON) # no preprocessor + + torunonly_simtool(core_on_disk_schedule ${ci_shared_app} + "-indir ${core_sharded_dir} -simulator_type schedule_stats" "") + set(tool.core_on_disk_schedule_rawtemp ON) # no preprocessor endif () endif () @@ -3838,6 +3923,8 @@ if (BUILD_CLIENTS) set(tool.drcachesim.scattergather-${ARCH_NAME}_runavx512 1) elseif (proc_supports_avx) set(tool.drcachesim.scattergather-${ARCH_NAME}_runavx 1) + elseif (proc_supports_sve2) + set(tool.drcachesim.scattergather-${ARCH_NAME}_runsve2 1) elseif (proc_supports_sve) set(tool.drcachesim.scattergather-${ARCH_NAME}_runsve 1) endif () @@ -3939,6 +4026,14 @@ if (BUILD_CLIENTS) torunonly_drcacheoff(raw-none ${ci_shared_app} "-raw_compress none" "" "") set(tool.drcacheoff.raw-none_expectbase "offline-simple") + # Test that malloc & co. are not invoked. + # We disable the lz4 default as both lz4 and snappy call + # dr_allow_unsafe_static_behavior(). + # We enable function tracing for a non-library-exported function to exercise drsyms. + torunonly_drcacheoff(check-malloc ${ci_shared_app} + "-raw_compress none -record_function 'main|2'" "" "") + set(tool.drcacheoff.check-malloc_expectbase "offline-simple") + # Test reading a trace in sharded snappy-compressed files. if (libsnappy) # with a parallel tool (basic_counts) @@ -3984,9 +4079,15 @@ if (BUILD_CLIENTS) endif () endif () - torunonly_drcacheoff(interval-count-output ${ci_shared_app} "" + torunonly_drcacheoff(interval-microseconds-count-output ${ci_shared_app} "" "@-simulator_type@basic_counts@-interval_microseconds@1M" "") + torunonly_drcacheoff(interval-instr-count-output ${ci_shared_app} "" + "@-simulator_type@basic_counts@-interval_instr_count@10000" "") + + torunonly_drcacheoff(interval-opcode-mix-output ${ci_shared_app} "" + "@-simulator_type@opcode_mix@-interval_instr_count@10000" "") + # As for the online test, we check that only 1 thread is in the final trace. torunonly_drcacheoff(max-global client.annotation-concurrency # Include function tracing to sanity test combining with delay and max. @@ -4161,12 +4262,19 @@ if (BUILD_CLIENTS) torunonly_drcacheoff(view ${ci_shared_app} "" "@-simulator_type@view@-sim_refs@16384" "") + unset(tool.drcacheoff.view_rawtemp) # Use preprocessor + if (AARCH64 AND proc_supports_sve) + set(tool.drcacheoff.view_runsve 1) + endif () set(tool.drcacheoff.func_view_full_run ON) # Fails on Windows if truncated. torunonly_drcacheoff(func_view common.fib "-record_function fib|1" "@-simulator_type@func_view" "only_5") endif (NOT RISCV64) if (DR_HOST_X86 AND DR_HOST_X64 AND LINUX) + torunonly_drcacheoff(opcode_categories allasm_x86_64 "" + "@-simulator_type@opcode_mix" "") + # Requires sudo to access pagemap. # XXX: Should we not enable this outside of the Github suite where we know # we have passwordless sudo? The pause for a password may cause problems @@ -4307,7 +4415,7 @@ if (BUILD_CLIENTS) if (LINUX) set(tool.drcacheoff.burst_syscall_inject_nodr ON) torunonly_drcacheoff(burst_syscall_inject tool.drcacheoff.burst_syscall_inject "" - "@-simulator_type@basic_counts@-syscall_template_file@drmemtrace.tool.drcacheoff.burst_syscall_inject.*.dir/raw/syscall_trace_template" + "@-simulator_type@invariant_checker@-syscall_template_file@drmemtrace.tool.drcacheoff.burst_syscall_inject.*.dir/raw/syscall_trace_template" "") endif () @@ -4398,7 +4506,11 @@ if (BUILD_CLIENTS) if (X86 AND proc_supports_avx512) set(tool.drcacheoff.allasm-scattergather-basic-counts_runavx512 1) elseif (AARCH64) - set(tool.drcacheoff.allasm-scattergather-basic-counts_runsve 1) + if (proc_supports_sve2) + set(tool.drcacheoff.allasm-scattergather-basic-counts_runsve2 1) + elseif (proc_supports_sve) + set(tool.drcacheoff.allasm-scattergather-basic-counts_runsve 1) + endif () endif () set(tool.drcacheoff.allasm-scattergather-basic-counts_expectbase "offline-allasm-scattergather-basic-counts-${ARCH_NAME}") @@ -4409,12 +4521,32 @@ if (BUILD_CLIENTS) if (X86 AND proc_supports_avx512) set(tool.drcachesim.allasm-scattergather-basic-counts_runavx512 1) elseif (AARCH64) - set(tool.drcachesim.allasm-scattergather-basic-counts_runsve 1) + if (proc_supports_sve2) + set(tool.drcachesim.allasm-scattergather-basic-counts_runsve2 1) + elseif (proc_supports_sve) + set(tool.drcachesim.allasm-scattergather-basic-counts_runsve 1) + endif () endif () set(tool.drcachesim.allasm-scattergather-basic-counts_expectbase "allasm-scattergather-basic-counts-${ARCH_NAME}") endif () + if (UNIX AND AARCH64 AND proc_supports_sve) + torunonly_drcacheoff(allasm-scattergather-vl-view allasm_scattergather + "" "@-simulator_type@view" "") + unset(tool.drcacheoff.allasm-scattergather-vl-view_rawtemp) # use preprocessor + set(tool.drcacheoff.allasm-scattergather-vl-view_runsve 1) + set(tool.drcacheoff.allasm-scattergather-vl-view_expectbase + "offline-allasm-scattergather-vl-view-${ARCH_NAME}") + + torunonly_drcachesim(allasm-scattergather-vl-view allasm_scattergather + "-simulator_type view" "") + unset(tool.drcachesim.allasm-scattergather-vl-view_rawtemp) # use preprocessor + set(tool.drcachesim.allasm-scattergather-vl-view_runsve 1) + set(tool.drcachesim.allasm-scattergather-vl-view_expectbase + "allasm-scattergather-vl-view-${ARCH_NAME}") + endif () + if (UNIX AND X86 AND X64) torunonly_drcacheoff(allasm-repstr-basic-counts allasm_repstr "" "@-simulator_type@basic_counts" "") @@ -4475,6 +4607,101 @@ if (BUILD_CLIENTS) set(${testname_full}_postcmd2 "${histo_path}@-test_mode@-test_mode_name@kernel_xfer_app@-trace_dir@${testname_full}.*.dir/trace") + # Test the standalone record filter tool (beyond its unit tests). + # Assumes the record filter output dir is named "${testname}.filtered.dir". + macro (torun_record_filter testname exename template_name launch_cmd analyzer) + if (WIN32) + # Speed the test up (takes 1+ minutes otherwise). + set(extra_ops "-trace_after_instrs 5K -exit_after_tracing 50K") + else () + set(extra_ops "") + endif () + torunonly_ci(${testname} ${exename} drcachesim + "${template_name}.c" + "-offline -subdir_prefix ${testname} ${extra_ops}" "" "") + set(${testname}_toolname "drcachesim") + set(${testname}_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") + set(outdir "${testname}.filtered.dir") + set(${testname}_runcmp "${CMAKE_CURRENT_SOURCE_DIR}/runmulti.cmake") + set(${testname}_precmd + "foreach@${CMAKE_COMMAND}@-E@remove_directory@${testname}.*.dir") + # Post-process the trace and sanity check it. + # Use a smaller chunk threshold to test multiple chunks. + set(${testname}_postcmd + "${drcachesim_path}@-chunk_instr_count@1000@-indir@${testname}.*.dir@-simulator_type@invariant_checker") + # Run the record filter tool with a null filter. + set(${testname}_postcmd2 "${CMAKE_COMMAND}@-E@make_directory@${outdir}") + set(${testname}_postcmd3 ${launch_cmd}) + # Run the analyzer on the result. + set(${testname}_postcmd4 + "${drcachesim_path}@-indir@${outdir}@-simulator_type@${analyzer}") + endmacro () + + set(testname "tool.record_filter") + get_target_path_for_execution(filter_path record_filter_launcher "${location_suffix}") + prefix_cmd_if_necessary(filter_path ON ${filter_path}) + torun_record_filter("${testname}" ${ci_shared_app} "record_filter-offline" + # We assume the app name starts with "s" here to avoid colliding with + # our output dir, while still letting the single precmd remove both. + "${filter_path}@-trace_dir@${testname}.s*.dir/trace@-output_dir@${testname}.filtered.dir" + "invariant_checker") + + # Single-threaded app on 4 cores to test start-idle cores. + set(testname "tool.record_filter_bycore_uni") + torun_record_filter("${testname}" ${ci_shared_app} + "record_filter_bycore_uni" + # We assume the app name starts with "s" here to avoid colliding with + # our output dir, while still letting the single precmd remove both. + "${drcachesim_path}@-simulator_type@record_filter@-indir@${testname}.s*.dir/trace@-core_sharded@-cores@4@-outdir@${testname}.filtered.dir" + "schedule_stats") + + if (UNIX) # Windows multi-thread tests are too slow. + set(testname "tool.record_filter_bycore_multi") + torun_record_filter("${testname}" pthreads.ptsig + "record_filter_bycore_multi" + # We use the app name start char "p" here to avoid colliding with + # our output dir, while still letting the single precmd remove both. + "${drcachesim_path}@-simulator_type@record_filter@-indir@${testname}.p*.dir/trace@-core_sharded@-cores@3@-outdir@${testname}.filtered.dir" + "schedule_stats") + endif () + + if (X86 AND X64 AND ZLIB_FOUND) + # Test the trim filter. + set(zip_path + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip") + set(outdir ${PROJECT_BINARY_DIR}/trim_filter) + file(MAKE_DIRECTORY ${outdir}) + # The filter launcher doesn't have an -infile parameter. + set(srcdir ${PROJECT_BINARY_DIR}/trim_filter_src) + file(MAKE_DIRECTORY ${srcdir}) + file(COPY ${zip_path} DESTINATION ${srcdir}) + torunonly_api(tool.drcacheoff.trim "${drcachesim_path}" "offline-trim" "" + "-simulator_type;view;-indir;${outdir};${test_mode_flag}" + OFF OFF) + set(tool.drcacheoff.trim_runcmp "${CMAKE_CURRENT_SOURCE_DIR}/runmulti.cmake") + # The filter overwrites any existing file in the dir from a prior run. + set(tool.drcacheoff.trim_precmd + "${drcachesim_path}@-simulator_type@record_filter@-trim_before_timestamp@13352268558646120@-trim_after_timestamp@13352268558646661@-indir@${srcdir}@-outdir@${outdir}") + set(tool.drcacheoff.trim_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") + set(tool.drcacheoff.trim_rawtemp ON) # no preprocessor + + # Test the record_filter in as-traced mode. + set(trace_dir + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.threadsig.x64.tracedir") + set(sched_file "${trace_dir}/cpu_schedule.bin.zip") + set(outdir ${CMAKE_CURRENT_BINARY_DIR}/filter_as_traced) + file(MAKE_DIRECTORY ${outdir}) + torunonly_api(tool.record_filter_as_traced "${drcachesim_path}" + "record_filter_as_traced" + "" "-simulator_type;schedule_stats;-indir;${outdir}" OFF OFF) + set(tool.record_filter_as_traced_runcmp "${CMAKE_CURRENT_SOURCE_DIR}/runmulti.cmake") + set(tool.record_filter_as_traced_precmd + "${drcachesim_path}@-simulator_type@record_filter@-cpu_schedule_file@${sched_file}@-core_sharded@-cores@7@-indir@${trace_dir}@-outdir@${outdir}") + set(tool.record_filter_as_traced_basedir + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") + set(tool.record_filter_as_traced_rawtemp ON) # no preprocessor + endif () + if (AARCH64) set(testname_full "tool.drcacheoff.allasm-aarch64-prefetch-counts") torunonly_ci(${testname_full} allasm_aarch64_prefetch drcachesim @@ -4604,10 +4831,12 @@ if (BUILD_CLIENTS) "@-simulator_type@opcode_mix") torunonly_drcacheoff_kernel(syscall-mix ${ci_shared_app} "-raw_compress none" "" "@-simulator_type@syscall_mix") + torunonly_drcacheoff_kernel(invariant-checker ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@invariant_checker@-test_mode_name@kernel_syscall_pt_trace") endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (proc_supports_pt) - if (X86) + if ((AARCHXX AND NOT ANDROID) OR X86) torunonly_drcacheoff(getretaddr_record_replace_retaddr common.getretaddr "-record_replace_retaddr -record_function tailcall_with_retaddr|1&foo|1" "@-simulator_type@invariant_checker" "") @@ -5922,23 +6151,42 @@ if (RISCV64) code_api|api.ir code_api|api.ir-static code_api|client.app_args + code_api|client.app_args code_api|client.blackbox + code_api|client.blackbox + code_api|client.crashmsg code_api|client.crashmsg code_api|client.execfault + code_api|client.execfault code_api|client.mangle_suspend + code_api|client.mangle_suspend + code_api|client.null_instrument code_api|client.null_instrument code_api|client.option_parse + code_api|client.option_parse + code_api|client.partial_module_map code_api|client.partial_module_map code_api|client.stack-overflow + code_api|client.stack-overflow code_api|client.truncate + code_api|client.unregister code_api|common.broadfun + code_api|common.hello + code_api|libutil.drconfig_test code_api|libutil.drconfig_test code_api|libutil.frontend_test + code_api|libutil.frontend_test + code_api|linux.exit code_api|linux.exit code_api|linux.infinite + code_api|linux.infinite code_api|linux.longjmp + code_api|linux.longjmp + code_api|linux.prctl code_api|linux.prctl code_api|linux.signalfd + code_api|pthreads.pthreads + code_api|pthreads.pthreads_exit code_api|pthreads.pthreads_exit code_api|sample.bbbuf code_api|sample.bbcount @@ -5947,12 +6195,22 @@ if (RISCV64) code_api|sample.empty code_api|sample.inline code_api|sample.inscount + code_api|sample.inscount.cleancall + code_api|sample.inscount.prof-pcs.cleancall code_api|sample.opcode_count code_api|sample.signal code_api|sample.stl_test + code_api|sample.stl_test code_api|sample.syscall + code_api|sample.syscall + code_api|security-linux.trampoline code_api|security-linux.trampoline + code_api|tool.drcachesim.missfile-config-file + code_api|tool.drcachesim.syscall-mix + code_api|tool.drcov.fib code_api|tool.drdisas + code_api|tool.histogram + code_api|tool.reuse_distance no_code_api,no_intercept_all_signals|linux.sigaction PROPERTIES LABELS RUNS_ON_QEMU) if (DEBUG) diff --git a/suite/tests/api/dis-a64-sve.txt b/suite/tests/api/dis-a64-sve.txt index 339245ca8a0..0cd5528bd7d 100644 --- a/suite/tests/api/dis-a64-sve.txt +++ b/suite/tests/api/dis-a64-sve.txt @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2022-2023 ARM Limited. All rights reserved. +# Copyright (c) 2022-2024 ARM Limited. All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -10802,744 +10802,744 @@ 05e39fff : lastb d31, p7, z31.d : lastb %p7 %z31.d -> %d31 # LD1B { .S }, /Z, [, .S, ] (LD1B-Z.P.BZ-S.x32.unscaled) -84004000 : ld1b z0.s, p0/Z, [x0, z0.s, UXTW] : ld1b (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s -84054482 : ld1b z2.s, p1/Z, [x4, z5.s, UXTW] : ld1b (%x4,%z5.s,uxtw)[8byte] %p1/z -> %z2.s -840748c4 : ld1b z4.s, p2/Z, [x6, z7.s, UXTW] : ld1b (%x6,%z7.s,uxtw)[8byte] %p2/z -> %z4.s -84094906 : ld1b z6.s, p2/Z, [x8, z9.s, UXTW] : ld1b (%x8,%z9.s,uxtw)[8byte] %p2/z -> %z6.s -840b4d48 : ld1b z8.s, p3/Z, [x10, z11.s, UXTW] : ld1b (%x10,%z11.s,uxtw)[8byte] %p3/z -> %z8.s -840d4d6a : ld1b z10.s, p3/Z, [x11, z13.s, UXTW] : ld1b (%x11,%z13.s,uxtw)[8byte] %p3/z -> %z10.s -840f51ac : ld1b z12.s, p4/Z, [x13, z15.s, UXTW] : ld1b (%x13,%z15.s,uxtw)[8byte] %p4/z -> %z12.s -841151ee : ld1b z14.s, p4/Z, [x15, z17.s, UXTW] : ld1b (%x15,%z17.s,uxtw)[8byte] %p4/z -> %z14.s -84135630 : ld1b z16.s, p5/Z, [x17, z19.s, UXTW] : ld1b (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s -84145671 : ld1b z17.s, p5/Z, [x19, z20.s, UXTW] : ld1b (%x19,%z20.s,uxtw)[8byte] %p5/z -> %z17.s -841656b3 : ld1b z19.s, p5/Z, [x21, z22.s, UXTW] : ld1b (%x21,%z22.s,uxtw)[8byte] %p5/z -> %z19.s -84185af5 : ld1b z21.s, p6/Z, [x23, z24.s, UXTW] : ld1b (%x23,%z24.s,uxtw)[8byte] %p6/z -> %z21.s -841a5b17 : ld1b z23.s, p6/Z, [x24, z26.s, UXTW] : ld1b (%x24,%z26.s,uxtw)[8byte] %p6/z -> %z23.s -841c5f59 : ld1b z25.s, p7/Z, [x26, z28.s, UXTW] : ld1b (%x26,%z28.s,uxtw)[8byte] %p7/z -> %z25.s -841e5f9b : ld1b z27.s, p7/Z, [x28, z30.s, UXTW] : ld1b (%x28,%z30.s,uxtw)[8byte] %p7/z -> %z27.s -841f5fff : ld1b z31.s, p7/Z, [sp, z31.s, UXTW] : ld1b (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s -84404000 : ld1b z0.s, p0/Z, [x0, z0.s, SXTW] : ld1b (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s -84454482 : ld1b z2.s, p1/Z, [x4, z5.s, SXTW] : ld1b (%x4,%z5.s,sxtw)[8byte] %p1/z -> %z2.s -844748c4 : ld1b z4.s, p2/Z, [x6, z7.s, SXTW] : ld1b (%x6,%z7.s,sxtw)[8byte] %p2/z -> %z4.s -84494906 : ld1b z6.s, p2/Z, [x8, z9.s, SXTW] : ld1b (%x8,%z9.s,sxtw)[8byte] %p2/z -> %z6.s -844b4d48 : ld1b z8.s, p3/Z, [x10, z11.s, SXTW] : ld1b (%x10,%z11.s,sxtw)[8byte] %p3/z -> %z8.s -844d4d6a : ld1b z10.s, p3/Z, [x11, z13.s, SXTW] : ld1b (%x11,%z13.s,sxtw)[8byte] %p3/z -> %z10.s -844f51ac : ld1b z12.s, p4/Z, [x13, z15.s, SXTW] : ld1b (%x13,%z15.s,sxtw)[8byte] %p4/z -> %z12.s -845151ee : ld1b z14.s, p4/Z, [x15, z17.s, SXTW] : ld1b (%x15,%z17.s,sxtw)[8byte] %p4/z -> %z14.s -84535630 : ld1b z16.s, p5/Z, [x17, z19.s, SXTW] : ld1b (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s -84545671 : ld1b z17.s, p5/Z, [x19, z20.s, SXTW] : ld1b (%x19,%z20.s,sxtw)[8byte] %p5/z -> %z17.s -845656b3 : ld1b z19.s, p5/Z, [x21, z22.s, SXTW] : ld1b (%x21,%z22.s,sxtw)[8byte] %p5/z -> %z19.s -84585af5 : ld1b z21.s, p6/Z, [x23, z24.s, SXTW] : ld1b (%x23,%z24.s,sxtw)[8byte] %p6/z -> %z21.s -845a5b17 : ld1b z23.s, p6/Z, [x24, z26.s, SXTW] : ld1b (%x24,%z26.s,sxtw)[8byte] %p6/z -> %z23.s -845c5f59 : ld1b z25.s, p7/Z, [x26, z28.s, SXTW] : ld1b (%x26,%z28.s,sxtw)[8byte] %p7/z -> %z25.s -845e5f9b : ld1b z27.s, p7/Z, [x28, z30.s, SXTW] : ld1b (%x28,%z30.s,sxtw)[8byte] %p7/z -> %z27.s -845f5fff : ld1b z31.s, p7/Z, [sp, z31.s, SXTW] : ld1b (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s +84004000 : ld1b z0.s, p0/Z, [x0, z0.s, UXTW] : ld1b (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s +84054482 : ld1b z2.s, p1/Z, [x4, z5.s, UXTW] : ld1b (%x4,%z5.s,uxtw)[1byte] %p1/z -> %z2.s +840748c4 : ld1b z4.s, p2/Z, [x6, z7.s, UXTW] : ld1b (%x6,%z7.s,uxtw)[1byte] %p2/z -> %z4.s +84094906 : ld1b z6.s, p2/Z, [x8, z9.s, UXTW] : ld1b (%x8,%z9.s,uxtw)[1byte] %p2/z -> %z6.s +840b4d48 : ld1b z8.s, p3/Z, [x10, z11.s, UXTW] : ld1b (%x10,%z11.s,uxtw)[1byte] %p3/z -> %z8.s +840d4d6a : ld1b z10.s, p3/Z, [x11, z13.s, UXTW] : ld1b (%x11,%z13.s,uxtw)[1byte] %p3/z -> %z10.s +840f51ac : ld1b z12.s, p4/Z, [x13, z15.s, UXTW] : ld1b (%x13,%z15.s,uxtw)[1byte] %p4/z -> %z12.s +841151ee : ld1b z14.s, p4/Z, [x15, z17.s, UXTW] : ld1b (%x15,%z17.s,uxtw)[1byte] %p4/z -> %z14.s +84135630 : ld1b z16.s, p5/Z, [x17, z19.s, UXTW] : ld1b (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s +84145671 : ld1b z17.s, p5/Z, [x19, z20.s, UXTW] : ld1b (%x19,%z20.s,uxtw)[1byte] %p5/z -> %z17.s +841656b3 : ld1b z19.s, p5/Z, [x21, z22.s, UXTW] : ld1b (%x21,%z22.s,uxtw)[1byte] %p5/z -> %z19.s +84185af5 : ld1b z21.s, p6/Z, [x23, z24.s, UXTW] : ld1b (%x23,%z24.s,uxtw)[1byte] %p6/z -> %z21.s +841a5b17 : ld1b z23.s, p6/Z, [x24, z26.s, UXTW] : ld1b (%x24,%z26.s,uxtw)[1byte] %p6/z -> %z23.s +841c5f59 : ld1b z25.s, p7/Z, [x26, z28.s, UXTW] : ld1b (%x26,%z28.s,uxtw)[1byte] %p7/z -> %z25.s +841e5f9b : ld1b z27.s, p7/Z, [x28, z30.s, UXTW] : ld1b (%x28,%z30.s,uxtw)[1byte] %p7/z -> %z27.s +841f5fff : ld1b z31.s, p7/Z, [sp, z31.s, UXTW] : ld1b (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s +84404000 : ld1b z0.s, p0/Z, [x0, z0.s, SXTW] : ld1b (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s +84454482 : ld1b z2.s, p1/Z, [x4, z5.s, SXTW] : ld1b (%x4,%z5.s,sxtw)[1byte] %p1/z -> %z2.s +844748c4 : ld1b z4.s, p2/Z, [x6, z7.s, SXTW] : ld1b (%x6,%z7.s,sxtw)[1byte] %p2/z -> %z4.s +84494906 : ld1b z6.s, p2/Z, [x8, z9.s, SXTW] : ld1b (%x8,%z9.s,sxtw)[1byte] %p2/z -> %z6.s +844b4d48 : ld1b z8.s, p3/Z, [x10, z11.s, SXTW] : ld1b (%x10,%z11.s,sxtw)[1byte] %p3/z -> %z8.s +844d4d6a : ld1b z10.s, p3/Z, [x11, z13.s, SXTW] : ld1b (%x11,%z13.s,sxtw)[1byte] %p3/z -> %z10.s +844f51ac : ld1b z12.s, p4/Z, [x13, z15.s, SXTW] : ld1b (%x13,%z15.s,sxtw)[1byte] %p4/z -> %z12.s +845151ee : ld1b z14.s, p4/Z, [x15, z17.s, SXTW] : ld1b (%x15,%z17.s,sxtw)[1byte] %p4/z -> %z14.s +84535630 : ld1b z16.s, p5/Z, [x17, z19.s, SXTW] : ld1b (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s +84545671 : ld1b z17.s, p5/Z, [x19, z20.s, SXTW] : ld1b (%x19,%z20.s,sxtw)[1byte] %p5/z -> %z17.s +845656b3 : ld1b z19.s, p5/Z, [x21, z22.s, SXTW] : ld1b (%x21,%z22.s,sxtw)[1byte] %p5/z -> %z19.s +84585af5 : ld1b z21.s, p6/Z, [x23, z24.s, SXTW] : ld1b (%x23,%z24.s,sxtw)[1byte] %p6/z -> %z21.s +845a5b17 : ld1b z23.s, p6/Z, [x24, z26.s, SXTW] : ld1b (%x24,%z26.s,sxtw)[1byte] %p6/z -> %z23.s +845c5f59 : ld1b z25.s, p7/Z, [x26, z28.s, SXTW] : ld1b (%x26,%z28.s,sxtw)[1byte] %p7/z -> %z25.s +845e5f9b : ld1b z27.s, p7/Z, [x28, z30.s, SXTW] : ld1b (%x28,%z30.s,sxtw)[1byte] %p7/z -> %z27.s +845f5fff : ld1b z31.s, p7/Z, [sp, z31.s, SXTW] : ld1b (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s # LD1B { .S }, /Z, [.S{, #}] (LD1B-Z.P.AI-S) -8420c000 : ld1b z0.s, p0/Z, [z0.s, #0] : ld1b (%z0.s)[8byte] %p0/z -> %z0.s -8422c482 : ld1b z2.s, p1/Z, [z4.s, #2] : ld1b +0x02(%z4.s)[8byte] %p1/z -> %z2.s -8424c8c4 : ld1b z4.s, p2/Z, [z6.s, #4] : ld1b +0x04(%z6.s)[8byte] %p2/z -> %z4.s -8426c906 : ld1b z6.s, p2/Z, [z8.s, #6] : ld1b +0x06(%z8.s)[8byte] %p2/z -> %z6.s -8428cd48 : ld1b z8.s, p3/Z, [z10.s, #8] : ld1b +0x08(%z10.s)[8byte] %p3/z -> %z8.s -842acd8a : ld1b z10.s, p3/Z, [z12.s, #10] : ld1b +0x0a(%z12.s)[8byte] %p3/z -> %z10.s -842cd1cc : ld1b z12.s, p4/Z, [z14.s, #12] : ld1b +0x0c(%z14.s)[8byte] %p4/z -> %z12.s -842ed20e : ld1b z14.s, p4/Z, [z16.s, #14] : ld1b +0x0e(%z16.s)[8byte] %p4/z -> %z14.s -8430d650 : ld1b z16.s, p5/Z, [z18.s, #16] : ld1b +0x10(%z18.s)[8byte] %p5/z -> %z16.s -8431d671 : ld1b z17.s, p5/Z, [z19.s, #17] : ld1b +0x11(%z19.s)[8byte] %p5/z -> %z17.s -8433d6b3 : ld1b z19.s, p5/Z, [z21.s, #19] : ld1b +0x13(%z21.s)[8byte] %p5/z -> %z19.s -8435daf5 : ld1b z21.s, p6/Z, [z23.s, #21] : ld1b +0x15(%z23.s)[8byte] %p6/z -> %z21.s -8437db37 : ld1b z23.s, p6/Z, [z25.s, #23] : ld1b +0x17(%z25.s)[8byte] %p6/z -> %z23.s -8439df79 : ld1b z25.s, p7/Z, [z27.s, #25] : ld1b +0x19(%z27.s)[8byte] %p7/z -> %z25.s -843bdfbb : ld1b z27.s, p7/Z, [z29.s, #27] : ld1b +0x1b(%z29.s)[8byte] %p7/z -> %z27.s -843fdfff : ld1b z31.s, p7/Z, [z31.s, #31] : ld1b +0x1f(%z31.s)[8byte] %p7/z -> %z31.s +8420c000 : ld1b z0.s, p0/Z, [z0.s, #0] : ld1b (%z0.s)[1byte] %p0/z -> %z0.s +8422c482 : ld1b z2.s, p1/Z, [z4.s, #2] : ld1b +0x02(%z4.s)[1byte] %p1/z -> %z2.s +8424c8c4 : ld1b z4.s, p2/Z, [z6.s, #4] : ld1b +0x04(%z6.s)[1byte] %p2/z -> %z4.s +8426c906 : ld1b z6.s, p2/Z, [z8.s, #6] : ld1b +0x06(%z8.s)[1byte] %p2/z -> %z6.s +8428cd48 : ld1b z8.s, p3/Z, [z10.s, #8] : ld1b +0x08(%z10.s)[1byte] %p3/z -> %z8.s +842acd8a : ld1b z10.s, p3/Z, [z12.s, #10] : ld1b +0x0a(%z12.s)[1byte] %p3/z -> %z10.s +842cd1cc : ld1b z12.s, p4/Z, [z14.s, #12] : ld1b +0x0c(%z14.s)[1byte] %p4/z -> %z12.s +842ed20e : ld1b z14.s, p4/Z, [z16.s, #14] : ld1b +0x0e(%z16.s)[1byte] %p4/z -> %z14.s +8430d650 : ld1b z16.s, p5/Z, [z18.s, #16] : ld1b +0x10(%z18.s)[1byte] %p5/z -> %z16.s +8431d671 : ld1b z17.s, p5/Z, [z19.s, #17] : ld1b +0x11(%z19.s)[1byte] %p5/z -> %z17.s +8433d6b3 : ld1b z19.s, p5/Z, [z21.s, #19] : ld1b +0x13(%z21.s)[1byte] %p5/z -> %z19.s +8435daf5 : ld1b z21.s, p6/Z, [z23.s, #21] : ld1b +0x15(%z23.s)[1byte] %p6/z -> %z21.s +8437db37 : ld1b z23.s, p6/Z, [z25.s, #23] : ld1b +0x17(%z25.s)[1byte] %p6/z -> %z23.s +8439df79 : ld1b z25.s, p7/Z, [z27.s, #25] : ld1b +0x19(%z27.s)[1byte] %p7/z -> %z25.s +843bdfbb : ld1b z27.s, p7/Z, [z29.s, #27] : ld1b +0x1b(%z29.s)[1byte] %p7/z -> %z27.s +843fdfff : ld1b z31.s, p7/Z, [z31.s, #31] : ld1b +0x1f(%z31.s)[1byte] %p7/z -> %z31.s # LD1B { .B }, /Z, [, ] (LD1B-Z.P.BR-U8) -a4004000 : ld1b z0.b, p0/Z, [x0, x0] : ld1b (%x0,%x0)[32byte] %p0/z -> %z0.b -a4054482 : ld1b z2.b, p1/Z, [x4, x5] : ld1b (%x4,%x5)[32byte] %p1/z -> %z2.b -a40748c4 : ld1b z4.b, p2/Z, [x6, x7] : ld1b (%x6,%x7)[32byte] %p2/z -> %z4.b -a4094906 : ld1b z6.b, p2/Z, [x8, x9] : ld1b (%x8,%x9)[32byte] %p2/z -> %z6.b -a40b4d48 : ld1b z8.b, p3/Z, [x10, x11] : ld1b (%x10,%x11)[32byte] %p3/z -> %z8.b -a40c4d6a : ld1b z10.b, p3/Z, [x11, x12] : ld1b (%x11,%x12)[32byte] %p3/z -> %z10.b -a40e51ac : ld1b z12.b, p4/Z, [x13, x14] : ld1b (%x13,%x14)[32byte] %p4/z -> %z12.b -a41051ee : ld1b z14.b, p4/Z, [x15, x16] : ld1b (%x15,%x16)[32byte] %p4/z -> %z14.b -a4125630 : ld1b z16.b, p5/Z, [x17, x18] : ld1b (%x17,%x18)[32byte] %p5/z -> %z16.b -a4145671 : ld1b z17.b, p5/Z, [x19, x20] : ld1b (%x19,%x20)[32byte] %p5/z -> %z17.b -a41656b3 : ld1b z19.b, p5/Z, [x21, x22] : ld1b (%x21,%x22)[32byte] %p5/z -> %z19.b -a4185af5 : ld1b z21.b, p6/Z, [x23, x24] : ld1b (%x23,%x24)[32byte] %p6/z -> %z21.b -a4195b17 : ld1b z23.b, p6/Z, [x24, x25] : ld1b (%x24,%x25)[32byte] %p6/z -> %z23.b -a41b5f59 : ld1b z25.b, p7/Z, [x26, x27] : ld1b (%x26,%x27)[32byte] %p7/z -> %z25.b -a41d5f9b : ld1b z27.b, p7/Z, [x28, x29] : ld1b (%x28,%x29)[32byte] %p7/z -> %z27.b -a41e5fff : ld1b z31.b, p7/Z, [sp, x30] : ld1b (%sp,%x30)[32byte] %p7/z -> %z31.b +a4004000 : ld1b z0.b, p0/Z, [x0, x0] : ld1b (%x0,%x0)[1byte] %p0/z -> %z0.b +a4054482 : ld1b z2.b, p1/Z, [x4, x5] : ld1b (%x4,%x5)[1byte] %p1/z -> %z2.b +a40748c4 : ld1b z4.b, p2/Z, [x6, x7] : ld1b (%x6,%x7)[1byte] %p2/z -> %z4.b +a4094906 : ld1b z6.b, p2/Z, [x8, x9] : ld1b (%x8,%x9)[1byte] %p2/z -> %z6.b +a40b4d48 : ld1b z8.b, p3/Z, [x10, x11] : ld1b (%x10,%x11)[1byte] %p3/z -> %z8.b +a40c4d6a : ld1b z10.b, p3/Z, [x11, x12] : ld1b (%x11,%x12)[1byte] %p3/z -> %z10.b +a40e51ac : ld1b z12.b, p4/Z, [x13, x14] : ld1b (%x13,%x14)[1byte] %p4/z -> %z12.b +a41051ee : ld1b z14.b, p4/Z, [x15, x16] : ld1b (%x15,%x16)[1byte] %p4/z -> %z14.b +a4125630 : ld1b z16.b, p5/Z, [x17, x18] : ld1b (%x17,%x18)[1byte] %p5/z -> %z16.b +a4145671 : ld1b z17.b, p5/Z, [x19, x20] : ld1b (%x19,%x20)[1byte] %p5/z -> %z17.b +a41656b3 : ld1b z19.b, p5/Z, [x21, x22] : ld1b (%x21,%x22)[1byte] %p5/z -> %z19.b +a4185af5 : ld1b z21.b, p6/Z, [x23, x24] : ld1b (%x23,%x24)[1byte] %p6/z -> %z21.b +a4195b17 : ld1b z23.b, p6/Z, [x24, x25] : ld1b (%x24,%x25)[1byte] %p6/z -> %z23.b +a41b5f59 : ld1b z25.b, p7/Z, [x26, x27] : ld1b (%x26,%x27)[1byte] %p7/z -> %z25.b +a41d5f9b : ld1b z27.b, p7/Z, [x28, x29] : ld1b (%x28,%x29)[1byte] %p7/z -> %z27.b +a41e5fff : ld1b z31.b, p7/Z, [sp, x30] : ld1b (%sp,%x30)[1byte] %p7/z -> %z31.b # LD1B { .B }, /Z, [{, #, MUL VL}] (LD1B-Z.P.BI-U8) -a408a000 : ld1b z0.b, p0/Z, [x0, #-8, MUL VL] : ld1b -0x0100(%x0)[32byte] %p0/z -> %z0.b -a409a482 : ld1b z2.b, p1/Z, [x4, #-7, MUL VL] : ld1b -0xe0(%x4)[32byte] %p1/z -> %z2.b -a40aa8c4 : ld1b z4.b, p2/Z, [x6, #-6, MUL VL] : ld1b -0xc0(%x6)[32byte] %p2/z -> %z4.b -a40ba906 : ld1b z6.b, p2/Z, [x8, #-5, MUL VL] : ld1b -0xa0(%x8)[32byte] %p2/z -> %z6.b -a40cad48 : ld1b z8.b, p3/Z, [x10, #-4, MUL VL] : ld1b -0x80(%x10)[32byte] %p3/z -> %z8.b -a40dad6a : ld1b z10.b, p3/Z, [x11, #-3, MUL VL] : ld1b -0x60(%x11)[32byte] %p3/z -> %z10.b -a40eb1ac : ld1b z12.b, p4/Z, [x13, #-2, MUL VL] : ld1b -0x40(%x13)[32byte] %p4/z -> %z12.b -a40fb1ee : ld1b z14.b, p4/Z, [x15, #-1, MUL VL] : ld1b -0x20(%x15)[32byte] %p4/z -> %z14.b -a400b630 : ld1b z16.b, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[32byte] %p5/z -> %z16.b -a400b671 : ld1b z17.b, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[32byte] %p5/z -> %z17.b -a401b6b3 : ld1b z19.b, p5/Z, [x21, #1, MUL VL] : ld1b +0x20(%x21)[32byte] %p5/z -> %z19.b -a402baf5 : ld1b z21.b, p6/Z, [x23, #2, MUL VL] : ld1b +0x40(%x23)[32byte] %p6/z -> %z21.b -a403bb17 : ld1b z23.b, p6/Z, [x24, #3, MUL VL] : ld1b +0x60(%x24)[32byte] %p6/z -> %z23.b -a404bf59 : ld1b z25.b, p7/Z, [x26, #4, MUL VL] : ld1b +0x80(%x26)[32byte] %p7/z -> %z25.b -a405bf9b : ld1b z27.b, p7/Z, [x28, #5, MUL VL] : ld1b +0xa0(%x28)[32byte] %p7/z -> %z27.b -a407bfff : ld1b z31.b, p7/Z, [sp, #7, MUL VL] : ld1b +0xe0(%sp)[32byte] %p7/z -> %z31.b +a408a000 : ld1b z0.b, p0/Z, [x0, #-8, MUL VL] : ld1b -0x0100(%x0)[1byte] %p0/z -> %z0.b +a409a482 : ld1b z2.b, p1/Z, [x4, #-7, MUL VL] : ld1b -0xe0(%x4)[1byte] %p1/z -> %z2.b +a40aa8c4 : ld1b z4.b, p2/Z, [x6, #-6, MUL VL] : ld1b -0xc0(%x6)[1byte] %p2/z -> %z4.b +a40ba906 : ld1b z6.b, p2/Z, [x8, #-5, MUL VL] : ld1b -0xa0(%x8)[1byte] %p2/z -> %z6.b +a40cad48 : ld1b z8.b, p3/Z, [x10, #-4, MUL VL] : ld1b -0x80(%x10)[1byte] %p3/z -> %z8.b +a40dad6a : ld1b z10.b, p3/Z, [x11, #-3, MUL VL] : ld1b -0x60(%x11)[1byte] %p3/z -> %z10.b +a40eb1ac : ld1b z12.b, p4/Z, [x13, #-2, MUL VL] : ld1b -0x40(%x13)[1byte] %p4/z -> %z12.b +a40fb1ee : ld1b z14.b, p4/Z, [x15, #-1, MUL VL] : ld1b -0x20(%x15)[1byte] %p4/z -> %z14.b +a400b630 : ld1b z16.b, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[1byte] %p5/z -> %z16.b +a400b671 : ld1b z17.b, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[1byte] %p5/z -> %z17.b +a401b6b3 : ld1b z19.b, p5/Z, [x21, #1, MUL VL] : ld1b +0x20(%x21)[1byte] %p5/z -> %z19.b +a402baf5 : ld1b z21.b, p6/Z, [x23, #2, MUL VL] : ld1b +0x40(%x23)[1byte] %p6/z -> %z21.b +a403bb17 : ld1b z23.b, p6/Z, [x24, #3, MUL VL] : ld1b +0x60(%x24)[1byte] %p6/z -> %z23.b +a404bf59 : ld1b z25.b, p7/Z, [x26, #4, MUL VL] : ld1b +0x80(%x26)[1byte] %p7/z -> %z25.b +a405bf9b : ld1b z27.b, p7/Z, [x28, #5, MUL VL] : ld1b +0xa0(%x28)[1byte] %p7/z -> %z27.b +a407bfff : ld1b z31.b, p7/Z, [sp, #7, MUL VL] : ld1b +0xe0(%sp)[1byte] %p7/z -> %z31.b # LD1B { .H }, /Z, [, ] (LD1B-Z.P.BR-U16) -a4204000 : ld1b z0.h, p0/Z, [x0, x0] : ld1b (%x0,%x0)[16byte] %p0/z -> %z0.h -a4254482 : ld1b z2.h, p1/Z, [x4, x5] : ld1b (%x4,%x5)[16byte] %p1/z -> %z2.h -a42748c4 : ld1b z4.h, p2/Z, [x6, x7] : ld1b (%x6,%x7)[16byte] %p2/z -> %z4.h -a4294906 : ld1b z6.h, p2/Z, [x8, x9] : ld1b (%x8,%x9)[16byte] %p2/z -> %z6.h -a42b4d48 : ld1b z8.h, p3/Z, [x10, x11] : ld1b (%x10,%x11)[16byte] %p3/z -> %z8.h -a42c4d6a : ld1b z10.h, p3/Z, [x11, x12] : ld1b (%x11,%x12)[16byte] %p3/z -> %z10.h -a42e51ac : ld1b z12.h, p4/Z, [x13, x14] : ld1b (%x13,%x14)[16byte] %p4/z -> %z12.h -a43051ee : ld1b z14.h, p4/Z, [x15, x16] : ld1b (%x15,%x16)[16byte] %p4/z -> %z14.h -a4325630 : ld1b z16.h, p5/Z, [x17, x18] : ld1b (%x17,%x18)[16byte] %p5/z -> %z16.h -a4345671 : ld1b z17.h, p5/Z, [x19, x20] : ld1b (%x19,%x20)[16byte] %p5/z -> %z17.h -a43656b3 : ld1b z19.h, p5/Z, [x21, x22] : ld1b (%x21,%x22)[16byte] %p5/z -> %z19.h -a4385af5 : ld1b z21.h, p6/Z, [x23, x24] : ld1b (%x23,%x24)[16byte] %p6/z -> %z21.h -a4395b17 : ld1b z23.h, p6/Z, [x24, x25] : ld1b (%x24,%x25)[16byte] %p6/z -> %z23.h -a43b5f59 : ld1b z25.h, p7/Z, [x26, x27] : ld1b (%x26,%x27)[16byte] %p7/z -> %z25.h -a43d5f9b : ld1b z27.h, p7/Z, [x28, x29] : ld1b (%x28,%x29)[16byte] %p7/z -> %z27.h -a43e5fff : ld1b z31.h, p7/Z, [sp, x30] : ld1b (%sp,%x30)[16byte] %p7/z -> %z31.h +a4204000 : ld1b z0.h, p0/Z, [x0, x0] : ld1b (%x0,%x0)[1byte] %p0/z -> %z0.h +a4254482 : ld1b z2.h, p1/Z, [x4, x5] : ld1b (%x4,%x5)[1byte] %p1/z -> %z2.h +a42748c4 : ld1b z4.h, p2/Z, [x6, x7] : ld1b (%x6,%x7)[1byte] %p2/z -> %z4.h +a4294906 : ld1b z6.h, p2/Z, [x8, x9] : ld1b (%x8,%x9)[1byte] %p2/z -> %z6.h +a42b4d48 : ld1b z8.h, p3/Z, [x10, x11] : ld1b (%x10,%x11)[1byte] %p3/z -> %z8.h +a42c4d6a : ld1b z10.h, p3/Z, [x11, x12] : ld1b (%x11,%x12)[1byte] %p3/z -> %z10.h +a42e51ac : ld1b z12.h, p4/Z, [x13, x14] : ld1b (%x13,%x14)[1byte] %p4/z -> %z12.h +a43051ee : ld1b z14.h, p4/Z, [x15, x16] : ld1b (%x15,%x16)[1byte] %p4/z -> %z14.h +a4325630 : ld1b z16.h, p5/Z, [x17, x18] : ld1b (%x17,%x18)[1byte] %p5/z -> %z16.h +a4345671 : ld1b z17.h, p5/Z, [x19, x20] : ld1b (%x19,%x20)[1byte] %p5/z -> %z17.h +a43656b3 : ld1b z19.h, p5/Z, [x21, x22] : ld1b (%x21,%x22)[1byte] %p5/z -> %z19.h +a4385af5 : ld1b z21.h, p6/Z, [x23, x24] : ld1b (%x23,%x24)[1byte] %p6/z -> %z21.h +a4395b17 : ld1b z23.h, p6/Z, [x24, x25] : ld1b (%x24,%x25)[1byte] %p6/z -> %z23.h +a43b5f59 : ld1b z25.h, p7/Z, [x26, x27] : ld1b (%x26,%x27)[1byte] %p7/z -> %z25.h +a43d5f9b : ld1b z27.h, p7/Z, [x28, x29] : ld1b (%x28,%x29)[1byte] %p7/z -> %z27.h +a43e5fff : ld1b z31.h, p7/Z, [sp, x30] : ld1b (%sp,%x30)[1byte] %p7/z -> %z31.h # LD1B { .H }, /Z, [{, #, MUL VL}] (LD1B-Z.P.BI-U16) -a428a000 : ld1b z0.h, p0/Z, [x0, #-8, MUL VL] : ld1b -0x80(%x0)[16byte] %p0/z -> %z0.h -a429a482 : ld1b z2.h, p1/Z, [x4, #-7, MUL VL] : ld1b -0x70(%x4)[16byte] %p1/z -> %z2.h -a42aa8c4 : ld1b z4.h, p2/Z, [x6, #-6, MUL VL] : ld1b -0x60(%x6)[16byte] %p2/z -> %z4.h -a42ba906 : ld1b z6.h, p2/Z, [x8, #-5, MUL VL] : ld1b -0x50(%x8)[16byte] %p2/z -> %z6.h -a42cad48 : ld1b z8.h, p3/Z, [x10, #-4, MUL VL] : ld1b -0x40(%x10)[16byte] %p3/z -> %z8.h -a42dad6a : ld1b z10.h, p3/Z, [x11, #-3, MUL VL] : ld1b -0x30(%x11)[16byte] %p3/z -> %z10.h -a42eb1ac : ld1b z12.h, p4/Z, [x13, #-2, MUL VL] : ld1b -0x20(%x13)[16byte] %p4/z -> %z12.h -a42fb1ee : ld1b z14.h, p4/Z, [x15, #-1, MUL VL] : ld1b -0x10(%x15)[16byte] %p4/z -> %z14.h -a420b630 : ld1b z16.h, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[16byte] %p5/z -> %z16.h -a420b671 : ld1b z17.h, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[16byte] %p5/z -> %z17.h -a421b6b3 : ld1b z19.h, p5/Z, [x21, #1, MUL VL] : ld1b +0x10(%x21)[16byte] %p5/z -> %z19.h -a422baf5 : ld1b z21.h, p6/Z, [x23, #2, MUL VL] : ld1b +0x20(%x23)[16byte] %p6/z -> %z21.h -a423bb17 : ld1b z23.h, p6/Z, [x24, #3, MUL VL] : ld1b +0x30(%x24)[16byte] %p6/z -> %z23.h -a424bf59 : ld1b z25.h, p7/Z, [x26, #4, MUL VL] : ld1b +0x40(%x26)[16byte] %p7/z -> %z25.h -a425bf9b : ld1b z27.h, p7/Z, [x28, #5, MUL VL] : ld1b +0x50(%x28)[16byte] %p7/z -> %z27.h -a427bfff : ld1b z31.h, p7/Z, [sp, #7, MUL VL] : ld1b +0x70(%sp)[16byte] %p7/z -> %z31.h +a428a000 : ld1b z0.h, p0/Z, [x0, #-8, MUL VL] : ld1b -0x80(%x0)[1byte] %p0/z -> %z0.h +a429a482 : ld1b z2.h, p1/Z, [x4, #-7, MUL VL] : ld1b -0x70(%x4)[1byte] %p1/z -> %z2.h +a42aa8c4 : ld1b z4.h, p2/Z, [x6, #-6, MUL VL] : ld1b -0x60(%x6)[1byte] %p2/z -> %z4.h +a42ba906 : ld1b z6.h, p2/Z, [x8, #-5, MUL VL] : ld1b -0x50(%x8)[1byte] %p2/z -> %z6.h +a42cad48 : ld1b z8.h, p3/Z, [x10, #-4, MUL VL] : ld1b -0x40(%x10)[1byte] %p3/z -> %z8.h +a42dad6a : ld1b z10.h, p3/Z, [x11, #-3, MUL VL] : ld1b -0x30(%x11)[1byte] %p3/z -> %z10.h +a42eb1ac : ld1b z12.h, p4/Z, [x13, #-2, MUL VL] : ld1b -0x20(%x13)[1byte] %p4/z -> %z12.h +a42fb1ee : ld1b z14.h, p4/Z, [x15, #-1, MUL VL] : ld1b -0x10(%x15)[1byte] %p4/z -> %z14.h +a420b630 : ld1b z16.h, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[1byte] %p5/z -> %z16.h +a420b671 : ld1b z17.h, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[1byte] %p5/z -> %z17.h +a421b6b3 : ld1b z19.h, p5/Z, [x21, #1, MUL VL] : ld1b +0x10(%x21)[1byte] %p5/z -> %z19.h +a422baf5 : ld1b z21.h, p6/Z, [x23, #2, MUL VL] : ld1b +0x20(%x23)[1byte] %p6/z -> %z21.h +a423bb17 : ld1b z23.h, p6/Z, [x24, #3, MUL VL] : ld1b +0x30(%x24)[1byte] %p6/z -> %z23.h +a424bf59 : ld1b z25.h, p7/Z, [x26, #4, MUL VL] : ld1b +0x40(%x26)[1byte] %p7/z -> %z25.h +a425bf9b : ld1b z27.h, p7/Z, [x28, #5, MUL VL] : ld1b +0x50(%x28)[1byte] %p7/z -> %z27.h +a427bfff : ld1b z31.h, p7/Z, [sp, #7, MUL VL] : ld1b +0x70(%sp)[1byte] %p7/z -> %z31.h # LD1B { .S }, /Z, [, ] (LD1B-Z.P.BR-U32) -a4404000 : ld1b z0.s, p0/Z, [x0, x0] : ld1b (%x0,%x0)[8byte] %p0/z -> %z0.s -a4454482 : ld1b z2.s, p1/Z, [x4, x5] : ld1b (%x4,%x5)[8byte] %p1/z -> %z2.s -a44748c4 : ld1b z4.s, p2/Z, [x6, x7] : ld1b (%x6,%x7)[8byte] %p2/z -> %z4.s -a4494906 : ld1b z6.s, p2/Z, [x8, x9] : ld1b (%x8,%x9)[8byte] %p2/z -> %z6.s -a44b4d48 : ld1b z8.s, p3/Z, [x10, x11] : ld1b (%x10,%x11)[8byte] %p3/z -> %z8.s -a44c4d6a : ld1b z10.s, p3/Z, [x11, x12] : ld1b (%x11,%x12)[8byte] %p3/z -> %z10.s -a44e51ac : ld1b z12.s, p4/Z, [x13, x14] : ld1b (%x13,%x14)[8byte] %p4/z -> %z12.s -a45051ee : ld1b z14.s, p4/Z, [x15, x16] : ld1b (%x15,%x16)[8byte] %p4/z -> %z14.s -a4525630 : ld1b z16.s, p5/Z, [x17, x18] : ld1b (%x17,%x18)[8byte] %p5/z -> %z16.s -a4545671 : ld1b z17.s, p5/Z, [x19, x20] : ld1b (%x19,%x20)[8byte] %p5/z -> %z17.s -a45656b3 : ld1b z19.s, p5/Z, [x21, x22] : ld1b (%x21,%x22)[8byte] %p5/z -> %z19.s -a4585af5 : ld1b z21.s, p6/Z, [x23, x24] : ld1b (%x23,%x24)[8byte] %p6/z -> %z21.s -a4595b17 : ld1b z23.s, p6/Z, [x24, x25] : ld1b (%x24,%x25)[8byte] %p6/z -> %z23.s -a45b5f59 : ld1b z25.s, p7/Z, [x26, x27] : ld1b (%x26,%x27)[8byte] %p7/z -> %z25.s -a45d5f9b : ld1b z27.s, p7/Z, [x28, x29] : ld1b (%x28,%x29)[8byte] %p7/z -> %z27.s -a45e5fff : ld1b z31.s, p7/Z, [sp, x30] : ld1b (%sp,%x30)[8byte] %p7/z -> %z31.s +a4404000 : ld1b z0.s, p0/Z, [x0, x0] : ld1b (%x0,%x0)[1byte] %p0/z -> %z0.s +a4454482 : ld1b z2.s, p1/Z, [x4, x5] : ld1b (%x4,%x5)[1byte] %p1/z -> %z2.s +a44748c4 : ld1b z4.s, p2/Z, [x6, x7] : ld1b (%x6,%x7)[1byte] %p2/z -> %z4.s +a4494906 : ld1b z6.s, p2/Z, [x8, x9] : ld1b (%x8,%x9)[1byte] %p2/z -> %z6.s +a44b4d48 : ld1b z8.s, p3/Z, [x10, x11] : ld1b (%x10,%x11)[1byte] %p3/z -> %z8.s +a44c4d6a : ld1b z10.s, p3/Z, [x11, x12] : ld1b (%x11,%x12)[1byte] %p3/z -> %z10.s +a44e51ac : ld1b z12.s, p4/Z, [x13, x14] : ld1b (%x13,%x14)[1byte] %p4/z -> %z12.s +a45051ee : ld1b z14.s, p4/Z, [x15, x16] : ld1b (%x15,%x16)[1byte] %p4/z -> %z14.s +a4525630 : ld1b z16.s, p5/Z, [x17, x18] : ld1b (%x17,%x18)[1byte] %p5/z -> %z16.s +a4545671 : ld1b z17.s, p5/Z, [x19, x20] : ld1b (%x19,%x20)[1byte] %p5/z -> %z17.s +a45656b3 : ld1b z19.s, p5/Z, [x21, x22] : ld1b (%x21,%x22)[1byte] %p5/z -> %z19.s +a4585af5 : ld1b z21.s, p6/Z, [x23, x24] : ld1b (%x23,%x24)[1byte] %p6/z -> %z21.s +a4595b17 : ld1b z23.s, p6/Z, [x24, x25] : ld1b (%x24,%x25)[1byte] %p6/z -> %z23.s +a45b5f59 : ld1b z25.s, p7/Z, [x26, x27] : ld1b (%x26,%x27)[1byte] %p7/z -> %z25.s +a45d5f9b : ld1b z27.s, p7/Z, [x28, x29] : ld1b (%x28,%x29)[1byte] %p7/z -> %z27.s +a45e5fff : ld1b z31.s, p7/Z, [sp, x30] : ld1b (%sp,%x30)[1byte] %p7/z -> %z31.s # LD1B { .S }, /Z, [{, #, MUL VL}] (LD1B-Z.P.BI-U32) -a448a000 : ld1b z0.s, p0/Z, [x0, #-8, MUL VL] : ld1b -0x40(%x0)[8byte] %p0/z -> %z0.s -a449a482 : ld1b z2.s, p1/Z, [x4, #-7, MUL VL] : ld1b -0x38(%x4)[8byte] %p1/z -> %z2.s -a44aa8c4 : ld1b z4.s, p2/Z, [x6, #-6, MUL VL] : ld1b -0x30(%x6)[8byte] %p2/z -> %z4.s -a44ba906 : ld1b z6.s, p2/Z, [x8, #-5, MUL VL] : ld1b -0x28(%x8)[8byte] %p2/z -> %z6.s -a44cad48 : ld1b z8.s, p3/Z, [x10, #-4, MUL VL] : ld1b -0x20(%x10)[8byte] %p3/z -> %z8.s -a44dad6a : ld1b z10.s, p3/Z, [x11, #-3, MUL VL] : ld1b -0x18(%x11)[8byte] %p3/z -> %z10.s -a44eb1ac : ld1b z12.s, p4/Z, [x13, #-2, MUL VL] : ld1b -0x10(%x13)[8byte] %p4/z -> %z12.s -a44fb1ee : ld1b z14.s, p4/Z, [x15, #-1, MUL VL] : ld1b -0x08(%x15)[8byte] %p4/z -> %z14.s -a440b630 : ld1b z16.s, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[8byte] %p5/z -> %z16.s -a440b671 : ld1b z17.s, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[8byte] %p5/z -> %z17.s -a441b6b3 : ld1b z19.s, p5/Z, [x21, #1, MUL VL] : ld1b +0x08(%x21)[8byte] %p5/z -> %z19.s -a442baf5 : ld1b z21.s, p6/Z, [x23, #2, MUL VL] : ld1b +0x10(%x23)[8byte] %p6/z -> %z21.s -a443bb17 : ld1b z23.s, p6/Z, [x24, #3, MUL VL] : ld1b +0x18(%x24)[8byte] %p6/z -> %z23.s -a444bf59 : ld1b z25.s, p7/Z, [x26, #4, MUL VL] : ld1b +0x20(%x26)[8byte] %p7/z -> %z25.s -a445bf9b : ld1b z27.s, p7/Z, [x28, #5, MUL VL] : ld1b +0x28(%x28)[8byte] %p7/z -> %z27.s -a447bfff : ld1b z31.s, p7/Z, [sp, #7, MUL VL] : ld1b +0x38(%sp)[8byte] %p7/z -> %z31.s +a448a000 : ld1b z0.s, p0/Z, [x0, #-8, MUL VL] : ld1b -0x40(%x0)[1byte] %p0/z -> %z0.s +a449a482 : ld1b z2.s, p1/Z, [x4, #-7, MUL VL] : ld1b -0x38(%x4)[1byte] %p1/z -> %z2.s +a44aa8c4 : ld1b z4.s, p2/Z, [x6, #-6, MUL VL] : ld1b -0x30(%x6)[1byte] %p2/z -> %z4.s +a44ba906 : ld1b z6.s, p2/Z, [x8, #-5, MUL VL] : ld1b -0x28(%x8)[1byte] %p2/z -> %z6.s +a44cad48 : ld1b z8.s, p3/Z, [x10, #-4, MUL VL] : ld1b -0x20(%x10)[1byte] %p3/z -> %z8.s +a44dad6a : ld1b z10.s, p3/Z, [x11, #-3, MUL VL] : ld1b -0x18(%x11)[1byte] %p3/z -> %z10.s +a44eb1ac : ld1b z12.s, p4/Z, [x13, #-2, MUL VL] : ld1b -0x10(%x13)[1byte] %p4/z -> %z12.s +a44fb1ee : ld1b z14.s, p4/Z, [x15, #-1, MUL VL] : ld1b -0x08(%x15)[1byte] %p4/z -> %z14.s +a440b630 : ld1b z16.s, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[1byte] %p5/z -> %z16.s +a440b671 : ld1b z17.s, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[1byte] %p5/z -> %z17.s +a441b6b3 : ld1b z19.s, p5/Z, [x21, #1, MUL VL] : ld1b +0x08(%x21)[1byte] %p5/z -> %z19.s +a442baf5 : ld1b z21.s, p6/Z, [x23, #2, MUL VL] : ld1b +0x10(%x23)[1byte] %p6/z -> %z21.s +a443bb17 : ld1b z23.s, p6/Z, [x24, #3, MUL VL] : ld1b +0x18(%x24)[1byte] %p6/z -> %z23.s +a444bf59 : ld1b z25.s, p7/Z, [x26, #4, MUL VL] : ld1b +0x20(%x26)[1byte] %p7/z -> %z25.s +a445bf9b : ld1b z27.s, p7/Z, [x28, #5, MUL VL] : ld1b +0x28(%x28)[1byte] %p7/z -> %z27.s +a447bfff : ld1b z31.s, p7/Z, [sp, #7, MUL VL] : ld1b +0x38(%sp)[1byte] %p7/z -> %z31.s # LD1B { .D }, /Z, [, ] (LD1B-Z.P.BR-U64) -a4604000 : ld1b z0.d, p0/Z, [x0, x0] : ld1b (%x0,%x0)[4byte] %p0/z -> %z0.d -a4654482 : ld1b z2.d, p1/Z, [x4, x5] : ld1b (%x4,%x5)[4byte] %p1/z -> %z2.d -a46748c4 : ld1b z4.d, p2/Z, [x6, x7] : ld1b (%x6,%x7)[4byte] %p2/z -> %z4.d -a4694906 : ld1b z6.d, p2/Z, [x8, x9] : ld1b (%x8,%x9)[4byte] %p2/z -> %z6.d -a46b4d48 : ld1b z8.d, p3/Z, [x10, x11] : ld1b (%x10,%x11)[4byte] %p3/z -> %z8.d -a46c4d6a : ld1b z10.d, p3/Z, [x11, x12] : ld1b (%x11,%x12)[4byte] %p3/z -> %z10.d -a46e51ac : ld1b z12.d, p4/Z, [x13, x14] : ld1b (%x13,%x14)[4byte] %p4/z -> %z12.d -a47051ee : ld1b z14.d, p4/Z, [x15, x16] : ld1b (%x15,%x16)[4byte] %p4/z -> %z14.d -a4725630 : ld1b z16.d, p5/Z, [x17, x18] : ld1b (%x17,%x18)[4byte] %p5/z -> %z16.d -a4745671 : ld1b z17.d, p5/Z, [x19, x20] : ld1b (%x19,%x20)[4byte] %p5/z -> %z17.d -a47656b3 : ld1b z19.d, p5/Z, [x21, x22] : ld1b (%x21,%x22)[4byte] %p5/z -> %z19.d -a4785af5 : ld1b z21.d, p6/Z, [x23, x24] : ld1b (%x23,%x24)[4byte] %p6/z -> %z21.d -a4795b17 : ld1b z23.d, p6/Z, [x24, x25] : ld1b (%x24,%x25)[4byte] %p6/z -> %z23.d -a47b5f59 : ld1b z25.d, p7/Z, [x26, x27] : ld1b (%x26,%x27)[4byte] %p7/z -> %z25.d -a47d5f9b : ld1b z27.d, p7/Z, [x28, x29] : ld1b (%x28,%x29)[4byte] %p7/z -> %z27.d -a47e5fff : ld1b z31.d, p7/Z, [sp, x30] : ld1b (%sp,%x30)[4byte] %p7/z -> %z31.d +a4604000 : ld1b z0.d, p0/Z, [x0, x0] : ld1b (%x0,%x0)[1byte] %p0/z -> %z0.d +a4654482 : ld1b z2.d, p1/Z, [x4, x5] : ld1b (%x4,%x5)[1byte] %p1/z -> %z2.d +a46748c4 : ld1b z4.d, p2/Z, [x6, x7] : ld1b (%x6,%x7)[1byte] %p2/z -> %z4.d +a4694906 : ld1b z6.d, p2/Z, [x8, x9] : ld1b (%x8,%x9)[1byte] %p2/z -> %z6.d +a46b4d48 : ld1b z8.d, p3/Z, [x10, x11] : ld1b (%x10,%x11)[1byte] %p3/z -> %z8.d +a46c4d6a : ld1b z10.d, p3/Z, [x11, x12] : ld1b (%x11,%x12)[1byte] %p3/z -> %z10.d +a46e51ac : ld1b z12.d, p4/Z, [x13, x14] : ld1b (%x13,%x14)[1byte] %p4/z -> %z12.d +a47051ee : ld1b z14.d, p4/Z, [x15, x16] : ld1b (%x15,%x16)[1byte] %p4/z -> %z14.d +a4725630 : ld1b z16.d, p5/Z, [x17, x18] : ld1b (%x17,%x18)[1byte] %p5/z -> %z16.d +a4745671 : ld1b z17.d, p5/Z, [x19, x20] : ld1b (%x19,%x20)[1byte] %p5/z -> %z17.d +a47656b3 : ld1b z19.d, p5/Z, [x21, x22] : ld1b (%x21,%x22)[1byte] %p5/z -> %z19.d +a4785af5 : ld1b z21.d, p6/Z, [x23, x24] : ld1b (%x23,%x24)[1byte] %p6/z -> %z21.d +a4795b17 : ld1b z23.d, p6/Z, [x24, x25] : ld1b (%x24,%x25)[1byte] %p6/z -> %z23.d +a47b5f59 : ld1b z25.d, p7/Z, [x26, x27] : ld1b (%x26,%x27)[1byte] %p7/z -> %z25.d +a47d5f9b : ld1b z27.d, p7/Z, [x28, x29] : ld1b (%x28,%x29)[1byte] %p7/z -> %z27.d +a47e5fff : ld1b z31.d, p7/Z, [sp, x30] : ld1b (%sp,%x30)[1byte] %p7/z -> %z31.d # LD1B { .D }, /Z, [{, #, MUL VL}] (LD1B-Z.P.BI-U64) -a468a000 : ld1b z0.d, p0/Z, [x0, #-8, MUL VL] : ld1b -0x20(%x0)[4byte] %p0/z -> %z0.d -a469a482 : ld1b z2.d, p1/Z, [x4, #-7, MUL VL] : ld1b -0x1c(%x4)[4byte] %p1/z -> %z2.d -a46aa8c4 : ld1b z4.d, p2/Z, [x6, #-6, MUL VL] : ld1b -0x18(%x6)[4byte] %p2/z -> %z4.d -a46ba906 : ld1b z6.d, p2/Z, [x8, #-5, MUL VL] : ld1b -0x14(%x8)[4byte] %p2/z -> %z6.d -a46cad48 : ld1b z8.d, p3/Z, [x10, #-4, MUL VL] : ld1b -0x10(%x10)[4byte] %p3/z -> %z8.d -a46dad6a : ld1b z10.d, p3/Z, [x11, #-3, MUL VL] : ld1b -0x0c(%x11)[4byte] %p3/z -> %z10.d -a46eb1ac : ld1b z12.d, p4/Z, [x13, #-2, MUL VL] : ld1b -0x08(%x13)[4byte] %p4/z -> %z12.d -a46fb1ee : ld1b z14.d, p4/Z, [x15, #-1, MUL VL] : ld1b -0x04(%x15)[4byte] %p4/z -> %z14.d -a460b630 : ld1b z16.d, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[4byte] %p5/z -> %z16.d -a460b671 : ld1b z17.d, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[4byte] %p5/z -> %z17.d -a461b6b3 : ld1b z19.d, p5/Z, [x21, #1, MUL VL] : ld1b +0x04(%x21)[4byte] %p5/z -> %z19.d -a462baf5 : ld1b z21.d, p6/Z, [x23, #2, MUL VL] : ld1b +0x08(%x23)[4byte] %p6/z -> %z21.d -a463bb17 : ld1b z23.d, p6/Z, [x24, #3, MUL VL] : ld1b +0x0c(%x24)[4byte] %p6/z -> %z23.d -a464bf59 : ld1b z25.d, p7/Z, [x26, #4, MUL VL] : ld1b +0x10(%x26)[4byte] %p7/z -> %z25.d -a465bf9b : ld1b z27.d, p7/Z, [x28, #5, MUL VL] : ld1b +0x14(%x28)[4byte] %p7/z -> %z27.d -a467bfff : ld1b z31.d, p7/Z, [sp, #7, MUL VL] : ld1b +0x1c(%sp)[4byte] %p7/z -> %z31.d +a468a000 : ld1b z0.d, p0/Z, [x0, #-8, MUL VL] : ld1b -0x20(%x0)[1byte] %p0/z -> %z0.d +a469a482 : ld1b z2.d, p1/Z, [x4, #-7, MUL VL] : ld1b -0x1c(%x4)[1byte] %p1/z -> %z2.d +a46aa8c4 : ld1b z4.d, p2/Z, [x6, #-6, MUL VL] : ld1b -0x18(%x6)[1byte] %p2/z -> %z4.d +a46ba906 : ld1b z6.d, p2/Z, [x8, #-5, MUL VL] : ld1b -0x14(%x8)[1byte] %p2/z -> %z6.d +a46cad48 : ld1b z8.d, p3/Z, [x10, #-4, MUL VL] : ld1b -0x10(%x10)[1byte] %p3/z -> %z8.d +a46dad6a : ld1b z10.d, p3/Z, [x11, #-3, MUL VL] : ld1b -0x0c(%x11)[1byte] %p3/z -> %z10.d +a46eb1ac : ld1b z12.d, p4/Z, [x13, #-2, MUL VL] : ld1b -0x08(%x13)[1byte] %p4/z -> %z12.d +a46fb1ee : ld1b z14.d, p4/Z, [x15, #-1, MUL VL] : ld1b -0x04(%x15)[1byte] %p4/z -> %z14.d +a460b630 : ld1b z16.d, p5/Z, [x17, #0, MUL VL] : ld1b (%x17)[1byte] %p5/z -> %z16.d +a460b671 : ld1b z17.d, p5/Z, [x19, #0, MUL VL] : ld1b (%x19)[1byte] %p5/z -> %z17.d +a461b6b3 : ld1b z19.d, p5/Z, [x21, #1, MUL VL] : ld1b +0x04(%x21)[1byte] %p5/z -> %z19.d +a462baf5 : ld1b z21.d, p6/Z, [x23, #2, MUL VL] : ld1b +0x08(%x23)[1byte] %p6/z -> %z21.d +a463bb17 : ld1b z23.d, p6/Z, [x24, #3, MUL VL] : ld1b +0x0c(%x24)[1byte] %p6/z -> %z23.d +a464bf59 : ld1b z25.d, p7/Z, [x26, #4, MUL VL] : ld1b +0x10(%x26)[1byte] %p7/z -> %z25.d +a465bf9b : ld1b z27.d, p7/Z, [x28, #5, MUL VL] : ld1b +0x14(%x28)[1byte] %p7/z -> %z27.d +a467bfff : ld1b z31.d, p7/Z, [sp, #7, MUL VL] : ld1b +0x1c(%sp)[1byte] %p7/z -> %z31.d # LD1B { .D }, /Z, [, .D, ] (LD1B-Z.P.BZ-D.x32.unscaled) -c4004000 : ld1b z0.d, p0/Z, [x0, z0.d, UXTW] : ld1b (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d -c4054482 : ld1b z2.d, p1/Z, [x4, z5.d, UXTW] : ld1b (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d -c40748c4 : ld1b z4.d, p2/Z, [x6, z7.d, UXTW] : ld1b (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d -c4094906 : ld1b z6.d, p2/Z, [x8, z9.d, UXTW] : ld1b (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d -c40b4d48 : ld1b z8.d, p3/Z, [x10, z11.d, UXTW] : ld1b (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d -c40d4d6a : ld1b z10.d, p3/Z, [x11, z13.d, UXTW] : ld1b (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d -c40f51ac : ld1b z12.d, p4/Z, [x13, z15.d, UXTW] : ld1b (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d -c41151ee : ld1b z14.d, p4/Z, [x15, z17.d, UXTW] : ld1b (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d -c4135630 : ld1b z16.d, p5/Z, [x17, z19.d, UXTW] : ld1b (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d -c4145671 : ld1b z17.d, p5/Z, [x19, z20.d, UXTW] : ld1b (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d -c41656b3 : ld1b z19.d, p5/Z, [x21, z22.d, UXTW] : ld1b (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d -c4185af5 : ld1b z21.d, p6/Z, [x23, z24.d, UXTW] : ld1b (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d -c41a5b17 : ld1b z23.d, p6/Z, [x24, z26.d, UXTW] : ld1b (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d -c41c5f59 : ld1b z25.d, p7/Z, [x26, z28.d, UXTW] : ld1b (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d -c41e5f9b : ld1b z27.d, p7/Z, [x28, z30.d, UXTW] : ld1b (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d -c41f5fff : ld1b z31.d, p7/Z, [sp, z31.d, UXTW] : ld1b (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d -c4404000 : ld1b z0.d, p0/Z, [x0, z0.d, SXTW] : ld1b (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d -c4454482 : ld1b z2.d, p1/Z, [x4, z5.d, SXTW] : ld1b (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d -c44748c4 : ld1b z4.d, p2/Z, [x6, z7.d, SXTW] : ld1b (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d -c4494906 : ld1b z6.d, p2/Z, [x8, z9.d, SXTW] : ld1b (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d -c44b4d48 : ld1b z8.d, p3/Z, [x10, z11.d, SXTW] : ld1b (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d -c44d4d6a : ld1b z10.d, p3/Z, [x11, z13.d, SXTW] : ld1b (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d -c44f51ac : ld1b z12.d, p4/Z, [x13, z15.d, SXTW] : ld1b (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d -c45151ee : ld1b z14.d, p4/Z, [x15, z17.d, SXTW] : ld1b (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d -c4535630 : ld1b z16.d, p5/Z, [x17, z19.d, SXTW] : ld1b (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d -c4545671 : ld1b z17.d, p5/Z, [x19, z20.d, SXTW] : ld1b (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d -c45656b3 : ld1b z19.d, p5/Z, [x21, z22.d, SXTW] : ld1b (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d -c4585af5 : ld1b z21.d, p6/Z, [x23, z24.d, SXTW] : ld1b (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d -c45a5b17 : ld1b z23.d, p6/Z, [x24, z26.d, SXTW] : ld1b (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d -c45c5f59 : ld1b z25.d, p7/Z, [x26, z28.d, SXTW] : ld1b (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d -c45e5f9b : ld1b z27.d, p7/Z, [x28, z30.d, SXTW] : ld1b (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d -c45f5fff : ld1b z31.d, p7/Z, [sp, z31.d, SXTW] : ld1b (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d +c4004000 : ld1b z0.d, p0/Z, [x0, z0.d, UXTW] : ld1b (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d +c4054482 : ld1b z2.d, p1/Z, [x4, z5.d, UXTW] : ld1b (%x4,%z5.d,uxtw)[1byte] %p1/z -> %z2.d +c40748c4 : ld1b z4.d, p2/Z, [x6, z7.d, UXTW] : ld1b (%x6,%z7.d,uxtw)[1byte] %p2/z -> %z4.d +c4094906 : ld1b z6.d, p2/Z, [x8, z9.d, UXTW] : ld1b (%x8,%z9.d,uxtw)[1byte] %p2/z -> %z6.d +c40b4d48 : ld1b z8.d, p3/Z, [x10, z11.d, UXTW] : ld1b (%x10,%z11.d,uxtw)[1byte] %p3/z -> %z8.d +c40d4d6a : ld1b z10.d, p3/Z, [x11, z13.d, UXTW] : ld1b (%x11,%z13.d,uxtw)[1byte] %p3/z -> %z10.d +c40f51ac : ld1b z12.d, p4/Z, [x13, z15.d, UXTW] : ld1b (%x13,%z15.d,uxtw)[1byte] %p4/z -> %z12.d +c41151ee : ld1b z14.d, p4/Z, [x15, z17.d, UXTW] : ld1b (%x15,%z17.d,uxtw)[1byte] %p4/z -> %z14.d +c4135630 : ld1b z16.d, p5/Z, [x17, z19.d, UXTW] : ld1b (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d +c4145671 : ld1b z17.d, p5/Z, [x19, z20.d, UXTW] : ld1b (%x19,%z20.d,uxtw)[1byte] %p5/z -> %z17.d +c41656b3 : ld1b z19.d, p5/Z, [x21, z22.d, UXTW] : ld1b (%x21,%z22.d,uxtw)[1byte] %p5/z -> %z19.d +c4185af5 : ld1b z21.d, p6/Z, [x23, z24.d, UXTW] : ld1b (%x23,%z24.d,uxtw)[1byte] %p6/z -> %z21.d +c41a5b17 : ld1b z23.d, p6/Z, [x24, z26.d, UXTW] : ld1b (%x24,%z26.d,uxtw)[1byte] %p6/z -> %z23.d +c41c5f59 : ld1b z25.d, p7/Z, [x26, z28.d, UXTW] : ld1b (%x26,%z28.d,uxtw)[1byte] %p7/z -> %z25.d +c41e5f9b : ld1b z27.d, p7/Z, [x28, z30.d, UXTW] : ld1b (%x28,%z30.d,uxtw)[1byte] %p7/z -> %z27.d +c41f5fff : ld1b z31.d, p7/Z, [sp, z31.d, UXTW] : ld1b (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d +c4404000 : ld1b z0.d, p0/Z, [x0, z0.d, SXTW] : ld1b (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d +c4454482 : ld1b z2.d, p1/Z, [x4, z5.d, SXTW] : ld1b (%x4,%z5.d,sxtw)[1byte] %p1/z -> %z2.d +c44748c4 : ld1b z4.d, p2/Z, [x6, z7.d, SXTW] : ld1b (%x6,%z7.d,sxtw)[1byte] %p2/z -> %z4.d +c4494906 : ld1b z6.d, p2/Z, [x8, z9.d, SXTW] : ld1b (%x8,%z9.d,sxtw)[1byte] %p2/z -> %z6.d +c44b4d48 : ld1b z8.d, p3/Z, [x10, z11.d, SXTW] : ld1b (%x10,%z11.d,sxtw)[1byte] %p3/z -> %z8.d +c44d4d6a : ld1b z10.d, p3/Z, [x11, z13.d, SXTW] : ld1b (%x11,%z13.d,sxtw)[1byte] %p3/z -> %z10.d +c44f51ac : ld1b z12.d, p4/Z, [x13, z15.d, SXTW] : ld1b (%x13,%z15.d,sxtw)[1byte] %p4/z -> %z12.d +c45151ee : ld1b z14.d, p4/Z, [x15, z17.d, SXTW] : ld1b (%x15,%z17.d,sxtw)[1byte] %p4/z -> %z14.d +c4535630 : ld1b z16.d, p5/Z, [x17, z19.d, SXTW] : ld1b (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d +c4545671 : ld1b z17.d, p5/Z, [x19, z20.d, SXTW] : ld1b (%x19,%z20.d,sxtw)[1byte] %p5/z -> %z17.d +c45656b3 : ld1b z19.d, p5/Z, [x21, z22.d, SXTW] : ld1b (%x21,%z22.d,sxtw)[1byte] %p5/z -> %z19.d +c4585af5 : ld1b z21.d, p6/Z, [x23, z24.d, SXTW] : ld1b (%x23,%z24.d,sxtw)[1byte] %p6/z -> %z21.d +c45a5b17 : ld1b z23.d, p6/Z, [x24, z26.d, SXTW] : ld1b (%x24,%z26.d,sxtw)[1byte] %p6/z -> %z23.d +c45c5f59 : ld1b z25.d, p7/Z, [x26, z28.d, SXTW] : ld1b (%x26,%z28.d,sxtw)[1byte] %p7/z -> %z25.d +c45e5f9b : ld1b z27.d, p7/Z, [x28, z30.d, SXTW] : ld1b (%x28,%z30.d,sxtw)[1byte] %p7/z -> %z27.d +c45f5fff : ld1b z31.d, p7/Z, [sp, z31.d, SXTW] : ld1b (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d # LD1B { .D }, /Z, [.D{, #}] (LD1B-Z.P.AI-D) -c420c000 : ld1b z0.d, p0/Z, [z0.d, #0] : ld1b (%z0.d)[4byte] %p0/z -> %z0.d -c422c482 : ld1b z2.d, p1/Z, [z4.d, #2] : ld1b +0x02(%z4.d)[4byte] %p1/z -> %z2.d -c424c8c4 : ld1b z4.d, p2/Z, [z6.d, #4] : ld1b +0x04(%z6.d)[4byte] %p2/z -> %z4.d -c426c906 : ld1b z6.d, p2/Z, [z8.d, #6] : ld1b +0x06(%z8.d)[4byte] %p2/z -> %z6.d -c428cd48 : ld1b z8.d, p3/Z, [z10.d, #8] : ld1b +0x08(%z10.d)[4byte] %p3/z -> %z8.d -c42acd8a : ld1b z10.d, p3/Z, [z12.d, #10] : ld1b +0x0a(%z12.d)[4byte] %p3/z -> %z10.d -c42cd1cc : ld1b z12.d, p4/Z, [z14.d, #12] : ld1b +0x0c(%z14.d)[4byte] %p4/z -> %z12.d -c42ed20e : ld1b z14.d, p4/Z, [z16.d, #14] : ld1b +0x0e(%z16.d)[4byte] %p4/z -> %z14.d -c430d650 : ld1b z16.d, p5/Z, [z18.d, #16] : ld1b +0x10(%z18.d)[4byte] %p5/z -> %z16.d -c431d671 : ld1b z17.d, p5/Z, [z19.d, #17] : ld1b +0x11(%z19.d)[4byte] %p5/z -> %z17.d -c433d6b3 : ld1b z19.d, p5/Z, [z21.d, #19] : ld1b +0x13(%z21.d)[4byte] %p5/z -> %z19.d -c435daf5 : ld1b z21.d, p6/Z, [z23.d, #21] : ld1b +0x15(%z23.d)[4byte] %p6/z -> %z21.d -c437db37 : ld1b z23.d, p6/Z, [z25.d, #23] : ld1b +0x17(%z25.d)[4byte] %p6/z -> %z23.d -c439df79 : ld1b z25.d, p7/Z, [z27.d, #25] : ld1b +0x19(%z27.d)[4byte] %p7/z -> %z25.d -c43bdfbb : ld1b z27.d, p7/Z, [z29.d, #27] : ld1b +0x1b(%z29.d)[4byte] %p7/z -> %z27.d -c43fdfff : ld1b z31.d, p7/Z, [z31.d, #31] : ld1b +0x1f(%z31.d)[4byte] %p7/z -> %z31.d +c420c000 : ld1b z0.d, p0/Z, [z0.d, #0] : ld1b (%z0.d)[1byte] %p0/z -> %z0.d +c422c482 : ld1b z2.d, p1/Z, [z4.d, #2] : ld1b +0x02(%z4.d)[1byte] %p1/z -> %z2.d +c424c8c4 : ld1b z4.d, p2/Z, [z6.d, #4] : ld1b +0x04(%z6.d)[1byte] %p2/z -> %z4.d +c426c906 : ld1b z6.d, p2/Z, [z8.d, #6] : ld1b +0x06(%z8.d)[1byte] %p2/z -> %z6.d +c428cd48 : ld1b z8.d, p3/Z, [z10.d, #8] : ld1b +0x08(%z10.d)[1byte] %p3/z -> %z8.d +c42acd8a : ld1b z10.d, p3/Z, [z12.d, #10] : ld1b +0x0a(%z12.d)[1byte] %p3/z -> %z10.d +c42cd1cc : ld1b z12.d, p4/Z, [z14.d, #12] : ld1b +0x0c(%z14.d)[1byte] %p4/z -> %z12.d +c42ed20e : ld1b z14.d, p4/Z, [z16.d, #14] : ld1b +0x0e(%z16.d)[1byte] %p4/z -> %z14.d +c430d650 : ld1b z16.d, p5/Z, [z18.d, #16] : ld1b +0x10(%z18.d)[1byte] %p5/z -> %z16.d +c431d671 : ld1b z17.d, p5/Z, [z19.d, #17] : ld1b +0x11(%z19.d)[1byte] %p5/z -> %z17.d +c433d6b3 : ld1b z19.d, p5/Z, [z21.d, #19] : ld1b +0x13(%z21.d)[1byte] %p5/z -> %z19.d +c435daf5 : ld1b z21.d, p6/Z, [z23.d, #21] : ld1b +0x15(%z23.d)[1byte] %p6/z -> %z21.d +c437db37 : ld1b z23.d, p6/Z, [z25.d, #23] : ld1b +0x17(%z25.d)[1byte] %p6/z -> %z23.d +c439df79 : ld1b z25.d, p7/Z, [z27.d, #25] : ld1b +0x19(%z27.d)[1byte] %p7/z -> %z25.d +c43bdfbb : ld1b z27.d, p7/Z, [z29.d, #27] : ld1b +0x1b(%z29.d)[1byte] %p7/z -> %z27.d +c43fdfff : ld1b z31.d, p7/Z, [z31.d, #31] : ld1b +0x1f(%z31.d)[1byte] %p7/z -> %z31.d # LD1B { .D }, /Z, [, .D] (LD1B-Z.P.BZ-D.64.unscaled) -c440c000 : ld1b z0.d, p0/Z, [x0, z0.d] : ld1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d -c445c482 : ld1b z2.d, p1/Z, [x4, z5.d] : ld1b (%x4,%z5.d)[4byte] %p1/z -> %z2.d -c447c8c4 : ld1b z4.d, p2/Z, [x6, z7.d] : ld1b (%x6,%z7.d)[4byte] %p2/z -> %z4.d -c449c906 : ld1b z6.d, p2/Z, [x8, z9.d] : ld1b (%x8,%z9.d)[4byte] %p2/z -> %z6.d -c44bcd48 : ld1b z8.d, p3/Z, [x10, z11.d] : ld1b (%x10,%z11.d)[4byte] %p3/z -> %z8.d -c44dcd6a : ld1b z10.d, p3/Z, [x11, z13.d] : ld1b (%x11,%z13.d)[4byte] %p3/z -> %z10.d -c44fd1ac : ld1b z12.d, p4/Z, [x13, z15.d] : ld1b (%x13,%z15.d)[4byte] %p4/z -> %z12.d -c451d1ee : ld1b z14.d, p4/Z, [x15, z17.d] : ld1b (%x15,%z17.d)[4byte] %p4/z -> %z14.d -c453d630 : ld1b z16.d, p5/Z, [x17, z19.d] : ld1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d -c454d671 : ld1b z17.d, p5/Z, [x19, z20.d] : ld1b (%x19,%z20.d)[4byte] %p5/z -> %z17.d -c456d6b3 : ld1b z19.d, p5/Z, [x21, z22.d] : ld1b (%x21,%z22.d)[4byte] %p5/z -> %z19.d -c458daf5 : ld1b z21.d, p6/Z, [x23, z24.d] : ld1b (%x23,%z24.d)[4byte] %p6/z -> %z21.d -c45adb17 : ld1b z23.d, p6/Z, [x24, z26.d] : ld1b (%x24,%z26.d)[4byte] %p6/z -> %z23.d -c45cdf59 : ld1b z25.d, p7/Z, [x26, z28.d] : ld1b (%x26,%z28.d)[4byte] %p7/z -> %z25.d -c45edf9b : ld1b z27.d, p7/Z, [x28, z30.d] : ld1b (%x28,%z30.d)[4byte] %p7/z -> %z27.d -c45fdfff : ld1b z31.d, p7/Z, [sp, z31.d] : ld1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d +c440c000 : ld1b z0.d, p0/Z, [x0, z0.d] : ld1b (%x0,%z0.d)[1byte] %p0/z -> %z0.d +c445c482 : ld1b z2.d, p1/Z, [x4, z5.d] : ld1b (%x4,%z5.d)[1byte] %p1/z -> %z2.d +c447c8c4 : ld1b z4.d, p2/Z, [x6, z7.d] : ld1b (%x6,%z7.d)[1byte] %p2/z -> %z4.d +c449c906 : ld1b z6.d, p2/Z, [x8, z9.d] : ld1b (%x8,%z9.d)[1byte] %p2/z -> %z6.d +c44bcd48 : ld1b z8.d, p3/Z, [x10, z11.d] : ld1b (%x10,%z11.d)[1byte] %p3/z -> %z8.d +c44dcd6a : ld1b z10.d, p3/Z, [x11, z13.d] : ld1b (%x11,%z13.d)[1byte] %p3/z -> %z10.d +c44fd1ac : ld1b z12.d, p4/Z, [x13, z15.d] : ld1b (%x13,%z15.d)[1byte] %p4/z -> %z12.d +c451d1ee : ld1b z14.d, p4/Z, [x15, z17.d] : ld1b (%x15,%z17.d)[1byte] %p4/z -> %z14.d +c453d630 : ld1b z16.d, p5/Z, [x17, z19.d] : ld1b (%x17,%z19.d)[1byte] %p5/z -> %z16.d +c454d671 : ld1b z17.d, p5/Z, [x19, z20.d] : ld1b (%x19,%z20.d)[1byte] %p5/z -> %z17.d +c456d6b3 : ld1b z19.d, p5/Z, [x21, z22.d] : ld1b (%x21,%z22.d)[1byte] %p5/z -> %z19.d +c458daf5 : ld1b z21.d, p6/Z, [x23, z24.d] : ld1b (%x23,%z24.d)[1byte] %p6/z -> %z21.d +c45adb17 : ld1b z23.d, p6/Z, [x24, z26.d] : ld1b (%x24,%z26.d)[1byte] %p6/z -> %z23.d +c45cdf59 : ld1b z25.d, p7/Z, [x26, z28.d] : ld1b (%x26,%z28.d)[1byte] %p7/z -> %z25.d +c45edf9b : ld1b z27.d, p7/Z, [x28, z30.d] : ld1b (%x28,%z30.d)[1byte] %p7/z -> %z27.d +c45fdfff : ld1b z31.d, p7/Z, [sp, z31.d] : ld1b (%sp,%z31.d)[1byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [, , LSL #3] (LD1D-Z.P.BR-U64) -a5e04000 : ld1d z0.d, p0/Z, [x0, x0, LSL #3] : ld1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d -a5e54482 : ld1d z2.d, p1/Z, [x4, x5, LSL #3] : ld1d (%x4,%x5,lsl #3)[32byte] %p1/z -> %z2.d -a5e748c4 : ld1d z4.d, p2/Z, [x6, x7, LSL #3] : ld1d (%x6,%x7,lsl #3)[32byte] %p2/z -> %z4.d -a5e94906 : ld1d z6.d, p2/Z, [x8, x9, LSL #3] : ld1d (%x8,%x9,lsl #3)[32byte] %p2/z -> %z6.d -a5eb4d48 : ld1d z8.d, p3/Z, [x10, x11, LSL #3] : ld1d (%x10,%x11,lsl #3)[32byte] %p3/z -> %z8.d -a5ec4d6a : ld1d z10.d, p3/Z, [x11, x12, LSL #3] : ld1d (%x11,%x12,lsl #3)[32byte] %p3/z -> %z10.d -a5ee51ac : ld1d z12.d, p4/Z, [x13, x14, LSL #3] : ld1d (%x13,%x14,lsl #3)[32byte] %p4/z -> %z12.d -a5f051ee : ld1d z14.d, p4/Z, [x15, x16, LSL #3] : ld1d (%x15,%x16,lsl #3)[32byte] %p4/z -> %z14.d -a5f25630 : ld1d z16.d, p5/Z, [x17, x18, LSL #3] : ld1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d -a5f45671 : ld1d z17.d, p5/Z, [x19, x20, LSL #3] : ld1d (%x19,%x20,lsl #3)[32byte] %p5/z -> %z17.d -a5f656b3 : ld1d z19.d, p5/Z, [x21, x22, LSL #3] : ld1d (%x21,%x22,lsl #3)[32byte] %p5/z -> %z19.d -a5f85af5 : ld1d z21.d, p6/Z, [x23, x24, LSL #3] : ld1d (%x23,%x24,lsl #3)[32byte] %p6/z -> %z21.d -a5f95b17 : ld1d z23.d, p6/Z, [x24, x25, LSL #3] : ld1d (%x24,%x25,lsl #3)[32byte] %p6/z -> %z23.d -a5fb5f59 : ld1d z25.d, p7/Z, [x26, x27, LSL #3] : ld1d (%x26,%x27,lsl #3)[32byte] %p7/z -> %z25.d -a5fd5f9b : ld1d z27.d, p7/Z, [x28, x29, LSL #3] : ld1d (%x28,%x29,lsl #3)[32byte] %p7/z -> %z27.d -a5fe5fff : ld1d z31.d, p7/Z, [sp, x30, LSL #3] : ld1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d +a5e04000 : ld1d z0.d, p0/Z, [x0, x0, LSL #3] : ld1d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d +a5e54482 : ld1d z2.d, p1/Z, [x4, x5, LSL #3] : ld1d (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d +a5e748c4 : ld1d z4.d, p2/Z, [x6, x7, LSL #3] : ld1d (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d +a5e94906 : ld1d z6.d, p2/Z, [x8, x9, LSL #3] : ld1d (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d +a5eb4d48 : ld1d z8.d, p3/Z, [x10, x11, LSL #3] : ld1d (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d +a5ec4d6a : ld1d z10.d, p3/Z, [x11, x12, LSL #3] : ld1d (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d +a5ee51ac : ld1d z12.d, p4/Z, [x13, x14, LSL #3] : ld1d (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d +a5f051ee : ld1d z14.d, p4/Z, [x15, x16, LSL #3] : ld1d (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d +a5f25630 : ld1d z16.d, p5/Z, [x17, x18, LSL #3] : ld1d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d +a5f45671 : ld1d z17.d, p5/Z, [x19, x20, LSL #3] : ld1d (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d +a5f656b3 : ld1d z19.d, p5/Z, [x21, x22, LSL #3] : ld1d (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d +a5f85af5 : ld1d z21.d, p6/Z, [x23, x24, LSL #3] : ld1d (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d +a5f95b17 : ld1d z23.d, p6/Z, [x24, x25, LSL #3] : ld1d (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d +a5fb5f59 : ld1d z25.d, p7/Z, [x26, x27, LSL #3] : ld1d (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d +a5fd5f9b : ld1d z27.d, p7/Z, [x28, x29, LSL #3] : ld1d (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d +a5fe5fff : ld1d z31.d, p7/Z, [sp, x30, LSL #3] : ld1d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [{, #, MUL VL}] (LD1D-Z.P.BI-U64) -a5e8a000 : ld1d z0.d, p0/Z, [x0, #-8, MUL VL] : ld1d -0x0100(%x0)[32byte] %p0/z -> %z0.d -a5e9a482 : ld1d z2.d, p1/Z, [x4, #-7, MUL VL] : ld1d -0xe0(%x4)[32byte] %p1/z -> %z2.d -a5eaa8c4 : ld1d z4.d, p2/Z, [x6, #-6, MUL VL] : ld1d -0xc0(%x6)[32byte] %p2/z -> %z4.d -a5eba906 : ld1d z6.d, p2/Z, [x8, #-5, MUL VL] : ld1d -0xa0(%x8)[32byte] %p2/z -> %z6.d -a5ecad48 : ld1d z8.d, p3/Z, [x10, #-4, MUL VL] : ld1d -0x80(%x10)[32byte] %p3/z -> %z8.d -a5edad6a : ld1d z10.d, p3/Z, [x11, #-3, MUL VL] : ld1d -0x60(%x11)[32byte] %p3/z -> %z10.d -a5eeb1ac : ld1d z12.d, p4/Z, [x13, #-2, MUL VL] : ld1d -0x40(%x13)[32byte] %p4/z -> %z12.d -a5efb1ee : ld1d z14.d, p4/Z, [x15, #-1, MUL VL] : ld1d -0x20(%x15)[32byte] %p4/z -> %z14.d -a5e0b630 : ld1d z16.d, p5/Z, [x17, #0, MUL VL] : ld1d (%x17)[32byte] %p5/z -> %z16.d -a5e0b671 : ld1d z17.d, p5/Z, [x19, #0, MUL VL] : ld1d (%x19)[32byte] %p5/z -> %z17.d -a5e1b6b3 : ld1d z19.d, p5/Z, [x21, #1, MUL VL] : ld1d +0x20(%x21)[32byte] %p5/z -> %z19.d -a5e2baf5 : ld1d z21.d, p6/Z, [x23, #2, MUL VL] : ld1d +0x40(%x23)[32byte] %p6/z -> %z21.d -a5e3bb17 : ld1d z23.d, p6/Z, [x24, #3, MUL VL] : ld1d +0x60(%x24)[32byte] %p6/z -> %z23.d -a5e4bf59 : ld1d z25.d, p7/Z, [x26, #4, MUL VL] : ld1d +0x80(%x26)[32byte] %p7/z -> %z25.d -a5e5bf9b : ld1d z27.d, p7/Z, [x28, #5, MUL VL] : ld1d +0xa0(%x28)[32byte] %p7/z -> %z27.d -a5e7bfff : ld1d z31.d, p7/Z, [sp, #7, MUL VL] : ld1d +0xe0(%sp)[32byte] %p7/z -> %z31.d +a5e8a000 : ld1d z0.d, p0/Z, [x0, #-8, MUL VL] : ld1d -0x0100(%x0)[8byte] %p0/z -> %z0.d +a5e9a482 : ld1d z2.d, p1/Z, [x4, #-7, MUL VL] : ld1d -0xe0(%x4)[8byte] %p1/z -> %z2.d +a5eaa8c4 : ld1d z4.d, p2/Z, [x6, #-6, MUL VL] : ld1d -0xc0(%x6)[8byte] %p2/z -> %z4.d +a5eba906 : ld1d z6.d, p2/Z, [x8, #-5, MUL VL] : ld1d -0xa0(%x8)[8byte] %p2/z -> %z6.d +a5ecad48 : ld1d z8.d, p3/Z, [x10, #-4, MUL VL] : ld1d -0x80(%x10)[8byte] %p3/z -> %z8.d +a5edad6a : ld1d z10.d, p3/Z, [x11, #-3, MUL VL] : ld1d -0x60(%x11)[8byte] %p3/z -> %z10.d +a5eeb1ac : ld1d z12.d, p4/Z, [x13, #-2, MUL VL] : ld1d -0x40(%x13)[8byte] %p4/z -> %z12.d +a5efb1ee : ld1d z14.d, p4/Z, [x15, #-1, MUL VL] : ld1d -0x20(%x15)[8byte] %p4/z -> %z14.d +a5e0b630 : ld1d z16.d, p5/Z, [x17, #0, MUL VL] : ld1d (%x17)[8byte] %p5/z -> %z16.d +a5e0b671 : ld1d z17.d, p5/Z, [x19, #0, MUL VL] : ld1d (%x19)[8byte] %p5/z -> %z17.d +a5e1b6b3 : ld1d z19.d, p5/Z, [x21, #1, MUL VL] : ld1d +0x20(%x21)[8byte] %p5/z -> %z19.d +a5e2baf5 : ld1d z21.d, p6/Z, [x23, #2, MUL VL] : ld1d +0x40(%x23)[8byte] %p6/z -> %z21.d +a5e3bb17 : ld1d z23.d, p6/Z, [x24, #3, MUL VL] : ld1d +0x60(%x24)[8byte] %p6/z -> %z23.d +a5e4bf59 : ld1d z25.d, p7/Z, [x26, #4, MUL VL] : ld1d +0x80(%x26)[8byte] %p7/z -> %z25.d +a5e5bf9b : ld1d z27.d, p7/Z, [x28, #5, MUL VL] : ld1d +0xa0(%x28)[8byte] %p7/z -> %z27.d +a5e7bfff : ld1d z31.d, p7/Z, [sp, #7, MUL VL] : ld1d +0xe0(%sp)[8byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [, .D, ] (LD1D-Z.P.BZ-D.x32.unscaled) -c5804000 : ld1d z0.d, p0/Z, [x0, z0.d, UXTW] : ld1d (%x0,%z0.d,uxtw)[32byte] %p0/z -> %z0.d -c5854482 : ld1d z2.d, p1/Z, [x4, z5.d, UXTW] : ld1d (%x4,%z5.d,uxtw)[32byte] %p1/z -> %z2.d -c58748c4 : ld1d z4.d, p2/Z, [x6, z7.d, UXTW] : ld1d (%x6,%z7.d,uxtw)[32byte] %p2/z -> %z4.d -c5894906 : ld1d z6.d, p2/Z, [x8, z9.d, UXTW] : ld1d (%x8,%z9.d,uxtw)[32byte] %p2/z -> %z6.d -c58b4d48 : ld1d z8.d, p3/Z, [x10, z11.d, UXTW] : ld1d (%x10,%z11.d,uxtw)[32byte] %p3/z -> %z8.d -c58d4d6a : ld1d z10.d, p3/Z, [x11, z13.d, UXTW] : ld1d (%x11,%z13.d,uxtw)[32byte] %p3/z -> %z10.d -c58f51ac : ld1d z12.d, p4/Z, [x13, z15.d, UXTW] : ld1d (%x13,%z15.d,uxtw)[32byte] %p4/z -> %z12.d -c59151ee : ld1d z14.d, p4/Z, [x15, z17.d, UXTW] : ld1d (%x15,%z17.d,uxtw)[32byte] %p4/z -> %z14.d -c5935630 : ld1d z16.d, p5/Z, [x17, z19.d, UXTW] : ld1d (%x17,%z19.d,uxtw)[32byte] %p5/z -> %z16.d -c5945671 : ld1d z17.d, p5/Z, [x19, z20.d, UXTW] : ld1d (%x19,%z20.d,uxtw)[32byte] %p5/z -> %z17.d -c59656b3 : ld1d z19.d, p5/Z, [x21, z22.d, UXTW] : ld1d (%x21,%z22.d,uxtw)[32byte] %p5/z -> %z19.d -c5985af5 : ld1d z21.d, p6/Z, [x23, z24.d, UXTW] : ld1d (%x23,%z24.d,uxtw)[32byte] %p6/z -> %z21.d -c59a5b17 : ld1d z23.d, p6/Z, [x24, z26.d, UXTW] : ld1d (%x24,%z26.d,uxtw)[32byte] %p6/z -> %z23.d -c59c5f59 : ld1d z25.d, p7/Z, [x26, z28.d, UXTW] : ld1d (%x26,%z28.d,uxtw)[32byte] %p7/z -> %z25.d -c59e5f9b : ld1d z27.d, p7/Z, [x28, z30.d, UXTW] : ld1d (%x28,%z30.d,uxtw)[32byte] %p7/z -> %z27.d -c59f5fff : ld1d z31.d, p7/Z, [sp, z31.d, UXTW] : ld1d (%sp,%z31.d,uxtw)[32byte] %p7/z -> %z31.d -c5c04000 : ld1d z0.d, p0/Z, [x0, z0.d, SXTW] : ld1d (%x0,%z0.d,sxtw)[32byte] %p0/z -> %z0.d -c5c54482 : ld1d z2.d, p1/Z, [x4, z5.d, SXTW] : ld1d (%x4,%z5.d,sxtw)[32byte] %p1/z -> %z2.d -c5c748c4 : ld1d z4.d, p2/Z, [x6, z7.d, SXTW] : ld1d (%x6,%z7.d,sxtw)[32byte] %p2/z -> %z4.d -c5c94906 : ld1d z6.d, p2/Z, [x8, z9.d, SXTW] : ld1d (%x8,%z9.d,sxtw)[32byte] %p2/z -> %z6.d -c5cb4d48 : ld1d z8.d, p3/Z, [x10, z11.d, SXTW] : ld1d (%x10,%z11.d,sxtw)[32byte] %p3/z -> %z8.d -c5cd4d6a : ld1d z10.d, p3/Z, [x11, z13.d, SXTW] : ld1d (%x11,%z13.d,sxtw)[32byte] %p3/z -> %z10.d -c5cf51ac : ld1d z12.d, p4/Z, [x13, z15.d, SXTW] : ld1d (%x13,%z15.d,sxtw)[32byte] %p4/z -> %z12.d -c5d151ee : ld1d z14.d, p4/Z, [x15, z17.d, SXTW] : ld1d (%x15,%z17.d,sxtw)[32byte] %p4/z -> %z14.d -c5d35630 : ld1d z16.d, p5/Z, [x17, z19.d, SXTW] : ld1d (%x17,%z19.d,sxtw)[32byte] %p5/z -> %z16.d -c5d45671 : ld1d z17.d, p5/Z, [x19, z20.d, SXTW] : ld1d (%x19,%z20.d,sxtw)[32byte] %p5/z -> %z17.d -c5d656b3 : ld1d z19.d, p5/Z, [x21, z22.d, SXTW] : ld1d (%x21,%z22.d,sxtw)[32byte] %p5/z -> %z19.d -c5d85af5 : ld1d z21.d, p6/Z, [x23, z24.d, SXTW] : ld1d (%x23,%z24.d,sxtw)[32byte] %p6/z -> %z21.d -c5da5b17 : ld1d z23.d, p6/Z, [x24, z26.d, SXTW] : ld1d (%x24,%z26.d,sxtw)[32byte] %p6/z -> %z23.d -c5dc5f59 : ld1d z25.d, p7/Z, [x26, z28.d, SXTW] : ld1d (%x26,%z28.d,sxtw)[32byte] %p7/z -> %z25.d -c5de5f9b : ld1d z27.d, p7/Z, [x28, z30.d, SXTW] : ld1d (%x28,%z30.d,sxtw)[32byte] %p7/z -> %z27.d -c5df5fff : ld1d z31.d, p7/Z, [sp, z31.d, SXTW] : ld1d (%sp,%z31.d,sxtw)[32byte] %p7/z -> %z31.d +c5804000 : ld1d z0.d, p0/Z, [x0, z0.d, UXTW] : ld1d (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d +c5854482 : ld1d z2.d, p1/Z, [x4, z5.d, UXTW] : ld1d (%x4,%z5.d,uxtw)[8byte] %p1/z -> %z2.d +c58748c4 : ld1d z4.d, p2/Z, [x6, z7.d, UXTW] : ld1d (%x6,%z7.d,uxtw)[8byte] %p2/z -> %z4.d +c5894906 : ld1d z6.d, p2/Z, [x8, z9.d, UXTW] : ld1d (%x8,%z9.d,uxtw)[8byte] %p2/z -> %z6.d +c58b4d48 : ld1d z8.d, p3/Z, [x10, z11.d, UXTW] : ld1d (%x10,%z11.d,uxtw)[8byte] %p3/z -> %z8.d +c58d4d6a : ld1d z10.d, p3/Z, [x11, z13.d, UXTW] : ld1d (%x11,%z13.d,uxtw)[8byte] %p3/z -> %z10.d +c58f51ac : ld1d z12.d, p4/Z, [x13, z15.d, UXTW] : ld1d (%x13,%z15.d,uxtw)[8byte] %p4/z -> %z12.d +c59151ee : ld1d z14.d, p4/Z, [x15, z17.d, UXTW] : ld1d (%x15,%z17.d,uxtw)[8byte] %p4/z -> %z14.d +c5935630 : ld1d z16.d, p5/Z, [x17, z19.d, UXTW] : ld1d (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d +c5945671 : ld1d z17.d, p5/Z, [x19, z20.d, UXTW] : ld1d (%x19,%z20.d,uxtw)[8byte] %p5/z -> %z17.d +c59656b3 : ld1d z19.d, p5/Z, [x21, z22.d, UXTW] : ld1d (%x21,%z22.d,uxtw)[8byte] %p5/z -> %z19.d +c5985af5 : ld1d z21.d, p6/Z, [x23, z24.d, UXTW] : ld1d (%x23,%z24.d,uxtw)[8byte] %p6/z -> %z21.d +c59a5b17 : ld1d z23.d, p6/Z, [x24, z26.d, UXTW] : ld1d (%x24,%z26.d,uxtw)[8byte] %p6/z -> %z23.d +c59c5f59 : ld1d z25.d, p7/Z, [x26, z28.d, UXTW] : ld1d (%x26,%z28.d,uxtw)[8byte] %p7/z -> %z25.d +c59e5f9b : ld1d z27.d, p7/Z, [x28, z30.d, UXTW] : ld1d (%x28,%z30.d,uxtw)[8byte] %p7/z -> %z27.d +c59f5fff : ld1d z31.d, p7/Z, [sp, z31.d, UXTW] : ld1d (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d +c5c04000 : ld1d z0.d, p0/Z, [x0, z0.d, SXTW] : ld1d (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d +c5c54482 : ld1d z2.d, p1/Z, [x4, z5.d, SXTW] : ld1d (%x4,%z5.d,sxtw)[8byte] %p1/z -> %z2.d +c5c748c4 : ld1d z4.d, p2/Z, [x6, z7.d, SXTW] : ld1d (%x6,%z7.d,sxtw)[8byte] %p2/z -> %z4.d +c5c94906 : ld1d z6.d, p2/Z, [x8, z9.d, SXTW] : ld1d (%x8,%z9.d,sxtw)[8byte] %p2/z -> %z6.d +c5cb4d48 : ld1d z8.d, p3/Z, [x10, z11.d, SXTW] : ld1d (%x10,%z11.d,sxtw)[8byte] %p3/z -> %z8.d +c5cd4d6a : ld1d z10.d, p3/Z, [x11, z13.d, SXTW] : ld1d (%x11,%z13.d,sxtw)[8byte] %p3/z -> %z10.d +c5cf51ac : ld1d z12.d, p4/Z, [x13, z15.d, SXTW] : ld1d (%x13,%z15.d,sxtw)[8byte] %p4/z -> %z12.d +c5d151ee : ld1d z14.d, p4/Z, [x15, z17.d, SXTW] : ld1d (%x15,%z17.d,sxtw)[8byte] %p4/z -> %z14.d +c5d35630 : ld1d z16.d, p5/Z, [x17, z19.d, SXTW] : ld1d (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d +c5d45671 : ld1d z17.d, p5/Z, [x19, z20.d, SXTW] : ld1d (%x19,%z20.d,sxtw)[8byte] %p5/z -> %z17.d +c5d656b3 : ld1d z19.d, p5/Z, [x21, z22.d, SXTW] : ld1d (%x21,%z22.d,sxtw)[8byte] %p5/z -> %z19.d +c5d85af5 : ld1d z21.d, p6/Z, [x23, z24.d, SXTW] : ld1d (%x23,%z24.d,sxtw)[8byte] %p6/z -> %z21.d +c5da5b17 : ld1d z23.d, p6/Z, [x24, z26.d, SXTW] : ld1d (%x24,%z26.d,sxtw)[8byte] %p6/z -> %z23.d +c5dc5f59 : ld1d z25.d, p7/Z, [x26, z28.d, SXTW] : ld1d (%x26,%z28.d,sxtw)[8byte] %p7/z -> %z25.d +c5de5f9b : ld1d z27.d, p7/Z, [x28, z30.d, SXTW] : ld1d (%x28,%z30.d,sxtw)[8byte] %p7/z -> %z27.d +c5df5fff : ld1d z31.d, p7/Z, [sp, z31.d, SXTW] : ld1d (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [, .D, #3] (LD1D-Z.P.BZ-D.x32.scaled) -c5a04000 : ld1d z0.d, p0/Z, [x0, z0.d, UXTW #3] : ld1d (%x0,%z0.d,uxtw #3)[32byte] %p0/z -> %z0.d -c5a54482 : ld1d z2.d, p1/Z, [x4, z5.d, UXTW #3] : ld1d (%x4,%z5.d,uxtw #3)[32byte] %p1/z -> %z2.d -c5a748c4 : ld1d z4.d, p2/Z, [x6, z7.d, UXTW #3] : ld1d (%x6,%z7.d,uxtw #3)[32byte] %p2/z -> %z4.d -c5a94906 : ld1d z6.d, p2/Z, [x8, z9.d, UXTW #3] : ld1d (%x8,%z9.d,uxtw #3)[32byte] %p2/z -> %z6.d -c5ab4d48 : ld1d z8.d, p3/Z, [x10, z11.d, UXTW #3] : ld1d (%x10,%z11.d,uxtw #3)[32byte] %p3/z -> %z8.d -c5ad4d6a : ld1d z10.d, p3/Z, [x11, z13.d, UXTW #3] : ld1d (%x11,%z13.d,uxtw #3)[32byte] %p3/z -> %z10.d -c5af51ac : ld1d z12.d, p4/Z, [x13, z15.d, UXTW #3] : ld1d (%x13,%z15.d,uxtw #3)[32byte] %p4/z -> %z12.d -c5b151ee : ld1d z14.d, p4/Z, [x15, z17.d, UXTW #3] : ld1d (%x15,%z17.d,uxtw #3)[32byte] %p4/z -> %z14.d -c5b35630 : ld1d z16.d, p5/Z, [x17, z19.d, UXTW #3] : ld1d (%x17,%z19.d,uxtw #3)[32byte] %p5/z -> %z16.d -c5b45671 : ld1d z17.d, p5/Z, [x19, z20.d, UXTW #3] : ld1d (%x19,%z20.d,uxtw #3)[32byte] %p5/z -> %z17.d -c5b656b3 : ld1d z19.d, p5/Z, [x21, z22.d, UXTW #3] : ld1d (%x21,%z22.d,uxtw #3)[32byte] %p5/z -> %z19.d -c5b85af5 : ld1d z21.d, p6/Z, [x23, z24.d, UXTW #3] : ld1d (%x23,%z24.d,uxtw #3)[32byte] %p6/z -> %z21.d -c5ba5b17 : ld1d z23.d, p6/Z, [x24, z26.d, UXTW #3] : ld1d (%x24,%z26.d,uxtw #3)[32byte] %p6/z -> %z23.d -c5bc5f59 : ld1d z25.d, p7/Z, [x26, z28.d, UXTW #3] : ld1d (%x26,%z28.d,uxtw #3)[32byte] %p7/z -> %z25.d -c5be5f9b : ld1d z27.d, p7/Z, [x28, z30.d, UXTW #3] : ld1d (%x28,%z30.d,uxtw #3)[32byte] %p7/z -> %z27.d -c5bf5fff : ld1d z31.d, p7/Z, [sp, z31.d, UXTW #3] : ld1d (%sp,%z31.d,uxtw #3)[32byte] %p7/z -> %z31.d -c5e04000 : ld1d z0.d, p0/Z, [x0, z0.d, SXTW #3] : ld1d (%x0,%z0.d,sxtw #3)[32byte] %p0/z -> %z0.d -c5e54482 : ld1d z2.d, p1/Z, [x4, z5.d, SXTW #3] : ld1d (%x4,%z5.d,sxtw #3)[32byte] %p1/z -> %z2.d -c5e748c4 : ld1d z4.d, p2/Z, [x6, z7.d, SXTW #3] : ld1d (%x6,%z7.d,sxtw #3)[32byte] %p2/z -> %z4.d -c5e94906 : ld1d z6.d, p2/Z, [x8, z9.d, SXTW #3] : ld1d (%x8,%z9.d,sxtw #3)[32byte] %p2/z -> %z6.d -c5eb4d48 : ld1d z8.d, p3/Z, [x10, z11.d, SXTW #3] : ld1d (%x10,%z11.d,sxtw #3)[32byte] %p3/z -> %z8.d -c5ed4d6a : ld1d z10.d, p3/Z, [x11, z13.d, SXTW #3] : ld1d (%x11,%z13.d,sxtw #3)[32byte] %p3/z -> %z10.d -c5ef51ac : ld1d z12.d, p4/Z, [x13, z15.d, SXTW #3] : ld1d (%x13,%z15.d,sxtw #3)[32byte] %p4/z -> %z12.d -c5f151ee : ld1d z14.d, p4/Z, [x15, z17.d, SXTW #3] : ld1d (%x15,%z17.d,sxtw #3)[32byte] %p4/z -> %z14.d -c5f35630 : ld1d z16.d, p5/Z, [x17, z19.d, SXTW #3] : ld1d (%x17,%z19.d,sxtw #3)[32byte] %p5/z -> %z16.d -c5f45671 : ld1d z17.d, p5/Z, [x19, z20.d, SXTW #3] : ld1d (%x19,%z20.d,sxtw #3)[32byte] %p5/z -> %z17.d -c5f656b3 : ld1d z19.d, p5/Z, [x21, z22.d, SXTW #3] : ld1d (%x21,%z22.d,sxtw #3)[32byte] %p5/z -> %z19.d -c5f85af5 : ld1d z21.d, p6/Z, [x23, z24.d, SXTW #3] : ld1d (%x23,%z24.d,sxtw #3)[32byte] %p6/z -> %z21.d -c5fa5b17 : ld1d z23.d, p6/Z, [x24, z26.d, SXTW #3] : ld1d (%x24,%z26.d,sxtw #3)[32byte] %p6/z -> %z23.d -c5fc5f59 : ld1d z25.d, p7/Z, [x26, z28.d, SXTW #3] : ld1d (%x26,%z28.d,sxtw #3)[32byte] %p7/z -> %z25.d -c5fe5f9b : ld1d z27.d, p7/Z, [x28, z30.d, SXTW #3] : ld1d (%x28,%z30.d,sxtw #3)[32byte] %p7/z -> %z27.d -c5ff5fff : ld1d z31.d, p7/Z, [sp, z31.d, SXTW #3] : ld1d (%sp,%z31.d,sxtw #3)[32byte] %p7/z -> %z31.d +c5a04000 : ld1d z0.d, p0/Z, [x0, z0.d, UXTW #3] : ld1d (%x0,%z0.d,uxtw #3)[8byte] %p0/z -> %z0.d +c5a54482 : ld1d z2.d, p1/Z, [x4, z5.d, UXTW #3] : ld1d (%x4,%z5.d,uxtw #3)[8byte] %p1/z -> %z2.d +c5a748c4 : ld1d z4.d, p2/Z, [x6, z7.d, UXTW #3] : ld1d (%x6,%z7.d,uxtw #3)[8byte] %p2/z -> %z4.d +c5a94906 : ld1d z6.d, p2/Z, [x8, z9.d, UXTW #3] : ld1d (%x8,%z9.d,uxtw #3)[8byte] %p2/z -> %z6.d +c5ab4d48 : ld1d z8.d, p3/Z, [x10, z11.d, UXTW #3] : ld1d (%x10,%z11.d,uxtw #3)[8byte] %p3/z -> %z8.d +c5ad4d6a : ld1d z10.d, p3/Z, [x11, z13.d, UXTW #3] : ld1d (%x11,%z13.d,uxtw #3)[8byte] %p3/z -> %z10.d +c5af51ac : ld1d z12.d, p4/Z, [x13, z15.d, UXTW #3] : ld1d (%x13,%z15.d,uxtw #3)[8byte] %p4/z -> %z12.d +c5b151ee : ld1d z14.d, p4/Z, [x15, z17.d, UXTW #3] : ld1d (%x15,%z17.d,uxtw #3)[8byte] %p4/z -> %z14.d +c5b35630 : ld1d z16.d, p5/Z, [x17, z19.d, UXTW #3] : ld1d (%x17,%z19.d,uxtw #3)[8byte] %p5/z -> %z16.d +c5b45671 : ld1d z17.d, p5/Z, [x19, z20.d, UXTW #3] : ld1d (%x19,%z20.d,uxtw #3)[8byte] %p5/z -> %z17.d +c5b656b3 : ld1d z19.d, p5/Z, [x21, z22.d, UXTW #3] : ld1d (%x21,%z22.d,uxtw #3)[8byte] %p5/z -> %z19.d +c5b85af5 : ld1d z21.d, p6/Z, [x23, z24.d, UXTW #3] : ld1d (%x23,%z24.d,uxtw #3)[8byte] %p6/z -> %z21.d +c5ba5b17 : ld1d z23.d, p6/Z, [x24, z26.d, UXTW #3] : ld1d (%x24,%z26.d,uxtw #3)[8byte] %p6/z -> %z23.d +c5bc5f59 : ld1d z25.d, p7/Z, [x26, z28.d, UXTW #3] : ld1d (%x26,%z28.d,uxtw #3)[8byte] %p7/z -> %z25.d +c5be5f9b : ld1d z27.d, p7/Z, [x28, z30.d, UXTW #3] : ld1d (%x28,%z30.d,uxtw #3)[8byte] %p7/z -> %z27.d +c5bf5fff : ld1d z31.d, p7/Z, [sp, z31.d, UXTW #3] : ld1d (%sp,%z31.d,uxtw #3)[8byte] %p7/z -> %z31.d +c5e04000 : ld1d z0.d, p0/Z, [x0, z0.d, SXTW #3] : ld1d (%x0,%z0.d,sxtw #3)[8byte] %p0/z -> %z0.d +c5e54482 : ld1d z2.d, p1/Z, [x4, z5.d, SXTW #3] : ld1d (%x4,%z5.d,sxtw #3)[8byte] %p1/z -> %z2.d +c5e748c4 : ld1d z4.d, p2/Z, [x6, z7.d, SXTW #3] : ld1d (%x6,%z7.d,sxtw #3)[8byte] %p2/z -> %z4.d +c5e94906 : ld1d z6.d, p2/Z, [x8, z9.d, SXTW #3] : ld1d (%x8,%z9.d,sxtw #3)[8byte] %p2/z -> %z6.d +c5eb4d48 : ld1d z8.d, p3/Z, [x10, z11.d, SXTW #3] : ld1d (%x10,%z11.d,sxtw #3)[8byte] %p3/z -> %z8.d +c5ed4d6a : ld1d z10.d, p3/Z, [x11, z13.d, SXTW #3] : ld1d (%x11,%z13.d,sxtw #3)[8byte] %p3/z -> %z10.d +c5ef51ac : ld1d z12.d, p4/Z, [x13, z15.d, SXTW #3] : ld1d (%x13,%z15.d,sxtw #3)[8byte] %p4/z -> %z12.d +c5f151ee : ld1d z14.d, p4/Z, [x15, z17.d, SXTW #3] : ld1d (%x15,%z17.d,sxtw #3)[8byte] %p4/z -> %z14.d +c5f35630 : ld1d z16.d, p5/Z, [x17, z19.d, SXTW #3] : ld1d (%x17,%z19.d,sxtw #3)[8byte] %p5/z -> %z16.d +c5f45671 : ld1d z17.d, p5/Z, [x19, z20.d, SXTW #3] : ld1d (%x19,%z20.d,sxtw #3)[8byte] %p5/z -> %z17.d +c5f656b3 : ld1d z19.d, p5/Z, [x21, z22.d, SXTW #3] : ld1d (%x21,%z22.d,sxtw #3)[8byte] %p5/z -> %z19.d +c5f85af5 : ld1d z21.d, p6/Z, [x23, z24.d, SXTW #3] : ld1d (%x23,%z24.d,sxtw #3)[8byte] %p6/z -> %z21.d +c5fa5b17 : ld1d z23.d, p6/Z, [x24, z26.d, SXTW #3] : ld1d (%x24,%z26.d,sxtw #3)[8byte] %p6/z -> %z23.d +c5fc5f59 : ld1d z25.d, p7/Z, [x26, z28.d, SXTW #3] : ld1d (%x26,%z28.d,sxtw #3)[8byte] %p7/z -> %z25.d +c5fe5f9b : ld1d z27.d, p7/Z, [x28, z30.d, SXTW #3] : ld1d (%x28,%z30.d,sxtw #3)[8byte] %p7/z -> %z27.d +c5ff5fff : ld1d z31.d, p7/Z, [sp, z31.d, SXTW #3] : ld1d (%sp,%z31.d,sxtw #3)[8byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [.D{, #}] (LD1D-Z.P.AI-D) -c5a0c000 : ld1d z0.d, p0/Z, [z0.d, #0] : ld1d (%z0.d)[32byte] %p0/z -> %z0.d -c5a2c482 : ld1d z2.d, p1/Z, [z4.d, #16] : ld1d +0x10(%z4.d)[32byte] %p1/z -> %z2.d -c5a4c8c4 : ld1d z4.d, p2/Z, [z6.d, #32] : ld1d +0x20(%z6.d)[32byte] %p2/z -> %z4.d -c5a6c906 : ld1d z6.d, p2/Z, [z8.d, #48] : ld1d +0x30(%z8.d)[32byte] %p2/z -> %z6.d -c5a8cd48 : ld1d z8.d, p3/Z, [z10.d, #64] : ld1d +0x40(%z10.d)[32byte] %p3/z -> %z8.d -c5aacd8a : ld1d z10.d, p3/Z, [z12.d, #80] : ld1d +0x50(%z12.d)[32byte] %p3/z -> %z10.d -c5acd1cc : ld1d z12.d, p4/Z, [z14.d, #96] : ld1d +0x60(%z14.d)[32byte] %p4/z -> %z12.d -c5aed20e : ld1d z14.d, p4/Z, [z16.d, #112] : ld1d +0x70(%z16.d)[32byte] %p4/z -> %z14.d -c5b0d650 : ld1d z16.d, p5/Z, [z18.d, #128] : ld1d +0x80(%z18.d)[32byte] %p5/z -> %z16.d -c5b1d671 : ld1d z17.d, p5/Z, [z19.d, #136] : ld1d +0x88(%z19.d)[32byte] %p5/z -> %z17.d -c5b3d6b3 : ld1d z19.d, p5/Z, [z21.d, #152] : ld1d +0x98(%z21.d)[32byte] %p5/z -> %z19.d -c5b5daf5 : ld1d z21.d, p6/Z, [z23.d, #168] : ld1d +0xa8(%z23.d)[32byte] %p6/z -> %z21.d -c5b7db37 : ld1d z23.d, p6/Z, [z25.d, #184] : ld1d +0xb8(%z25.d)[32byte] %p6/z -> %z23.d -c5b9df79 : ld1d z25.d, p7/Z, [z27.d, #200] : ld1d +0xc8(%z27.d)[32byte] %p7/z -> %z25.d -c5bbdfbb : ld1d z27.d, p7/Z, [z29.d, #216] : ld1d +0xd8(%z29.d)[32byte] %p7/z -> %z27.d -c5bfdfff : ld1d z31.d, p7/Z, [z31.d, #248] : ld1d +0xf8(%z31.d)[32byte] %p7/z -> %z31.d +c5a0c000 : ld1d z0.d, p0/Z, [z0.d, #0] : ld1d (%z0.d)[8byte] %p0/z -> %z0.d +c5a2c482 : ld1d z2.d, p1/Z, [z4.d, #16] : ld1d +0x10(%z4.d)[8byte] %p1/z -> %z2.d +c5a4c8c4 : ld1d z4.d, p2/Z, [z6.d, #32] : ld1d +0x20(%z6.d)[8byte] %p2/z -> %z4.d +c5a6c906 : ld1d z6.d, p2/Z, [z8.d, #48] : ld1d +0x30(%z8.d)[8byte] %p2/z -> %z6.d +c5a8cd48 : ld1d z8.d, p3/Z, [z10.d, #64] : ld1d +0x40(%z10.d)[8byte] %p3/z -> %z8.d +c5aacd8a : ld1d z10.d, p3/Z, [z12.d, #80] : ld1d +0x50(%z12.d)[8byte] %p3/z -> %z10.d +c5acd1cc : ld1d z12.d, p4/Z, [z14.d, #96] : ld1d +0x60(%z14.d)[8byte] %p4/z -> %z12.d +c5aed20e : ld1d z14.d, p4/Z, [z16.d, #112] : ld1d +0x70(%z16.d)[8byte] %p4/z -> %z14.d +c5b0d650 : ld1d z16.d, p5/Z, [z18.d, #128] : ld1d +0x80(%z18.d)[8byte] %p5/z -> %z16.d +c5b1d671 : ld1d z17.d, p5/Z, [z19.d, #136] : ld1d +0x88(%z19.d)[8byte] %p5/z -> %z17.d +c5b3d6b3 : ld1d z19.d, p5/Z, [z21.d, #152] : ld1d +0x98(%z21.d)[8byte] %p5/z -> %z19.d +c5b5daf5 : ld1d z21.d, p6/Z, [z23.d, #168] : ld1d +0xa8(%z23.d)[8byte] %p6/z -> %z21.d +c5b7db37 : ld1d z23.d, p6/Z, [z25.d, #184] : ld1d +0xb8(%z25.d)[8byte] %p6/z -> %z23.d +c5b9df79 : ld1d z25.d, p7/Z, [z27.d, #200] : ld1d +0xc8(%z27.d)[8byte] %p7/z -> %z25.d +c5bbdfbb : ld1d z27.d, p7/Z, [z29.d, #216] : ld1d +0xd8(%z29.d)[8byte] %p7/z -> %z27.d +c5bfdfff : ld1d z31.d, p7/Z, [z31.d, #248] : ld1d +0xf8(%z31.d)[8byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [, .D] (LD1D-Z.P.BZ-D.64.unscaled) -c5c0c000 : ld1d z0.d, p0/Z, [x0, z0.d] : ld1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d -c5c5c482 : ld1d z2.d, p1/Z, [x4, z5.d] : ld1d (%x4,%z5.d)[32byte] %p1/z -> %z2.d -c5c7c8c4 : ld1d z4.d, p2/Z, [x6, z7.d] : ld1d (%x6,%z7.d)[32byte] %p2/z -> %z4.d -c5c9c906 : ld1d z6.d, p2/Z, [x8, z9.d] : ld1d (%x8,%z9.d)[32byte] %p2/z -> %z6.d -c5cbcd48 : ld1d z8.d, p3/Z, [x10, z11.d] : ld1d (%x10,%z11.d)[32byte] %p3/z -> %z8.d -c5cdcd6a : ld1d z10.d, p3/Z, [x11, z13.d] : ld1d (%x11,%z13.d)[32byte] %p3/z -> %z10.d -c5cfd1ac : ld1d z12.d, p4/Z, [x13, z15.d] : ld1d (%x13,%z15.d)[32byte] %p4/z -> %z12.d -c5d1d1ee : ld1d z14.d, p4/Z, [x15, z17.d] : ld1d (%x15,%z17.d)[32byte] %p4/z -> %z14.d -c5d3d630 : ld1d z16.d, p5/Z, [x17, z19.d] : ld1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d -c5d4d671 : ld1d z17.d, p5/Z, [x19, z20.d] : ld1d (%x19,%z20.d)[32byte] %p5/z -> %z17.d -c5d6d6b3 : ld1d z19.d, p5/Z, [x21, z22.d] : ld1d (%x21,%z22.d)[32byte] %p5/z -> %z19.d -c5d8daf5 : ld1d z21.d, p6/Z, [x23, z24.d] : ld1d (%x23,%z24.d)[32byte] %p6/z -> %z21.d -c5dadb17 : ld1d z23.d, p6/Z, [x24, z26.d] : ld1d (%x24,%z26.d)[32byte] %p6/z -> %z23.d -c5dcdf59 : ld1d z25.d, p7/Z, [x26, z28.d] : ld1d (%x26,%z28.d)[32byte] %p7/z -> %z25.d -c5dedf9b : ld1d z27.d, p7/Z, [x28, z30.d] : ld1d (%x28,%z30.d)[32byte] %p7/z -> %z27.d -c5dfdfff : ld1d z31.d, p7/Z, [sp, z31.d] : ld1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d +c5c0c000 : ld1d z0.d, p0/Z, [x0, z0.d] : ld1d (%x0,%z0.d)[8byte] %p0/z -> %z0.d +c5c5c482 : ld1d z2.d, p1/Z, [x4, z5.d] : ld1d (%x4,%z5.d)[8byte] %p1/z -> %z2.d +c5c7c8c4 : ld1d z4.d, p2/Z, [x6, z7.d] : ld1d (%x6,%z7.d)[8byte] %p2/z -> %z4.d +c5c9c906 : ld1d z6.d, p2/Z, [x8, z9.d] : ld1d (%x8,%z9.d)[8byte] %p2/z -> %z6.d +c5cbcd48 : ld1d z8.d, p3/Z, [x10, z11.d] : ld1d (%x10,%z11.d)[8byte] %p3/z -> %z8.d +c5cdcd6a : ld1d z10.d, p3/Z, [x11, z13.d] : ld1d (%x11,%z13.d)[8byte] %p3/z -> %z10.d +c5cfd1ac : ld1d z12.d, p4/Z, [x13, z15.d] : ld1d (%x13,%z15.d)[8byte] %p4/z -> %z12.d +c5d1d1ee : ld1d z14.d, p4/Z, [x15, z17.d] : ld1d (%x15,%z17.d)[8byte] %p4/z -> %z14.d +c5d3d630 : ld1d z16.d, p5/Z, [x17, z19.d] : ld1d (%x17,%z19.d)[8byte] %p5/z -> %z16.d +c5d4d671 : ld1d z17.d, p5/Z, [x19, z20.d] : ld1d (%x19,%z20.d)[8byte] %p5/z -> %z17.d +c5d6d6b3 : ld1d z19.d, p5/Z, [x21, z22.d] : ld1d (%x21,%z22.d)[8byte] %p5/z -> %z19.d +c5d8daf5 : ld1d z21.d, p6/Z, [x23, z24.d] : ld1d (%x23,%z24.d)[8byte] %p6/z -> %z21.d +c5dadb17 : ld1d z23.d, p6/Z, [x24, z26.d] : ld1d (%x24,%z26.d)[8byte] %p6/z -> %z23.d +c5dcdf59 : ld1d z25.d, p7/Z, [x26, z28.d] : ld1d (%x26,%z28.d)[8byte] %p7/z -> %z25.d +c5dedf9b : ld1d z27.d, p7/Z, [x28, z30.d] : ld1d (%x28,%z30.d)[8byte] %p7/z -> %z27.d +c5dfdfff : ld1d z31.d, p7/Z, [sp, z31.d] : ld1d (%sp,%z31.d)[8byte] %p7/z -> %z31.d # LD1D { .D }, /Z, [, .D, LSL #3] (LD1D-Z.P.BZ-D.64.scaled) -c5e0c000 : ld1d z0.d, p0/Z, [x0, z0.d, LSL #3] : ld1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d -c5e5c482 : ld1d z2.d, p1/Z, [x4, z5.d, LSL #3] : ld1d (%x4,%z5.d,lsl #3)[32byte] %p1/z -> %z2.d -c5e7c8c4 : ld1d z4.d, p2/Z, [x6, z7.d, LSL #3] : ld1d (%x6,%z7.d,lsl #3)[32byte] %p2/z -> %z4.d -c5e9c906 : ld1d z6.d, p2/Z, [x8, z9.d, LSL #3] : ld1d (%x8,%z9.d,lsl #3)[32byte] %p2/z -> %z6.d -c5ebcd48 : ld1d z8.d, p3/Z, [x10, z11.d, LSL #3] : ld1d (%x10,%z11.d,lsl #3)[32byte] %p3/z -> %z8.d -c5edcd6a : ld1d z10.d, p3/Z, [x11, z13.d, LSL #3] : ld1d (%x11,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d -c5efd1ac : ld1d z12.d, p4/Z, [x13, z15.d, LSL #3] : ld1d (%x13,%z15.d,lsl #3)[32byte] %p4/z -> %z12.d -c5f1d1ee : ld1d z14.d, p4/Z, [x15, z17.d, LSL #3] : ld1d (%x15,%z17.d,lsl #3)[32byte] %p4/z -> %z14.d -c5f3d630 : ld1d z16.d, p5/Z, [x17, z19.d, LSL #3] : ld1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d -c5f4d671 : ld1d z17.d, p5/Z, [x19, z20.d, LSL #3] : ld1d (%x19,%z20.d,lsl #3)[32byte] %p5/z -> %z17.d -c5f6d6b3 : ld1d z19.d, p5/Z, [x21, z22.d, LSL #3] : ld1d (%x21,%z22.d,lsl #3)[32byte] %p5/z -> %z19.d -c5f8daf5 : ld1d z21.d, p6/Z, [x23, z24.d, LSL #3] : ld1d (%x23,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d -c5fadb17 : ld1d z23.d, p6/Z, [x24, z26.d, LSL #3] : ld1d (%x24,%z26.d,lsl #3)[32byte] %p6/z -> %z23.d -c5fcdf59 : ld1d z25.d, p7/Z, [x26, z28.d, LSL #3] : ld1d (%x26,%z28.d,lsl #3)[32byte] %p7/z -> %z25.d -c5fedf9b : ld1d z27.d, p7/Z, [x28, z30.d, LSL #3] : ld1d (%x28,%z30.d,lsl #3)[32byte] %p7/z -> %z27.d -c5ffdfff : ld1d z31.d, p7/Z, [sp, z31.d, LSL #3] : ld1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d +c5e0c000 : ld1d z0.d, p0/Z, [x0, z0.d, LSL #3] : ld1d (%x0,%z0.d,lsl #3)[8byte] %p0/z -> %z0.d +c5e5c482 : ld1d z2.d, p1/Z, [x4, z5.d, LSL #3] : ld1d (%x4,%z5.d,lsl #3)[8byte] %p1/z -> %z2.d +c5e7c8c4 : ld1d z4.d, p2/Z, [x6, z7.d, LSL #3] : ld1d (%x6,%z7.d,lsl #3)[8byte] %p2/z -> %z4.d +c5e9c906 : ld1d z6.d, p2/Z, [x8, z9.d, LSL #3] : ld1d (%x8,%z9.d,lsl #3)[8byte] %p2/z -> %z6.d +c5ebcd48 : ld1d z8.d, p3/Z, [x10, z11.d, LSL #3] : ld1d (%x10,%z11.d,lsl #3)[8byte] %p3/z -> %z8.d +c5edcd6a : ld1d z10.d, p3/Z, [x11, z13.d, LSL #3] : ld1d (%x11,%z13.d,lsl #3)[8byte] %p3/z -> %z10.d +c5efd1ac : ld1d z12.d, p4/Z, [x13, z15.d, LSL #3] : ld1d (%x13,%z15.d,lsl #3)[8byte] %p4/z -> %z12.d +c5f1d1ee : ld1d z14.d, p4/Z, [x15, z17.d, LSL #3] : ld1d (%x15,%z17.d,lsl #3)[8byte] %p4/z -> %z14.d +c5f3d630 : ld1d z16.d, p5/Z, [x17, z19.d, LSL #3] : ld1d (%x17,%z19.d,lsl #3)[8byte] %p5/z -> %z16.d +c5f4d671 : ld1d z17.d, p5/Z, [x19, z20.d, LSL #3] : ld1d (%x19,%z20.d,lsl #3)[8byte] %p5/z -> %z17.d +c5f6d6b3 : ld1d z19.d, p5/Z, [x21, z22.d, LSL #3] : ld1d (%x21,%z22.d,lsl #3)[8byte] %p5/z -> %z19.d +c5f8daf5 : ld1d z21.d, p6/Z, [x23, z24.d, LSL #3] : ld1d (%x23,%z24.d,lsl #3)[8byte] %p6/z -> %z21.d +c5fadb17 : ld1d z23.d, p6/Z, [x24, z26.d, LSL #3] : ld1d (%x24,%z26.d,lsl #3)[8byte] %p6/z -> %z23.d +c5fcdf59 : ld1d z25.d, p7/Z, [x26, z28.d, LSL #3] : ld1d (%x26,%z28.d,lsl #3)[8byte] %p7/z -> %z25.d +c5fedf9b : ld1d z27.d, p7/Z, [x28, z30.d, LSL #3] : ld1d (%x28,%z30.d,lsl #3)[8byte] %p7/z -> %z27.d +c5ffdfff : ld1d z31.d, p7/Z, [sp, z31.d, LSL #3] : ld1d (%sp,%z31.d,lsl #3)[8byte] %p7/z -> %z31.d # LD1H { .S }, /Z, [, .S, ] (LD1H-Z.P.BZ-S.x32.unscaled) -84804000 : ld1h z0.s, p0/Z, [x0, z0.s, UXTW] : ld1h (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s -84854482 : ld1h z2.s, p1/Z, [x4, z5.s, UXTW] : ld1h (%x4,%z5.s,uxtw)[16byte] %p1/z -> %z2.s -848748c4 : ld1h z4.s, p2/Z, [x6, z7.s, UXTW] : ld1h (%x6,%z7.s,uxtw)[16byte] %p2/z -> %z4.s -84894906 : ld1h z6.s, p2/Z, [x8, z9.s, UXTW] : ld1h (%x8,%z9.s,uxtw)[16byte] %p2/z -> %z6.s -848b4d48 : ld1h z8.s, p3/Z, [x10, z11.s, UXTW] : ld1h (%x10,%z11.s,uxtw)[16byte] %p3/z -> %z8.s -848d4d6a : ld1h z10.s, p3/Z, [x11, z13.s, UXTW] : ld1h (%x11,%z13.s,uxtw)[16byte] %p3/z -> %z10.s -848f51ac : ld1h z12.s, p4/Z, [x13, z15.s, UXTW] : ld1h (%x13,%z15.s,uxtw)[16byte] %p4/z -> %z12.s -849151ee : ld1h z14.s, p4/Z, [x15, z17.s, UXTW] : ld1h (%x15,%z17.s,uxtw)[16byte] %p4/z -> %z14.s -84935630 : ld1h z16.s, p5/Z, [x17, z19.s, UXTW] : ld1h (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s -84945671 : ld1h z17.s, p5/Z, [x19, z20.s, UXTW] : ld1h (%x19,%z20.s,uxtw)[16byte] %p5/z -> %z17.s -849656b3 : ld1h z19.s, p5/Z, [x21, z22.s, UXTW] : ld1h (%x21,%z22.s,uxtw)[16byte] %p5/z -> %z19.s -84985af5 : ld1h z21.s, p6/Z, [x23, z24.s, UXTW] : ld1h (%x23,%z24.s,uxtw)[16byte] %p6/z -> %z21.s -849a5b17 : ld1h z23.s, p6/Z, [x24, z26.s, UXTW] : ld1h (%x24,%z26.s,uxtw)[16byte] %p6/z -> %z23.s -849c5f59 : ld1h z25.s, p7/Z, [x26, z28.s, UXTW] : ld1h (%x26,%z28.s,uxtw)[16byte] %p7/z -> %z25.s -849e5f9b : ld1h z27.s, p7/Z, [x28, z30.s, UXTW] : ld1h (%x28,%z30.s,uxtw)[16byte] %p7/z -> %z27.s -849f5fff : ld1h z31.s, p7/Z, [sp, z31.s, UXTW] : ld1h (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s -84c04000 : ld1h z0.s, p0/Z, [x0, z0.s, SXTW] : ld1h (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s -84c54482 : ld1h z2.s, p1/Z, [x4, z5.s, SXTW] : ld1h (%x4,%z5.s,sxtw)[16byte] %p1/z -> %z2.s -84c748c4 : ld1h z4.s, p2/Z, [x6, z7.s, SXTW] : ld1h (%x6,%z7.s,sxtw)[16byte] %p2/z -> %z4.s -84c94906 : ld1h z6.s, p2/Z, [x8, z9.s, SXTW] : ld1h (%x8,%z9.s,sxtw)[16byte] %p2/z -> %z6.s -84cb4d48 : ld1h z8.s, p3/Z, [x10, z11.s, SXTW] : ld1h (%x10,%z11.s,sxtw)[16byte] %p3/z -> %z8.s -84cd4d6a : ld1h z10.s, p3/Z, [x11, z13.s, SXTW] : ld1h (%x11,%z13.s,sxtw)[16byte] %p3/z -> %z10.s -84cf51ac : ld1h z12.s, p4/Z, [x13, z15.s, SXTW] : ld1h (%x13,%z15.s,sxtw)[16byte] %p4/z -> %z12.s -84d151ee : ld1h z14.s, p4/Z, [x15, z17.s, SXTW] : ld1h (%x15,%z17.s,sxtw)[16byte] %p4/z -> %z14.s -84d35630 : ld1h z16.s, p5/Z, [x17, z19.s, SXTW] : ld1h (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s -84d45671 : ld1h z17.s, p5/Z, [x19, z20.s, SXTW] : ld1h (%x19,%z20.s,sxtw)[16byte] %p5/z -> %z17.s -84d656b3 : ld1h z19.s, p5/Z, [x21, z22.s, SXTW] : ld1h (%x21,%z22.s,sxtw)[16byte] %p5/z -> %z19.s -84d85af5 : ld1h z21.s, p6/Z, [x23, z24.s, SXTW] : ld1h (%x23,%z24.s,sxtw)[16byte] %p6/z -> %z21.s -84da5b17 : ld1h z23.s, p6/Z, [x24, z26.s, SXTW] : ld1h (%x24,%z26.s,sxtw)[16byte] %p6/z -> %z23.s -84dc5f59 : ld1h z25.s, p7/Z, [x26, z28.s, SXTW] : ld1h (%x26,%z28.s,sxtw)[16byte] %p7/z -> %z25.s -84de5f9b : ld1h z27.s, p7/Z, [x28, z30.s, SXTW] : ld1h (%x28,%z30.s,sxtw)[16byte] %p7/z -> %z27.s -84df5fff : ld1h z31.s, p7/Z, [sp, z31.s, SXTW] : ld1h (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s +84804000 : ld1h z0.s, p0/Z, [x0, z0.s, UXTW] : ld1h (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s +84854482 : ld1h z2.s, p1/Z, [x4, z5.s, UXTW] : ld1h (%x4,%z5.s,uxtw)[2byte] %p1/z -> %z2.s +848748c4 : ld1h z4.s, p2/Z, [x6, z7.s, UXTW] : ld1h (%x6,%z7.s,uxtw)[2byte] %p2/z -> %z4.s +84894906 : ld1h z6.s, p2/Z, [x8, z9.s, UXTW] : ld1h (%x8,%z9.s,uxtw)[2byte] %p2/z -> %z6.s +848b4d48 : ld1h z8.s, p3/Z, [x10, z11.s, UXTW] : ld1h (%x10,%z11.s,uxtw)[2byte] %p3/z -> %z8.s +848d4d6a : ld1h z10.s, p3/Z, [x11, z13.s, UXTW] : ld1h (%x11,%z13.s,uxtw)[2byte] %p3/z -> %z10.s +848f51ac : ld1h z12.s, p4/Z, [x13, z15.s, UXTW] : ld1h (%x13,%z15.s,uxtw)[2byte] %p4/z -> %z12.s +849151ee : ld1h z14.s, p4/Z, [x15, z17.s, UXTW] : ld1h (%x15,%z17.s,uxtw)[2byte] %p4/z -> %z14.s +84935630 : ld1h z16.s, p5/Z, [x17, z19.s, UXTW] : ld1h (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s +84945671 : ld1h z17.s, p5/Z, [x19, z20.s, UXTW] : ld1h (%x19,%z20.s,uxtw)[2byte] %p5/z -> %z17.s +849656b3 : ld1h z19.s, p5/Z, [x21, z22.s, UXTW] : ld1h (%x21,%z22.s,uxtw)[2byte] %p5/z -> %z19.s +84985af5 : ld1h z21.s, p6/Z, [x23, z24.s, UXTW] : ld1h (%x23,%z24.s,uxtw)[2byte] %p6/z -> %z21.s +849a5b17 : ld1h z23.s, p6/Z, [x24, z26.s, UXTW] : ld1h (%x24,%z26.s,uxtw)[2byte] %p6/z -> %z23.s +849c5f59 : ld1h z25.s, p7/Z, [x26, z28.s, UXTW] : ld1h (%x26,%z28.s,uxtw)[2byte] %p7/z -> %z25.s +849e5f9b : ld1h z27.s, p7/Z, [x28, z30.s, UXTW] : ld1h (%x28,%z30.s,uxtw)[2byte] %p7/z -> %z27.s +849f5fff : ld1h z31.s, p7/Z, [sp, z31.s, UXTW] : ld1h (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s +84c04000 : ld1h z0.s, p0/Z, [x0, z0.s, SXTW] : ld1h (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s +84c54482 : ld1h z2.s, p1/Z, [x4, z5.s, SXTW] : ld1h (%x4,%z5.s,sxtw)[2byte] %p1/z -> %z2.s +84c748c4 : ld1h z4.s, p2/Z, [x6, z7.s, SXTW] : ld1h (%x6,%z7.s,sxtw)[2byte] %p2/z -> %z4.s +84c94906 : ld1h z6.s, p2/Z, [x8, z9.s, SXTW] : ld1h (%x8,%z9.s,sxtw)[2byte] %p2/z -> %z6.s +84cb4d48 : ld1h z8.s, p3/Z, [x10, z11.s, SXTW] : ld1h (%x10,%z11.s,sxtw)[2byte] %p3/z -> %z8.s +84cd4d6a : ld1h z10.s, p3/Z, [x11, z13.s, SXTW] : ld1h (%x11,%z13.s,sxtw)[2byte] %p3/z -> %z10.s +84cf51ac : ld1h z12.s, p4/Z, [x13, z15.s, SXTW] : ld1h (%x13,%z15.s,sxtw)[2byte] %p4/z -> %z12.s +84d151ee : ld1h z14.s, p4/Z, [x15, z17.s, SXTW] : ld1h (%x15,%z17.s,sxtw)[2byte] %p4/z -> %z14.s +84d35630 : ld1h z16.s, p5/Z, [x17, z19.s, SXTW] : ld1h (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s +84d45671 : ld1h z17.s, p5/Z, [x19, z20.s, SXTW] : ld1h (%x19,%z20.s,sxtw)[2byte] %p5/z -> %z17.s +84d656b3 : ld1h z19.s, p5/Z, [x21, z22.s, SXTW] : ld1h (%x21,%z22.s,sxtw)[2byte] %p5/z -> %z19.s +84d85af5 : ld1h z21.s, p6/Z, [x23, z24.s, SXTW] : ld1h (%x23,%z24.s,sxtw)[2byte] %p6/z -> %z21.s +84da5b17 : ld1h z23.s, p6/Z, [x24, z26.s, SXTW] : ld1h (%x24,%z26.s,sxtw)[2byte] %p6/z -> %z23.s +84dc5f59 : ld1h z25.s, p7/Z, [x26, z28.s, SXTW] : ld1h (%x26,%z28.s,sxtw)[2byte] %p7/z -> %z25.s +84de5f9b : ld1h z27.s, p7/Z, [x28, z30.s, SXTW] : ld1h (%x28,%z30.s,sxtw)[2byte] %p7/z -> %z27.s +84df5fff : ld1h z31.s, p7/Z, [sp, z31.s, SXTW] : ld1h (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s # LD1H { .S }, /Z, [, .S, #1] (LD1H-Z.P.BZ-S.x32.scaled) -84a04000 : ld1h z0.s, p0/Z, [x0, z0.s, UXTW #1] : ld1h (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s -84a54482 : ld1h z2.s, p1/Z, [x4, z5.s, UXTW #1] : ld1h (%x4,%z5.s,uxtw #1)[16byte] %p1/z -> %z2.s -84a748c4 : ld1h z4.s, p2/Z, [x6, z7.s, UXTW #1] : ld1h (%x6,%z7.s,uxtw #1)[16byte] %p2/z -> %z4.s -84a94906 : ld1h z6.s, p2/Z, [x8, z9.s, UXTW #1] : ld1h (%x8,%z9.s,uxtw #1)[16byte] %p2/z -> %z6.s -84ab4d48 : ld1h z8.s, p3/Z, [x10, z11.s, UXTW #1] : ld1h (%x10,%z11.s,uxtw #1)[16byte] %p3/z -> %z8.s -84ad4d6a : ld1h z10.s, p3/Z, [x11, z13.s, UXTW #1] : ld1h (%x11,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s -84af51ac : ld1h z12.s, p4/Z, [x13, z15.s, UXTW #1] : ld1h (%x13,%z15.s,uxtw #1)[16byte] %p4/z -> %z12.s -84b151ee : ld1h z14.s, p4/Z, [x15, z17.s, UXTW #1] : ld1h (%x15,%z17.s,uxtw #1)[16byte] %p4/z -> %z14.s -84b35630 : ld1h z16.s, p5/Z, [x17, z19.s, UXTW #1] : ld1h (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s -84b45671 : ld1h z17.s, p5/Z, [x19, z20.s, UXTW #1] : ld1h (%x19,%z20.s,uxtw #1)[16byte] %p5/z -> %z17.s -84b656b3 : ld1h z19.s, p5/Z, [x21, z22.s, UXTW #1] : ld1h (%x21,%z22.s,uxtw #1)[16byte] %p5/z -> %z19.s -84b85af5 : ld1h z21.s, p6/Z, [x23, z24.s, UXTW #1] : ld1h (%x23,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s -84ba5b17 : ld1h z23.s, p6/Z, [x24, z26.s, UXTW #1] : ld1h (%x24,%z26.s,uxtw #1)[16byte] %p6/z -> %z23.s -84bc5f59 : ld1h z25.s, p7/Z, [x26, z28.s, UXTW #1] : ld1h (%x26,%z28.s,uxtw #1)[16byte] %p7/z -> %z25.s -84be5f9b : ld1h z27.s, p7/Z, [x28, z30.s, UXTW #1] : ld1h (%x28,%z30.s,uxtw #1)[16byte] %p7/z -> %z27.s -84bf5fff : ld1h z31.s, p7/Z, [sp, z31.s, UXTW #1] : ld1h (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s -84e04000 : ld1h z0.s, p0/Z, [x0, z0.s, SXTW #1] : ld1h (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s -84e54482 : ld1h z2.s, p1/Z, [x4, z5.s, SXTW #1] : ld1h (%x4,%z5.s,sxtw #1)[16byte] %p1/z -> %z2.s -84e748c4 : ld1h z4.s, p2/Z, [x6, z7.s, SXTW #1] : ld1h (%x6,%z7.s,sxtw #1)[16byte] %p2/z -> %z4.s -84e94906 : ld1h z6.s, p2/Z, [x8, z9.s, SXTW #1] : ld1h (%x8,%z9.s,sxtw #1)[16byte] %p2/z -> %z6.s -84eb4d48 : ld1h z8.s, p3/Z, [x10, z11.s, SXTW #1] : ld1h (%x10,%z11.s,sxtw #1)[16byte] %p3/z -> %z8.s -84ed4d6a : ld1h z10.s, p3/Z, [x11, z13.s, SXTW #1] : ld1h (%x11,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s -84ef51ac : ld1h z12.s, p4/Z, [x13, z15.s, SXTW #1] : ld1h (%x13,%z15.s,sxtw #1)[16byte] %p4/z -> %z12.s -84f151ee : ld1h z14.s, p4/Z, [x15, z17.s, SXTW #1] : ld1h (%x15,%z17.s,sxtw #1)[16byte] %p4/z -> %z14.s -84f35630 : ld1h z16.s, p5/Z, [x17, z19.s, SXTW #1] : ld1h (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s -84f45671 : ld1h z17.s, p5/Z, [x19, z20.s, SXTW #1] : ld1h (%x19,%z20.s,sxtw #1)[16byte] %p5/z -> %z17.s -84f656b3 : ld1h z19.s, p5/Z, [x21, z22.s, SXTW #1] : ld1h (%x21,%z22.s,sxtw #1)[16byte] %p5/z -> %z19.s -84f85af5 : ld1h z21.s, p6/Z, [x23, z24.s, SXTW #1] : ld1h (%x23,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s -84fa5b17 : ld1h z23.s, p6/Z, [x24, z26.s, SXTW #1] : ld1h (%x24,%z26.s,sxtw #1)[16byte] %p6/z -> %z23.s -84fc5f59 : ld1h z25.s, p7/Z, [x26, z28.s, SXTW #1] : ld1h (%x26,%z28.s,sxtw #1)[16byte] %p7/z -> %z25.s -84fe5f9b : ld1h z27.s, p7/Z, [x28, z30.s, SXTW #1] : ld1h (%x28,%z30.s,sxtw #1)[16byte] %p7/z -> %z27.s -84ff5fff : ld1h z31.s, p7/Z, [sp, z31.s, SXTW #1] : ld1h (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s +84a04000 : ld1h z0.s, p0/Z, [x0, z0.s, UXTW #1] : ld1h (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s +84a54482 : ld1h z2.s, p1/Z, [x4, z5.s, UXTW #1] : ld1h (%x4,%z5.s,uxtw #1)[2byte] %p1/z -> %z2.s +84a748c4 : ld1h z4.s, p2/Z, [x6, z7.s, UXTW #1] : ld1h (%x6,%z7.s,uxtw #1)[2byte] %p2/z -> %z4.s +84a94906 : ld1h z6.s, p2/Z, [x8, z9.s, UXTW #1] : ld1h (%x8,%z9.s,uxtw #1)[2byte] %p2/z -> %z6.s +84ab4d48 : ld1h z8.s, p3/Z, [x10, z11.s, UXTW #1] : ld1h (%x10,%z11.s,uxtw #1)[2byte] %p3/z -> %z8.s +84ad4d6a : ld1h z10.s, p3/Z, [x11, z13.s, UXTW #1] : ld1h (%x11,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s +84af51ac : ld1h z12.s, p4/Z, [x13, z15.s, UXTW #1] : ld1h (%x13,%z15.s,uxtw #1)[2byte] %p4/z -> %z12.s +84b151ee : ld1h z14.s, p4/Z, [x15, z17.s, UXTW #1] : ld1h (%x15,%z17.s,uxtw #1)[2byte] %p4/z -> %z14.s +84b35630 : ld1h z16.s, p5/Z, [x17, z19.s, UXTW #1] : ld1h (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s +84b45671 : ld1h z17.s, p5/Z, [x19, z20.s, UXTW #1] : ld1h (%x19,%z20.s,uxtw #1)[2byte] %p5/z -> %z17.s +84b656b3 : ld1h z19.s, p5/Z, [x21, z22.s, UXTW #1] : ld1h (%x21,%z22.s,uxtw #1)[2byte] %p5/z -> %z19.s +84b85af5 : ld1h z21.s, p6/Z, [x23, z24.s, UXTW #1] : ld1h (%x23,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s +84ba5b17 : ld1h z23.s, p6/Z, [x24, z26.s, UXTW #1] : ld1h (%x24,%z26.s,uxtw #1)[2byte] %p6/z -> %z23.s +84bc5f59 : ld1h z25.s, p7/Z, [x26, z28.s, UXTW #1] : ld1h (%x26,%z28.s,uxtw #1)[2byte] %p7/z -> %z25.s +84be5f9b : ld1h z27.s, p7/Z, [x28, z30.s, UXTW #1] : ld1h (%x28,%z30.s,uxtw #1)[2byte] %p7/z -> %z27.s +84bf5fff : ld1h z31.s, p7/Z, [sp, z31.s, UXTW #1] : ld1h (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s +84e04000 : ld1h z0.s, p0/Z, [x0, z0.s, SXTW #1] : ld1h (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s +84e54482 : ld1h z2.s, p1/Z, [x4, z5.s, SXTW #1] : ld1h (%x4,%z5.s,sxtw #1)[2byte] %p1/z -> %z2.s +84e748c4 : ld1h z4.s, p2/Z, [x6, z7.s, SXTW #1] : ld1h (%x6,%z7.s,sxtw #1)[2byte] %p2/z -> %z4.s +84e94906 : ld1h z6.s, p2/Z, [x8, z9.s, SXTW #1] : ld1h (%x8,%z9.s,sxtw #1)[2byte] %p2/z -> %z6.s +84eb4d48 : ld1h z8.s, p3/Z, [x10, z11.s, SXTW #1] : ld1h (%x10,%z11.s,sxtw #1)[2byte] %p3/z -> %z8.s +84ed4d6a : ld1h z10.s, p3/Z, [x11, z13.s, SXTW #1] : ld1h (%x11,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s +84ef51ac : ld1h z12.s, p4/Z, [x13, z15.s, SXTW #1] : ld1h (%x13,%z15.s,sxtw #1)[2byte] %p4/z -> %z12.s +84f151ee : ld1h z14.s, p4/Z, [x15, z17.s, SXTW #1] : ld1h (%x15,%z17.s,sxtw #1)[2byte] %p4/z -> %z14.s +84f35630 : ld1h z16.s, p5/Z, [x17, z19.s, SXTW #1] : ld1h (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s +84f45671 : ld1h z17.s, p5/Z, [x19, z20.s, SXTW #1] : ld1h (%x19,%z20.s,sxtw #1)[2byte] %p5/z -> %z17.s +84f656b3 : ld1h z19.s, p5/Z, [x21, z22.s, SXTW #1] : ld1h (%x21,%z22.s,sxtw #1)[2byte] %p5/z -> %z19.s +84f85af5 : ld1h z21.s, p6/Z, [x23, z24.s, SXTW #1] : ld1h (%x23,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s +84fa5b17 : ld1h z23.s, p6/Z, [x24, z26.s, SXTW #1] : ld1h (%x24,%z26.s,sxtw #1)[2byte] %p6/z -> %z23.s +84fc5f59 : ld1h z25.s, p7/Z, [x26, z28.s, SXTW #1] : ld1h (%x26,%z28.s,sxtw #1)[2byte] %p7/z -> %z25.s +84fe5f9b : ld1h z27.s, p7/Z, [x28, z30.s, SXTW #1] : ld1h (%x28,%z30.s,sxtw #1)[2byte] %p7/z -> %z27.s +84ff5fff : ld1h z31.s, p7/Z, [sp, z31.s, SXTW #1] : ld1h (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s # LD1H { .S }, /Z, [.S{, #}] (LD1H-Z.P.AI-S) -84a0c000 : ld1h z0.s, p0/Z, [z0.s, #0] : ld1h (%z0.s)[16byte] %p0/z -> %z0.s -84a2c482 : ld1h z2.s, p1/Z, [z4.s, #4] : ld1h +0x04(%z4.s)[16byte] %p1/z -> %z2.s -84a4c8c4 : ld1h z4.s, p2/Z, [z6.s, #8] : ld1h +0x08(%z6.s)[16byte] %p2/z -> %z4.s -84a6c906 : ld1h z6.s, p2/Z, [z8.s, #12] : ld1h +0x0c(%z8.s)[16byte] %p2/z -> %z6.s -84a8cd48 : ld1h z8.s, p3/Z, [z10.s, #16] : ld1h +0x10(%z10.s)[16byte] %p3/z -> %z8.s -84aacd8a : ld1h z10.s, p3/Z, [z12.s, #20] : ld1h +0x14(%z12.s)[16byte] %p3/z -> %z10.s -84acd1cc : ld1h z12.s, p4/Z, [z14.s, #24] : ld1h +0x18(%z14.s)[16byte] %p4/z -> %z12.s -84aed20e : ld1h z14.s, p4/Z, [z16.s, #28] : ld1h +0x1c(%z16.s)[16byte] %p4/z -> %z14.s -84b0d650 : ld1h z16.s, p5/Z, [z18.s, #32] : ld1h +0x20(%z18.s)[16byte] %p5/z -> %z16.s -84b1d671 : ld1h z17.s, p5/Z, [z19.s, #34] : ld1h +0x22(%z19.s)[16byte] %p5/z -> %z17.s -84b3d6b3 : ld1h z19.s, p5/Z, [z21.s, #38] : ld1h +0x26(%z21.s)[16byte] %p5/z -> %z19.s -84b5daf5 : ld1h z21.s, p6/Z, [z23.s, #42] : ld1h +0x2a(%z23.s)[16byte] %p6/z -> %z21.s -84b7db37 : ld1h z23.s, p6/Z, [z25.s, #46] : ld1h +0x2e(%z25.s)[16byte] %p6/z -> %z23.s -84b9df79 : ld1h z25.s, p7/Z, [z27.s, #50] : ld1h +0x32(%z27.s)[16byte] %p7/z -> %z25.s -84bbdfbb : ld1h z27.s, p7/Z, [z29.s, #54] : ld1h +0x36(%z29.s)[16byte] %p7/z -> %z27.s -84bfdfff : ld1h z31.s, p7/Z, [z31.s, #62] : ld1h +0x3e(%z31.s)[16byte] %p7/z -> %z31.s +84a0c000 : ld1h z0.s, p0/Z, [z0.s, #0] : ld1h (%z0.s)[2byte] %p0/z -> %z0.s +84a2c482 : ld1h z2.s, p1/Z, [z4.s, #4] : ld1h +0x04(%z4.s)[2byte] %p1/z -> %z2.s +84a4c8c4 : ld1h z4.s, p2/Z, [z6.s, #8] : ld1h +0x08(%z6.s)[2byte] %p2/z -> %z4.s +84a6c906 : ld1h z6.s, p2/Z, [z8.s, #12] : ld1h +0x0c(%z8.s)[2byte] %p2/z -> %z6.s +84a8cd48 : ld1h z8.s, p3/Z, [z10.s, #16] : ld1h +0x10(%z10.s)[2byte] %p3/z -> %z8.s +84aacd8a : ld1h z10.s, p3/Z, [z12.s, #20] : ld1h +0x14(%z12.s)[2byte] %p3/z -> %z10.s +84acd1cc : ld1h z12.s, p4/Z, [z14.s, #24] : ld1h +0x18(%z14.s)[2byte] %p4/z -> %z12.s +84aed20e : ld1h z14.s, p4/Z, [z16.s, #28] : ld1h +0x1c(%z16.s)[2byte] %p4/z -> %z14.s +84b0d650 : ld1h z16.s, p5/Z, [z18.s, #32] : ld1h +0x20(%z18.s)[2byte] %p5/z -> %z16.s +84b1d671 : ld1h z17.s, p5/Z, [z19.s, #34] : ld1h +0x22(%z19.s)[2byte] %p5/z -> %z17.s +84b3d6b3 : ld1h z19.s, p5/Z, [z21.s, #38] : ld1h +0x26(%z21.s)[2byte] %p5/z -> %z19.s +84b5daf5 : ld1h z21.s, p6/Z, [z23.s, #42] : ld1h +0x2a(%z23.s)[2byte] %p6/z -> %z21.s +84b7db37 : ld1h z23.s, p6/Z, [z25.s, #46] : ld1h +0x2e(%z25.s)[2byte] %p6/z -> %z23.s +84b9df79 : ld1h z25.s, p7/Z, [z27.s, #50] : ld1h +0x32(%z27.s)[2byte] %p7/z -> %z25.s +84bbdfbb : ld1h z27.s, p7/Z, [z29.s, #54] : ld1h +0x36(%z29.s)[2byte] %p7/z -> %z27.s +84bfdfff : ld1h z31.s, p7/Z, [z31.s, #62] : ld1h +0x3e(%z31.s)[2byte] %p7/z -> %z31.s # LD1H { .H }, /Z, [, , LSL #1] (LD1H-Z.P.BR-U16) -a4a04000 : ld1h z0.h, p0/Z, [x0, x0, LSL #1] : ld1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h -a4a54482 : ld1h z2.h, p1/Z, [x4, x5, LSL #1] : ld1h (%x4,%x5,lsl #1)[32byte] %p1/z -> %z2.h -a4a748c4 : ld1h z4.h, p2/Z, [x6, x7, LSL #1] : ld1h (%x6,%x7,lsl #1)[32byte] %p2/z -> %z4.h -a4a94906 : ld1h z6.h, p2/Z, [x8, x9, LSL #1] : ld1h (%x8,%x9,lsl #1)[32byte] %p2/z -> %z6.h -a4ab4d48 : ld1h z8.h, p3/Z, [x10, x11, LSL #1] : ld1h (%x10,%x11,lsl #1)[32byte] %p3/z -> %z8.h -a4ac4d6a : ld1h z10.h, p3/Z, [x11, x12, LSL #1] : ld1h (%x11,%x12,lsl #1)[32byte] %p3/z -> %z10.h -a4ae51ac : ld1h z12.h, p4/Z, [x13, x14, LSL #1] : ld1h (%x13,%x14,lsl #1)[32byte] %p4/z -> %z12.h -a4b051ee : ld1h z14.h, p4/Z, [x15, x16, LSL #1] : ld1h (%x15,%x16,lsl #1)[32byte] %p4/z -> %z14.h -a4b25630 : ld1h z16.h, p5/Z, [x17, x18, LSL #1] : ld1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h -a4b45671 : ld1h z17.h, p5/Z, [x19, x20, LSL #1] : ld1h (%x19,%x20,lsl #1)[32byte] %p5/z -> %z17.h -a4b656b3 : ld1h z19.h, p5/Z, [x21, x22, LSL #1] : ld1h (%x21,%x22,lsl #1)[32byte] %p5/z -> %z19.h -a4b85af5 : ld1h z21.h, p6/Z, [x23, x24, LSL #1] : ld1h (%x23,%x24,lsl #1)[32byte] %p6/z -> %z21.h -a4b95b17 : ld1h z23.h, p6/Z, [x24, x25, LSL #1] : ld1h (%x24,%x25,lsl #1)[32byte] %p6/z -> %z23.h -a4bb5f59 : ld1h z25.h, p7/Z, [x26, x27, LSL #1] : ld1h (%x26,%x27,lsl #1)[32byte] %p7/z -> %z25.h -a4bd5f9b : ld1h z27.h, p7/Z, [x28, x29, LSL #1] : ld1h (%x28,%x29,lsl #1)[32byte] %p7/z -> %z27.h -a4be5fff : ld1h z31.h, p7/Z, [sp, x30, LSL #1] : ld1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h +a4a04000 : ld1h z0.h, p0/Z, [x0, x0, LSL #1] : ld1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h +a4a54482 : ld1h z2.h, p1/Z, [x4, x5, LSL #1] : ld1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h +a4a748c4 : ld1h z4.h, p2/Z, [x6, x7, LSL #1] : ld1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h +a4a94906 : ld1h z6.h, p2/Z, [x8, x9, LSL #1] : ld1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h +a4ab4d48 : ld1h z8.h, p3/Z, [x10, x11, LSL #1] : ld1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h +a4ac4d6a : ld1h z10.h, p3/Z, [x11, x12, LSL #1] : ld1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h +a4ae51ac : ld1h z12.h, p4/Z, [x13, x14, LSL #1] : ld1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h +a4b051ee : ld1h z14.h, p4/Z, [x15, x16, LSL #1] : ld1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h +a4b25630 : ld1h z16.h, p5/Z, [x17, x18, LSL #1] : ld1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h +a4b45671 : ld1h z17.h, p5/Z, [x19, x20, LSL #1] : ld1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h +a4b656b3 : ld1h z19.h, p5/Z, [x21, x22, LSL #1] : ld1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h +a4b85af5 : ld1h z21.h, p6/Z, [x23, x24, LSL #1] : ld1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h +a4b95b17 : ld1h z23.h, p6/Z, [x24, x25, LSL #1] : ld1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h +a4bb5f59 : ld1h z25.h, p7/Z, [x26, x27, LSL #1] : ld1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h +a4bd5f9b : ld1h z27.h, p7/Z, [x28, x29, LSL #1] : ld1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h +a4be5fff : ld1h z31.h, p7/Z, [sp, x30, LSL #1] : ld1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h # LD1H { .H }, /Z, [{, #, MUL VL}] (LD1H-Z.P.BI-U16) -a4a8a000 : ld1h z0.h, p0/Z, [x0, #-8, MUL VL] : ld1h -0x0100(%x0)[32byte] %p0/z -> %z0.h -a4a9a482 : ld1h z2.h, p1/Z, [x4, #-7, MUL VL] : ld1h -0xe0(%x4)[32byte] %p1/z -> %z2.h -a4aaa8c4 : ld1h z4.h, p2/Z, [x6, #-6, MUL VL] : ld1h -0xc0(%x6)[32byte] %p2/z -> %z4.h -a4aba906 : ld1h z6.h, p2/Z, [x8, #-5, MUL VL] : ld1h -0xa0(%x8)[32byte] %p2/z -> %z6.h -a4acad48 : ld1h z8.h, p3/Z, [x10, #-4, MUL VL] : ld1h -0x80(%x10)[32byte] %p3/z -> %z8.h -a4adad6a : ld1h z10.h, p3/Z, [x11, #-3, MUL VL] : ld1h -0x60(%x11)[32byte] %p3/z -> %z10.h -a4aeb1ac : ld1h z12.h, p4/Z, [x13, #-2, MUL VL] : ld1h -0x40(%x13)[32byte] %p4/z -> %z12.h -a4afb1ee : ld1h z14.h, p4/Z, [x15, #-1, MUL VL] : ld1h -0x20(%x15)[32byte] %p4/z -> %z14.h -a4a0b630 : ld1h z16.h, p5/Z, [x17, #0, MUL VL] : ld1h (%x17)[32byte] %p5/z -> %z16.h -a4a0b671 : ld1h z17.h, p5/Z, [x19, #0, MUL VL] : ld1h (%x19)[32byte] %p5/z -> %z17.h -a4a1b6b3 : ld1h z19.h, p5/Z, [x21, #1, MUL VL] : ld1h +0x20(%x21)[32byte] %p5/z -> %z19.h -a4a2baf5 : ld1h z21.h, p6/Z, [x23, #2, MUL VL] : ld1h +0x40(%x23)[32byte] %p6/z -> %z21.h -a4a3bb17 : ld1h z23.h, p6/Z, [x24, #3, MUL VL] : ld1h +0x60(%x24)[32byte] %p6/z -> %z23.h -a4a4bf59 : ld1h z25.h, p7/Z, [x26, #4, MUL VL] : ld1h +0x80(%x26)[32byte] %p7/z -> %z25.h -a4a5bf9b : ld1h z27.h, p7/Z, [x28, #5, MUL VL] : ld1h +0xa0(%x28)[32byte] %p7/z -> %z27.h -a4a7bfff : ld1h z31.h, p7/Z, [sp, #7, MUL VL] : ld1h +0xe0(%sp)[32byte] %p7/z -> %z31.h +a4a8a000 : ld1h z0.h, p0/Z, [x0, #-8, MUL VL] : ld1h -0x0100(%x0)[2byte] %p0/z -> %z0.h +a4a9a482 : ld1h z2.h, p1/Z, [x4, #-7, MUL VL] : ld1h -0xe0(%x4)[2byte] %p1/z -> %z2.h +a4aaa8c4 : ld1h z4.h, p2/Z, [x6, #-6, MUL VL] : ld1h -0xc0(%x6)[2byte] %p2/z -> %z4.h +a4aba906 : ld1h z6.h, p2/Z, [x8, #-5, MUL VL] : ld1h -0xa0(%x8)[2byte] %p2/z -> %z6.h +a4acad48 : ld1h z8.h, p3/Z, [x10, #-4, MUL VL] : ld1h -0x80(%x10)[2byte] %p3/z -> %z8.h +a4adad6a : ld1h z10.h, p3/Z, [x11, #-3, MUL VL] : ld1h -0x60(%x11)[2byte] %p3/z -> %z10.h +a4aeb1ac : ld1h z12.h, p4/Z, [x13, #-2, MUL VL] : ld1h -0x40(%x13)[2byte] %p4/z -> %z12.h +a4afb1ee : ld1h z14.h, p4/Z, [x15, #-1, MUL VL] : ld1h -0x20(%x15)[2byte] %p4/z -> %z14.h +a4a0b630 : ld1h z16.h, p5/Z, [x17, #0, MUL VL] : ld1h (%x17)[2byte] %p5/z -> %z16.h +a4a0b671 : ld1h z17.h, p5/Z, [x19, #0, MUL VL] : ld1h (%x19)[2byte] %p5/z -> %z17.h +a4a1b6b3 : ld1h z19.h, p5/Z, [x21, #1, MUL VL] : ld1h +0x20(%x21)[2byte] %p5/z -> %z19.h +a4a2baf5 : ld1h z21.h, p6/Z, [x23, #2, MUL VL] : ld1h +0x40(%x23)[2byte] %p6/z -> %z21.h +a4a3bb17 : ld1h z23.h, p6/Z, [x24, #3, MUL VL] : ld1h +0x60(%x24)[2byte] %p6/z -> %z23.h +a4a4bf59 : ld1h z25.h, p7/Z, [x26, #4, MUL VL] : ld1h +0x80(%x26)[2byte] %p7/z -> %z25.h +a4a5bf9b : ld1h z27.h, p7/Z, [x28, #5, MUL VL] : ld1h +0xa0(%x28)[2byte] %p7/z -> %z27.h +a4a7bfff : ld1h z31.h, p7/Z, [sp, #7, MUL VL] : ld1h +0xe0(%sp)[2byte] %p7/z -> %z31.h # LD1H { .S }, /Z, [, , LSL #1] (LD1H-Z.P.BR-U32) -a4c04000 : ld1h z0.s, p0/Z, [x0, x0, LSL #1] : ld1h (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s -a4c54482 : ld1h z2.s, p1/Z, [x4, x5, LSL #1] : ld1h (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s -a4c748c4 : ld1h z4.s, p2/Z, [x6, x7, LSL #1] : ld1h (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.s -a4c94906 : ld1h z6.s, p2/Z, [x8, x9, LSL #1] : ld1h (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.s -a4cb4d48 : ld1h z8.s, p3/Z, [x10, x11, LSL #1] : ld1h (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.s -a4cc4d6a : ld1h z10.s, p3/Z, [x11, x12, LSL #1] : ld1h (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.s -a4ce51ac : ld1h z12.s, p4/Z, [x13, x14, LSL #1] : ld1h (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.s -a4d051ee : ld1h z14.s, p4/Z, [x15, x16, LSL #1] : ld1h (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.s -a4d25630 : ld1h z16.s, p5/Z, [x17, x18, LSL #1] : ld1h (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s -a4d45671 : ld1h z17.s, p5/Z, [x19, x20, LSL #1] : ld1h (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.s -a4d656b3 : ld1h z19.s, p5/Z, [x21, x22, LSL #1] : ld1h (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.s -a4d85af5 : ld1h z21.s, p6/Z, [x23, x24, LSL #1] : ld1h (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.s -a4d95b17 : ld1h z23.s, p6/Z, [x24, x25, LSL #1] : ld1h (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.s -a4db5f59 : ld1h z25.s, p7/Z, [x26, x27, LSL #1] : ld1h (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.s -a4dd5f9b : ld1h z27.s, p7/Z, [x28, x29, LSL #1] : ld1h (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.s -a4de5fff : ld1h z31.s, p7/Z, [sp, x30, LSL #1] : ld1h (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s +a4c04000 : ld1h z0.s, p0/Z, [x0, x0, LSL #1] : ld1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s +a4c54482 : ld1h z2.s, p1/Z, [x4, x5, LSL #1] : ld1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.s +a4c748c4 : ld1h z4.s, p2/Z, [x6, x7, LSL #1] : ld1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.s +a4c94906 : ld1h z6.s, p2/Z, [x8, x9, LSL #1] : ld1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.s +a4cb4d48 : ld1h z8.s, p3/Z, [x10, x11, LSL #1] : ld1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.s +a4cc4d6a : ld1h z10.s, p3/Z, [x11, x12, LSL #1] : ld1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.s +a4ce51ac : ld1h z12.s, p4/Z, [x13, x14, LSL #1] : ld1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.s +a4d051ee : ld1h z14.s, p4/Z, [x15, x16, LSL #1] : ld1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.s +a4d25630 : ld1h z16.s, p5/Z, [x17, x18, LSL #1] : ld1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s +a4d45671 : ld1h z17.s, p5/Z, [x19, x20, LSL #1] : ld1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.s +a4d656b3 : ld1h z19.s, p5/Z, [x21, x22, LSL #1] : ld1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.s +a4d85af5 : ld1h z21.s, p6/Z, [x23, x24, LSL #1] : ld1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.s +a4d95b17 : ld1h z23.s, p6/Z, [x24, x25, LSL #1] : ld1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.s +a4db5f59 : ld1h z25.s, p7/Z, [x26, x27, LSL #1] : ld1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.s +a4dd5f9b : ld1h z27.s, p7/Z, [x28, x29, LSL #1] : ld1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.s +a4de5fff : ld1h z31.s, p7/Z, [sp, x30, LSL #1] : ld1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s # LD1H { .S }, /Z, [{, #, MUL VL}] (LD1H-Z.P.BI-U32) -a4c8a000 : ld1h z0.s, p0/Z, [x0, #-8, MUL VL] : ld1h -0x80(%x0)[16byte] %p0/z -> %z0.s -a4c9a482 : ld1h z2.s, p1/Z, [x4, #-7, MUL VL] : ld1h -0x70(%x4)[16byte] %p1/z -> %z2.s -a4caa8c4 : ld1h z4.s, p2/Z, [x6, #-6, MUL VL] : ld1h -0x60(%x6)[16byte] %p2/z -> %z4.s -a4cba906 : ld1h z6.s, p2/Z, [x8, #-5, MUL VL] : ld1h -0x50(%x8)[16byte] %p2/z -> %z6.s -a4ccad48 : ld1h z8.s, p3/Z, [x10, #-4, MUL VL] : ld1h -0x40(%x10)[16byte] %p3/z -> %z8.s -a4cdad6a : ld1h z10.s, p3/Z, [x11, #-3, MUL VL] : ld1h -0x30(%x11)[16byte] %p3/z -> %z10.s -a4ceb1ac : ld1h z12.s, p4/Z, [x13, #-2, MUL VL] : ld1h -0x20(%x13)[16byte] %p4/z -> %z12.s -a4cfb1ee : ld1h z14.s, p4/Z, [x15, #-1, MUL VL] : ld1h -0x10(%x15)[16byte] %p4/z -> %z14.s -a4c0b630 : ld1h z16.s, p5/Z, [x17, #0, MUL VL] : ld1h (%x17)[16byte] %p5/z -> %z16.s -a4c0b671 : ld1h z17.s, p5/Z, [x19, #0, MUL VL] : ld1h (%x19)[16byte] %p5/z -> %z17.s -a4c1b6b3 : ld1h z19.s, p5/Z, [x21, #1, MUL VL] : ld1h +0x10(%x21)[16byte] %p5/z -> %z19.s -a4c2baf5 : ld1h z21.s, p6/Z, [x23, #2, MUL VL] : ld1h +0x20(%x23)[16byte] %p6/z -> %z21.s -a4c3bb17 : ld1h z23.s, p6/Z, [x24, #3, MUL VL] : ld1h +0x30(%x24)[16byte] %p6/z -> %z23.s -a4c4bf59 : ld1h z25.s, p7/Z, [x26, #4, MUL VL] : ld1h +0x40(%x26)[16byte] %p7/z -> %z25.s -a4c5bf9b : ld1h z27.s, p7/Z, [x28, #5, MUL VL] : ld1h +0x50(%x28)[16byte] %p7/z -> %z27.s -a4c7bfff : ld1h z31.s, p7/Z, [sp, #7, MUL VL] : ld1h +0x70(%sp)[16byte] %p7/z -> %z31.s +a4c8a000 : ld1h z0.s, p0/Z, [x0, #-8, MUL VL] : ld1h -0x80(%x0)[2byte] %p0/z -> %z0.s +a4c9a482 : ld1h z2.s, p1/Z, [x4, #-7, MUL VL] : ld1h -0x70(%x4)[2byte] %p1/z -> %z2.s +a4caa8c4 : ld1h z4.s, p2/Z, [x6, #-6, MUL VL] : ld1h -0x60(%x6)[2byte] %p2/z -> %z4.s +a4cba906 : ld1h z6.s, p2/Z, [x8, #-5, MUL VL] : ld1h -0x50(%x8)[2byte] %p2/z -> %z6.s +a4ccad48 : ld1h z8.s, p3/Z, [x10, #-4, MUL VL] : ld1h -0x40(%x10)[2byte] %p3/z -> %z8.s +a4cdad6a : ld1h z10.s, p3/Z, [x11, #-3, MUL VL] : ld1h -0x30(%x11)[2byte] %p3/z -> %z10.s +a4ceb1ac : ld1h z12.s, p4/Z, [x13, #-2, MUL VL] : ld1h -0x20(%x13)[2byte] %p4/z -> %z12.s +a4cfb1ee : ld1h z14.s, p4/Z, [x15, #-1, MUL VL] : ld1h -0x10(%x15)[2byte] %p4/z -> %z14.s +a4c0b630 : ld1h z16.s, p5/Z, [x17, #0, MUL VL] : ld1h (%x17)[2byte] %p5/z -> %z16.s +a4c0b671 : ld1h z17.s, p5/Z, [x19, #0, MUL VL] : ld1h (%x19)[2byte] %p5/z -> %z17.s +a4c1b6b3 : ld1h z19.s, p5/Z, [x21, #1, MUL VL] : ld1h +0x10(%x21)[2byte] %p5/z -> %z19.s +a4c2baf5 : ld1h z21.s, p6/Z, [x23, #2, MUL VL] : ld1h +0x20(%x23)[2byte] %p6/z -> %z21.s +a4c3bb17 : ld1h z23.s, p6/Z, [x24, #3, MUL VL] : ld1h +0x30(%x24)[2byte] %p6/z -> %z23.s +a4c4bf59 : ld1h z25.s, p7/Z, [x26, #4, MUL VL] : ld1h +0x40(%x26)[2byte] %p7/z -> %z25.s +a4c5bf9b : ld1h z27.s, p7/Z, [x28, #5, MUL VL] : ld1h +0x50(%x28)[2byte] %p7/z -> %z27.s +a4c7bfff : ld1h z31.s, p7/Z, [sp, #7, MUL VL] : ld1h +0x70(%sp)[2byte] %p7/z -> %z31.s # LD1H { .D }, /Z, [, , LSL #1] (LD1H-Z.P.BR-U64) -a4e04000 : ld1h z0.d, p0/Z, [x0, x0, LSL #1] : ld1h (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d -a4e54482 : ld1h z2.d, p1/Z, [x4, x5, LSL #1] : ld1h (%x4,%x5,lsl #1)[8byte] %p1/z -> %z2.d -a4e748c4 : ld1h z4.d, p2/Z, [x6, x7, LSL #1] : ld1h (%x6,%x7,lsl #1)[8byte] %p2/z -> %z4.d -a4e94906 : ld1h z6.d, p2/Z, [x8, x9, LSL #1] : ld1h (%x8,%x9,lsl #1)[8byte] %p2/z -> %z6.d -a4eb4d48 : ld1h z8.d, p3/Z, [x10, x11, LSL #1] : ld1h (%x10,%x11,lsl #1)[8byte] %p3/z -> %z8.d -a4ec4d6a : ld1h z10.d, p3/Z, [x11, x12, LSL #1] : ld1h (%x11,%x12,lsl #1)[8byte] %p3/z -> %z10.d -a4ee51ac : ld1h z12.d, p4/Z, [x13, x14, LSL #1] : ld1h (%x13,%x14,lsl #1)[8byte] %p4/z -> %z12.d -a4f051ee : ld1h z14.d, p4/Z, [x15, x16, LSL #1] : ld1h (%x15,%x16,lsl #1)[8byte] %p4/z -> %z14.d -a4f25630 : ld1h z16.d, p5/Z, [x17, x18, LSL #1] : ld1h (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d -a4f45671 : ld1h z17.d, p5/Z, [x19, x20, LSL #1] : ld1h (%x19,%x20,lsl #1)[8byte] %p5/z -> %z17.d -a4f656b3 : ld1h z19.d, p5/Z, [x21, x22, LSL #1] : ld1h (%x21,%x22,lsl #1)[8byte] %p5/z -> %z19.d -a4f85af5 : ld1h z21.d, p6/Z, [x23, x24, LSL #1] : ld1h (%x23,%x24,lsl #1)[8byte] %p6/z -> %z21.d -a4f95b17 : ld1h z23.d, p6/Z, [x24, x25, LSL #1] : ld1h (%x24,%x25,lsl #1)[8byte] %p6/z -> %z23.d -a4fb5f59 : ld1h z25.d, p7/Z, [x26, x27, LSL #1] : ld1h (%x26,%x27,lsl #1)[8byte] %p7/z -> %z25.d -a4fd5f9b : ld1h z27.d, p7/Z, [x28, x29, LSL #1] : ld1h (%x28,%x29,lsl #1)[8byte] %p7/z -> %z27.d -a4fe5fff : ld1h z31.d, p7/Z, [sp, x30, LSL #1] : ld1h (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d +a4e04000 : ld1h z0.d, p0/Z, [x0, x0, LSL #1] : ld1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d +a4e54482 : ld1h z2.d, p1/Z, [x4, x5, LSL #1] : ld1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.d +a4e748c4 : ld1h z4.d, p2/Z, [x6, x7, LSL #1] : ld1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.d +a4e94906 : ld1h z6.d, p2/Z, [x8, x9, LSL #1] : ld1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.d +a4eb4d48 : ld1h z8.d, p3/Z, [x10, x11, LSL #1] : ld1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.d +a4ec4d6a : ld1h z10.d, p3/Z, [x11, x12, LSL #1] : ld1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.d +a4ee51ac : ld1h z12.d, p4/Z, [x13, x14, LSL #1] : ld1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.d +a4f051ee : ld1h z14.d, p4/Z, [x15, x16, LSL #1] : ld1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.d +a4f25630 : ld1h z16.d, p5/Z, [x17, x18, LSL #1] : ld1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d +a4f45671 : ld1h z17.d, p5/Z, [x19, x20, LSL #1] : ld1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.d +a4f656b3 : ld1h z19.d, p5/Z, [x21, x22, LSL #1] : ld1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.d +a4f85af5 : ld1h z21.d, p6/Z, [x23, x24, LSL #1] : ld1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.d +a4f95b17 : ld1h z23.d, p6/Z, [x24, x25, LSL #1] : ld1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.d +a4fb5f59 : ld1h z25.d, p7/Z, [x26, x27, LSL #1] : ld1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.d +a4fd5f9b : ld1h z27.d, p7/Z, [x28, x29, LSL #1] : ld1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.d +a4fe5fff : ld1h z31.d, p7/Z, [sp, x30, LSL #1] : ld1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d # LD1H { .D }, /Z, [{, #, MUL VL}] (LD1H-Z.P.BI-U64) -a4e8a000 : ld1h z0.d, p0/Z, [x0, #-8, MUL VL] : ld1h -0x40(%x0)[8byte] %p0/z -> %z0.d -a4e9a482 : ld1h z2.d, p1/Z, [x4, #-7, MUL VL] : ld1h -0x38(%x4)[8byte] %p1/z -> %z2.d -a4eaa8c4 : ld1h z4.d, p2/Z, [x6, #-6, MUL VL] : ld1h -0x30(%x6)[8byte] %p2/z -> %z4.d -a4eba906 : ld1h z6.d, p2/Z, [x8, #-5, MUL VL] : ld1h -0x28(%x8)[8byte] %p2/z -> %z6.d -a4ecad48 : ld1h z8.d, p3/Z, [x10, #-4, MUL VL] : ld1h -0x20(%x10)[8byte] %p3/z -> %z8.d -a4edad6a : ld1h z10.d, p3/Z, [x11, #-3, MUL VL] : ld1h -0x18(%x11)[8byte] %p3/z -> %z10.d -a4eeb1ac : ld1h z12.d, p4/Z, [x13, #-2, MUL VL] : ld1h -0x10(%x13)[8byte] %p4/z -> %z12.d -a4efb1ee : ld1h z14.d, p4/Z, [x15, #-1, MUL VL] : ld1h -0x08(%x15)[8byte] %p4/z -> %z14.d -a4e0b630 : ld1h z16.d, p5/Z, [x17, #0, MUL VL] : ld1h (%x17)[8byte] %p5/z -> %z16.d -a4e0b671 : ld1h z17.d, p5/Z, [x19, #0, MUL VL] : ld1h (%x19)[8byte] %p5/z -> %z17.d -a4e1b6b3 : ld1h z19.d, p5/Z, [x21, #1, MUL VL] : ld1h +0x08(%x21)[8byte] %p5/z -> %z19.d -a4e2baf5 : ld1h z21.d, p6/Z, [x23, #2, MUL VL] : ld1h +0x10(%x23)[8byte] %p6/z -> %z21.d -a4e3bb17 : ld1h z23.d, p6/Z, [x24, #3, MUL VL] : ld1h +0x18(%x24)[8byte] %p6/z -> %z23.d -a4e4bf59 : ld1h z25.d, p7/Z, [x26, #4, MUL VL] : ld1h +0x20(%x26)[8byte] %p7/z -> %z25.d -a4e5bf9b : ld1h z27.d, p7/Z, [x28, #5, MUL VL] : ld1h +0x28(%x28)[8byte] %p7/z -> %z27.d -a4e7bfff : ld1h z31.d, p7/Z, [sp, #7, MUL VL] : ld1h +0x38(%sp)[8byte] %p7/z -> %z31.d +a4e8a000 : ld1h z0.d, p0/Z, [x0, #-8, MUL VL] : ld1h -0x40(%x0)[2byte] %p0/z -> %z0.d +a4e9a482 : ld1h z2.d, p1/Z, [x4, #-7, MUL VL] : ld1h -0x38(%x4)[2byte] %p1/z -> %z2.d +a4eaa8c4 : ld1h z4.d, p2/Z, [x6, #-6, MUL VL] : ld1h -0x30(%x6)[2byte] %p2/z -> %z4.d +a4eba906 : ld1h z6.d, p2/Z, [x8, #-5, MUL VL] : ld1h -0x28(%x8)[2byte] %p2/z -> %z6.d +a4ecad48 : ld1h z8.d, p3/Z, [x10, #-4, MUL VL] : ld1h -0x20(%x10)[2byte] %p3/z -> %z8.d +a4edad6a : ld1h z10.d, p3/Z, [x11, #-3, MUL VL] : ld1h -0x18(%x11)[2byte] %p3/z -> %z10.d +a4eeb1ac : ld1h z12.d, p4/Z, [x13, #-2, MUL VL] : ld1h -0x10(%x13)[2byte] %p4/z -> %z12.d +a4efb1ee : ld1h z14.d, p4/Z, [x15, #-1, MUL VL] : ld1h -0x08(%x15)[2byte] %p4/z -> %z14.d +a4e0b630 : ld1h z16.d, p5/Z, [x17, #0, MUL VL] : ld1h (%x17)[2byte] %p5/z -> %z16.d +a4e0b671 : ld1h z17.d, p5/Z, [x19, #0, MUL VL] : ld1h (%x19)[2byte] %p5/z -> %z17.d +a4e1b6b3 : ld1h z19.d, p5/Z, [x21, #1, MUL VL] : ld1h +0x08(%x21)[2byte] %p5/z -> %z19.d +a4e2baf5 : ld1h z21.d, p6/Z, [x23, #2, MUL VL] : ld1h +0x10(%x23)[2byte] %p6/z -> %z21.d +a4e3bb17 : ld1h z23.d, p6/Z, [x24, #3, MUL VL] : ld1h +0x18(%x24)[2byte] %p6/z -> %z23.d +a4e4bf59 : ld1h z25.d, p7/Z, [x26, #4, MUL VL] : ld1h +0x20(%x26)[2byte] %p7/z -> %z25.d +a4e5bf9b : ld1h z27.d, p7/Z, [x28, #5, MUL VL] : ld1h +0x28(%x28)[2byte] %p7/z -> %z27.d +a4e7bfff : ld1h z31.d, p7/Z, [sp, #7, MUL VL] : ld1h +0x38(%sp)[2byte] %p7/z -> %z31.d # LD1H { .D }, /Z, [, .D, ] (LD1H-Z.P.BZ-D.x32.unscaled) -c4804000 : ld1h z0.d, p0/Z, [x0, z0.d, UXTW] : ld1h (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d -c4854482 : ld1h z2.d, p1/Z, [x4, z5.d, UXTW] : ld1h (%x4,%z5.d,uxtw)[8byte] %p1/z -> %z2.d -c48748c4 : ld1h z4.d, p2/Z, [x6, z7.d, UXTW] : ld1h (%x6,%z7.d,uxtw)[8byte] %p2/z -> %z4.d -c4894906 : ld1h z6.d, p2/Z, [x8, z9.d, UXTW] : ld1h (%x8,%z9.d,uxtw)[8byte] %p2/z -> %z6.d -c48b4d48 : ld1h z8.d, p3/Z, [x10, z11.d, UXTW] : ld1h (%x10,%z11.d,uxtw)[8byte] %p3/z -> %z8.d -c48d4d6a : ld1h z10.d, p3/Z, [x11, z13.d, UXTW] : ld1h (%x11,%z13.d,uxtw)[8byte] %p3/z -> %z10.d -c48f51ac : ld1h z12.d, p4/Z, [x13, z15.d, UXTW] : ld1h (%x13,%z15.d,uxtw)[8byte] %p4/z -> %z12.d -c49151ee : ld1h z14.d, p4/Z, [x15, z17.d, UXTW] : ld1h (%x15,%z17.d,uxtw)[8byte] %p4/z -> %z14.d -c4935630 : ld1h z16.d, p5/Z, [x17, z19.d, UXTW] : ld1h (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d -c4945671 : ld1h z17.d, p5/Z, [x19, z20.d, UXTW] : ld1h (%x19,%z20.d,uxtw)[8byte] %p5/z -> %z17.d -c49656b3 : ld1h z19.d, p5/Z, [x21, z22.d, UXTW] : ld1h (%x21,%z22.d,uxtw)[8byte] %p5/z -> %z19.d -c4985af5 : ld1h z21.d, p6/Z, [x23, z24.d, UXTW] : ld1h (%x23,%z24.d,uxtw)[8byte] %p6/z -> %z21.d -c49a5b17 : ld1h z23.d, p6/Z, [x24, z26.d, UXTW] : ld1h (%x24,%z26.d,uxtw)[8byte] %p6/z -> %z23.d -c49c5f59 : ld1h z25.d, p7/Z, [x26, z28.d, UXTW] : ld1h (%x26,%z28.d,uxtw)[8byte] %p7/z -> %z25.d -c49e5f9b : ld1h z27.d, p7/Z, [x28, z30.d, UXTW] : ld1h (%x28,%z30.d,uxtw)[8byte] %p7/z -> %z27.d -c49f5fff : ld1h z31.d, p7/Z, [sp, z31.d, UXTW] : ld1h (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d -c4c04000 : ld1h z0.d, p0/Z, [x0, z0.d, SXTW] : ld1h (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d -c4c54482 : ld1h z2.d, p1/Z, [x4, z5.d, SXTW] : ld1h (%x4,%z5.d,sxtw)[8byte] %p1/z -> %z2.d -c4c748c4 : ld1h z4.d, p2/Z, [x6, z7.d, SXTW] : ld1h (%x6,%z7.d,sxtw)[8byte] %p2/z -> %z4.d -c4c94906 : ld1h z6.d, p2/Z, [x8, z9.d, SXTW] : ld1h (%x8,%z9.d,sxtw)[8byte] %p2/z -> %z6.d -c4cb4d48 : ld1h z8.d, p3/Z, [x10, z11.d, SXTW] : ld1h (%x10,%z11.d,sxtw)[8byte] %p3/z -> %z8.d -c4cd4d6a : ld1h z10.d, p3/Z, [x11, z13.d, SXTW] : ld1h (%x11,%z13.d,sxtw)[8byte] %p3/z -> %z10.d -c4cf51ac : ld1h z12.d, p4/Z, [x13, z15.d, SXTW] : ld1h (%x13,%z15.d,sxtw)[8byte] %p4/z -> %z12.d -c4d151ee : ld1h z14.d, p4/Z, [x15, z17.d, SXTW] : ld1h (%x15,%z17.d,sxtw)[8byte] %p4/z -> %z14.d -c4d35630 : ld1h z16.d, p5/Z, [x17, z19.d, SXTW] : ld1h (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d -c4d45671 : ld1h z17.d, p5/Z, [x19, z20.d, SXTW] : ld1h (%x19,%z20.d,sxtw)[8byte] %p5/z -> %z17.d -c4d656b3 : ld1h z19.d, p5/Z, [x21, z22.d, SXTW] : ld1h (%x21,%z22.d,sxtw)[8byte] %p5/z -> %z19.d -c4d85af5 : ld1h z21.d, p6/Z, [x23, z24.d, SXTW] : ld1h (%x23,%z24.d,sxtw)[8byte] %p6/z -> %z21.d -c4da5b17 : ld1h z23.d, p6/Z, [x24, z26.d, SXTW] : ld1h (%x24,%z26.d,sxtw)[8byte] %p6/z -> %z23.d -c4dc5f59 : ld1h z25.d, p7/Z, [x26, z28.d, SXTW] : ld1h (%x26,%z28.d,sxtw)[8byte] %p7/z -> %z25.d -c4de5f9b : ld1h z27.d, p7/Z, [x28, z30.d, SXTW] : ld1h (%x28,%z30.d,sxtw)[8byte] %p7/z -> %z27.d -c4df5fff : ld1h z31.d, p7/Z, [sp, z31.d, SXTW] : ld1h (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d +c4804000 : ld1h z0.d, p0/Z, [x0, z0.d, UXTW] : ld1h (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d +c4854482 : ld1h z2.d, p1/Z, [x4, z5.d, UXTW] : ld1h (%x4,%z5.d,uxtw)[2byte] %p1/z -> %z2.d +c48748c4 : ld1h z4.d, p2/Z, [x6, z7.d, UXTW] : ld1h (%x6,%z7.d,uxtw)[2byte] %p2/z -> %z4.d +c4894906 : ld1h z6.d, p2/Z, [x8, z9.d, UXTW] : ld1h (%x8,%z9.d,uxtw)[2byte] %p2/z -> %z6.d +c48b4d48 : ld1h z8.d, p3/Z, [x10, z11.d, UXTW] : ld1h (%x10,%z11.d,uxtw)[2byte] %p3/z -> %z8.d +c48d4d6a : ld1h z10.d, p3/Z, [x11, z13.d, UXTW] : ld1h (%x11,%z13.d,uxtw)[2byte] %p3/z -> %z10.d +c48f51ac : ld1h z12.d, p4/Z, [x13, z15.d, UXTW] : ld1h (%x13,%z15.d,uxtw)[2byte] %p4/z -> %z12.d +c49151ee : ld1h z14.d, p4/Z, [x15, z17.d, UXTW] : ld1h (%x15,%z17.d,uxtw)[2byte] %p4/z -> %z14.d +c4935630 : ld1h z16.d, p5/Z, [x17, z19.d, UXTW] : ld1h (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d +c4945671 : ld1h z17.d, p5/Z, [x19, z20.d, UXTW] : ld1h (%x19,%z20.d,uxtw)[2byte] %p5/z -> %z17.d +c49656b3 : ld1h z19.d, p5/Z, [x21, z22.d, UXTW] : ld1h (%x21,%z22.d,uxtw)[2byte] %p5/z -> %z19.d +c4985af5 : ld1h z21.d, p6/Z, [x23, z24.d, UXTW] : ld1h (%x23,%z24.d,uxtw)[2byte] %p6/z -> %z21.d +c49a5b17 : ld1h z23.d, p6/Z, [x24, z26.d, UXTW] : ld1h (%x24,%z26.d,uxtw)[2byte] %p6/z -> %z23.d +c49c5f59 : ld1h z25.d, p7/Z, [x26, z28.d, UXTW] : ld1h (%x26,%z28.d,uxtw)[2byte] %p7/z -> %z25.d +c49e5f9b : ld1h z27.d, p7/Z, [x28, z30.d, UXTW] : ld1h (%x28,%z30.d,uxtw)[2byte] %p7/z -> %z27.d +c49f5fff : ld1h z31.d, p7/Z, [sp, z31.d, UXTW] : ld1h (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d +c4c04000 : ld1h z0.d, p0/Z, [x0, z0.d, SXTW] : ld1h (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d +c4c54482 : ld1h z2.d, p1/Z, [x4, z5.d, SXTW] : ld1h (%x4,%z5.d,sxtw)[2byte] %p1/z -> %z2.d +c4c748c4 : ld1h z4.d, p2/Z, [x6, z7.d, SXTW] : ld1h (%x6,%z7.d,sxtw)[2byte] %p2/z -> %z4.d +c4c94906 : ld1h z6.d, p2/Z, [x8, z9.d, SXTW] : ld1h (%x8,%z9.d,sxtw)[2byte] %p2/z -> %z6.d +c4cb4d48 : ld1h z8.d, p3/Z, [x10, z11.d, SXTW] : ld1h (%x10,%z11.d,sxtw)[2byte] %p3/z -> %z8.d +c4cd4d6a : ld1h z10.d, p3/Z, [x11, z13.d, SXTW] : ld1h (%x11,%z13.d,sxtw)[2byte] %p3/z -> %z10.d +c4cf51ac : ld1h z12.d, p4/Z, [x13, z15.d, SXTW] : ld1h (%x13,%z15.d,sxtw)[2byte] %p4/z -> %z12.d +c4d151ee : ld1h z14.d, p4/Z, [x15, z17.d, SXTW] : ld1h (%x15,%z17.d,sxtw)[2byte] %p4/z -> %z14.d +c4d35630 : ld1h z16.d, p5/Z, [x17, z19.d, SXTW] : ld1h (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d +c4d45671 : ld1h z17.d, p5/Z, [x19, z20.d, SXTW] : ld1h (%x19,%z20.d,sxtw)[2byte] %p5/z -> %z17.d +c4d656b3 : ld1h z19.d, p5/Z, [x21, z22.d, SXTW] : ld1h (%x21,%z22.d,sxtw)[2byte] %p5/z -> %z19.d +c4d85af5 : ld1h z21.d, p6/Z, [x23, z24.d, SXTW] : ld1h (%x23,%z24.d,sxtw)[2byte] %p6/z -> %z21.d +c4da5b17 : ld1h z23.d, p6/Z, [x24, z26.d, SXTW] : ld1h (%x24,%z26.d,sxtw)[2byte] %p6/z -> %z23.d +c4dc5f59 : ld1h z25.d, p7/Z, [x26, z28.d, SXTW] : ld1h (%x26,%z28.d,sxtw)[2byte] %p7/z -> %z25.d +c4de5f9b : ld1h z27.d, p7/Z, [x28, z30.d, SXTW] : ld1h (%x28,%z30.d,sxtw)[2byte] %p7/z -> %z27.d +c4df5fff : ld1h z31.d, p7/Z, [sp, z31.d, SXTW] : ld1h (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d # LD1H { .D }, /Z, [, .D, #1] (LD1H-Z.P.BZ-D.x32.scaled) -c4a04000 : ld1h z0.d, p0/Z, [x0, z0.d, UXTW #1] : ld1h (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d -c4a54482 : ld1h z2.d, p1/Z, [x4, z5.d, UXTW #1] : ld1h (%x4,%z5.d,uxtw #1)[8byte] %p1/z -> %z2.d -c4a748c4 : ld1h z4.d, p2/Z, [x6, z7.d, UXTW #1] : ld1h (%x6,%z7.d,uxtw #1)[8byte] %p2/z -> %z4.d -c4a94906 : ld1h z6.d, p2/Z, [x8, z9.d, UXTW #1] : ld1h (%x8,%z9.d,uxtw #1)[8byte] %p2/z -> %z6.d -c4ab4d48 : ld1h z8.d, p3/Z, [x10, z11.d, UXTW #1] : ld1h (%x10,%z11.d,uxtw #1)[8byte] %p3/z -> %z8.d -c4ad4d6a : ld1h z10.d, p3/Z, [x11, z13.d, UXTW #1] : ld1h (%x11,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d -c4af51ac : ld1h z12.d, p4/Z, [x13, z15.d, UXTW #1] : ld1h (%x13,%z15.d,uxtw #1)[8byte] %p4/z -> %z12.d -c4b151ee : ld1h z14.d, p4/Z, [x15, z17.d, UXTW #1] : ld1h (%x15,%z17.d,uxtw #1)[8byte] %p4/z -> %z14.d -c4b35630 : ld1h z16.d, p5/Z, [x17, z19.d, UXTW #1] : ld1h (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d -c4b45671 : ld1h z17.d, p5/Z, [x19, z20.d, UXTW #1] : ld1h (%x19,%z20.d,uxtw #1)[8byte] %p5/z -> %z17.d -c4b656b3 : ld1h z19.d, p5/Z, [x21, z22.d, UXTW #1] : ld1h (%x21,%z22.d,uxtw #1)[8byte] %p5/z -> %z19.d -c4b85af5 : ld1h z21.d, p6/Z, [x23, z24.d, UXTW #1] : ld1h (%x23,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d -c4ba5b17 : ld1h z23.d, p6/Z, [x24, z26.d, UXTW #1] : ld1h (%x24,%z26.d,uxtw #1)[8byte] %p6/z -> %z23.d -c4bc5f59 : ld1h z25.d, p7/Z, [x26, z28.d, UXTW #1] : ld1h (%x26,%z28.d,uxtw #1)[8byte] %p7/z -> %z25.d -c4be5f9b : ld1h z27.d, p7/Z, [x28, z30.d, UXTW #1] : ld1h (%x28,%z30.d,uxtw #1)[8byte] %p7/z -> %z27.d -c4bf5fff : ld1h z31.d, p7/Z, [sp, z31.d, UXTW #1] : ld1h (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d -c4e04000 : ld1h z0.d, p0/Z, [x0, z0.d, SXTW #1] : ld1h (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d -c4e54482 : ld1h z2.d, p1/Z, [x4, z5.d, SXTW #1] : ld1h (%x4,%z5.d,sxtw #1)[8byte] %p1/z -> %z2.d -c4e748c4 : ld1h z4.d, p2/Z, [x6, z7.d, SXTW #1] : ld1h (%x6,%z7.d,sxtw #1)[8byte] %p2/z -> %z4.d -c4e94906 : ld1h z6.d, p2/Z, [x8, z9.d, SXTW #1] : ld1h (%x8,%z9.d,sxtw #1)[8byte] %p2/z -> %z6.d -c4eb4d48 : ld1h z8.d, p3/Z, [x10, z11.d, SXTW #1] : ld1h (%x10,%z11.d,sxtw #1)[8byte] %p3/z -> %z8.d -c4ed4d6a : ld1h z10.d, p3/Z, [x11, z13.d, SXTW #1] : ld1h (%x11,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d -c4ef51ac : ld1h z12.d, p4/Z, [x13, z15.d, SXTW #1] : ld1h (%x13,%z15.d,sxtw #1)[8byte] %p4/z -> %z12.d -c4f151ee : ld1h z14.d, p4/Z, [x15, z17.d, SXTW #1] : ld1h (%x15,%z17.d,sxtw #1)[8byte] %p4/z -> %z14.d -c4f35630 : ld1h z16.d, p5/Z, [x17, z19.d, SXTW #1] : ld1h (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d -c4f45671 : ld1h z17.d, p5/Z, [x19, z20.d, SXTW #1] : ld1h (%x19,%z20.d,sxtw #1)[8byte] %p5/z -> %z17.d -c4f656b3 : ld1h z19.d, p5/Z, [x21, z22.d, SXTW #1] : ld1h (%x21,%z22.d,sxtw #1)[8byte] %p5/z -> %z19.d -c4f85af5 : ld1h z21.d, p6/Z, [x23, z24.d, SXTW #1] : ld1h (%x23,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d -c4fa5b17 : ld1h z23.d, p6/Z, [x24, z26.d, SXTW #1] : ld1h (%x24,%z26.d,sxtw #1)[8byte] %p6/z -> %z23.d -c4fc5f59 : ld1h z25.d, p7/Z, [x26, z28.d, SXTW #1] : ld1h (%x26,%z28.d,sxtw #1)[8byte] %p7/z -> %z25.d -c4fe5f9b : ld1h z27.d, p7/Z, [x28, z30.d, SXTW #1] : ld1h (%x28,%z30.d,sxtw #1)[8byte] %p7/z -> %z27.d -c4ff5fff : ld1h z31.d, p7/Z, [sp, z31.d, SXTW #1] : ld1h (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d +c4a04000 : ld1h z0.d, p0/Z, [x0, z0.d, UXTW #1] : ld1h (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d +c4a54482 : ld1h z2.d, p1/Z, [x4, z5.d, UXTW #1] : ld1h (%x4,%z5.d,uxtw #1)[2byte] %p1/z -> %z2.d +c4a748c4 : ld1h z4.d, p2/Z, [x6, z7.d, UXTW #1] : ld1h (%x6,%z7.d,uxtw #1)[2byte] %p2/z -> %z4.d +c4a94906 : ld1h z6.d, p2/Z, [x8, z9.d, UXTW #1] : ld1h (%x8,%z9.d,uxtw #1)[2byte] %p2/z -> %z6.d +c4ab4d48 : ld1h z8.d, p3/Z, [x10, z11.d, UXTW #1] : ld1h (%x10,%z11.d,uxtw #1)[2byte] %p3/z -> %z8.d +c4ad4d6a : ld1h z10.d, p3/Z, [x11, z13.d, UXTW #1] : ld1h (%x11,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d +c4af51ac : ld1h z12.d, p4/Z, [x13, z15.d, UXTW #1] : ld1h (%x13,%z15.d,uxtw #1)[2byte] %p4/z -> %z12.d +c4b151ee : ld1h z14.d, p4/Z, [x15, z17.d, UXTW #1] : ld1h (%x15,%z17.d,uxtw #1)[2byte] %p4/z -> %z14.d +c4b35630 : ld1h z16.d, p5/Z, [x17, z19.d, UXTW #1] : ld1h (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d +c4b45671 : ld1h z17.d, p5/Z, [x19, z20.d, UXTW #1] : ld1h (%x19,%z20.d,uxtw #1)[2byte] %p5/z -> %z17.d +c4b656b3 : ld1h z19.d, p5/Z, [x21, z22.d, UXTW #1] : ld1h (%x21,%z22.d,uxtw #1)[2byte] %p5/z -> %z19.d +c4b85af5 : ld1h z21.d, p6/Z, [x23, z24.d, UXTW #1] : ld1h (%x23,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d +c4ba5b17 : ld1h z23.d, p6/Z, [x24, z26.d, UXTW #1] : ld1h (%x24,%z26.d,uxtw #1)[2byte] %p6/z -> %z23.d +c4bc5f59 : ld1h z25.d, p7/Z, [x26, z28.d, UXTW #1] : ld1h (%x26,%z28.d,uxtw #1)[2byte] %p7/z -> %z25.d +c4be5f9b : ld1h z27.d, p7/Z, [x28, z30.d, UXTW #1] : ld1h (%x28,%z30.d,uxtw #1)[2byte] %p7/z -> %z27.d +c4bf5fff : ld1h z31.d, p7/Z, [sp, z31.d, UXTW #1] : ld1h (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d +c4e04000 : ld1h z0.d, p0/Z, [x0, z0.d, SXTW #1] : ld1h (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d +c4e54482 : ld1h z2.d, p1/Z, [x4, z5.d, SXTW #1] : ld1h (%x4,%z5.d,sxtw #1)[2byte] %p1/z -> %z2.d +c4e748c4 : ld1h z4.d, p2/Z, [x6, z7.d, SXTW #1] : ld1h (%x6,%z7.d,sxtw #1)[2byte] %p2/z -> %z4.d +c4e94906 : ld1h z6.d, p2/Z, [x8, z9.d, SXTW #1] : ld1h (%x8,%z9.d,sxtw #1)[2byte] %p2/z -> %z6.d +c4eb4d48 : ld1h z8.d, p3/Z, [x10, z11.d, SXTW #1] : ld1h (%x10,%z11.d,sxtw #1)[2byte] %p3/z -> %z8.d +c4ed4d6a : ld1h z10.d, p3/Z, [x11, z13.d, SXTW #1] : ld1h (%x11,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d +c4ef51ac : ld1h z12.d, p4/Z, [x13, z15.d, SXTW #1] : ld1h (%x13,%z15.d,sxtw #1)[2byte] %p4/z -> %z12.d +c4f151ee : ld1h z14.d, p4/Z, [x15, z17.d, SXTW #1] : ld1h (%x15,%z17.d,sxtw #1)[2byte] %p4/z -> %z14.d +c4f35630 : ld1h z16.d, p5/Z, [x17, z19.d, SXTW #1] : ld1h (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d +c4f45671 : ld1h z17.d, p5/Z, [x19, z20.d, SXTW #1] : ld1h (%x19,%z20.d,sxtw #1)[2byte] %p5/z -> %z17.d +c4f656b3 : ld1h z19.d, p5/Z, [x21, z22.d, SXTW #1] : ld1h (%x21,%z22.d,sxtw #1)[2byte] %p5/z -> %z19.d +c4f85af5 : ld1h z21.d, p6/Z, [x23, z24.d, SXTW #1] : ld1h (%x23,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d +c4fa5b17 : ld1h z23.d, p6/Z, [x24, z26.d, SXTW #1] : ld1h (%x24,%z26.d,sxtw #1)[2byte] %p6/z -> %z23.d +c4fc5f59 : ld1h z25.d, p7/Z, [x26, z28.d, SXTW #1] : ld1h (%x26,%z28.d,sxtw #1)[2byte] %p7/z -> %z25.d +c4fe5f9b : ld1h z27.d, p7/Z, [x28, z30.d, SXTW #1] : ld1h (%x28,%z30.d,sxtw #1)[2byte] %p7/z -> %z27.d +c4ff5fff : ld1h z31.d, p7/Z, [sp, z31.d, SXTW #1] : ld1h (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d # LD1H { .D }, /Z, [.D{, #}] (LD1H-Z.P.AI-D) -c4a0c000 : ld1h z0.d, p0/Z, [z0.d, #0] : ld1h (%z0.d)[8byte] %p0/z -> %z0.d -c4a2c482 : ld1h z2.d, p1/Z, [z4.d, #4] : ld1h +0x04(%z4.d)[8byte] %p1/z -> %z2.d -c4a4c8c4 : ld1h z4.d, p2/Z, [z6.d, #8] : ld1h +0x08(%z6.d)[8byte] %p2/z -> %z4.d -c4a6c906 : ld1h z6.d, p2/Z, [z8.d, #12] : ld1h +0x0c(%z8.d)[8byte] %p2/z -> %z6.d -c4a8cd48 : ld1h z8.d, p3/Z, [z10.d, #16] : ld1h +0x10(%z10.d)[8byte] %p3/z -> %z8.d -c4aacd8a : ld1h z10.d, p3/Z, [z12.d, #20] : ld1h +0x14(%z12.d)[8byte] %p3/z -> %z10.d -c4acd1cc : ld1h z12.d, p4/Z, [z14.d, #24] : ld1h +0x18(%z14.d)[8byte] %p4/z -> %z12.d -c4aed20e : ld1h z14.d, p4/Z, [z16.d, #28] : ld1h +0x1c(%z16.d)[8byte] %p4/z -> %z14.d -c4b0d650 : ld1h z16.d, p5/Z, [z18.d, #32] : ld1h +0x20(%z18.d)[8byte] %p5/z -> %z16.d -c4b1d671 : ld1h z17.d, p5/Z, [z19.d, #34] : ld1h +0x22(%z19.d)[8byte] %p5/z -> %z17.d -c4b3d6b3 : ld1h z19.d, p5/Z, [z21.d, #38] : ld1h +0x26(%z21.d)[8byte] %p5/z -> %z19.d -c4b5daf5 : ld1h z21.d, p6/Z, [z23.d, #42] : ld1h +0x2a(%z23.d)[8byte] %p6/z -> %z21.d -c4b7db37 : ld1h z23.d, p6/Z, [z25.d, #46] : ld1h +0x2e(%z25.d)[8byte] %p6/z -> %z23.d -c4b9df79 : ld1h z25.d, p7/Z, [z27.d, #50] : ld1h +0x32(%z27.d)[8byte] %p7/z -> %z25.d -c4bbdfbb : ld1h z27.d, p7/Z, [z29.d, #54] : ld1h +0x36(%z29.d)[8byte] %p7/z -> %z27.d -c4bfdfff : ld1h z31.d, p7/Z, [z31.d, #62] : ld1h +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +c4a0c000 : ld1h z0.d, p0/Z, [z0.d, #0] : ld1h (%z0.d)[2byte] %p0/z -> %z0.d +c4a2c482 : ld1h z2.d, p1/Z, [z4.d, #4] : ld1h +0x04(%z4.d)[2byte] %p1/z -> %z2.d +c4a4c8c4 : ld1h z4.d, p2/Z, [z6.d, #8] : ld1h +0x08(%z6.d)[2byte] %p2/z -> %z4.d +c4a6c906 : ld1h z6.d, p2/Z, [z8.d, #12] : ld1h +0x0c(%z8.d)[2byte] %p2/z -> %z6.d +c4a8cd48 : ld1h z8.d, p3/Z, [z10.d, #16] : ld1h +0x10(%z10.d)[2byte] %p3/z -> %z8.d +c4aacd8a : ld1h z10.d, p3/Z, [z12.d, #20] : ld1h +0x14(%z12.d)[2byte] %p3/z -> %z10.d +c4acd1cc : ld1h z12.d, p4/Z, [z14.d, #24] : ld1h +0x18(%z14.d)[2byte] %p4/z -> %z12.d +c4aed20e : ld1h z14.d, p4/Z, [z16.d, #28] : ld1h +0x1c(%z16.d)[2byte] %p4/z -> %z14.d +c4b0d650 : ld1h z16.d, p5/Z, [z18.d, #32] : ld1h +0x20(%z18.d)[2byte] %p5/z -> %z16.d +c4b1d671 : ld1h z17.d, p5/Z, [z19.d, #34] : ld1h +0x22(%z19.d)[2byte] %p5/z -> %z17.d +c4b3d6b3 : ld1h z19.d, p5/Z, [z21.d, #38] : ld1h +0x26(%z21.d)[2byte] %p5/z -> %z19.d +c4b5daf5 : ld1h z21.d, p6/Z, [z23.d, #42] : ld1h +0x2a(%z23.d)[2byte] %p6/z -> %z21.d +c4b7db37 : ld1h z23.d, p6/Z, [z25.d, #46] : ld1h +0x2e(%z25.d)[2byte] %p6/z -> %z23.d +c4b9df79 : ld1h z25.d, p7/Z, [z27.d, #50] : ld1h +0x32(%z27.d)[2byte] %p7/z -> %z25.d +c4bbdfbb : ld1h z27.d, p7/Z, [z29.d, #54] : ld1h +0x36(%z29.d)[2byte] %p7/z -> %z27.d +c4bfdfff : ld1h z31.d, p7/Z, [z31.d, #62] : ld1h +0x3e(%z31.d)[2byte] %p7/z -> %z31.d # LD1H { .D }, /Z, [, .D] (LD1H-Z.P.BZ-D.64.unscaled) -c4c0c000 : ld1h z0.d, p0/Z, [x0, z0.d] : ld1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d -c4c5c482 : ld1h z2.d, p1/Z, [x4, z5.d] : ld1h (%x4,%z5.d)[8byte] %p1/z -> %z2.d -c4c7c8c4 : ld1h z4.d, p2/Z, [x6, z7.d] : ld1h (%x6,%z7.d)[8byte] %p2/z -> %z4.d -c4c9c906 : ld1h z6.d, p2/Z, [x8, z9.d] : ld1h (%x8,%z9.d)[8byte] %p2/z -> %z6.d -c4cbcd48 : ld1h z8.d, p3/Z, [x10, z11.d] : ld1h (%x10,%z11.d)[8byte] %p3/z -> %z8.d -c4cdcd6a : ld1h z10.d, p3/Z, [x11, z13.d] : ld1h (%x11,%z13.d)[8byte] %p3/z -> %z10.d -c4cfd1ac : ld1h z12.d, p4/Z, [x13, z15.d] : ld1h (%x13,%z15.d)[8byte] %p4/z -> %z12.d -c4d1d1ee : ld1h z14.d, p4/Z, [x15, z17.d] : ld1h (%x15,%z17.d)[8byte] %p4/z -> %z14.d -c4d3d630 : ld1h z16.d, p5/Z, [x17, z19.d] : ld1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d -c4d4d671 : ld1h z17.d, p5/Z, [x19, z20.d] : ld1h (%x19,%z20.d)[8byte] %p5/z -> %z17.d -c4d6d6b3 : ld1h z19.d, p5/Z, [x21, z22.d] : ld1h (%x21,%z22.d)[8byte] %p5/z -> %z19.d -c4d8daf5 : ld1h z21.d, p6/Z, [x23, z24.d] : ld1h (%x23,%z24.d)[8byte] %p6/z -> %z21.d -c4dadb17 : ld1h z23.d, p6/Z, [x24, z26.d] : ld1h (%x24,%z26.d)[8byte] %p6/z -> %z23.d -c4dcdf59 : ld1h z25.d, p7/Z, [x26, z28.d] : ld1h (%x26,%z28.d)[8byte] %p7/z -> %z25.d -c4dedf9b : ld1h z27.d, p7/Z, [x28, z30.d] : ld1h (%x28,%z30.d)[8byte] %p7/z -> %z27.d -c4dfdfff : ld1h z31.d, p7/Z, [sp, z31.d] : ld1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d +c4c0c000 : ld1h z0.d, p0/Z, [x0, z0.d] : ld1h (%x0,%z0.d)[2byte] %p0/z -> %z0.d +c4c5c482 : ld1h z2.d, p1/Z, [x4, z5.d] : ld1h (%x4,%z5.d)[2byte] %p1/z -> %z2.d +c4c7c8c4 : ld1h z4.d, p2/Z, [x6, z7.d] : ld1h (%x6,%z7.d)[2byte] %p2/z -> %z4.d +c4c9c906 : ld1h z6.d, p2/Z, [x8, z9.d] : ld1h (%x8,%z9.d)[2byte] %p2/z -> %z6.d +c4cbcd48 : ld1h z8.d, p3/Z, [x10, z11.d] : ld1h (%x10,%z11.d)[2byte] %p3/z -> %z8.d +c4cdcd6a : ld1h z10.d, p3/Z, [x11, z13.d] : ld1h (%x11,%z13.d)[2byte] %p3/z -> %z10.d +c4cfd1ac : ld1h z12.d, p4/Z, [x13, z15.d] : ld1h (%x13,%z15.d)[2byte] %p4/z -> %z12.d +c4d1d1ee : ld1h z14.d, p4/Z, [x15, z17.d] : ld1h (%x15,%z17.d)[2byte] %p4/z -> %z14.d +c4d3d630 : ld1h z16.d, p5/Z, [x17, z19.d] : ld1h (%x17,%z19.d)[2byte] %p5/z -> %z16.d +c4d4d671 : ld1h z17.d, p5/Z, [x19, z20.d] : ld1h (%x19,%z20.d)[2byte] %p5/z -> %z17.d +c4d6d6b3 : ld1h z19.d, p5/Z, [x21, z22.d] : ld1h (%x21,%z22.d)[2byte] %p5/z -> %z19.d +c4d8daf5 : ld1h z21.d, p6/Z, [x23, z24.d] : ld1h (%x23,%z24.d)[2byte] %p6/z -> %z21.d +c4dadb17 : ld1h z23.d, p6/Z, [x24, z26.d] : ld1h (%x24,%z26.d)[2byte] %p6/z -> %z23.d +c4dcdf59 : ld1h z25.d, p7/Z, [x26, z28.d] : ld1h (%x26,%z28.d)[2byte] %p7/z -> %z25.d +c4dedf9b : ld1h z27.d, p7/Z, [x28, z30.d] : ld1h (%x28,%z30.d)[2byte] %p7/z -> %z27.d +c4dfdfff : ld1h z31.d, p7/Z, [sp, z31.d] : ld1h (%sp,%z31.d)[2byte] %p7/z -> %z31.d # LD1H { .D }, /Z, [, .D, LSL #1] (LD1H-Z.P.BZ-D.64.scaled) -c4e0c000 : ld1h z0.d, p0/Z, [x0, z0.d, LSL #1] : ld1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d -c4e5c482 : ld1h z2.d, p1/Z, [x4, z5.d, LSL #1] : ld1h (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d -c4e7c8c4 : ld1h z4.d, p2/Z, [x6, z7.d, LSL #1] : ld1h (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d -c4e9c906 : ld1h z6.d, p2/Z, [x8, z9.d, LSL #1] : ld1h (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d -c4ebcd48 : ld1h z8.d, p3/Z, [x10, z11.d, LSL #1] : ld1h (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d -c4edcd6a : ld1h z10.d, p3/Z, [x11, z13.d, LSL #1] : ld1h (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d -c4efd1ac : ld1h z12.d, p4/Z, [x13, z15.d, LSL #1] : ld1h (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d -c4f1d1ee : ld1h z14.d, p4/Z, [x15, z17.d, LSL #1] : ld1h (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d -c4f3d630 : ld1h z16.d, p5/Z, [x17, z19.d, LSL #1] : ld1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d -c4f4d671 : ld1h z17.d, p5/Z, [x19, z20.d, LSL #1] : ld1h (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d -c4f6d6b3 : ld1h z19.d, p5/Z, [x21, z22.d, LSL #1] : ld1h (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d -c4f8daf5 : ld1h z21.d, p6/Z, [x23, z24.d, LSL #1] : ld1h (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d -c4fadb17 : ld1h z23.d, p6/Z, [x24, z26.d, LSL #1] : ld1h (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d -c4fcdf59 : ld1h z25.d, p7/Z, [x26, z28.d, LSL #1] : ld1h (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d -c4fedf9b : ld1h z27.d, p7/Z, [x28, z30.d, LSL #1] : ld1h (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d -c4ffdfff : ld1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d +c4e0c000 : ld1h z0.d, p0/Z, [x0, z0.d, LSL #1] : ld1h (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d +c4e5c482 : ld1h z2.d, p1/Z, [x4, z5.d, LSL #1] : ld1h (%x4,%z5.d,lsl #1)[2byte] %p1/z -> %z2.d +c4e7c8c4 : ld1h z4.d, p2/Z, [x6, z7.d, LSL #1] : ld1h (%x6,%z7.d,lsl #1)[2byte] %p2/z -> %z4.d +c4e9c906 : ld1h z6.d, p2/Z, [x8, z9.d, LSL #1] : ld1h (%x8,%z9.d,lsl #1)[2byte] %p2/z -> %z6.d +c4ebcd48 : ld1h z8.d, p3/Z, [x10, z11.d, LSL #1] : ld1h (%x10,%z11.d,lsl #1)[2byte] %p3/z -> %z8.d +c4edcd6a : ld1h z10.d, p3/Z, [x11, z13.d, LSL #1] : ld1h (%x11,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d +c4efd1ac : ld1h z12.d, p4/Z, [x13, z15.d, LSL #1] : ld1h (%x13,%z15.d,lsl #1)[2byte] %p4/z -> %z12.d +c4f1d1ee : ld1h z14.d, p4/Z, [x15, z17.d, LSL #1] : ld1h (%x15,%z17.d,lsl #1)[2byte] %p4/z -> %z14.d +c4f3d630 : ld1h z16.d, p5/Z, [x17, z19.d, LSL #1] : ld1h (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d +c4f4d671 : ld1h z17.d, p5/Z, [x19, z20.d, LSL #1] : ld1h (%x19,%z20.d,lsl #1)[2byte] %p5/z -> %z17.d +c4f6d6b3 : ld1h z19.d, p5/Z, [x21, z22.d, LSL #1] : ld1h (%x21,%z22.d,lsl #1)[2byte] %p5/z -> %z19.d +c4f8daf5 : ld1h z21.d, p6/Z, [x23, z24.d, LSL #1] : ld1h (%x23,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d +c4fadb17 : ld1h z23.d, p6/Z, [x24, z26.d, LSL #1] : ld1h (%x24,%z26.d,lsl #1)[2byte] %p6/z -> %z23.d +c4fcdf59 : ld1h z25.d, p7/Z, [x26, z28.d, LSL #1] : ld1h (%x26,%z28.d,lsl #1)[2byte] %p7/z -> %z25.d +c4fedf9b : ld1h z27.d, p7/Z, [x28, z30.d, LSL #1] : ld1h (%x28,%z30.d,lsl #1)[2byte] %p7/z -> %z27.d +c4ffdfff : ld1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1h (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d # LD1RB { .B }, /Z, [{, #}] (LD1RB-Z.P.BI-U8) 84408000 : ld1rb z0.b, p0/Z, [x0, #0] : ld1rb (%x0)[1byte] %p0/z -> %z0.b @@ -11686,166 +11686,166 @@ c4ffdfff : ld1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1h (%sp,%z31.d,lsl #1 84ffffff : ld1rh z31.d, p7/Z, [sp, #126] : ld1rh +0x7e(%sp)[2byte] %p7/z -> %z31.d # LD1ROB { .B }, /Z, [, ] (LD1ROB-Z.P.BR-Contiguous) -a4200000 : ld1rob z0.b, p0/Z, [x0, x0] : ld1rob (%x0,%x0)[32byte] %p0/z -> %z0.b -a4250482 : ld1rob z2.b, p1/Z, [x4, x5] : ld1rob (%x4,%x5)[32byte] %p1/z -> %z2.b -a42708c4 : ld1rob z4.b, p2/Z, [x6, x7] : ld1rob (%x6,%x7)[32byte] %p2/z -> %z4.b -a4290906 : ld1rob z6.b, p2/Z, [x8, x9] : ld1rob (%x8,%x9)[32byte] %p2/z -> %z6.b -a42b0d48 : ld1rob z8.b, p3/Z, [x10, x11] : ld1rob (%x10,%x11)[32byte] %p3/z -> %z8.b -a42c0d6a : ld1rob z10.b, p3/Z, [x11, x12] : ld1rob (%x11,%x12)[32byte] %p3/z -> %z10.b -a42e11ac : ld1rob z12.b, p4/Z, [x13, x14] : ld1rob (%x13,%x14)[32byte] %p4/z -> %z12.b -a43011ee : ld1rob z14.b, p4/Z, [x15, x16] : ld1rob (%x15,%x16)[32byte] %p4/z -> %z14.b -a4321630 : ld1rob z16.b, p5/Z, [x17, x18] : ld1rob (%x17,%x18)[32byte] %p5/z -> %z16.b -a4341671 : ld1rob z17.b, p5/Z, [x19, x20] : ld1rob (%x19,%x20)[32byte] %p5/z -> %z17.b -a43616b3 : ld1rob z19.b, p5/Z, [x21, x22] : ld1rob (%x21,%x22)[32byte] %p5/z -> %z19.b -a4381af5 : ld1rob z21.b, p6/Z, [x23, x24] : ld1rob (%x23,%x24)[32byte] %p6/z -> %z21.b -a4391b17 : ld1rob z23.b, p6/Z, [x24, x25] : ld1rob (%x24,%x25)[32byte] %p6/z -> %z23.b -a43b1f59 : ld1rob z25.b, p7/Z, [x26, x27] : ld1rob (%x26,%x27)[32byte] %p7/z -> %z25.b -a43d1f9b : ld1rob z27.b, p7/Z, [x28, x29] : ld1rob (%x28,%x29)[32byte] %p7/z -> %z27.b -a43e1fff : ld1rob z31.b, p7/Z, [sp, x30] : ld1rob (%sp,%x30)[32byte] %p7/z -> %z31.b +a4200000 : ld1rob z0.b, p0/Z, [x0, x0] : ld1rob (%x0,%x0)[1byte] %p0/z -> %z0.b +a4250482 : ld1rob z2.b, p1/Z, [x4, x5] : ld1rob (%x4,%x5)[1byte] %p1/z -> %z2.b +a42708c4 : ld1rob z4.b, p2/Z, [x6, x7] : ld1rob (%x6,%x7)[1byte] %p2/z -> %z4.b +a4290906 : ld1rob z6.b, p2/Z, [x8, x9] : ld1rob (%x8,%x9)[1byte] %p2/z -> %z6.b +a42b0d48 : ld1rob z8.b, p3/Z, [x10, x11] : ld1rob (%x10,%x11)[1byte] %p3/z -> %z8.b +a42c0d6a : ld1rob z10.b, p3/Z, [x11, x12] : ld1rob (%x11,%x12)[1byte] %p3/z -> %z10.b +a42e11ac : ld1rob z12.b, p4/Z, [x13, x14] : ld1rob (%x13,%x14)[1byte] %p4/z -> %z12.b +a43011ee : ld1rob z14.b, p4/Z, [x15, x16] : ld1rob (%x15,%x16)[1byte] %p4/z -> %z14.b +a4321630 : ld1rob z16.b, p5/Z, [x17, x18] : ld1rob (%x17,%x18)[1byte] %p5/z -> %z16.b +a4341671 : ld1rob z17.b, p5/Z, [x19, x20] : ld1rob (%x19,%x20)[1byte] %p5/z -> %z17.b +a43616b3 : ld1rob z19.b, p5/Z, [x21, x22] : ld1rob (%x21,%x22)[1byte] %p5/z -> %z19.b +a4381af5 : ld1rob z21.b, p6/Z, [x23, x24] : ld1rob (%x23,%x24)[1byte] %p6/z -> %z21.b +a4391b17 : ld1rob z23.b, p6/Z, [x24, x25] : ld1rob (%x24,%x25)[1byte] %p6/z -> %z23.b +a43b1f59 : ld1rob z25.b, p7/Z, [x26, x27] : ld1rob (%x26,%x27)[1byte] %p7/z -> %z25.b +a43d1f9b : ld1rob z27.b, p7/Z, [x28, x29] : ld1rob (%x28,%x29)[1byte] %p7/z -> %z27.b +a43e1fff : ld1rob z31.b, p7/Z, [sp, x30] : ld1rob (%sp,%x30)[1byte] %p7/z -> %z31.b # LD1RQB { .B }, /Z, [, ] (LD1RQB-Z.P.BR-Contiguous) -a4000000 : ld1rqb z0.b, p0/Z, [x0, x0] : ld1rqb (%x0,%x0)[16byte] %p0/z -> %z0.b -a4050482 : ld1rqb z2.b, p1/Z, [x4, x5] : ld1rqb (%x4,%x5)[16byte] %p1/z -> %z2.b -a40708c4 : ld1rqb z4.b, p2/Z, [x6, x7] : ld1rqb (%x6,%x7)[16byte] %p2/z -> %z4.b -a4090906 : ld1rqb z6.b, p2/Z, [x8, x9] : ld1rqb (%x8,%x9)[16byte] %p2/z -> %z6.b -a40b0d48 : ld1rqb z8.b, p3/Z, [x10, x11] : ld1rqb (%x10,%x11)[16byte] %p3/z -> %z8.b -a40c0d6a : ld1rqb z10.b, p3/Z, [x11, x12] : ld1rqb (%x11,%x12)[16byte] %p3/z -> %z10.b -a40e11ac : ld1rqb z12.b, p4/Z, [x13, x14] : ld1rqb (%x13,%x14)[16byte] %p4/z -> %z12.b -a41011ee : ld1rqb z14.b, p4/Z, [x15, x16] : ld1rqb (%x15,%x16)[16byte] %p4/z -> %z14.b -a4121630 : ld1rqb z16.b, p5/Z, [x17, x18] : ld1rqb (%x17,%x18)[16byte] %p5/z -> %z16.b -a4141671 : ld1rqb z17.b, p5/Z, [x19, x20] : ld1rqb (%x19,%x20)[16byte] %p5/z -> %z17.b -a41616b3 : ld1rqb z19.b, p5/Z, [x21, x22] : ld1rqb (%x21,%x22)[16byte] %p5/z -> %z19.b -a4181af5 : ld1rqb z21.b, p6/Z, [x23, x24] : ld1rqb (%x23,%x24)[16byte] %p6/z -> %z21.b -a4191b17 : ld1rqb z23.b, p6/Z, [x24, x25] : ld1rqb (%x24,%x25)[16byte] %p6/z -> %z23.b -a41b1f59 : ld1rqb z25.b, p7/Z, [x26, x27] : ld1rqb (%x26,%x27)[16byte] %p7/z -> %z25.b -a41d1f9b : ld1rqb z27.b, p7/Z, [x28, x29] : ld1rqb (%x28,%x29)[16byte] %p7/z -> %z27.b -a41e1fff : ld1rqb z31.b, p7/Z, [sp, x30] : ld1rqb (%sp,%x30)[16byte] %p7/z -> %z31.b +a4000000 : ld1rqb z0.b, p0/Z, [x0, x0] : ld1rqb (%x0,%x0)[1byte] %p0/z -> %z0.b +a4050482 : ld1rqb z2.b, p1/Z, [x4, x5] : ld1rqb (%x4,%x5)[1byte] %p1/z -> %z2.b +a40708c4 : ld1rqb z4.b, p2/Z, [x6, x7] : ld1rqb (%x6,%x7)[1byte] %p2/z -> %z4.b +a4090906 : ld1rqb z6.b, p2/Z, [x8, x9] : ld1rqb (%x8,%x9)[1byte] %p2/z -> %z6.b +a40b0d48 : ld1rqb z8.b, p3/Z, [x10, x11] : ld1rqb (%x10,%x11)[1byte] %p3/z -> %z8.b +a40c0d6a : ld1rqb z10.b, p3/Z, [x11, x12] : ld1rqb (%x11,%x12)[1byte] %p3/z -> %z10.b +a40e11ac : ld1rqb z12.b, p4/Z, [x13, x14] : ld1rqb (%x13,%x14)[1byte] %p4/z -> %z12.b +a41011ee : ld1rqb z14.b, p4/Z, [x15, x16] : ld1rqb (%x15,%x16)[1byte] %p4/z -> %z14.b +a4121630 : ld1rqb z16.b, p5/Z, [x17, x18] : ld1rqb (%x17,%x18)[1byte] %p5/z -> %z16.b +a4141671 : ld1rqb z17.b, p5/Z, [x19, x20] : ld1rqb (%x19,%x20)[1byte] %p5/z -> %z17.b +a41616b3 : ld1rqb z19.b, p5/Z, [x21, x22] : ld1rqb (%x21,%x22)[1byte] %p5/z -> %z19.b +a4181af5 : ld1rqb z21.b, p6/Z, [x23, x24] : ld1rqb (%x23,%x24)[1byte] %p6/z -> %z21.b +a4191b17 : ld1rqb z23.b, p6/Z, [x24, x25] : ld1rqb (%x24,%x25)[1byte] %p6/z -> %z23.b +a41b1f59 : ld1rqb z25.b, p7/Z, [x26, x27] : ld1rqb (%x26,%x27)[1byte] %p7/z -> %z25.b +a41d1f9b : ld1rqb z27.b, p7/Z, [x28, x29] : ld1rqb (%x28,%x29)[1byte] %p7/z -> %z27.b +a41e1fff : ld1rqb z31.b, p7/Z, [sp, x30] : ld1rqb (%sp,%x30)[1byte] %p7/z -> %z31.b # LD1RQB { .B }, /Z, [{, #}] (LD1RQB-Z.P.BI-U8) -a4082000 : ld1rqb z0.b, p0/Z, [x0, #-128] : ld1rqb -0x80(%x0)[16byte] %p0/z -> %z0.b -a4092482 : ld1rqb z2.b, p1/Z, [x4, #-112] : ld1rqb -0x70(%x4)[16byte] %p1/z -> %z2.b -a40a28c4 : ld1rqb z4.b, p2/Z, [x6, #-96] : ld1rqb -0x60(%x6)[16byte] %p2/z -> %z4.b -a40b2906 : ld1rqb z6.b, p2/Z, [x8, #-80] : ld1rqb -0x50(%x8)[16byte] %p2/z -> %z6.b -a40c2d48 : ld1rqb z8.b, p3/Z, [x10, #-64] : ld1rqb -0x40(%x10)[16byte] %p3/z -> %z8.b -a40d2d6a : ld1rqb z10.b, p3/Z, [x11, #-48] : ld1rqb -0x30(%x11)[16byte] %p3/z -> %z10.b -a40e31ac : ld1rqb z12.b, p4/Z, [x13, #-32] : ld1rqb -0x20(%x13)[16byte] %p4/z -> %z12.b -a40f31ee : ld1rqb z14.b, p4/Z, [x15, #-16] : ld1rqb -0x10(%x15)[16byte] %p4/z -> %z14.b -a4003630 : ld1rqb z16.b, p5/Z, [x17, #0] : ld1rqb (%x17)[16byte] %p5/z -> %z16.b -a4003671 : ld1rqb z17.b, p5/Z, [x19, #0] : ld1rqb (%x19)[16byte] %p5/z -> %z17.b -a40136b3 : ld1rqb z19.b, p5/Z, [x21, #16] : ld1rqb +0x10(%x21)[16byte] %p5/z -> %z19.b -a4023af5 : ld1rqb z21.b, p6/Z, [x23, #32] : ld1rqb +0x20(%x23)[16byte] %p6/z -> %z21.b -a4033b17 : ld1rqb z23.b, p6/Z, [x24, #48] : ld1rqb +0x30(%x24)[16byte] %p6/z -> %z23.b -a4043f59 : ld1rqb z25.b, p7/Z, [x26, #64] : ld1rqb +0x40(%x26)[16byte] %p7/z -> %z25.b -a4053f9b : ld1rqb z27.b, p7/Z, [x28, #80] : ld1rqb +0x50(%x28)[16byte] %p7/z -> %z27.b -a4073fff : ld1rqb z31.b, p7/Z, [sp, #112] : ld1rqb +0x70(%sp)[16byte] %p7/z -> %z31.b +a4082000 : ld1rqb z0.b, p0/Z, [x0, #-128] : ld1rqb -0x80(%x0)[1byte] %p0/z -> %z0.b +a4092482 : ld1rqb z2.b, p1/Z, [x4, #-112] : ld1rqb -0x70(%x4)[1byte] %p1/z -> %z2.b +a40a28c4 : ld1rqb z4.b, p2/Z, [x6, #-96] : ld1rqb -0x60(%x6)[1byte] %p2/z -> %z4.b +a40b2906 : ld1rqb z6.b, p2/Z, [x8, #-80] : ld1rqb -0x50(%x8)[1byte] %p2/z -> %z6.b +a40c2d48 : ld1rqb z8.b, p3/Z, [x10, #-64] : ld1rqb -0x40(%x10)[1byte] %p3/z -> %z8.b +a40d2d6a : ld1rqb z10.b, p3/Z, [x11, #-48] : ld1rqb -0x30(%x11)[1byte] %p3/z -> %z10.b +a40e31ac : ld1rqb z12.b, p4/Z, [x13, #-32] : ld1rqb -0x20(%x13)[1byte] %p4/z -> %z12.b +a40f31ee : ld1rqb z14.b, p4/Z, [x15, #-16] : ld1rqb -0x10(%x15)[1byte] %p4/z -> %z14.b +a4003630 : ld1rqb z16.b, p5/Z, [x17, #0] : ld1rqb (%x17)[1byte] %p5/z -> %z16.b +a4003671 : ld1rqb z17.b, p5/Z, [x19, #0] : ld1rqb (%x19)[1byte] %p5/z -> %z17.b +a40136b3 : ld1rqb z19.b, p5/Z, [x21, #16] : ld1rqb +0x10(%x21)[1byte] %p5/z -> %z19.b +a4023af5 : ld1rqb z21.b, p6/Z, [x23, #32] : ld1rqb +0x20(%x23)[1byte] %p6/z -> %z21.b +a4033b17 : ld1rqb z23.b, p6/Z, [x24, #48] : ld1rqb +0x30(%x24)[1byte] %p6/z -> %z23.b +a4043f59 : ld1rqb z25.b, p7/Z, [x26, #64] : ld1rqb +0x40(%x26)[1byte] %p7/z -> %z25.b +a4053f9b : ld1rqb z27.b, p7/Z, [x28, #80] : ld1rqb +0x50(%x28)[1byte] %p7/z -> %z27.b +a4073fff : ld1rqb z31.b, p7/Z, [sp, #112] : ld1rqb +0x70(%sp)[1byte] %p7/z -> %z31.b # LD1RQD { .D }, /Z, [, , LSL #3] (LD1RQD-Z.P.BR-Contiguous) -a5800000 : ld1rqd z0.d, p0/Z, [x0, x0, lsl #3] : ld1rqd (%x0,%x0,lsl #3)[16byte] %p0/z -> %z0.d -a5850482 : ld1rqd z2.d, p1/Z, [x4, x5, lsl #3] : ld1rqd (%x4,%x5,lsl #3)[16byte] %p1/z -> %z2.d -a58708c4 : ld1rqd z4.d, p2/Z, [x6, x7, lsl #3] : ld1rqd (%x6,%x7,lsl #3)[16byte] %p2/z -> %z4.d -a5890906 : ld1rqd z6.d, p2/Z, [x8, x9, lsl #3] : ld1rqd (%x8,%x9,lsl #3)[16byte] %p2/z -> %z6.d -a58b0d48 : ld1rqd z8.d, p3/Z, [x10, x11, lsl #3] : ld1rqd (%x10,%x11,lsl #3)[16byte] %p3/z -> %z8.d -a58c0d6a : ld1rqd z10.d, p3/Z, [x11, x12, lsl #3] : ld1rqd (%x11,%x12,lsl #3)[16byte] %p3/z -> %z10.d -a58e11ac : ld1rqd z12.d, p4/Z, [x13, x14, lsl #3] : ld1rqd (%x13,%x14,lsl #3)[16byte] %p4/z -> %z12.d -a59011ee : ld1rqd z14.d, p4/Z, [x15, x16, lsl #3] : ld1rqd (%x15,%x16,lsl #3)[16byte] %p4/z -> %z14.d -a5921630 : ld1rqd z16.d, p5/Z, [x17, x18, lsl #3] : ld1rqd (%x17,%x18,lsl #3)[16byte] %p5/z -> %z16.d -a5941671 : ld1rqd z17.d, p5/Z, [x19, x20, lsl #3] : ld1rqd (%x19,%x20,lsl #3)[16byte] %p5/z -> %z17.d -a59616b3 : ld1rqd z19.d, p5/Z, [x21, x22, lsl #3] : ld1rqd (%x21,%x22,lsl #3)[16byte] %p5/z -> %z19.d -a5981af5 : ld1rqd z21.d, p6/Z, [x23, x24, lsl #3] : ld1rqd (%x23,%x24,lsl #3)[16byte] %p6/z -> %z21.d -a5991b17 : ld1rqd z23.d, p6/Z, [x24, x25, lsl #3] : ld1rqd (%x24,%x25,lsl #3)[16byte] %p6/z -> %z23.d -a59b1f59 : ld1rqd z25.d, p7/Z, [x26, x27, lsl #3] : ld1rqd (%x26,%x27,lsl #3)[16byte] %p7/z -> %z25.d -a59d1f9b : ld1rqd z27.d, p7/Z, [x28, x29, lsl #3] : ld1rqd (%x28,%x29,lsl #3)[16byte] %p7/z -> %z27.d -a59e1fff : ld1rqd z31.d, p7/Z, [sp, x30, lsl #3] : ld1rqd (%sp,%x30,lsl #3)[16byte] %p7/z -> %z31.d +a5800000 : ld1rqd z0.d, p0/Z, [x0, x0, lsl #3] : ld1rqd (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d +a5850482 : ld1rqd z2.d, p1/Z, [x4, x5, lsl #3] : ld1rqd (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d +a58708c4 : ld1rqd z4.d, p2/Z, [x6, x7, lsl #3] : ld1rqd (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d +a5890906 : ld1rqd z6.d, p2/Z, [x8, x9, lsl #3] : ld1rqd (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d +a58b0d48 : ld1rqd z8.d, p3/Z, [x10, x11, lsl #3] : ld1rqd (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d +a58c0d6a : ld1rqd z10.d, p3/Z, [x11, x12, lsl #3] : ld1rqd (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d +a58e11ac : ld1rqd z12.d, p4/Z, [x13, x14, lsl #3] : ld1rqd (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d +a59011ee : ld1rqd z14.d, p4/Z, [x15, x16, lsl #3] : ld1rqd (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d +a5921630 : ld1rqd z16.d, p5/Z, [x17, x18, lsl #3] : ld1rqd (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d +a5941671 : ld1rqd z17.d, p5/Z, [x19, x20, lsl #3] : ld1rqd (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d +a59616b3 : ld1rqd z19.d, p5/Z, [x21, x22, lsl #3] : ld1rqd (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d +a5981af5 : ld1rqd z21.d, p6/Z, [x23, x24, lsl #3] : ld1rqd (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d +a5991b17 : ld1rqd z23.d, p6/Z, [x24, x25, lsl #3] : ld1rqd (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d +a59b1f59 : ld1rqd z25.d, p7/Z, [x26, x27, lsl #3] : ld1rqd (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d +a59d1f9b : ld1rqd z27.d, p7/Z, [x28, x29, lsl #3] : ld1rqd (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d +a59e1fff : ld1rqd z31.d, p7/Z, [sp, x30, lsl #3] : ld1rqd (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d # LD1RQD { .D }, /Z, [{, #}] (LD1RQD-Z.P.BI-U64) -a5882000 : ld1rqd z0.d, p0/Z, [x0, #-128] : ld1rqd -0x80(%x0)[16byte] %p0/z -> %z0.d -a5892482 : ld1rqd z2.d, p1/Z, [x4, #-112] : ld1rqd -0x70(%x4)[16byte] %p1/z -> %z2.d -a58a28c4 : ld1rqd z4.d, p2/Z, [x6, #-96] : ld1rqd -0x60(%x6)[16byte] %p2/z -> %z4.d -a58b2906 : ld1rqd z6.d, p2/Z, [x8, #-80] : ld1rqd -0x50(%x8)[16byte] %p2/z -> %z6.d -a58c2d48 : ld1rqd z8.d, p3/Z, [x10, #-64] : ld1rqd -0x40(%x10)[16byte] %p3/z -> %z8.d -a58d2d6a : ld1rqd z10.d, p3/Z, [x11, #-48] : ld1rqd -0x30(%x11)[16byte] %p3/z -> %z10.d -a58e31ac : ld1rqd z12.d, p4/Z, [x13, #-32] : ld1rqd -0x20(%x13)[16byte] %p4/z -> %z12.d -a58f31ee : ld1rqd z14.d, p4/Z, [x15, #-16] : ld1rqd -0x10(%x15)[16byte] %p4/z -> %z14.d -a5803630 : ld1rqd z16.d, p5/Z, [x17, #0] : ld1rqd (%x17)[16byte] %p5/z -> %z16.d -a5803671 : ld1rqd z17.d, p5/Z, [x19, #0] : ld1rqd (%x19)[16byte] %p5/z -> %z17.d -a58136b3 : ld1rqd z19.d, p5/Z, [x21, #16] : ld1rqd +0x10(%x21)[16byte] %p5/z -> %z19.d -a5823af5 : ld1rqd z21.d, p6/Z, [x23, #32] : ld1rqd +0x20(%x23)[16byte] %p6/z -> %z21.d -a5833b17 : ld1rqd z23.d, p6/Z, [x24, #48] : ld1rqd +0x30(%x24)[16byte] %p6/z -> %z23.d -a5843f59 : ld1rqd z25.d, p7/Z, [x26, #64] : ld1rqd +0x40(%x26)[16byte] %p7/z -> %z25.d -a5853f9b : ld1rqd z27.d, p7/Z, [x28, #80] : ld1rqd +0x50(%x28)[16byte] %p7/z -> %z27.d -a5873fff : ld1rqd z31.d, p7/Z, [sp, #112] : ld1rqd +0x70(%sp)[16byte] %p7/z -> %z31.d +a5882000 : ld1rqd z0.d, p0/Z, [x0, #-128] : ld1rqd -0x80(%x0)[8byte] %p0/z -> %z0.d +a5892482 : ld1rqd z2.d, p1/Z, [x4, #-112] : ld1rqd -0x70(%x4)[8byte] %p1/z -> %z2.d +a58a28c4 : ld1rqd z4.d, p2/Z, [x6, #-96] : ld1rqd -0x60(%x6)[8byte] %p2/z -> %z4.d +a58b2906 : ld1rqd z6.d, p2/Z, [x8, #-80] : ld1rqd -0x50(%x8)[8byte] %p2/z -> %z6.d +a58c2d48 : ld1rqd z8.d, p3/Z, [x10, #-64] : ld1rqd -0x40(%x10)[8byte] %p3/z -> %z8.d +a58d2d6a : ld1rqd z10.d, p3/Z, [x11, #-48] : ld1rqd -0x30(%x11)[8byte] %p3/z -> %z10.d +a58e31ac : ld1rqd z12.d, p4/Z, [x13, #-32] : ld1rqd -0x20(%x13)[8byte] %p4/z -> %z12.d +a58f31ee : ld1rqd z14.d, p4/Z, [x15, #-16] : ld1rqd -0x10(%x15)[8byte] %p4/z -> %z14.d +a5803630 : ld1rqd z16.d, p5/Z, [x17, #0] : ld1rqd (%x17)[8byte] %p5/z -> %z16.d +a5803671 : ld1rqd z17.d, p5/Z, [x19, #0] : ld1rqd (%x19)[8byte] %p5/z -> %z17.d +a58136b3 : ld1rqd z19.d, p5/Z, [x21, #16] : ld1rqd +0x10(%x21)[8byte] %p5/z -> %z19.d +a5823af5 : ld1rqd z21.d, p6/Z, [x23, #32] : ld1rqd +0x20(%x23)[8byte] %p6/z -> %z21.d +a5833b17 : ld1rqd z23.d, p6/Z, [x24, #48] : ld1rqd +0x30(%x24)[8byte] %p6/z -> %z23.d +a5843f59 : ld1rqd z25.d, p7/Z, [x26, #64] : ld1rqd +0x40(%x26)[8byte] %p7/z -> %z25.d +a5853f9b : ld1rqd z27.d, p7/Z, [x28, #80] : ld1rqd +0x50(%x28)[8byte] %p7/z -> %z27.d +a5873fff : ld1rqd z31.d, p7/Z, [sp, #112] : ld1rqd +0x70(%sp)[8byte] %p7/z -> %z31.d # LD1RQH { .H }, /Z, [, , LSL #1] (LD1RQH-Z.P.BR-Contiguous) -a4800000 : ld1rqh z0.h, p0/Z, [x0, x01, LSL #1] : ld1rqh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.h -a4850482 : ld1rqh z2.h, p1/Z, [x4, x51, LSL #1] : ld1rqh (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.h -a48708c4 : ld1rqh z4.h, p2/Z, [x6, x71, LSL #1] : ld1rqh (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.h -a4890906 : ld1rqh z6.h, p2/Z, [x8, x91, LSL #1] : ld1rqh (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.h -a48b0d48 : ld1rqh z8.h, p3/Z, [x10, x111, LSL #1] : ld1rqh (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.h -a48c0d6a : ld1rqh z10.h, p3/Z, [x11, x121, LSL #1] : ld1rqh (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.h -a48e11ac : ld1rqh z12.h, p4/Z, [x13, x141, LSL #1] : ld1rqh (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.h -a49011ee : ld1rqh z14.h, p4/Z, [x15, x161, LSL #1] : ld1rqh (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.h -a4921630 : ld1rqh z16.h, p5/Z, [x17, x181, LSL #1] : ld1rqh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.h -a4941671 : ld1rqh z17.h, p5/Z, [x19, x201, LSL #1] : ld1rqh (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.h -a49616b3 : ld1rqh z19.h, p5/Z, [x21, x221, LSL #1] : ld1rqh (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.h -a4981af5 : ld1rqh z21.h, p6/Z, [x23, x241, LSL #1] : ld1rqh (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.h -a4991b17 : ld1rqh z23.h, p6/Z, [x24, x251, LSL #1] : ld1rqh (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.h -a49b1f59 : ld1rqh z25.h, p7/Z, [x26, x271, LSL #1] : ld1rqh (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.h -a49d1f9b : ld1rqh z27.h, p7/Z, [x28, x291, LSL #1] : ld1rqh (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.h -a49e1fff : ld1rqh z31.h, p7/Z, [sp, x301, LSL #1] : ld1rqh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.h +a4800000 : ld1rqh z0.h, p0/Z, [x0, x01, LSL #1] : ld1rqh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h +a4850482 : ld1rqh z2.h, p1/Z, [x4, x51, LSL #1] : ld1rqh (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h +a48708c4 : ld1rqh z4.h, p2/Z, [x6, x71, LSL #1] : ld1rqh (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h +a4890906 : ld1rqh z6.h, p2/Z, [x8, x91, LSL #1] : ld1rqh (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h +a48b0d48 : ld1rqh z8.h, p3/Z, [x10, x111, LSL #1] : ld1rqh (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h +a48c0d6a : ld1rqh z10.h, p3/Z, [x11, x121, LSL #1] : ld1rqh (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h +a48e11ac : ld1rqh z12.h, p4/Z, [x13, x141, LSL #1] : ld1rqh (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h +a49011ee : ld1rqh z14.h, p4/Z, [x15, x161, LSL #1] : ld1rqh (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h +a4921630 : ld1rqh z16.h, p5/Z, [x17, x181, LSL #1] : ld1rqh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h +a4941671 : ld1rqh z17.h, p5/Z, [x19, x201, LSL #1] : ld1rqh (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h +a49616b3 : ld1rqh z19.h, p5/Z, [x21, x221, LSL #1] : ld1rqh (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h +a4981af5 : ld1rqh z21.h, p6/Z, [x23, x241, LSL #1] : ld1rqh (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h +a4991b17 : ld1rqh z23.h, p6/Z, [x24, x251, LSL #1] : ld1rqh (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h +a49b1f59 : ld1rqh z25.h, p7/Z, [x26, x271, LSL #1] : ld1rqh (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h +a49d1f9b : ld1rqh z27.h, p7/Z, [x28, x291, LSL #1] : ld1rqh (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h +a49e1fff : ld1rqh z31.h, p7/Z, [sp, x301, LSL #1] : ld1rqh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h # LD1RQH { .H }, /Z, [{, #}] (LD1RQH-Z.P.BI-U16) -a4882000 : ld1rqh z0.h, p0/Z, [x0, #-128] : ld1rqh -0x80(%x0)[16byte] %p0/z -> %z0.h -a4892482 : ld1rqh z2.h, p1/Z, [x4, #-112] : ld1rqh -0x70(%x4)[16byte] %p1/z -> %z2.h -a48a28c4 : ld1rqh z4.h, p2/Z, [x6, #-96] : ld1rqh -0x60(%x6)[16byte] %p2/z -> %z4.h -a48b2906 : ld1rqh z6.h, p2/Z, [x8, #-80] : ld1rqh -0x50(%x8)[16byte] %p2/z -> %z6.h -a48c2d48 : ld1rqh z8.h, p3/Z, [x10, #-64] : ld1rqh -0x40(%x10)[16byte] %p3/z -> %z8.h -a48d2d6a : ld1rqh z10.h, p3/Z, [x11, #-48] : ld1rqh -0x30(%x11)[16byte] %p3/z -> %z10.h -a48e31ac : ld1rqh z12.h, p4/Z, [x13, #-32] : ld1rqh -0x20(%x13)[16byte] %p4/z -> %z12.h -a48f31ee : ld1rqh z14.h, p4/Z, [x15, #-16] : ld1rqh -0x10(%x15)[16byte] %p4/z -> %z14.h -a4803630 : ld1rqh z16.h, p5/Z, [x17, #0] : ld1rqh (%x17)[16byte] %p5/z -> %z16.h -a4803671 : ld1rqh z17.h, p5/Z, [x19, #0] : ld1rqh (%x19)[16byte] %p5/z -> %z17.h -a48136b3 : ld1rqh z19.h, p5/Z, [x21, #16] : ld1rqh +0x10(%x21)[16byte] %p5/z -> %z19.h -a4823af5 : ld1rqh z21.h, p6/Z, [x23, #32] : ld1rqh +0x20(%x23)[16byte] %p6/z -> %z21.h -a4833b17 : ld1rqh z23.h, p6/Z, [x24, #48] : ld1rqh +0x30(%x24)[16byte] %p6/z -> %z23.h -a4843f59 : ld1rqh z25.h, p7/Z, [x26, #64] : ld1rqh +0x40(%x26)[16byte] %p7/z -> %z25.h -a4853f9b : ld1rqh z27.h, p7/Z, [x28, #80] : ld1rqh +0x50(%x28)[16byte] %p7/z -> %z27.h -a4873fff : ld1rqh z31.h, p7/Z, [sp, #112] : ld1rqh +0x70(%sp)[16byte] %p7/z -> %z31.h +a4882000 : ld1rqh z0.h, p0/Z, [x0, #-128] : ld1rqh -0x80(%x0)[2byte] %p0/z -> %z0.h +a4892482 : ld1rqh z2.h, p1/Z, [x4, #-112] : ld1rqh -0x70(%x4)[2byte] %p1/z -> %z2.h +a48a28c4 : ld1rqh z4.h, p2/Z, [x6, #-96] : ld1rqh -0x60(%x6)[2byte] %p2/z -> %z4.h +a48b2906 : ld1rqh z6.h, p2/Z, [x8, #-80] : ld1rqh -0x50(%x8)[2byte] %p2/z -> %z6.h +a48c2d48 : ld1rqh z8.h, p3/Z, [x10, #-64] : ld1rqh -0x40(%x10)[2byte] %p3/z -> %z8.h +a48d2d6a : ld1rqh z10.h, p3/Z, [x11, #-48] : ld1rqh -0x30(%x11)[2byte] %p3/z -> %z10.h +a48e31ac : ld1rqh z12.h, p4/Z, [x13, #-32] : ld1rqh -0x20(%x13)[2byte] %p4/z -> %z12.h +a48f31ee : ld1rqh z14.h, p4/Z, [x15, #-16] : ld1rqh -0x10(%x15)[2byte] %p4/z -> %z14.h +a4803630 : ld1rqh z16.h, p5/Z, [x17, #0] : ld1rqh (%x17)[2byte] %p5/z -> %z16.h +a4803671 : ld1rqh z17.h, p5/Z, [x19, #0] : ld1rqh (%x19)[2byte] %p5/z -> %z17.h +a48136b3 : ld1rqh z19.h, p5/Z, [x21, #16] : ld1rqh +0x10(%x21)[2byte] %p5/z -> %z19.h +a4823af5 : ld1rqh z21.h, p6/Z, [x23, #32] : ld1rqh +0x20(%x23)[2byte] %p6/z -> %z21.h +a4833b17 : ld1rqh z23.h, p6/Z, [x24, #48] : ld1rqh +0x30(%x24)[2byte] %p6/z -> %z23.h +a4843f59 : ld1rqh z25.h, p7/Z, [x26, #64] : ld1rqh +0x40(%x26)[2byte] %p7/z -> %z25.h +a4853f9b : ld1rqh z27.h, p7/Z, [x28, #80] : ld1rqh +0x50(%x28)[2byte] %p7/z -> %z27.h +a4873fff : ld1rqh z31.h, p7/Z, [sp, #112] : ld1rqh +0x70(%sp)[2byte] %p7/z -> %z31.h # LD1RQW { .S }, /Z, [, , LSL #2] (LD1RQW-Z.P.BR-Contiguous) -a5000000 : ld1rqw z0.s, p0/Z, [x0, x0, LSL #2] : ld1rqw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.s -a5050482 : ld1rqw z2.s, p1/Z, [x4, x5, LSL #2] : ld1rqw (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.s -a50708c4 : ld1rqw z4.s, p2/Z, [x6, x7, LSL #2] : ld1rqw (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.s -a5090906 : ld1rqw z6.s, p2/Z, [x8, x9, LSL #2] : ld1rqw (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.s -a50b0d48 : ld1rqw z8.s, p3/Z, [x10, x11, LSL #2] : ld1rqw (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.s -a50c0d6a : ld1rqw z10.s, p3/Z, [x11, x12, LSL #2] : ld1rqw (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.s -a50e11ac : ld1rqw z12.s, p4/Z, [x13, x14, LSL #2] : ld1rqw (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.s -a51011ee : ld1rqw z14.s, p4/Z, [x15, x16, LSL #2] : ld1rqw (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.s -a5121630 : ld1rqw z16.s, p5/Z, [x17, x18, LSL #2] : ld1rqw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.s -a5141671 : ld1rqw z17.s, p5/Z, [x19, x20, LSL #2] : ld1rqw (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.s -a51616b3 : ld1rqw z19.s, p5/Z, [x21, x22, LSL #2] : ld1rqw (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.s -a5181af5 : ld1rqw z21.s, p6/Z, [x23, x24, LSL #2] : ld1rqw (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.s -a5191b17 : ld1rqw z23.s, p6/Z, [x24, x25, LSL #2] : ld1rqw (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.s -a51b1f59 : ld1rqw z25.s, p7/Z, [x26, x27, LSL #2] : ld1rqw (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.s -a51d1f9b : ld1rqw z27.s, p7/Z, [x28, x29, LSL #2] : ld1rqw (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.s -a51e1fff : ld1rqw z31.s, p7/Z, [sp, x30, LSL #2] : ld1rqw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.s +a5000000 : ld1rqw z0.s, p0/Z, [x0, x0, LSL #2] : ld1rqw (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s +a5050482 : ld1rqw z2.s, p1/Z, [x4, x5, LSL #2] : ld1rqw (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s +a50708c4 : ld1rqw z4.s, p2/Z, [x6, x7, LSL #2] : ld1rqw (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s +a5090906 : ld1rqw z6.s, p2/Z, [x8, x9, LSL #2] : ld1rqw (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s +a50b0d48 : ld1rqw z8.s, p3/Z, [x10, x11, LSL #2] : ld1rqw (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s +a50c0d6a : ld1rqw z10.s, p3/Z, [x11, x12, LSL #2] : ld1rqw (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s +a50e11ac : ld1rqw z12.s, p4/Z, [x13, x14, LSL #2] : ld1rqw (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s +a51011ee : ld1rqw z14.s, p4/Z, [x15, x16, LSL #2] : ld1rqw (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s +a5121630 : ld1rqw z16.s, p5/Z, [x17, x18, LSL #2] : ld1rqw (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s +a5141671 : ld1rqw z17.s, p5/Z, [x19, x20, LSL #2] : ld1rqw (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s +a51616b3 : ld1rqw z19.s, p5/Z, [x21, x22, LSL #2] : ld1rqw (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s +a5181af5 : ld1rqw z21.s, p6/Z, [x23, x24, LSL #2] : ld1rqw (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s +a5191b17 : ld1rqw z23.s, p6/Z, [x24, x25, LSL #2] : ld1rqw (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s +a51b1f59 : ld1rqw z25.s, p7/Z, [x26, x27, LSL #2] : ld1rqw (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s +a51d1f9b : ld1rqw z27.s, p7/Z, [x28, x29, LSL #2] : ld1rqw (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s +a51e1fff : ld1rqw z31.s, p7/Z, [sp, x30, LSL #2] : ld1rqw (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s # LD1RQW { .S }, /Z, [{, #}] (LD1RQW-Z.P.BI-U32) -a5082000 : ld1rqw z0.s, p0/Z, [x0, #-128] : ld1rqw -0x80(%x0)[16byte] %p0/z -> %z0.s -a5092482 : ld1rqw z2.s, p1/Z, [x4, #-112] : ld1rqw -0x70(%x4)[16byte] %p1/z -> %z2.s -a50a28c4 : ld1rqw z4.s, p2/Z, [x6, #-96] : ld1rqw -0x60(%x6)[16byte] %p2/z -> %z4.s -a50b2906 : ld1rqw z6.s, p2/Z, [x8, #-80] : ld1rqw -0x50(%x8)[16byte] %p2/z -> %z6.s -a50c2d48 : ld1rqw z8.s, p3/Z, [x10, #-64] : ld1rqw -0x40(%x10)[16byte] %p3/z -> %z8.s -a50d2d6a : ld1rqw z10.s, p3/Z, [x11, #-48] : ld1rqw -0x30(%x11)[16byte] %p3/z -> %z10.s -a50e31ac : ld1rqw z12.s, p4/Z, [x13, #-32] : ld1rqw -0x20(%x13)[16byte] %p4/z -> %z12.s -a50f31ee : ld1rqw z14.s, p4/Z, [x15, #-16] : ld1rqw -0x10(%x15)[16byte] %p4/z -> %z14.s -a5003630 : ld1rqw z16.s, p5/Z, [x17, #0] : ld1rqw (%x17)[16byte] %p5/z -> %z16.s -a5003671 : ld1rqw z17.s, p5/Z, [x19, #0] : ld1rqw (%x19)[16byte] %p5/z -> %z17.s -a50136b3 : ld1rqw z19.s, p5/Z, [x21, #16] : ld1rqw +0x10(%x21)[16byte] %p5/z -> %z19.s -a5023af5 : ld1rqw z21.s, p6/Z, [x23, #32] : ld1rqw +0x20(%x23)[16byte] %p6/z -> %z21.s -a5033b17 : ld1rqw z23.s, p6/Z, [x24, #48] : ld1rqw +0x30(%x24)[16byte] %p6/z -> %z23.s -a5043f59 : ld1rqw z25.s, p7/Z, [x26, #64] : ld1rqw +0x40(%x26)[16byte] %p7/z -> %z25.s -a5053f9b : ld1rqw z27.s, p7/Z, [x28, #80] : ld1rqw +0x50(%x28)[16byte] %p7/z -> %z27.s -a5073fff : ld1rqw z31.s, p7/Z, [sp, #112] : ld1rqw +0x70(%sp)[16byte] %p7/z -> %z31.s +a5082000 : ld1rqw z0.s, p0/Z, [x0, #-128] : ld1rqw -0x80(%x0)[4byte] %p0/z -> %z0.s +a5092482 : ld1rqw z2.s, p1/Z, [x4, #-112] : ld1rqw -0x70(%x4)[4byte] %p1/z -> %z2.s +a50a28c4 : ld1rqw z4.s, p2/Z, [x6, #-96] : ld1rqw -0x60(%x6)[4byte] %p2/z -> %z4.s +a50b2906 : ld1rqw z6.s, p2/Z, [x8, #-80] : ld1rqw -0x50(%x8)[4byte] %p2/z -> %z6.s +a50c2d48 : ld1rqw z8.s, p3/Z, [x10, #-64] : ld1rqw -0x40(%x10)[4byte] %p3/z -> %z8.s +a50d2d6a : ld1rqw z10.s, p3/Z, [x11, #-48] : ld1rqw -0x30(%x11)[4byte] %p3/z -> %z10.s +a50e31ac : ld1rqw z12.s, p4/Z, [x13, #-32] : ld1rqw -0x20(%x13)[4byte] %p4/z -> %z12.s +a50f31ee : ld1rqw z14.s, p4/Z, [x15, #-16] : ld1rqw -0x10(%x15)[4byte] %p4/z -> %z14.s +a5003630 : ld1rqw z16.s, p5/Z, [x17, #0] : ld1rqw (%x17)[4byte] %p5/z -> %z16.s +a5003671 : ld1rqw z17.s, p5/Z, [x19, #0] : ld1rqw (%x19)[4byte] %p5/z -> %z17.s +a50136b3 : ld1rqw z19.s, p5/Z, [x21, #16] : ld1rqw +0x10(%x21)[4byte] %p5/z -> %z19.s +a5023af5 : ld1rqw z21.s, p6/Z, [x23, #32] : ld1rqw +0x20(%x23)[4byte] %p6/z -> %z21.s +a5033b17 : ld1rqw z23.s, p6/Z, [x24, #48] : ld1rqw +0x30(%x24)[4byte] %p6/z -> %z23.s +a5043f59 : ld1rqw z25.s, p7/Z, [x26, #64] : ld1rqw +0x40(%x26)[4byte] %p7/z -> %z25.s +a5053f9b : ld1rqw z27.s, p7/Z, [x28, #80] : ld1rqw +0x50(%x28)[4byte] %p7/z -> %z27.s +a5073fff : ld1rqw z31.s, p7/Z, [sp, #112] : ld1rqw +0x70(%sp)[4byte] %p7/z -> %z31.s # LD1RSB { .D }, /Z, [{, #}] (LD1RSB-Z.P.BI-S64) 85c08000 : ld1rsb z0.d, p0/Z, [x0, #0] : ld1rsb (%x0)[1byte] %p0/z -> %z0.d @@ -11992,3216 +11992,3216 @@ a5073fff : ld1rqw z31.s, p7/Z, [sp, #112] : ld1rqw +0x70(%sp)[16byte] 857fffff : ld1rw z31.d, p7/Z, [sp, #252] : ld1rw +0xfc(%sp)[4byte] %p7/z -> %z31.d # LD1SB { .S }, /Z, [, .S, ] (LD1SB-Z.P.BZ-S.x32.unscaled) -84000000 : ld1sb z0.s, p0/Z, [x0, z0.s, UXTW] : ld1sb (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s -84050482 : ld1sb z2.s, p1/Z, [x4, z5.s, UXTW] : ld1sb (%x4,%z5.s,uxtw)[8byte] %p1/z -> %z2.s -840708c4 : ld1sb z4.s, p2/Z, [x6, z7.s, UXTW] : ld1sb (%x6,%z7.s,uxtw)[8byte] %p2/z -> %z4.s -84090906 : ld1sb z6.s, p2/Z, [x8, z9.s, UXTW] : ld1sb (%x8,%z9.s,uxtw)[8byte] %p2/z -> %z6.s -840b0d48 : ld1sb z8.s, p3/Z, [x10, z11.s, UXTW] : ld1sb (%x10,%z11.s,uxtw)[8byte] %p3/z -> %z8.s -840d0d6a : ld1sb z10.s, p3/Z, [x11, z13.s, UXTW] : ld1sb (%x11,%z13.s,uxtw)[8byte] %p3/z -> %z10.s -840f11ac : ld1sb z12.s, p4/Z, [x13, z15.s, UXTW] : ld1sb (%x13,%z15.s,uxtw)[8byte] %p4/z -> %z12.s -841111ee : ld1sb z14.s, p4/Z, [x15, z17.s, UXTW] : ld1sb (%x15,%z17.s,uxtw)[8byte] %p4/z -> %z14.s -84131630 : ld1sb z16.s, p5/Z, [x17, z19.s, UXTW] : ld1sb (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s -84141671 : ld1sb z17.s, p5/Z, [x19, z20.s, UXTW] : ld1sb (%x19,%z20.s,uxtw)[8byte] %p5/z -> %z17.s -841616b3 : ld1sb z19.s, p5/Z, [x21, z22.s, UXTW] : ld1sb (%x21,%z22.s,uxtw)[8byte] %p5/z -> %z19.s -84181af5 : ld1sb z21.s, p6/Z, [x23, z24.s, UXTW] : ld1sb (%x23,%z24.s,uxtw)[8byte] %p6/z -> %z21.s -841a1b17 : ld1sb z23.s, p6/Z, [x24, z26.s, UXTW] : ld1sb (%x24,%z26.s,uxtw)[8byte] %p6/z -> %z23.s -841c1f59 : ld1sb z25.s, p7/Z, [x26, z28.s, UXTW] : ld1sb (%x26,%z28.s,uxtw)[8byte] %p7/z -> %z25.s -841e1f9b : ld1sb z27.s, p7/Z, [x28, z30.s, UXTW] : ld1sb (%x28,%z30.s,uxtw)[8byte] %p7/z -> %z27.s -841f1fff : ld1sb z31.s, p7/Z, [sp, z31.s, UXTW] : ld1sb (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s -84400000 : ld1sb z0.s, p0/Z, [x0, z0.s, SXTW] : ld1sb (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s -84450482 : ld1sb z2.s, p1/Z, [x4, z5.s, SXTW] : ld1sb (%x4,%z5.s,sxtw)[8byte] %p1/z -> %z2.s -844708c4 : ld1sb z4.s, p2/Z, [x6, z7.s, SXTW] : ld1sb (%x6,%z7.s,sxtw)[8byte] %p2/z -> %z4.s -84490906 : ld1sb z6.s, p2/Z, [x8, z9.s, SXTW] : ld1sb (%x8,%z9.s,sxtw)[8byte] %p2/z -> %z6.s -844b0d48 : ld1sb z8.s, p3/Z, [x10, z11.s, SXTW] : ld1sb (%x10,%z11.s,sxtw)[8byte] %p3/z -> %z8.s -844d0d6a : ld1sb z10.s, p3/Z, [x11, z13.s, SXTW] : ld1sb (%x11,%z13.s,sxtw)[8byte] %p3/z -> %z10.s -844f11ac : ld1sb z12.s, p4/Z, [x13, z15.s, SXTW] : ld1sb (%x13,%z15.s,sxtw)[8byte] %p4/z -> %z12.s -845111ee : ld1sb z14.s, p4/Z, [x15, z17.s, SXTW] : ld1sb (%x15,%z17.s,sxtw)[8byte] %p4/z -> %z14.s -84531630 : ld1sb z16.s, p5/Z, [x17, z19.s, SXTW] : ld1sb (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s -84541671 : ld1sb z17.s, p5/Z, [x19, z20.s, SXTW] : ld1sb (%x19,%z20.s,sxtw)[8byte] %p5/z -> %z17.s -845616b3 : ld1sb z19.s, p5/Z, [x21, z22.s, SXTW] : ld1sb (%x21,%z22.s,sxtw)[8byte] %p5/z -> %z19.s -84581af5 : ld1sb z21.s, p6/Z, [x23, z24.s, SXTW] : ld1sb (%x23,%z24.s,sxtw)[8byte] %p6/z -> %z21.s -845a1b17 : ld1sb z23.s, p6/Z, [x24, z26.s, SXTW] : ld1sb (%x24,%z26.s,sxtw)[8byte] %p6/z -> %z23.s -845c1f59 : ld1sb z25.s, p7/Z, [x26, z28.s, SXTW] : ld1sb (%x26,%z28.s,sxtw)[8byte] %p7/z -> %z25.s -845e1f9b : ld1sb z27.s, p7/Z, [x28, z30.s, SXTW] : ld1sb (%x28,%z30.s,sxtw)[8byte] %p7/z -> %z27.s -845f1fff : ld1sb z31.s, p7/Z, [sp, z31.s, SXTW] : ld1sb (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s +84000000 : ld1sb z0.s, p0/Z, [x0, z0.s, UXTW] : ld1sb (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s +84050482 : ld1sb z2.s, p1/Z, [x4, z5.s, UXTW] : ld1sb (%x4,%z5.s,uxtw)[1byte] %p1/z -> %z2.s +840708c4 : ld1sb z4.s, p2/Z, [x6, z7.s, UXTW] : ld1sb (%x6,%z7.s,uxtw)[1byte] %p2/z -> %z4.s +84090906 : ld1sb z6.s, p2/Z, [x8, z9.s, UXTW] : ld1sb (%x8,%z9.s,uxtw)[1byte] %p2/z -> %z6.s +840b0d48 : ld1sb z8.s, p3/Z, [x10, z11.s, UXTW] : ld1sb (%x10,%z11.s,uxtw)[1byte] %p3/z -> %z8.s +840d0d6a : ld1sb z10.s, p3/Z, [x11, z13.s, UXTW] : ld1sb (%x11,%z13.s,uxtw)[1byte] %p3/z -> %z10.s +840f11ac : ld1sb z12.s, p4/Z, [x13, z15.s, UXTW] : ld1sb (%x13,%z15.s,uxtw)[1byte] %p4/z -> %z12.s +841111ee : ld1sb z14.s, p4/Z, [x15, z17.s, UXTW] : ld1sb (%x15,%z17.s,uxtw)[1byte] %p4/z -> %z14.s +84131630 : ld1sb z16.s, p5/Z, [x17, z19.s, UXTW] : ld1sb (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s +84141671 : ld1sb z17.s, p5/Z, [x19, z20.s, UXTW] : ld1sb (%x19,%z20.s,uxtw)[1byte] %p5/z -> %z17.s +841616b3 : ld1sb z19.s, p5/Z, [x21, z22.s, UXTW] : ld1sb (%x21,%z22.s,uxtw)[1byte] %p5/z -> %z19.s +84181af5 : ld1sb z21.s, p6/Z, [x23, z24.s, UXTW] : ld1sb (%x23,%z24.s,uxtw)[1byte] %p6/z -> %z21.s +841a1b17 : ld1sb z23.s, p6/Z, [x24, z26.s, UXTW] : ld1sb (%x24,%z26.s,uxtw)[1byte] %p6/z -> %z23.s +841c1f59 : ld1sb z25.s, p7/Z, [x26, z28.s, UXTW] : ld1sb (%x26,%z28.s,uxtw)[1byte] %p7/z -> %z25.s +841e1f9b : ld1sb z27.s, p7/Z, [x28, z30.s, UXTW] : ld1sb (%x28,%z30.s,uxtw)[1byte] %p7/z -> %z27.s +841f1fff : ld1sb z31.s, p7/Z, [sp, z31.s, UXTW] : ld1sb (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s +84400000 : ld1sb z0.s, p0/Z, [x0, z0.s, SXTW] : ld1sb (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s +84450482 : ld1sb z2.s, p1/Z, [x4, z5.s, SXTW] : ld1sb (%x4,%z5.s,sxtw)[1byte] %p1/z -> %z2.s +844708c4 : ld1sb z4.s, p2/Z, [x6, z7.s, SXTW] : ld1sb (%x6,%z7.s,sxtw)[1byte] %p2/z -> %z4.s +84490906 : ld1sb z6.s, p2/Z, [x8, z9.s, SXTW] : ld1sb (%x8,%z9.s,sxtw)[1byte] %p2/z -> %z6.s +844b0d48 : ld1sb z8.s, p3/Z, [x10, z11.s, SXTW] : ld1sb (%x10,%z11.s,sxtw)[1byte] %p3/z -> %z8.s +844d0d6a : ld1sb z10.s, p3/Z, [x11, z13.s, SXTW] : ld1sb (%x11,%z13.s,sxtw)[1byte] %p3/z -> %z10.s +844f11ac : ld1sb z12.s, p4/Z, [x13, z15.s, SXTW] : ld1sb (%x13,%z15.s,sxtw)[1byte] %p4/z -> %z12.s +845111ee : ld1sb z14.s, p4/Z, [x15, z17.s, SXTW] : ld1sb (%x15,%z17.s,sxtw)[1byte] %p4/z -> %z14.s +84531630 : ld1sb z16.s, p5/Z, [x17, z19.s, SXTW] : ld1sb (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s +84541671 : ld1sb z17.s, p5/Z, [x19, z20.s, SXTW] : ld1sb (%x19,%z20.s,sxtw)[1byte] %p5/z -> %z17.s +845616b3 : ld1sb z19.s, p5/Z, [x21, z22.s, SXTW] : ld1sb (%x21,%z22.s,sxtw)[1byte] %p5/z -> %z19.s +84581af5 : ld1sb z21.s, p6/Z, [x23, z24.s, SXTW] : ld1sb (%x23,%z24.s,sxtw)[1byte] %p6/z -> %z21.s +845a1b17 : ld1sb z23.s, p6/Z, [x24, z26.s, SXTW] : ld1sb (%x24,%z26.s,sxtw)[1byte] %p6/z -> %z23.s +845c1f59 : ld1sb z25.s, p7/Z, [x26, z28.s, SXTW] : ld1sb (%x26,%z28.s,sxtw)[1byte] %p7/z -> %z25.s +845e1f9b : ld1sb z27.s, p7/Z, [x28, z30.s, SXTW] : ld1sb (%x28,%z30.s,sxtw)[1byte] %p7/z -> %z27.s +845f1fff : ld1sb z31.s, p7/Z, [sp, z31.s, SXTW] : ld1sb (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s # LD1SB { .S }, /Z, [.S{, #}] (LD1SB-Z.P.AI-S) -84208000 : ld1sb z0.s, p0/Z, [z0.s, #0] : ld1sb (%z0.s)[8byte] %p0/z -> %z0.s -84228482 : ld1sb z2.s, p1/Z, [z4.s, #2] : ld1sb +0x02(%z4.s)[8byte] %p1/z -> %z2.s -842488c4 : ld1sb z4.s, p2/Z, [z6.s, #4] : ld1sb +0x04(%z6.s)[8byte] %p2/z -> %z4.s -84268906 : ld1sb z6.s, p2/Z, [z8.s, #6] : ld1sb +0x06(%z8.s)[8byte] %p2/z -> %z6.s -84288d48 : ld1sb z8.s, p3/Z, [z10.s, #8] : ld1sb +0x08(%z10.s)[8byte] %p3/z -> %z8.s -842a8d8a : ld1sb z10.s, p3/Z, [z12.s, #10] : ld1sb +0x0a(%z12.s)[8byte] %p3/z -> %z10.s -842c91cc : ld1sb z12.s, p4/Z, [z14.s, #12] : ld1sb +0x0c(%z14.s)[8byte] %p4/z -> %z12.s -842e920e : ld1sb z14.s, p4/Z, [z16.s, #14] : ld1sb +0x0e(%z16.s)[8byte] %p4/z -> %z14.s -84309650 : ld1sb z16.s, p5/Z, [z18.s, #16] : ld1sb +0x10(%z18.s)[8byte] %p5/z -> %z16.s -84319671 : ld1sb z17.s, p5/Z, [z19.s, #17] : ld1sb +0x11(%z19.s)[8byte] %p5/z -> %z17.s -843396b3 : ld1sb z19.s, p5/Z, [z21.s, #19] : ld1sb +0x13(%z21.s)[8byte] %p5/z -> %z19.s -84359af5 : ld1sb z21.s, p6/Z, [z23.s, #21] : ld1sb +0x15(%z23.s)[8byte] %p6/z -> %z21.s -84379b37 : ld1sb z23.s, p6/Z, [z25.s, #23] : ld1sb +0x17(%z25.s)[8byte] %p6/z -> %z23.s -84399f79 : ld1sb z25.s, p7/Z, [z27.s, #25] : ld1sb +0x19(%z27.s)[8byte] %p7/z -> %z25.s -843b9fbb : ld1sb z27.s, p7/Z, [z29.s, #27] : ld1sb +0x1b(%z29.s)[8byte] %p7/z -> %z27.s -843f9fff : ld1sb z31.s, p7/Z, [z31.s, #31] : ld1sb +0x1f(%z31.s)[8byte] %p7/z -> %z31.s +84208000 : ld1sb z0.s, p0/Z, [z0.s, #0] : ld1sb (%z0.s)[1byte] %p0/z -> %z0.s +84228482 : ld1sb z2.s, p1/Z, [z4.s, #2] : ld1sb +0x02(%z4.s)[1byte] %p1/z -> %z2.s +842488c4 : ld1sb z4.s, p2/Z, [z6.s, #4] : ld1sb +0x04(%z6.s)[1byte] %p2/z -> %z4.s +84268906 : ld1sb z6.s, p2/Z, [z8.s, #6] : ld1sb +0x06(%z8.s)[1byte] %p2/z -> %z6.s +84288d48 : ld1sb z8.s, p3/Z, [z10.s, #8] : ld1sb +0x08(%z10.s)[1byte] %p3/z -> %z8.s +842a8d8a : ld1sb z10.s, p3/Z, [z12.s, #10] : ld1sb +0x0a(%z12.s)[1byte] %p3/z -> %z10.s +842c91cc : ld1sb z12.s, p4/Z, [z14.s, #12] : ld1sb +0x0c(%z14.s)[1byte] %p4/z -> %z12.s +842e920e : ld1sb z14.s, p4/Z, [z16.s, #14] : ld1sb +0x0e(%z16.s)[1byte] %p4/z -> %z14.s +84309650 : ld1sb z16.s, p5/Z, [z18.s, #16] : ld1sb +0x10(%z18.s)[1byte] %p5/z -> %z16.s +84319671 : ld1sb z17.s, p5/Z, [z19.s, #17] : ld1sb +0x11(%z19.s)[1byte] %p5/z -> %z17.s +843396b3 : ld1sb z19.s, p5/Z, [z21.s, #19] : ld1sb +0x13(%z21.s)[1byte] %p5/z -> %z19.s +84359af5 : ld1sb z21.s, p6/Z, [z23.s, #21] : ld1sb +0x15(%z23.s)[1byte] %p6/z -> %z21.s +84379b37 : ld1sb z23.s, p6/Z, [z25.s, #23] : ld1sb +0x17(%z25.s)[1byte] %p6/z -> %z23.s +84399f79 : ld1sb z25.s, p7/Z, [z27.s, #25] : ld1sb +0x19(%z27.s)[1byte] %p7/z -> %z25.s +843b9fbb : ld1sb z27.s, p7/Z, [z29.s, #27] : ld1sb +0x1b(%z29.s)[1byte] %p7/z -> %z27.s +843f9fff : ld1sb z31.s, p7/Z, [z31.s, #31] : ld1sb +0x1f(%z31.s)[1byte] %p7/z -> %z31.s # LD1SB { .D }, /Z, [, ] (LD1SB-Z.P.BR-S64) -a5804000 : ld1sb z0.d, p0/Z, [x0, x0] : ld1sb (%x0,%x0)[4byte] %p0/z -> %z0.d -a5854482 : ld1sb z2.d, p1/Z, [x4, x5] : ld1sb (%x4,%x5)[4byte] %p1/z -> %z2.d -a58748c4 : ld1sb z4.d, p2/Z, [x6, x7] : ld1sb (%x6,%x7)[4byte] %p2/z -> %z4.d -a5894906 : ld1sb z6.d, p2/Z, [x8, x9] : ld1sb (%x8,%x9)[4byte] %p2/z -> %z6.d -a58b4d48 : ld1sb z8.d, p3/Z, [x10, x11] : ld1sb (%x10,%x11)[4byte] %p3/z -> %z8.d -a58c4d6a : ld1sb z10.d, p3/Z, [x11, x12] : ld1sb (%x11,%x12)[4byte] %p3/z -> %z10.d -a58e51ac : ld1sb z12.d, p4/Z, [x13, x14] : ld1sb (%x13,%x14)[4byte] %p4/z -> %z12.d -a59051ee : ld1sb z14.d, p4/Z, [x15, x16] : ld1sb (%x15,%x16)[4byte] %p4/z -> %z14.d -a5925630 : ld1sb z16.d, p5/Z, [x17, x18] : ld1sb (%x17,%x18)[4byte] %p5/z -> %z16.d -a5945671 : ld1sb z17.d, p5/Z, [x19, x20] : ld1sb (%x19,%x20)[4byte] %p5/z -> %z17.d -a59656b3 : ld1sb z19.d, p5/Z, [x21, x22] : ld1sb (%x21,%x22)[4byte] %p5/z -> %z19.d -a5985af5 : ld1sb z21.d, p6/Z, [x23, x24] : ld1sb (%x23,%x24)[4byte] %p6/z -> %z21.d -a5995b17 : ld1sb z23.d, p6/Z, [x24, x25] : ld1sb (%x24,%x25)[4byte] %p6/z -> %z23.d -a59b5f59 : ld1sb z25.d, p7/Z, [x26, x27] : ld1sb (%x26,%x27)[4byte] %p7/z -> %z25.d -a59d5f9b : ld1sb z27.d, p7/Z, [x28, x29] : ld1sb (%x28,%x29)[4byte] %p7/z -> %z27.d -a59e5fff : ld1sb z31.d, p7/Z, [sp, x30] : ld1sb (%sp,%x30)[4byte] %p7/z -> %z31.d +a5804000 : ld1sb z0.d, p0/Z, [x0, x0] : ld1sb (%x0,%x0)[1byte] %p0/z -> %z0.d +a5854482 : ld1sb z2.d, p1/Z, [x4, x5] : ld1sb (%x4,%x5)[1byte] %p1/z -> %z2.d +a58748c4 : ld1sb z4.d, p2/Z, [x6, x7] : ld1sb (%x6,%x7)[1byte] %p2/z -> %z4.d +a5894906 : ld1sb z6.d, p2/Z, [x8, x9] : ld1sb (%x8,%x9)[1byte] %p2/z -> %z6.d +a58b4d48 : ld1sb z8.d, p3/Z, [x10, x11] : ld1sb (%x10,%x11)[1byte] %p3/z -> %z8.d +a58c4d6a : ld1sb z10.d, p3/Z, [x11, x12] : ld1sb (%x11,%x12)[1byte] %p3/z -> %z10.d +a58e51ac : ld1sb z12.d, p4/Z, [x13, x14] : ld1sb (%x13,%x14)[1byte] %p4/z -> %z12.d +a59051ee : ld1sb z14.d, p4/Z, [x15, x16] : ld1sb (%x15,%x16)[1byte] %p4/z -> %z14.d +a5925630 : ld1sb z16.d, p5/Z, [x17, x18] : ld1sb (%x17,%x18)[1byte] %p5/z -> %z16.d +a5945671 : ld1sb z17.d, p5/Z, [x19, x20] : ld1sb (%x19,%x20)[1byte] %p5/z -> %z17.d +a59656b3 : ld1sb z19.d, p5/Z, [x21, x22] : ld1sb (%x21,%x22)[1byte] %p5/z -> %z19.d +a5985af5 : ld1sb z21.d, p6/Z, [x23, x24] : ld1sb (%x23,%x24)[1byte] %p6/z -> %z21.d +a5995b17 : ld1sb z23.d, p6/Z, [x24, x25] : ld1sb (%x24,%x25)[1byte] %p6/z -> %z23.d +a59b5f59 : ld1sb z25.d, p7/Z, [x26, x27] : ld1sb (%x26,%x27)[1byte] %p7/z -> %z25.d +a59d5f9b : ld1sb z27.d, p7/Z, [x28, x29] : ld1sb (%x28,%x29)[1byte] %p7/z -> %z27.d +a59e5fff : ld1sb z31.d, p7/Z, [sp, x30] : ld1sb (%sp,%x30)[1byte] %p7/z -> %z31.d # LD1SB { .D }, /Z, [{, #, MUL VL}] (LD1SB-Z.P.BI-S64) -a588a000 : ld1sb z0.d, p0/Z, [x0, #-8, MUL VL] : ld1sb -0x20(%x0)[4byte] %p0/z -> %z0.d -a589a482 : ld1sb z2.d, p1/Z, [x4, #-7, MUL VL] : ld1sb -0x1c(%x4)[4byte] %p1/z -> %z2.d -a58aa8c4 : ld1sb z4.d, p2/Z, [x6, #-6, MUL VL] : ld1sb -0x18(%x6)[4byte] %p2/z -> %z4.d -a58ba906 : ld1sb z6.d, p2/Z, [x8, #-5, MUL VL] : ld1sb -0x14(%x8)[4byte] %p2/z -> %z6.d -a58cad48 : ld1sb z8.d, p3/Z, [x10, #-4, MUL VL] : ld1sb -0x10(%x10)[4byte] %p3/z -> %z8.d -a58dad6a : ld1sb z10.d, p3/Z, [x11, #-3, MUL VL] : ld1sb -0x0c(%x11)[4byte] %p3/z -> %z10.d -a58eb1ac : ld1sb z12.d, p4/Z, [x13, #-2, MUL VL] : ld1sb -0x08(%x13)[4byte] %p4/z -> %z12.d -a58fb1ee : ld1sb z14.d, p4/Z, [x15, #-1, MUL VL] : ld1sb -0x04(%x15)[4byte] %p4/z -> %z14.d -a580b630 : ld1sb z16.d, p5/Z, [x17, #0, MUL VL] : ld1sb (%x17)[4byte] %p5/z -> %z16.d -a580b671 : ld1sb z17.d, p5/Z, [x19, #0, MUL VL] : ld1sb (%x19)[4byte] %p5/z -> %z17.d -a581b6b3 : ld1sb z19.d, p5/Z, [x21, #1, MUL VL] : ld1sb +0x04(%x21)[4byte] %p5/z -> %z19.d -a582baf5 : ld1sb z21.d, p6/Z, [x23, #2, MUL VL] : ld1sb +0x08(%x23)[4byte] %p6/z -> %z21.d -a583bb17 : ld1sb z23.d, p6/Z, [x24, #3, MUL VL] : ld1sb +0x0c(%x24)[4byte] %p6/z -> %z23.d -a584bf59 : ld1sb z25.d, p7/Z, [x26, #4, MUL VL] : ld1sb +0x10(%x26)[4byte] %p7/z -> %z25.d -a585bf9b : ld1sb z27.d, p7/Z, [x28, #5, MUL VL] : ld1sb +0x14(%x28)[4byte] %p7/z -> %z27.d -a587bfff : ld1sb z31.d, p7/Z, [sp, #7, MUL VL] : ld1sb +0x1c(%sp)[4byte] %p7/z -> %z31.d +a588a000 : ld1sb z0.d, p0/Z, [x0, #-8, MUL VL] : ld1sb -0x20(%x0)[1byte] %p0/z -> %z0.d +a589a482 : ld1sb z2.d, p1/Z, [x4, #-7, MUL VL] : ld1sb -0x1c(%x4)[1byte] %p1/z -> %z2.d +a58aa8c4 : ld1sb z4.d, p2/Z, [x6, #-6, MUL VL] : ld1sb -0x18(%x6)[1byte] %p2/z -> %z4.d +a58ba906 : ld1sb z6.d, p2/Z, [x8, #-5, MUL VL] : ld1sb -0x14(%x8)[1byte] %p2/z -> %z6.d +a58cad48 : ld1sb z8.d, p3/Z, [x10, #-4, MUL VL] : ld1sb -0x10(%x10)[1byte] %p3/z -> %z8.d +a58dad6a : ld1sb z10.d, p3/Z, [x11, #-3, MUL VL] : ld1sb -0x0c(%x11)[1byte] %p3/z -> %z10.d +a58eb1ac : ld1sb z12.d, p4/Z, [x13, #-2, MUL VL] : ld1sb -0x08(%x13)[1byte] %p4/z -> %z12.d +a58fb1ee : ld1sb z14.d, p4/Z, [x15, #-1, MUL VL] : ld1sb -0x04(%x15)[1byte] %p4/z -> %z14.d +a580b630 : ld1sb z16.d, p5/Z, [x17, #0, MUL VL] : ld1sb (%x17)[1byte] %p5/z -> %z16.d +a580b671 : ld1sb z17.d, p5/Z, [x19, #0, MUL VL] : ld1sb (%x19)[1byte] %p5/z -> %z17.d +a581b6b3 : ld1sb z19.d, p5/Z, [x21, #1, MUL VL] : ld1sb +0x04(%x21)[1byte] %p5/z -> %z19.d +a582baf5 : ld1sb z21.d, p6/Z, [x23, #2, MUL VL] : ld1sb +0x08(%x23)[1byte] %p6/z -> %z21.d +a583bb17 : ld1sb z23.d, p6/Z, [x24, #3, MUL VL] : ld1sb +0x0c(%x24)[1byte] %p6/z -> %z23.d +a584bf59 : ld1sb z25.d, p7/Z, [x26, #4, MUL VL] : ld1sb +0x10(%x26)[1byte] %p7/z -> %z25.d +a585bf9b : ld1sb z27.d, p7/Z, [x28, #5, MUL VL] : ld1sb +0x14(%x28)[1byte] %p7/z -> %z27.d +a587bfff : ld1sb z31.d, p7/Z, [sp, #7, MUL VL] : ld1sb +0x1c(%sp)[1byte] %p7/z -> %z31.d # LD1SB { .S }, /Z, [, ] (LD1SB-Z.P.BR-S32) -a5a04000 : ld1sb z0.s, p0/Z, [x0, x0] : ld1sb (%x0,%x0)[8byte] %p0/z -> %z0.s -a5a54482 : ld1sb z2.s, p1/Z, [x4, x5] : ld1sb (%x4,%x5)[8byte] %p1/z -> %z2.s -a5a748c4 : ld1sb z4.s, p2/Z, [x6, x7] : ld1sb (%x6,%x7)[8byte] %p2/z -> %z4.s -a5a94906 : ld1sb z6.s, p2/Z, [x8, x9] : ld1sb (%x8,%x9)[8byte] %p2/z -> %z6.s -a5ab4d48 : ld1sb z8.s, p3/Z, [x10, x11] : ld1sb (%x10,%x11)[8byte] %p3/z -> %z8.s -a5ac4d6a : ld1sb z10.s, p3/Z, [x11, x12] : ld1sb (%x11,%x12)[8byte] %p3/z -> %z10.s -a5ae51ac : ld1sb z12.s, p4/Z, [x13, x14] : ld1sb (%x13,%x14)[8byte] %p4/z -> %z12.s -a5b051ee : ld1sb z14.s, p4/Z, [x15, x16] : ld1sb (%x15,%x16)[8byte] %p4/z -> %z14.s -a5b25630 : ld1sb z16.s, p5/Z, [x17, x18] : ld1sb (%x17,%x18)[8byte] %p5/z -> %z16.s -a5b45671 : ld1sb z17.s, p5/Z, [x19, x20] : ld1sb (%x19,%x20)[8byte] %p5/z -> %z17.s -a5b656b3 : ld1sb z19.s, p5/Z, [x21, x22] : ld1sb (%x21,%x22)[8byte] %p5/z -> %z19.s -a5b85af5 : ld1sb z21.s, p6/Z, [x23, x24] : ld1sb (%x23,%x24)[8byte] %p6/z -> %z21.s -a5b95b17 : ld1sb z23.s, p6/Z, [x24, x25] : ld1sb (%x24,%x25)[8byte] %p6/z -> %z23.s -a5bb5f59 : ld1sb z25.s, p7/Z, [x26, x27] : ld1sb (%x26,%x27)[8byte] %p7/z -> %z25.s -a5bd5f9b : ld1sb z27.s, p7/Z, [x28, x29] : ld1sb (%x28,%x29)[8byte] %p7/z -> %z27.s -a5be5fff : ld1sb z31.s, p7/Z, [sp, x30] : ld1sb (%sp,%x30)[8byte] %p7/z -> %z31.s +a5a04000 : ld1sb z0.s, p0/Z, [x0, x0] : ld1sb (%x0,%x0)[1byte] %p0/z -> %z0.s +a5a54482 : ld1sb z2.s, p1/Z, [x4, x5] : ld1sb (%x4,%x5)[1byte] %p1/z -> %z2.s +a5a748c4 : ld1sb z4.s, p2/Z, [x6, x7] : ld1sb (%x6,%x7)[1byte] %p2/z -> %z4.s +a5a94906 : ld1sb z6.s, p2/Z, [x8, x9] : ld1sb (%x8,%x9)[1byte] %p2/z -> %z6.s +a5ab4d48 : ld1sb z8.s, p3/Z, [x10, x11] : ld1sb (%x10,%x11)[1byte] %p3/z -> %z8.s +a5ac4d6a : ld1sb z10.s, p3/Z, [x11, x12] : ld1sb (%x11,%x12)[1byte] %p3/z -> %z10.s +a5ae51ac : ld1sb z12.s, p4/Z, [x13, x14] : ld1sb (%x13,%x14)[1byte] %p4/z -> %z12.s +a5b051ee : ld1sb z14.s, p4/Z, [x15, x16] : ld1sb (%x15,%x16)[1byte] %p4/z -> %z14.s +a5b25630 : ld1sb z16.s, p5/Z, [x17, x18] : ld1sb (%x17,%x18)[1byte] %p5/z -> %z16.s +a5b45671 : ld1sb z17.s, p5/Z, [x19, x20] : ld1sb (%x19,%x20)[1byte] %p5/z -> %z17.s +a5b656b3 : ld1sb z19.s, p5/Z, [x21, x22] : ld1sb (%x21,%x22)[1byte] %p5/z -> %z19.s +a5b85af5 : ld1sb z21.s, p6/Z, [x23, x24] : ld1sb (%x23,%x24)[1byte] %p6/z -> %z21.s +a5b95b17 : ld1sb z23.s, p6/Z, [x24, x25] : ld1sb (%x24,%x25)[1byte] %p6/z -> %z23.s +a5bb5f59 : ld1sb z25.s, p7/Z, [x26, x27] : ld1sb (%x26,%x27)[1byte] %p7/z -> %z25.s +a5bd5f9b : ld1sb z27.s, p7/Z, [x28, x29] : ld1sb (%x28,%x29)[1byte] %p7/z -> %z27.s +a5be5fff : ld1sb z31.s, p7/Z, [sp, x30] : ld1sb (%sp,%x30)[1byte] %p7/z -> %z31.s # LD1SB { .S }, /Z, [{, #, MUL VL}] (LD1SB-Z.P.BI-S32) -a5a8a000 : ld1sb z0.s, p0/Z, [x0, #-8, MUL VL] : ld1sb -0x40(%x0)[8byte] %p0/z -> %z0.s -a5a9a482 : ld1sb z2.s, p1/Z, [x4, #-7, MUL VL] : ld1sb -0x38(%x4)[8byte] %p1/z -> %z2.s -a5aaa8c4 : ld1sb z4.s, p2/Z, [x6, #-6, MUL VL] : ld1sb -0x30(%x6)[8byte] %p2/z -> %z4.s -a5aba906 : ld1sb z6.s, p2/Z, [x8, #-5, MUL VL] : ld1sb -0x28(%x8)[8byte] %p2/z -> %z6.s -a5acad48 : ld1sb z8.s, p3/Z, [x10, #-4, MUL VL] : ld1sb -0x20(%x10)[8byte] %p3/z -> %z8.s -a5adad6a : ld1sb z10.s, p3/Z, [x11, #-3, MUL VL] : ld1sb -0x18(%x11)[8byte] %p3/z -> %z10.s -a5aeb1ac : ld1sb z12.s, p4/Z, [x13, #-2, MUL VL] : ld1sb -0x10(%x13)[8byte] %p4/z -> %z12.s -a5afb1ee : ld1sb z14.s, p4/Z, [x15, #-1, MUL VL] : ld1sb -0x08(%x15)[8byte] %p4/z -> %z14.s -a5a0b630 : ld1sb z16.s, p5/Z, [x17, #0, MUL VL] : ld1sb (%x17)[8byte] %p5/z -> %z16.s -a5a0b671 : ld1sb z17.s, p5/Z, [x19, #0, MUL VL] : ld1sb (%x19)[8byte] %p5/z -> %z17.s -a5a1b6b3 : ld1sb z19.s, p5/Z, [x21, #1, MUL VL] : ld1sb +0x08(%x21)[8byte] %p5/z -> %z19.s -a5a2baf5 : ld1sb z21.s, p6/Z, [x23, #2, MUL VL] : ld1sb +0x10(%x23)[8byte] %p6/z -> %z21.s -a5a3bb17 : ld1sb z23.s, p6/Z, [x24, #3, MUL VL] : ld1sb +0x18(%x24)[8byte] %p6/z -> %z23.s -a5a4bf59 : ld1sb z25.s, p7/Z, [x26, #4, MUL VL] : ld1sb +0x20(%x26)[8byte] %p7/z -> %z25.s -a5a5bf9b : ld1sb z27.s, p7/Z, [x28, #5, MUL VL] : ld1sb +0x28(%x28)[8byte] %p7/z -> %z27.s -a5a7bfff : ld1sb z31.s, p7/Z, [sp, #7, MUL VL] : ld1sb +0x38(%sp)[8byte] %p7/z -> %z31.s +a5a8a000 : ld1sb z0.s, p0/Z, [x0, #-8, MUL VL] : ld1sb -0x40(%x0)[1byte] %p0/z -> %z0.s +a5a9a482 : ld1sb z2.s, p1/Z, [x4, #-7, MUL VL] : ld1sb -0x38(%x4)[1byte] %p1/z -> %z2.s +a5aaa8c4 : ld1sb z4.s, p2/Z, [x6, #-6, MUL VL] : ld1sb -0x30(%x6)[1byte] %p2/z -> %z4.s +a5aba906 : ld1sb z6.s, p2/Z, [x8, #-5, MUL VL] : ld1sb -0x28(%x8)[1byte] %p2/z -> %z6.s +a5acad48 : ld1sb z8.s, p3/Z, [x10, #-4, MUL VL] : ld1sb -0x20(%x10)[1byte] %p3/z -> %z8.s +a5adad6a : ld1sb z10.s, p3/Z, [x11, #-3, MUL VL] : ld1sb -0x18(%x11)[1byte] %p3/z -> %z10.s +a5aeb1ac : ld1sb z12.s, p4/Z, [x13, #-2, MUL VL] : ld1sb -0x10(%x13)[1byte] %p4/z -> %z12.s +a5afb1ee : ld1sb z14.s, p4/Z, [x15, #-1, MUL VL] : ld1sb -0x08(%x15)[1byte] %p4/z -> %z14.s +a5a0b630 : ld1sb z16.s, p5/Z, [x17, #0, MUL VL] : ld1sb (%x17)[1byte] %p5/z -> %z16.s +a5a0b671 : ld1sb z17.s, p5/Z, [x19, #0, MUL VL] : ld1sb (%x19)[1byte] %p5/z -> %z17.s +a5a1b6b3 : ld1sb z19.s, p5/Z, [x21, #1, MUL VL] : ld1sb +0x08(%x21)[1byte] %p5/z -> %z19.s +a5a2baf5 : ld1sb z21.s, p6/Z, [x23, #2, MUL VL] : ld1sb +0x10(%x23)[1byte] %p6/z -> %z21.s +a5a3bb17 : ld1sb z23.s, p6/Z, [x24, #3, MUL VL] : ld1sb +0x18(%x24)[1byte] %p6/z -> %z23.s +a5a4bf59 : ld1sb z25.s, p7/Z, [x26, #4, MUL VL] : ld1sb +0x20(%x26)[1byte] %p7/z -> %z25.s +a5a5bf9b : ld1sb z27.s, p7/Z, [x28, #5, MUL VL] : ld1sb +0x28(%x28)[1byte] %p7/z -> %z27.s +a5a7bfff : ld1sb z31.s, p7/Z, [sp, #7, MUL VL] : ld1sb +0x38(%sp)[1byte] %p7/z -> %z31.s # LD1SB { .H }, /Z, [, ] (LD1SB-Z.P.BR-S16) -a5c04000 : ld1sb z0.h, p0/Z, [x0, x0] : ld1sb (%x0,%x0)[16byte] %p0/z -> %z0.h -a5c54482 : ld1sb z2.h, p1/Z, [x4, x5] : ld1sb (%x4,%x5)[16byte] %p1/z -> %z2.h -a5c748c4 : ld1sb z4.h, p2/Z, [x6, x7] : ld1sb (%x6,%x7)[16byte] %p2/z -> %z4.h -a5c94906 : ld1sb z6.h, p2/Z, [x8, x9] : ld1sb (%x8,%x9)[16byte] %p2/z -> %z6.h -a5cb4d48 : ld1sb z8.h, p3/Z, [x10, x11] : ld1sb (%x10,%x11)[16byte] %p3/z -> %z8.h -a5cc4d6a : ld1sb z10.h, p3/Z, [x11, x12] : ld1sb (%x11,%x12)[16byte] %p3/z -> %z10.h -a5ce51ac : ld1sb z12.h, p4/Z, [x13, x14] : ld1sb (%x13,%x14)[16byte] %p4/z -> %z12.h -a5d051ee : ld1sb z14.h, p4/Z, [x15, x16] : ld1sb (%x15,%x16)[16byte] %p4/z -> %z14.h -a5d25630 : ld1sb z16.h, p5/Z, [x17, x18] : ld1sb (%x17,%x18)[16byte] %p5/z -> %z16.h -a5d45671 : ld1sb z17.h, p5/Z, [x19, x20] : ld1sb (%x19,%x20)[16byte] %p5/z -> %z17.h -a5d656b3 : ld1sb z19.h, p5/Z, [x21, x22] : ld1sb (%x21,%x22)[16byte] %p5/z -> %z19.h -a5d85af5 : ld1sb z21.h, p6/Z, [x23, x24] : ld1sb (%x23,%x24)[16byte] %p6/z -> %z21.h -a5d95b17 : ld1sb z23.h, p6/Z, [x24, x25] : ld1sb (%x24,%x25)[16byte] %p6/z -> %z23.h -a5db5f59 : ld1sb z25.h, p7/Z, [x26, x27] : ld1sb (%x26,%x27)[16byte] %p7/z -> %z25.h -a5dd5f9b : ld1sb z27.h, p7/Z, [x28, x29] : ld1sb (%x28,%x29)[16byte] %p7/z -> %z27.h -a5de5fff : ld1sb z31.h, p7/Z, [sp, x30] : ld1sb (%sp,%x30)[16byte] %p7/z -> %z31.h +a5c04000 : ld1sb z0.h, p0/Z, [x0, x0] : ld1sb (%x0,%x0)[1byte] %p0/z -> %z0.h +a5c54482 : ld1sb z2.h, p1/Z, [x4, x5] : ld1sb (%x4,%x5)[1byte] %p1/z -> %z2.h +a5c748c4 : ld1sb z4.h, p2/Z, [x6, x7] : ld1sb (%x6,%x7)[1byte] %p2/z -> %z4.h +a5c94906 : ld1sb z6.h, p2/Z, [x8, x9] : ld1sb (%x8,%x9)[1byte] %p2/z -> %z6.h +a5cb4d48 : ld1sb z8.h, p3/Z, [x10, x11] : ld1sb (%x10,%x11)[1byte] %p3/z -> %z8.h +a5cc4d6a : ld1sb z10.h, p3/Z, [x11, x12] : ld1sb (%x11,%x12)[1byte] %p3/z -> %z10.h +a5ce51ac : ld1sb z12.h, p4/Z, [x13, x14] : ld1sb (%x13,%x14)[1byte] %p4/z -> %z12.h +a5d051ee : ld1sb z14.h, p4/Z, [x15, x16] : ld1sb (%x15,%x16)[1byte] %p4/z -> %z14.h +a5d25630 : ld1sb z16.h, p5/Z, [x17, x18] : ld1sb (%x17,%x18)[1byte] %p5/z -> %z16.h +a5d45671 : ld1sb z17.h, p5/Z, [x19, x20] : ld1sb (%x19,%x20)[1byte] %p5/z -> %z17.h +a5d656b3 : ld1sb z19.h, p5/Z, [x21, x22] : ld1sb (%x21,%x22)[1byte] %p5/z -> %z19.h +a5d85af5 : ld1sb z21.h, p6/Z, [x23, x24] : ld1sb (%x23,%x24)[1byte] %p6/z -> %z21.h +a5d95b17 : ld1sb z23.h, p6/Z, [x24, x25] : ld1sb (%x24,%x25)[1byte] %p6/z -> %z23.h +a5db5f59 : ld1sb z25.h, p7/Z, [x26, x27] : ld1sb (%x26,%x27)[1byte] %p7/z -> %z25.h +a5dd5f9b : ld1sb z27.h, p7/Z, [x28, x29] : ld1sb (%x28,%x29)[1byte] %p7/z -> %z27.h +a5de5fff : ld1sb z31.h, p7/Z, [sp, x30] : ld1sb (%sp,%x30)[1byte] %p7/z -> %z31.h # LD1SB { .H }, /Z, [{, #, MUL VL}] (LD1SB-Z.P.BI-S16) -a5c8a000 : ld1sb z0.h, p0/Z, [x0, #-8, MUL VL] : ld1sb -0x80(%x0)[16byte] %p0/z -> %z0.h -a5c9a482 : ld1sb z2.h, p1/Z, [x4, #-7, MUL VL] : ld1sb -0x70(%x4)[16byte] %p1/z -> %z2.h -a5caa8c4 : ld1sb z4.h, p2/Z, [x6, #-6, MUL VL] : ld1sb -0x60(%x6)[16byte] %p2/z -> %z4.h -a5cba906 : ld1sb z6.h, p2/Z, [x8, #-5, MUL VL] : ld1sb -0x50(%x8)[16byte] %p2/z -> %z6.h -a5ccad48 : ld1sb z8.h, p3/Z, [x10, #-4, MUL VL] : ld1sb -0x40(%x10)[16byte] %p3/z -> %z8.h -a5cdad6a : ld1sb z10.h, p3/Z, [x11, #-3, MUL VL] : ld1sb -0x30(%x11)[16byte] %p3/z -> %z10.h -a5ceb1ac : ld1sb z12.h, p4/Z, [x13, #-2, MUL VL] : ld1sb -0x20(%x13)[16byte] %p4/z -> %z12.h -a5cfb1ee : ld1sb z14.h, p4/Z, [x15, #-1, MUL VL] : ld1sb -0x10(%x15)[16byte] %p4/z -> %z14.h -a5c0b630 : ld1sb z16.h, p5/Z, [x17, #0, MUL VL] : ld1sb (%x17)[16byte] %p5/z -> %z16.h -a5c0b671 : ld1sb z17.h, p5/Z, [x19, #0, MUL VL] : ld1sb (%x19)[16byte] %p5/z -> %z17.h -a5c1b6b3 : ld1sb z19.h, p5/Z, [x21, #1, MUL VL] : ld1sb +0x10(%x21)[16byte] %p5/z -> %z19.h -a5c2baf5 : ld1sb z21.h, p6/Z, [x23, #2, MUL VL] : ld1sb +0x20(%x23)[16byte] %p6/z -> %z21.h -a5c3bb17 : ld1sb z23.h, p6/Z, [x24, #3, MUL VL] : ld1sb +0x30(%x24)[16byte] %p6/z -> %z23.h -a5c4bf59 : ld1sb z25.h, p7/Z, [x26, #4, MUL VL] : ld1sb +0x40(%x26)[16byte] %p7/z -> %z25.h -a5c5bf9b : ld1sb z27.h, p7/Z, [x28, #5, MUL VL] : ld1sb +0x50(%x28)[16byte] %p7/z -> %z27.h -a5c7bfff : ld1sb z31.h, p7/Z, [sp, #7, MUL VL] : ld1sb +0x70(%sp)[16byte] %p7/z -> %z31.h +a5c8a000 : ld1sb z0.h, p0/Z, [x0, #-8, MUL VL] : ld1sb -0x80(%x0)[1byte] %p0/z -> %z0.h +a5c9a482 : ld1sb z2.h, p1/Z, [x4, #-7, MUL VL] : ld1sb -0x70(%x4)[1byte] %p1/z -> %z2.h +a5caa8c4 : ld1sb z4.h, p2/Z, [x6, #-6, MUL VL] : ld1sb -0x60(%x6)[1byte] %p2/z -> %z4.h +a5cba906 : ld1sb z6.h, p2/Z, [x8, #-5, MUL VL] : ld1sb -0x50(%x8)[1byte] %p2/z -> %z6.h +a5ccad48 : ld1sb z8.h, p3/Z, [x10, #-4, MUL VL] : ld1sb -0x40(%x10)[1byte] %p3/z -> %z8.h +a5cdad6a : ld1sb z10.h, p3/Z, [x11, #-3, MUL VL] : ld1sb -0x30(%x11)[1byte] %p3/z -> %z10.h +a5ceb1ac : ld1sb z12.h, p4/Z, [x13, #-2, MUL VL] : ld1sb -0x20(%x13)[1byte] %p4/z -> %z12.h +a5cfb1ee : ld1sb z14.h, p4/Z, [x15, #-1, MUL VL] : ld1sb -0x10(%x15)[1byte] %p4/z -> %z14.h +a5c0b630 : ld1sb z16.h, p5/Z, [x17, #0, MUL VL] : ld1sb (%x17)[1byte] %p5/z -> %z16.h +a5c0b671 : ld1sb z17.h, p5/Z, [x19, #0, MUL VL] : ld1sb (%x19)[1byte] %p5/z -> %z17.h +a5c1b6b3 : ld1sb z19.h, p5/Z, [x21, #1, MUL VL] : ld1sb +0x10(%x21)[1byte] %p5/z -> %z19.h +a5c2baf5 : ld1sb z21.h, p6/Z, [x23, #2, MUL VL] : ld1sb +0x20(%x23)[1byte] %p6/z -> %z21.h +a5c3bb17 : ld1sb z23.h, p6/Z, [x24, #3, MUL VL] : ld1sb +0x30(%x24)[1byte] %p6/z -> %z23.h +a5c4bf59 : ld1sb z25.h, p7/Z, [x26, #4, MUL VL] : ld1sb +0x40(%x26)[1byte] %p7/z -> %z25.h +a5c5bf9b : ld1sb z27.h, p7/Z, [x28, #5, MUL VL] : ld1sb +0x50(%x28)[1byte] %p7/z -> %z27.h +a5c7bfff : ld1sb z31.h, p7/Z, [sp, #7, MUL VL] : ld1sb +0x70(%sp)[1byte] %p7/z -> %z31.h # LD1SB { .D }, /Z, [, .D, ] (LD1SB-Z.P.BZ-D.x32.unscaled) -c4000000 : ld1sb z0.d, p0/Z, [x0, z0.d, UXTW] : ld1sb (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d -c4050482 : ld1sb z2.d, p1/Z, [x4, z5.d, UXTW] : ld1sb (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d -c40708c4 : ld1sb z4.d, p2/Z, [x6, z7.d, UXTW] : ld1sb (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d -c4090906 : ld1sb z6.d, p2/Z, [x8, z9.d, UXTW] : ld1sb (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d -c40b0d48 : ld1sb z8.d, p3/Z, [x10, z11.d, UXTW] : ld1sb (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d -c40d0d6a : ld1sb z10.d, p3/Z, [x11, z13.d, UXTW] : ld1sb (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d -c40f11ac : ld1sb z12.d, p4/Z, [x13, z15.d, UXTW] : ld1sb (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d -c41111ee : ld1sb z14.d, p4/Z, [x15, z17.d, UXTW] : ld1sb (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d -c4131630 : ld1sb z16.d, p5/Z, [x17, z19.d, UXTW] : ld1sb (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d -c4141671 : ld1sb z17.d, p5/Z, [x19, z20.d, UXTW] : ld1sb (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d -c41616b3 : ld1sb z19.d, p5/Z, [x21, z22.d, UXTW] : ld1sb (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d -c4181af5 : ld1sb z21.d, p6/Z, [x23, z24.d, UXTW] : ld1sb (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d -c41a1b17 : ld1sb z23.d, p6/Z, [x24, z26.d, UXTW] : ld1sb (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d -c41c1f59 : ld1sb z25.d, p7/Z, [x26, z28.d, UXTW] : ld1sb (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d -c41e1f9b : ld1sb z27.d, p7/Z, [x28, z30.d, UXTW] : ld1sb (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d -c41f1fff : ld1sb z31.d, p7/Z, [sp, z31.d, UXTW] : ld1sb (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d -c4400000 : ld1sb z0.d, p0/Z, [x0, z0.d, SXTW] : ld1sb (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d -c4450482 : ld1sb z2.d, p1/Z, [x4, z5.d, SXTW] : ld1sb (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d -c44708c4 : ld1sb z4.d, p2/Z, [x6, z7.d, SXTW] : ld1sb (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d -c4490906 : ld1sb z6.d, p2/Z, [x8, z9.d, SXTW] : ld1sb (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d -c44b0d48 : ld1sb z8.d, p3/Z, [x10, z11.d, SXTW] : ld1sb (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d -c44d0d6a : ld1sb z10.d, p3/Z, [x11, z13.d, SXTW] : ld1sb (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d -c44f11ac : ld1sb z12.d, p4/Z, [x13, z15.d, SXTW] : ld1sb (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d -c45111ee : ld1sb z14.d, p4/Z, [x15, z17.d, SXTW] : ld1sb (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d -c4531630 : ld1sb z16.d, p5/Z, [x17, z19.d, SXTW] : ld1sb (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d -c4541671 : ld1sb z17.d, p5/Z, [x19, z20.d, SXTW] : ld1sb (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d -c45616b3 : ld1sb z19.d, p5/Z, [x21, z22.d, SXTW] : ld1sb (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d -c4581af5 : ld1sb z21.d, p6/Z, [x23, z24.d, SXTW] : ld1sb (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d -c45a1b17 : ld1sb z23.d, p6/Z, [x24, z26.d, SXTW] : ld1sb (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d -c45c1f59 : ld1sb z25.d, p7/Z, [x26, z28.d, SXTW] : ld1sb (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d -c45e1f9b : ld1sb z27.d, p7/Z, [x28, z30.d, SXTW] : ld1sb (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d -c45f1fff : ld1sb z31.d, p7/Z, [sp, z31.d, SXTW] : ld1sb (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d +c4000000 : ld1sb z0.d, p0/Z, [x0, z0.d, UXTW] : ld1sb (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d +c4050482 : ld1sb z2.d, p1/Z, [x4, z5.d, UXTW] : ld1sb (%x4,%z5.d,uxtw)[1byte] %p1/z -> %z2.d +c40708c4 : ld1sb z4.d, p2/Z, [x6, z7.d, UXTW] : ld1sb (%x6,%z7.d,uxtw)[1byte] %p2/z -> %z4.d +c4090906 : ld1sb z6.d, p2/Z, [x8, z9.d, UXTW] : ld1sb (%x8,%z9.d,uxtw)[1byte] %p2/z -> %z6.d +c40b0d48 : ld1sb z8.d, p3/Z, [x10, z11.d, UXTW] : ld1sb (%x10,%z11.d,uxtw)[1byte] %p3/z -> %z8.d +c40d0d6a : ld1sb z10.d, p3/Z, [x11, z13.d, UXTW] : ld1sb (%x11,%z13.d,uxtw)[1byte] %p3/z -> %z10.d +c40f11ac : ld1sb z12.d, p4/Z, [x13, z15.d, UXTW] : ld1sb (%x13,%z15.d,uxtw)[1byte] %p4/z -> %z12.d +c41111ee : ld1sb z14.d, p4/Z, [x15, z17.d, UXTW] : ld1sb (%x15,%z17.d,uxtw)[1byte] %p4/z -> %z14.d +c4131630 : ld1sb z16.d, p5/Z, [x17, z19.d, UXTW] : ld1sb (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d +c4141671 : ld1sb z17.d, p5/Z, [x19, z20.d, UXTW] : ld1sb (%x19,%z20.d,uxtw)[1byte] %p5/z -> %z17.d +c41616b3 : ld1sb z19.d, p5/Z, [x21, z22.d, UXTW] : ld1sb (%x21,%z22.d,uxtw)[1byte] %p5/z -> %z19.d +c4181af5 : ld1sb z21.d, p6/Z, [x23, z24.d, UXTW] : ld1sb (%x23,%z24.d,uxtw)[1byte] %p6/z -> %z21.d +c41a1b17 : ld1sb z23.d, p6/Z, [x24, z26.d, UXTW] : ld1sb (%x24,%z26.d,uxtw)[1byte] %p6/z -> %z23.d +c41c1f59 : ld1sb z25.d, p7/Z, [x26, z28.d, UXTW] : ld1sb (%x26,%z28.d,uxtw)[1byte] %p7/z -> %z25.d +c41e1f9b : ld1sb z27.d, p7/Z, [x28, z30.d, UXTW] : ld1sb (%x28,%z30.d,uxtw)[1byte] %p7/z -> %z27.d +c41f1fff : ld1sb z31.d, p7/Z, [sp, z31.d, UXTW] : ld1sb (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d +c4400000 : ld1sb z0.d, p0/Z, [x0, z0.d, SXTW] : ld1sb (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d +c4450482 : ld1sb z2.d, p1/Z, [x4, z5.d, SXTW] : ld1sb (%x4,%z5.d,sxtw)[1byte] %p1/z -> %z2.d +c44708c4 : ld1sb z4.d, p2/Z, [x6, z7.d, SXTW] : ld1sb (%x6,%z7.d,sxtw)[1byte] %p2/z -> %z4.d +c4490906 : ld1sb z6.d, p2/Z, [x8, z9.d, SXTW] : ld1sb (%x8,%z9.d,sxtw)[1byte] %p2/z -> %z6.d +c44b0d48 : ld1sb z8.d, p3/Z, [x10, z11.d, SXTW] : ld1sb (%x10,%z11.d,sxtw)[1byte] %p3/z -> %z8.d +c44d0d6a : ld1sb z10.d, p3/Z, [x11, z13.d, SXTW] : ld1sb (%x11,%z13.d,sxtw)[1byte] %p3/z -> %z10.d +c44f11ac : ld1sb z12.d, p4/Z, [x13, z15.d, SXTW] : ld1sb (%x13,%z15.d,sxtw)[1byte] %p4/z -> %z12.d +c45111ee : ld1sb z14.d, p4/Z, [x15, z17.d, SXTW] : ld1sb (%x15,%z17.d,sxtw)[1byte] %p4/z -> %z14.d +c4531630 : ld1sb z16.d, p5/Z, [x17, z19.d, SXTW] : ld1sb (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d +c4541671 : ld1sb z17.d, p5/Z, [x19, z20.d, SXTW] : ld1sb (%x19,%z20.d,sxtw)[1byte] %p5/z -> %z17.d +c45616b3 : ld1sb z19.d, p5/Z, [x21, z22.d, SXTW] : ld1sb (%x21,%z22.d,sxtw)[1byte] %p5/z -> %z19.d +c4581af5 : ld1sb z21.d, p6/Z, [x23, z24.d, SXTW] : ld1sb (%x23,%z24.d,sxtw)[1byte] %p6/z -> %z21.d +c45a1b17 : ld1sb z23.d, p6/Z, [x24, z26.d, SXTW] : ld1sb (%x24,%z26.d,sxtw)[1byte] %p6/z -> %z23.d +c45c1f59 : ld1sb z25.d, p7/Z, [x26, z28.d, SXTW] : ld1sb (%x26,%z28.d,sxtw)[1byte] %p7/z -> %z25.d +c45e1f9b : ld1sb z27.d, p7/Z, [x28, z30.d, SXTW] : ld1sb (%x28,%z30.d,sxtw)[1byte] %p7/z -> %z27.d +c45f1fff : ld1sb z31.d, p7/Z, [sp, z31.d, SXTW] : ld1sb (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d # LD1SB { .D }, /Z, [.D{, #}] (LD1SB-Z.P.AI-D) -c4208000 : ld1sb z0.d, p0/Z, [z0.d, #0] : ld1sb (%z0.d)[4byte] %p0/z -> %z0.d -c4228482 : ld1sb z2.d, p1/Z, [z4.d, #2] : ld1sb +0x02(%z4.d)[4byte] %p1/z -> %z2.d -c42488c4 : ld1sb z4.d, p2/Z, [z6.d, #4] : ld1sb +0x04(%z6.d)[4byte] %p2/z -> %z4.d -c4268906 : ld1sb z6.d, p2/Z, [z8.d, #6] : ld1sb +0x06(%z8.d)[4byte] %p2/z -> %z6.d -c4288d48 : ld1sb z8.d, p3/Z, [z10.d, #8] : ld1sb +0x08(%z10.d)[4byte] %p3/z -> %z8.d -c42a8d8a : ld1sb z10.d, p3/Z, [z12.d, #10] : ld1sb +0x0a(%z12.d)[4byte] %p3/z -> %z10.d -c42c91cc : ld1sb z12.d, p4/Z, [z14.d, #12] : ld1sb +0x0c(%z14.d)[4byte] %p4/z -> %z12.d -c42e920e : ld1sb z14.d, p4/Z, [z16.d, #14] : ld1sb +0x0e(%z16.d)[4byte] %p4/z -> %z14.d -c4309650 : ld1sb z16.d, p5/Z, [z18.d, #16] : ld1sb +0x10(%z18.d)[4byte] %p5/z -> %z16.d -c4319671 : ld1sb z17.d, p5/Z, [z19.d, #17] : ld1sb +0x11(%z19.d)[4byte] %p5/z -> %z17.d -c43396b3 : ld1sb z19.d, p5/Z, [z21.d, #19] : ld1sb +0x13(%z21.d)[4byte] %p5/z -> %z19.d -c4359af5 : ld1sb z21.d, p6/Z, [z23.d, #21] : ld1sb +0x15(%z23.d)[4byte] %p6/z -> %z21.d -c4379b37 : ld1sb z23.d, p6/Z, [z25.d, #23] : ld1sb +0x17(%z25.d)[4byte] %p6/z -> %z23.d -c4399f79 : ld1sb z25.d, p7/Z, [z27.d, #25] : ld1sb +0x19(%z27.d)[4byte] %p7/z -> %z25.d -c43b9fbb : ld1sb z27.d, p7/Z, [z29.d, #27] : ld1sb +0x1b(%z29.d)[4byte] %p7/z -> %z27.d -c43f9fff : ld1sb z31.d, p7/Z, [z31.d, #31] : ld1sb +0x1f(%z31.d)[4byte] %p7/z -> %z31.d +c4208000 : ld1sb z0.d, p0/Z, [z0.d, #0] : ld1sb (%z0.d)[1byte] %p0/z -> %z0.d +c4228482 : ld1sb z2.d, p1/Z, [z4.d, #2] : ld1sb +0x02(%z4.d)[1byte] %p1/z -> %z2.d +c42488c4 : ld1sb z4.d, p2/Z, [z6.d, #4] : ld1sb +0x04(%z6.d)[1byte] %p2/z -> %z4.d +c4268906 : ld1sb z6.d, p2/Z, [z8.d, #6] : ld1sb +0x06(%z8.d)[1byte] %p2/z -> %z6.d +c4288d48 : ld1sb z8.d, p3/Z, [z10.d, #8] : ld1sb +0x08(%z10.d)[1byte] %p3/z -> %z8.d +c42a8d8a : ld1sb z10.d, p3/Z, [z12.d, #10] : ld1sb +0x0a(%z12.d)[1byte] %p3/z -> %z10.d +c42c91cc : ld1sb z12.d, p4/Z, [z14.d, #12] : ld1sb +0x0c(%z14.d)[1byte] %p4/z -> %z12.d +c42e920e : ld1sb z14.d, p4/Z, [z16.d, #14] : ld1sb +0x0e(%z16.d)[1byte] %p4/z -> %z14.d +c4309650 : ld1sb z16.d, p5/Z, [z18.d, #16] : ld1sb +0x10(%z18.d)[1byte] %p5/z -> %z16.d +c4319671 : ld1sb z17.d, p5/Z, [z19.d, #17] : ld1sb +0x11(%z19.d)[1byte] %p5/z -> %z17.d +c43396b3 : ld1sb z19.d, p5/Z, [z21.d, #19] : ld1sb +0x13(%z21.d)[1byte] %p5/z -> %z19.d +c4359af5 : ld1sb z21.d, p6/Z, [z23.d, #21] : ld1sb +0x15(%z23.d)[1byte] %p6/z -> %z21.d +c4379b37 : ld1sb z23.d, p6/Z, [z25.d, #23] : ld1sb +0x17(%z25.d)[1byte] %p6/z -> %z23.d +c4399f79 : ld1sb z25.d, p7/Z, [z27.d, #25] : ld1sb +0x19(%z27.d)[1byte] %p7/z -> %z25.d +c43b9fbb : ld1sb z27.d, p7/Z, [z29.d, #27] : ld1sb +0x1b(%z29.d)[1byte] %p7/z -> %z27.d +c43f9fff : ld1sb z31.d, p7/Z, [z31.d, #31] : ld1sb +0x1f(%z31.d)[1byte] %p7/z -> %z31.d # LD1SB { .D }, /Z, [, .D] (LD1SB-Z.P.BZ-D.64.unscaled) -c4408000 : ld1sb z0.d, p0/Z, [x0, z0.d] : ld1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d -c4458482 : ld1sb z2.d, p1/Z, [x4, z5.d] : ld1sb (%x4,%z5.d)[4byte] %p1/z -> %z2.d -c44788c4 : ld1sb z4.d, p2/Z, [x6, z7.d] : ld1sb (%x6,%z7.d)[4byte] %p2/z -> %z4.d -c4498906 : ld1sb z6.d, p2/Z, [x8, z9.d] : ld1sb (%x8,%z9.d)[4byte] %p2/z -> %z6.d -c44b8d48 : ld1sb z8.d, p3/Z, [x10, z11.d] : ld1sb (%x10,%z11.d)[4byte] %p3/z -> %z8.d -c44d8d6a : ld1sb z10.d, p3/Z, [x11, z13.d] : ld1sb (%x11,%z13.d)[4byte] %p3/z -> %z10.d -c44f91ac : ld1sb z12.d, p4/Z, [x13, z15.d] : ld1sb (%x13,%z15.d)[4byte] %p4/z -> %z12.d -c45191ee : ld1sb z14.d, p4/Z, [x15, z17.d] : ld1sb (%x15,%z17.d)[4byte] %p4/z -> %z14.d -c4539630 : ld1sb z16.d, p5/Z, [x17, z19.d] : ld1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d -c4549671 : ld1sb z17.d, p5/Z, [x19, z20.d] : ld1sb (%x19,%z20.d)[4byte] %p5/z -> %z17.d -c45696b3 : ld1sb z19.d, p5/Z, [x21, z22.d] : ld1sb (%x21,%z22.d)[4byte] %p5/z -> %z19.d -c4589af5 : ld1sb z21.d, p6/Z, [x23, z24.d] : ld1sb (%x23,%z24.d)[4byte] %p6/z -> %z21.d -c45a9b17 : ld1sb z23.d, p6/Z, [x24, z26.d] : ld1sb (%x24,%z26.d)[4byte] %p6/z -> %z23.d -c45c9f59 : ld1sb z25.d, p7/Z, [x26, z28.d] : ld1sb (%x26,%z28.d)[4byte] %p7/z -> %z25.d -c45e9f9b : ld1sb z27.d, p7/Z, [x28, z30.d] : ld1sb (%x28,%z30.d)[4byte] %p7/z -> %z27.d -c45f9fff : ld1sb z31.d, p7/Z, [sp, z31.d] : ld1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d +c4408000 : ld1sb z0.d, p0/Z, [x0, z0.d] : ld1sb (%x0,%z0.d)[1byte] %p0/z -> %z0.d +c4458482 : ld1sb z2.d, p1/Z, [x4, z5.d] : ld1sb (%x4,%z5.d)[1byte] %p1/z -> %z2.d +c44788c4 : ld1sb z4.d, p2/Z, [x6, z7.d] : ld1sb (%x6,%z7.d)[1byte] %p2/z -> %z4.d +c4498906 : ld1sb z6.d, p2/Z, [x8, z9.d] : ld1sb (%x8,%z9.d)[1byte] %p2/z -> %z6.d +c44b8d48 : ld1sb z8.d, p3/Z, [x10, z11.d] : ld1sb (%x10,%z11.d)[1byte] %p3/z -> %z8.d +c44d8d6a : ld1sb z10.d, p3/Z, [x11, z13.d] : ld1sb (%x11,%z13.d)[1byte] %p3/z -> %z10.d +c44f91ac : ld1sb z12.d, p4/Z, [x13, z15.d] : ld1sb (%x13,%z15.d)[1byte] %p4/z -> %z12.d +c45191ee : ld1sb z14.d, p4/Z, [x15, z17.d] : ld1sb (%x15,%z17.d)[1byte] %p4/z -> %z14.d +c4539630 : ld1sb z16.d, p5/Z, [x17, z19.d] : ld1sb (%x17,%z19.d)[1byte] %p5/z -> %z16.d +c4549671 : ld1sb z17.d, p5/Z, [x19, z20.d] : ld1sb (%x19,%z20.d)[1byte] %p5/z -> %z17.d +c45696b3 : ld1sb z19.d, p5/Z, [x21, z22.d] : ld1sb (%x21,%z22.d)[1byte] %p5/z -> %z19.d +c4589af5 : ld1sb z21.d, p6/Z, [x23, z24.d] : ld1sb (%x23,%z24.d)[1byte] %p6/z -> %z21.d +c45a9b17 : ld1sb z23.d, p6/Z, [x24, z26.d] : ld1sb (%x24,%z26.d)[1byte] %p6/z -> %z23.d +c45c9f59 : ld1sb z25.d, p7/Z, [x26, z28.d] : ld1sb (%x26,%z28.d)[1byte] %p7/z -> %z25.d +c45e9f9b : ld1sb z27.d, p7/Z, [x28, z30.d] : ld1sb (%x28,%z30.d)[1byte] %p7/z -> %z27.d +c45f9fff : ld1sb z31.d, p7/Z, [sp, z31.d] : ld1sb (%sp,%z31.d)[1byte] %p7/z -> %z31.d # LD1SH { .S }, /Z, [, .S, ] (LD1SH-Z.P.BZ-S.x32.unscaled) -84800000 : ld1sh z0.s, p0/Z, [x0, z0.s, UXTW] : ld1sh (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s -84850482 : ld1sh z2.s, p1/Z, [x4, z5.s, UXTW] : ld1sh (%x4,%z5.s,uxtw)[16byte] %p1/z -> %z2.s -848708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, UXTW] : ld1sh (%x6,%z7.s,uxtw)[16byte] %p2/z -> %z4.s -84890906 : ld1sh z6.s, p2/Z, [x8, z9.s, UXTW] : ld1sh (%x8,%z9.s,uxtw)[16byte] %p2/z -> %z6.s -848b0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, UXTW] : ld1sh (%x10,%z11.s,uxtw)[16byte] %p3/z -> %z8.s -848d0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, UXTW] : ld1sh (%x11,%z13.s,uxtw)[16byte] %p3/z -> %z10.s -848f11ac : ld1sh z12.s, p4/Z, [x13, z15.s, UXTW] : ld1sh (%x13,%z15.s,uxtw)[16byte] %p4/z -> %z12.s -849111ee : ld1sh z14.s, p4/Z, [x15, z17.s, UXTW] : ld1sh (%x15,%z17.s,uxtw)[16byte] %p4/z -> %z14.s -84931630 : ld1sh z16.s, p5/Z, [x17, z19.s, UXTW] : ld1sh (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s -84941671 : ld1sh z17.s, p5/Z, [x19, z20.s, UXTW] : ld1sh (%x19,%z20.s,uxtw)[16byte] %p5/z -> %z17.s -849616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, UXTW] : ld1sh (%x21,%z22.s,uxtw)[16byte] %p5/z -> %z19.s -84981af5 : ld1sh z21.s, p6/Z, [x23, z24.s, UXTW] : ld1sh (%x23,%z24.s,uxtw)[16byte] %p6/z -> %z21.s -849a1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, UXTW] : ld1sh (%x24,%z26.s,uxtw)[16byte] %p6/z -> %z23.s -849c1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, UXTW] : ld1sh (%x26,%z28.s,uxtw)[16byte] %p7/z -> %z25.s -849e1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, UXTW] : ld1sh (%x28,%z30.s,uxtw)[16byte] %p7/z -> %z27.s -849f1fff : ld1sh z31.s, p7/Z, [sp, z31.s, UXTW] : ld1sh (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s -84c00000 : ld1sh z0.s, p0/Z, [x0, z0.s, SXTW] : ld1sh (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s -84c50482 : ld1sh z2.s, p1/Z, [x4, z5.s, SXTW] : ld1sh (%x4,%z5.s,sxtw)[16byte] %p1/z -> %z2.s -84c708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, SXTW] : ld1sh (%x6,%z7.s,sxtw)[16byte] %p2/z -> %z4.s -84c90906 : ld1sh z6.s, p2/Z, [x8, z9.s, SXTW] : ld1sh (%x8,%z9.s,sxtw)[16byte] %p2/z -> %z6.s -84cb0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, SXTW] : ld1sh (%x10,%z11.s,sxtw)[16byte] %p3/z -> %z8.s -84cd0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, SXTW] : ld1sh (%x11,%z13.s,sxtw)[16byte] %p3/z -> %z10.s -84cf11ac : ld1sh z12.s, p4/Z, [x13, z15.s, SXTW] : ld1sh (%x13,%z15.s,sxtw)[16byte] %p4/z -> %z12.s -84d111ee : ld1sh z14.s, p4/Z, [x15, z17.s, SXTW] : ld1sh (%x15,%z17.s,sxtw)[16byte] %p4/z -> %z14.s -84d31630 : ld1sh z16.s, p5/Z, [x17, z19.s, SXTW] : ld1sh (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s -84d41671 : ld1sh z17.s, p5/Z, [x19, z20.s, SXTW] : ld1sh (%x19,%z20.s,sxtw)[16byte] %p5/z -> %z17.s -84d616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, SXTW] : ld1sh (%x21,%z22.s,sxtw)[16byte] %p5/z -> %z19.s -84d81af5 : ld1sh z21.s, p6/Z, [x23, z24.s, SXTW] : ld1sh (%x23,%z24.s,sxtw)[16byte] %p6/z -> %z21.s -84da1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, SXTW] : ld1sh (%x24,%z26.s,sxtw)[16byte] %p6/z -> %z23.s -84dc1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, SXTW] : ld1sh (%x26,%z28.s,sxtw)[16byte] %p7/z -> %z25.s -84de1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, SXTW] : ld1sh (%x28,%z30.s,sxtw)[16byte] %p7/z -> %z27.s -84df1fff : ld1sh z31.s, p7/Z, [sp, z31.s, SXTW] : ld1sh (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s +84800000 : ld1sh z0.s, p0/Z, [x0, z0.s, UXTW] : ld1sh (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s +84850482 : ld1sh z2.s, p1/Z, [x4, z5.s, UXTW] : ld1sh (%x4,%z5.s,uxtw)[2byte] %p1/z -> %z2.s +848708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, UXTW] : ld1sh (%x6,%z7.s,uxtw)[2byte] %p2/z -> %z4.s +84890906 : ld1sh z6.s, p2/Z, [x8, z9.s, UXTW] : ld1sh (%x8,%z9.s,uxtw)[2byte] %p2/z -> %z6.s +848b0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, UXTW] : ld1sh (%x10,%z11.s,uxtw)[2byte] %p3/z -> %z8.s +848d0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, UXTW] : ld1sh (%x11,%z13.s,uxtw)[2byte] %p3/z -> %z10.s +848f11ac : ld1sh z12.s, p4/Z, [x13, z15.s, UXTW] : ld1sh (%x13,%z15.s,uxtw)[2byte] %p4/z -> %z12.s +849111ee : ld1sh z14.s, p4/Z, [x15, z17.s, UXTW] : ld1sh (%x15,%z17.s,uxtw)[2byte] %p4/z -> %z14.s +84931630 : ld1sh z16.s, p5/Z, [x17, z19.s, UXTW] : ld1sh (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s +84941671 : ld1sh z17.s, p5/Z, [x19, z20.s, UXTW] : ld1sh (%x19,%z20.s,uxtw)[2byte] %p5/z -> %z17.s +849616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, UXTW] : ld1sh (%x21,%z22.s,uxtw)[2byte] %p5/z -> %z19.s +84981af5 : ld1sh z21.s, p6/Z, [x23, z24.s, UXTW] : ld1sh (%x23,%z24.s,uxtw)[2byte] %p6/z -> %z21.s +849a1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, UXTW] : ld1sh (%x24,%z26.s,uxtw)[2byte] %p6/z -> %z23.s +849c1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, UXTW] : ld1sh (%x26,%z28.s,uxtw)[2byte] %p7/z -> %z25.s +849e1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, UXTW] : ld1sh (%x28,%z30.s,uxtw)[2byte] %p7/z -> %z27.s +849f1fff : ld1sh z31.s, p7/Z, [sp, z31.s, UXTW] : ld1sh (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s +84c00000 : ld1sh z0.s, p0/Z, [x0, z0.s, SXTW] : ld1sh (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s +84c50482 : ld1sh z2.s, p1/Z, [x4, z5.s, SXTW] : ld1sh (%x4,%z5.s,sxtw)[2byte] %p1/z -> %z2.s +84c708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, SXTW] : ld1sh (%x6,%z7.s,sxtw)[2byte] %p2/z -> %z4.s +84c90906 : ld1sh z6.s, p2/Z, [x8, z9.s, SXTW] : ld1sh (%x8,%z9.s,sxtw)[2byte] %p2/z -> %z6.s +84cb0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, SXTW] : ld1sh (%x10,%z11.s,sxtw)[2byte] %p3/z -> %z8.s +84cd0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, SXTW] : ld1sh (%x11,%z13.s,sxtw)[2byte] %p3/z -> %z10.s +84cf11ac : ld1sh z12.s, p4/Z, [x13, z15.s, SXTW] : ld1sh (%x13,%z15.s,sxtw)[2byte] %p4/z -> %z12.s +84d111ee : ld1sh z14.s, p4/Z, [x15, z17.s, SXTW] : ld1sh (%x15,%z17.s,sxtw)[2byte] %p4/z -> %z14.s +84d31630 : ld1sh z16.s, p5/Z, [x17, z19.s, SXTW] : ld1sh (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s +84d41671 : ld1sh z17.s, p5/Z, [x19, z20.s, SXTW] : ld1sh (%x19,%z20.s,sxtw)[2byte] %p5/z -> %z17.s +84d616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, SXTW] : ld1sh (%x21,%z22.s,sxtw)[2byte] %p5/z -> %z19.s +84d81af5 : ld1sh z21.s, p6/Z, [x23, z24.s, SXTW] : ld1sh (%x23,%z24.s,sxtw)[2byte] %p6/z -> %z21.s +84da1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, SXTW] : ld1sh (%x24,%z26.s,sxtw)[2byte] %p6/z -> %z23.s +84dc1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, SXTW] : ld1sh (%x26,%z28.s,sxtw)[2byte] %p7/z -> %z25.s +84de1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, SXTW] : ld1sh (%x28,%z30.s,sxtw)[2byte] %p7/z -> %z27.s +84df1fff : ld1sh z31.s, p7/Z, [sp, z31.s, SXTW] : ld1sh (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s # LD1SH { .S }, /Z, [, .S, #1] (LD1SH-Z.P.BZ-S.x32.scaled) -84a00000 : ld1sh z0.s, p0/Z, [x0, z0.s, UXTW #1] : ld1sh (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s -84a50482 : ld1sh z2.s, p1/Z, [x4, z5.s, UXTW #1] : ld1sh (%x4,%z5.s,uxtw #1)[16byte] %p1/z -> %z2.s -84a708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, UXTW #1] : ld1sh (%x6,%z7.s,uxtw #1)[16byte] %p2/z -> %z4.s -84a90906 : ld1sh z6.s, p2/Z, [x8, z9.s, UXTW #1] : ld1sh (%x8,%z9.s,uxtw #1)[16byte] %p2/z -> %z6.s -84ab0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, UXTW #1] : ld1sh (%x10,%z11.s,uxtw #1)[16byte] %p3/z -> %z8.s -84ad0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, UXTW #1] : ld1sh (%x11,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s -84af11ac : ld1sh z12.s, p4/Z, [x13, z15.s, UXTW #1] : ld1sh (%x13,%z15.s,uxtw #1)[16byte] %p4/z -> %z12.s -84b111ee : ld1sh z14.s, p4/Z, [x15, z17.s, UXTW #1] : ld1sh (%x15,%z17.s,uxtw #1)[16byte] %p4/z -> %z14.s -84b31630 : ld1sh z16.s, p5/Z, [x17, z19.s, UXTW #1] : ld1sh (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s -84b41671 : ld1sh z17.s, p5/Z, [x19, z20.s, UXTW #1] : ld1sh (%x19,%z20.s,uxtw #1)[16byte] %p5/z -> %z17.s -84b616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, UXTW #1] : ld1sh (%x21,%z22.s,uxtw #1)[16byte] %p5/z -> %z19.s -84b81af5 : ld1sh z21.s, p6/Z, [x23, z24.s, UXTW #1] : ld1sh (%x23,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s -84ba1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, UXTW #1] : ld1sh (%x24,%z26.s,uxtw #1)[16byte] %p6/z -> %z23.s -84bc1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, UXTW #1] : ld1sh (%x26,%z28.s,uxtw #1)[16byte] %p7/z -> %z25.s -84be1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, UXTW #1] : ld1sh (%x28,%z30.s,uxtw #1)[16byte] %p7/z -> %z27.s -84bf1fff : ld1sh z31.s, p7/Z, [sp, z31.s, UXTW #1] : ld1sh (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s -84e00000 : ld1sh z0.s, p0/Z, [x0, z0.s, SXTW #1] : ld1sh (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s -84e50482 : ld1sh z2.s, p1/Z, [x4, z5.s, SXTW #1] : ld1sh (%x4,%z5.s,sxtw #1)[16byte] %p1/z -> %z2.s -84e708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, SXTW #1] : ld1sh (%x6,%z7.s,sxtw #1)[16byte] %p2/z -> %z4.s -84e90906 : ld1sh z6.s, p2/Z, [x8, z9.s, SXTW #1] : ld1sh (%x8,%z9.s,sxtw #1)[16byte] %p2/z -> %z6.s -84eb0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, SXTW #1] : ld1sh (%x10,%z11.s,sxtw #1)[16byte] %p3/z -> %z8.s -84ed0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, SXTW #1] : ld1sh (%x11,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s -84ef11ac : ld1sh z12.s, p4/Z, [x13, z15.s, SXTW #1] : ld1sh (%x13,%z15.s,sxtw #1)[16byte] %p4/z -> %z12.s -84f111ee : ld1sh z14.s, p4/Z, [x15, z17.s, SXTW #1] : ld1sh (%x15,%z17.s,sxtw #1)[16byte] %p4/z -> %z14.s -84f31630 : ld1sh z16.s, p5/Z, [x17, z19.s, SXTW #1] : ld1sh (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s -84f41671 : ld1sh z17.s, p5/Z, [x19, z20.s, SXTW #1] : ld1sh (%x19,%z20.s,sxtw #1)[16byte] %p5/z -> %z17.s -84f616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, SXTW #1] : ld1sh (%x21,%z22.s,sxtw #1)[16byte] %p5/z -> %z19.s -84f81af5 : ld1sh z21.s, p6/Z, [x23, z24.s, SXTW #1] : ld1sh (%x23,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s -84fa1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, SXTW #1] : ld1sh (%x24,%z26.s,sxtw #1)[16byte] %p6/z -> %z23.s -84fc1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, SXTW #1] : ld1sh (%x26,%z28.s,sxtw #1)[16byte] %p7/z -> %z25.s -84fe1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, SXTW #1] : ld1sh (%x28,%z30.s,sxtw #1)[16byte] %p7/z -> %z27.s -84ff1fff : ld1sh z31.s, p7/Z, [sp, z31.s, SXTW #1] : ld1sh (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s +84a00000 : ld1sh z0.s, p0/Z, [x0, z0.s, UXTW #1] : ld1sh (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s +84a50482 : ld1sh z2.s, p1/Z, [x4, z5.s, UXTW #1] : ld1sh (%x4,%z5.s,uxtw #1)[2byte] %p1/z -> %z2.s +84a708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, UXTW #1] : ld1sh (%x6,%z7.s,uxtw #1)[2byte] %p2/z -> %z4.s +84a90906 : ld1sh z6.s, p2/Z, [x8, z9.s, UXTW #1] : ld1sh (%x8,%z9.s,uxtw #1)[2byte] %p2/z -> %z6.s +84ab0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, UXTW #1] : ld1sh (%x10,%z11.s,uxtw #1)[2byte] %p3/z -> %z8.s +84ad0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, UXTW #1] : ld1sh (%x11,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s +84af11ac : ld1sh z12.s, p4/Z, [x13, z15.s, UXTW #1] : ld1sh (%x13,%z15.s,uxtw #1)[2byte] %p4/z -> %z12.s +84b111ee : ld1sh z14.s, p4/Z, [x15, z17.s, UXTW #1] : ld1sh (%x15,%z17.s,uxtw #1)[2byte] %p4/z -> %z14.s +84b31630 : ld1sh z16.s, p5/Z, [x17, z19.s, UXTW #1] : ld1sh (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s +84b41671 : ld1sh z17.s, p5/Z, [x19, z20.s, UXTW #1] : ld1sh (%x19,%z20.s,uxtw #1)[2byte] %p5/z -> %z17.s +84b616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, UXTW #1] : ld1sh (%x21,%z22.s,uxtw #1)[2byte] %p5/z -> %z19.s +84b81af5 : ld1sh z21.s, p6/Z, [x23, z24.s, UXTW #1] : ld1sh (%x23,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s +84ba1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, UXTW #1] : ld1sh (%x24,%z26.s,uxtw #1)[2byte] %p6/z -> %z23.s +84bc1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, UXTW #1] : ld1sh (%x26,%z28.s,uxtw #1)[2byte] %p7/z -> %z25.s +84be1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, UXTW #1] : ld1sh (%x28,%z30.s,uxtw #1)[2byte] %p7/z -> %z27.s +84bf1fff : ld1sh z31.s, p7/Z, [sp, z31.s, UXTW #1] : ld1sh (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s +84e00000 : ld1sh z0.s, p0/Z, [x0, z0.s, SXTW #1] : ld1sh (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s +84e50482 : ld1sh z2.s, p1/Z, [x4, z5.s, SXTW #1] : ld1sh (%x4,%z5.s,sxtw #1)[2byte] %p1/z -> %z2.s +84e708c4 : ld1sh z4.s, p2/Z, [x6, z7.s, SXTW #1] : ld1sh (%x6,%z7.s,sxtw #1)[2byte] %p2/z -> %z4.s +84e90906 : ld1sh z6.s, p2/Z, [x8, z9.s, SXTW #1] : ld1sh (%x8,%z9.s,sxtw #1)[2byte] %p2/z -> %z6.s +84eb0d48 : ld1sh z8.s, p3/Z, [x10, z11.s, SXTW #1] : ld1sh (%x10,%z11.s,sxtw #1)[2byte] %p3/z -> %z8.s +84ed0d6a : ld1sh z10.s, p3/Z, [x11, z13.s, SXTW #1] : ld1sh (%x11,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s +84ef11ac : ld1sh z12.s, p4/Z, [x13, z15.s, SXTW #1] : ld1sh (%x13,%z15.s,sxtw #1)[2byte] %p4/z -> %z12.s +84f111ee : ld1sh z14.s, p4/Z, [x15, z17.s, SXTW #1] : ld1sh (%x15,%z17.s,sxtw #1)[2byte] %p4/z -> %z14.s +84f31630 : ld1sh z16.s, p5/Z, [x17, z19.s, SXTW #1] : ld1sh (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s +84f41671 : ld1sh z17.s, p5/Z, [x19, z20.s, SXTW #1] : ld1sh (%x19,%z20.s,sxtw #1)[2byte] %p5/z -> %z17.s +84f616b3 : ld1sh z19.s, p5/Z, [x21, z22.s, SXTW #1] : ld1sh (%x21,%z22.s,sxtw #1)[2byte] %p5/z -> %z19.s +84f81af5 : ld1sh z21.s, p6/Z, [x23, z24.s, SXTW #1] : ld1sh (%x23,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s +84fa1b17 : ld1sh z23.s, p6/Z, [x24, z26.s, SXTW #1] : ld1sh (%x24,%z26.s,sxtw #1)[2byte] %p6/z -> %z23.s +84fc1f59 : ld1sh z25.s, p7/Z, [x26, z28.s, SXTW #1] : ld1sh (%x26,%z28.s,sxtw #1)[2byte] %p7/z -> %z25.s +84fe1f9b : ld1sh z27.s, p7/Z, [x28, z30.s, SXTW #1] : ld1sh (%x28,%z30.s,sxtw #1)[2byte] %p7/z -> %z27.s +84ff1fff : ld1sh z31.s, p7/Z, [sp, z31.s, SXTW #1] : ld1sh (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s # LD1SH { .S }, /Z, [.S{, #}] (LD1SH-Z.P.AI-S) -84a08000 : ld1sh z0.s, p0/Z, [z0.s, #0] : ld1sh (%z0.s)[16byte] %p0/z -> %z0.s -84a28482 : ld1sh z2.s, p1/Z, [z4.s, #4] : ld1sh +0x04(%z4.s)[16byte] %p1/z -> %z2.s -84a488c4 : ld1sh z4.s, p2/Z, [z6.s, #8] : ld1sh +0x08(%z6.s)[16byte] %p2/z -> %z4.s -84a68906 : ld1sh z6.s, p2/Z, [z8.s, #12] : ld1sh +0x0c(%z8.s)[16byte] %p2/z -> %z6.s -84a88d48 : ld1sh z8.s, p3/Z, [z10.s, #16] : ld1sh +0x10(%z10.s)[16byte] %p3/z -> %z8.s -84aa8d8a : ld1sh z10.s, p3/Z, [z12.s, #20] : ld1sh +0x14(%z12.s)[16byte] %p3/z -> %z10.s -84ac91cc : ld1sh z12.s, p4/Z, [z14.s, #24] : ld1sh +0x18(%z14.s)[16byte] %p4/z -> %z12.s -84ae920e : ld1sh z14.s, p4/Z, [z16.s, #28] : ld1sh +0x1c(%z16.s)[16byte] %p4/z -> %z14.s -84b09650 : ld1sh z16.s, p5/Z, [z18.s, #32] : ld1sh +0x20(%z18.s)[16byte] %p5/z -> %z16.s -84b19671 : ld1sh z17.s, p5/Z, [z19.s, #34] : ld1sh +0x22(%z19.s)[16byte] %p5/z -> %z17.s -84b396b3 : ld1sh z19.s, p5/Z, [z21.s, #38] : ld1sh +0x26(%z21.s)[16byte] %p5/z -> %z19.s -84b59af5 : ld1sh z21.s, p6/Z, [z23.s, #42] : ld1sh +0x2a(%z23.s)[16byte] %p6/z -> %z21.s -84b79b37 : ld1sh z23.s, p6/Z, [z25.s, #46] : ld1sh +0x2e(%z25.s)[16byte] %p6/z -> %z23.s -84b99f79 : ld1sh z25.s, p7/Z, [z27.s, #50] : ld1sh +0x32(%z27.s)[16byte] %p7/z -> %z25.s -84bb9fbb : ld1sh z27.s, p7/Z, [z29.s, #54] : ld1sh +0x36(%z29.s)[16byte] %p7/z -> %z27.s -84bf9fff : ld1sh z31.s, p7/Z, [z31.s, #62] : ld1sh +0x3e(%z31.s)[16byte] %p7/z -> %z31.s +84a08000 : ld1sh z0.s, p0/Z, [z0.s, #0] : ld1sh (%z0.s)[2byte] %p0/z -> %z0.s +84a28482 : ld1sh z2.s, p1/Z, [z4.s, #4] : ld1sh +0x04(%z4.s)[2byte] %p1/z -> %z2.s +84a488c4 : ld1sh z4.s, p2/Z, [z6.s, #8] : ld1sh +0x08(%z6.s)[2byte] %p2/z -> %z4.s +84a68906 : ld1sh z6.s, p2/Z, [z8.s, #12] : ld1sh +0x0c(%z8.s)[2byte] %p2/z -> %z6.s +84a88d48 : ld1sh z8.s, p3/Z, [z10.s, #16] : ld1sh +0x10(%z10.s)[2byte] %p3/z -> %z8.s +84aa8d8a : ld1sh z10.s, p3/Z, [z12.s, #20] : ld1sh +0x14(%z12.s)[2byte] %p3/z -> %z10.s +84ac91cc : ld1sh z12.s, p4/Z, [z14.s, #24] : ld1sh +0x18(%z14.s)[2byte] %p4/z -> %z12.s +84ae920e : ld1sh z14.s, p4/Z, [z16.s, #28] : ld1sh +0x1c(%z16.s)[2byte] %p4/z -> %z14.s +84b09650 : ld1sh z16.s, p5/Z, [z18.s, #32] : ld1sh +0x20(%z18.s)[2byte] %p5/z -> %z16.s +84b19671 : ld1sh z17.s, p5/Z, [z19.s, #34] : ld1sh +0x22(%z19.s)[2byte] %p5/z -> %z17.s +84b396b3 : ld1sh z19.s, p5/Z, [z21.s, #38] : ld1sh +0x26(%z21.s)[2byte] %p5/z -> %z19.s +84b59af5 : ld1sh z21.s, p6/Z, [z23.s, #42] : ld1sh +0x2a(%z23.s)[2byte] %p6/z -> %z21.s +84b79b37 : ld1sh z23.s, p6/Z, [z25.s, #46] : ld1sh +0x2e(%z25.s)[2byte] %p6/z -> %z23.s +84b99f79 : ld1sh z25.s, p7/Z, [z27.s, #50] : ld1sh +0x32(%z27.s)[2byte] %p7/z -> %z25.s +84bb9fbb : ld1sh z27.s, p7/Z, [z29.s, #54] : ld1sh +0x36(%z29.s)[2byte] %p7/z -> %z27.s +84bf9fff : ld1sh z31.s, p7/Z, [z31.s, #62] : ld1sh +0x3e(%z31.s)[2byte] %p7/z -> %z31.s # LD1SH { .D }, /Z, [, , LSL #1] (LD1SH-Z.P.BR-S64) -a5004000 : ld1sh z0.d, p0/Z, [x0, x0, LSL #1] : ld1sh (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d -a5054482 : ld1sh z2.d, p1/Z, [x4, x5, LSL #1] : ld1sh (%x4,%x5,lsl #1)[8byte] %p1/z -> %z2.d -a50748c4 : ld1sh z4.d, p2/Z, [x6, x7, LSL #1] : ld1sh (%x6,%x7,lsl #1)[8byte] %p2/z -> %z4.d -a5094906 : ld1sh z6.d, p2/Z, [x8, x9, LSL #1] : ld1sh (%x8,%x9,lsl #1)[8byte] %p2/z -> %z6.d -a50b4d48 : ld1sh z8.d, p3/Z, [x10, x11, LSL #1] : ld1sh (%x10,%x11,lsl #1)[8byte] %p3/z -> %z8.d -a50c4d6a : ld1sh z10.d, p3/Z, [x11, x12, LSL #1] : ld1sh (%x11,%x12,lsl #1)[8byte] %p3/z -> %z10.d -a50e51ac : ld1sh z12.d, p4/Z, [x13, x14, LSL #1] : ld1sh (%x13,%x14,lsl #1)[8byte] %p4/z -> %z12.d -a51051ee : ld1sh z14.d, p4/Z, [x15, x16, LSL #1] : ld1sh (%x15,%x16,lsl #1)[8byte] %p4/z -> %z14.d -a5125630 : ld1sh z16.d, p5/Z, [x17, x18, LSL #1] : ld1sh (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d -a5145671 : ld1sh z17.d, p5/Z, [x19, x20, LSL #1] : ld1sh (%x19,%x20,lsl #1)[8byte] %p5/z -> %z17.d -a51656b3 : ld1sh z19.d, p5/Z, [x21, x22, LSL #1] : ld1sh (%x21,%x22,lsl #1)[8byte] %p5/z -> %z19.d -a5185af5 : ld1sh z21.d, p6/Z, [x23, x24, LSL #1] : ld1sh (%x23,%x24,lsl #1)[8byte] %p6/z -> %z21.d -a5195b17 : ld1sh z23.d, p6/Z, [x24, x25, LSL #1] : ld1sh (%x24,%x25,lsl #1)[8byte] %p6/z -> %z23.d -a51b5f59 : ld1sh z25.d, p7/Z, [x26, x27, LSL #1] : ld1sh (%x26,%x27,lsl #1)[8byte] %p7/z -> %z25.d -a51d5f9b : ld1sh z27.d, p7/Z, [x28, x29, LSL #1] : ld1sh (%x28,%x29,lsl #1)[8byte] %p7/z -> %z27.d -a51e5fff : ld1sh z31.d, p7/Z, [sp, x30, LSL #1] : ld1sh (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d +a5004000 : ld1sh z0.d, p0/Z, [x0, x0, LSL #1] : ld1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d +a5054482 : ld1sh z2.d, p1/Z, [x4, x5, LSL #1] : ld1sh (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.d +a50748c4 : ld1sh z4.d, p2/Z, [x6, x7, LSL #1] : ld1sh (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.d +a5094906 : ld1sh z6.d, p2/Z, [x8, x9, LSL #1] : ld1sh (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.d +a50b4d48 : ld1sh z8.d, p3/Z, [x10, x11, LSL #1] : ld1sh (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.d +a50c4d6a : ld1sh z10.d, p3/Z, [x11, x12, LSL #1] : ld1sh (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.d +a50e51ac : ld1sh z12.d, p4/Z, [x13, x14, LSL #1] : ld1sh (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.d +a51051ee : ld1sh z14.d, p4/Z, [x15, x16, LSL #1] : ld1sh (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.d +a5125630 : ld1sh z16.d, p5/Z, [x17, x18, LSL #1] : ld1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d +a5145671 : ld1sh z17.d, p5/Z, [x19, x20, LSL #1] : ld1sh (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.d +a51656b3 : ld1sh z19.d, p5/Z, [x21, x22, LSL #1] : ld1sh (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.d +a5185af5 : ld1sh z21.d, p6/Z, [x23, x24, LSL #1] : ld1sh (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.d +a5195b17 : ld1sh z23.d, p6/Z, [x24, x25, LSL #1] : ld1sh (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.d +a51b5f59 : ld1sh z25.d, p7/Z, [x26, x27, LSL #1] : ld1sh (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.d +a51d5f9b : ld1sh z27.d, p7/Z, [x28, x29, LSL #1] : ld1sh (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.d +a51e5fff : ld1sh z31.d, p7/Z, [sp, x30, LSL #1] : ld1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d # LD1SH { .D }, /Z, [{, #, MUL VL}] (LD1SH-Z.P.BI-S64) -a508a000 : ld1sh z0.d, p0/Z, [x0, #-8, MUL VL] : ld1sh -0x40(%x0)[8byte] %p0/z -> %z0.d -a509a482 : ld1sh z2.d, p1/Z, [x4, #-7, MUL VL] : ld1sh -0x38(%x4)[8byte] %p1/z -> %z2.d -a50aa8c4 : ld1sh z4.d, p2/Z, [x6, #-6, MUL VL] : ld1sh -0x30(%x6)[8byte] %p2/z -> %z4.d -a50ba906 : ld1sh z6.d, p2/Z, [x8, #-5, MUL VL] : ld1sh -0x28(%x8)[8byte] %p2/z -> %z6.d -a50cad48 : ld1sh z8.d, p3/Z, [x10, #-4, MUL VL] : ld1sh -0x20(%x10)[8byte] %p3/z -> %z8.d -a50dad6a : ld1sh z10.d, p3/Z, [x11, #-3, MUL VL] : ld1sh -0x18(%x11)[8byte] %p3/z -> %z10.d -a50eb1ac : ld1sh z12.d, p4/Z, [x13, #-2, MUL VL] : ld1sh -0x10(%x13)[8byte] %p4/z -> %z12.d -a50fb1ee : ld1sh z14.d, p4/Z, [x15, #-1, MUL VL] : ld1sh -0x08(%x15)[8byte] %p4/z -> %z14.d -a500b630 : ld1sh z16.d, p5/Z, [x17, #0, MUL VL] : ld1sh (%x17)[8byte] %p5/z -> %z16.d -a500b671 : ld1sh z17.d, p5/Z, [x19, #0, MUL VL] : ld1sh (%x19)[8byte] %p5/z -> %z17.d -a501b6b3 : ld1sh z19.d, p5/Z, [x21, #1, MUL VL] : ld1sh +0x08(%x21)[8byte] %p5/z -> %z19.d -a502baf5 : ld1sh z21.d, p6/Z, [x23, #2, MUL VL] : ld1sh +0x10(%x23)[8byte] %p6/z -> %z21.d -a503bb17 : ld1sh z23.d, p6/Z, [x24, #3, MUL VL] : ld1sh +0x18(%x24)[8byte] %p6/z -> %z23.d -a504bf59 : ld1sh z25.d, p7/Z, [x26, #4, MUL VL] : ld1sh +0x20(%x26)[8byte] %p7/z -> %z25.d -a505bf9b : ld1sh z27.d, p7/Z, [x28, #5, MUL VL] : ld1sh +0x28(%x28)[8byte] %p7/z -> %z27.d -a507bfff : ld1sh z31.d, p7/Z, [sp, #7, MUL VL] : ld1sh +0x38(%sp)[8byte] %p7/z -> %z31.d +a508a000 : ld1sh z0.d, p0/Z, [x0, #-8, MUL VL] : ld1sh -0x40(%x0)[2byte] %p0/z -> %z0.d +a509a482 : ld1sh z2.d, p1/Z, [x4, #-7, MUL VL] : ld1sh -0x38(%x4)[2byte] %p1/z -> %z2.d +a50aa8c4 : ld1sh z4.d, p2/Z, [x6, #-6, MUL VL] : ld1sh -0x30(%x6)[2byte] %p2/z -> %z4.d +a50ba906 : ld1sh z6.d, p2/Z, [x8, #-5, MUL VL] : ld1sh -0x28(%x8)[2byte] %p2/z -> %z6.d +a50cad48 : ld1sh z8.d, p3/Z, [x10, #-4, MUL VL] : ld1sh -0x20(%x10)[2byte] %p3/z -> %z8.d +a50dad6a : ld1sh z10.d, p3/Z, [x11, #-3, MUL VL] : ld1sh -0x18(%x11)[2byte] %p3/z -> %z10.d +a50eb1ac : ld1sh z12.d, p4/Z, [x13, #-2, MUL VL] : ld1sh -0x10(%x13)[2byte] %p4/z -> %z12.d +a50fb1ee : ld1sh z14.d, p4/Z, [x15, #-1, MUL VL] : ld1sh -0x08(%x15)[2byte] %p4/z -> %z14.d +a500b630 : ld1sh z16.d, p5/Z, [x17, #0, MUL VL] : ld1sh (%x17)[2byte] %p5/z -> %z16.d +a500b671 : ld1sh z17.d, p5/Z, [x19, #0, MUL VL] : ld1sh (%x19)[2byte] %p5/z -> %z17.d +a501b6b3 : ld1sh z19.d, p5/Z, [x21, #1, MUL VL] : ld1sh +0x08(%x21)[2byte] %p5/z -> %z19.d +a502baf5 : ld1sh z21.d, p6/Z, [x23, #2, MUL VL] : ld1sh +0x10(%x23)[2byte] %p6/z -> %z21.d +a503bb17 : ld1sh z23.d, p6/Z, [x24, #3, MUL VL] : ld1sh +0x18(%x24)[2byte] %p6/z -> %z23.d +a504bf59 : ld1sh z25.d, p7/Z, [x26, #4, MUL VL] : ld1sh +0x20(%x26)[2byte] %p7/z -> %z25.d +a505bf9b : ld1sh z27.d, p7/Z, [x28, #5, MUL VL] : ld1sh +0x28(%x28)[2byte] %p7/z -> %z27.d +a507bfff : ld1sh z31.d, p7/Z, [sp, #7, MUL VL] : ld1sh +0x38(%sp)[2byte] %p7/z -> %z31.d # LD1SH { .S }, /Z, [, , LSL #1] (LD1SH-Z.P.BR-S32) -a5204000 : ld1sh z0.s, p0/Z, [x0, x0, LSL #1] : ld1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s -a5254482 : ld1sh z2.s, p1/Z, [x4, x5, LSL #1] : ld1sh (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s -a52748c4 : ld1sh z4.s, p2/Z, [x6, x7, LSL #1] : ld1sh (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.s -a5294906 : ld1sh z6.s, p2/Z, [x8, x9, LSL #1] : ld1sh (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.s -a52b4d48 : ld1sh z8.s, p3/Z, [x10, x11, LSL #1] : ld1sh (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.s -a52c4d6a : ld1sh z10.s, p3/Z, [x11, x12, LSL #1] : ld1sh (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.s -a52e51ac : ld1sh z12.s, p4/Z, [x13, x14, LSL #1] : ld1sh (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.s -a53051ee : ld1sh z14.s, p4/Z, [x15, x16, LSL #1] : ld1sh (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.s -a5325630 : ld1sh z16.s, p5/Z, [x17, x18, LSL #1] : ld1sh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s -a5345671 : ld1sh z17.s, p5/Z, [x19, x20, LSL #1] : ld1sh (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.s -a53656b3 : ld1sh z19.s, p5/Z, [x21, x22, LSL #1] : ld1sh (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.s -a5385af5 : ld1sh z21.s, p6/Z, [x23, x24, LSL #1] : ld1sh (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.s -a5395b17 : ld1sh z23.s, p6/Z, [x24, x25, LSL #1] : ld1sh (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.s -a53b5f59 : ld1sh z25.s, p7/Z, [x26, x27, LSL #1] : ld1sh (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.s -a53d5f9b : ld1sh z27.s, p7/Z, [x28, x29, LSL #1] : ld1sh (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.s -a53e5fff : ld1sh z31.s, p7/Z, [sp, x30, LSL #1] : ld1sh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s +a5204000 : ld1sh z0.s, p0/Z, [x0, x0, LSL #1] : ld1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s +a5254482 : ld1sh z2.s, p1/Z, [x4, x5, LSL #1] : ld1sh (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.s +a52748c4 : ld1sh z4.s, p2/Z, [x6, x7, LSL #1] : ld1sh (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.s +a5294906 : ld1sh z6.s, p2/Z, [x8, x9, LSL #1] : ld1sh (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.s +a52b4d48 : ld1sh z8.s, p3/Z, [x10, x11, LSL #1] : ld1sh (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.s +a52c4d6a : ld1sh z10.s, p3/Z, [x11, x12, LSL #1] : ld1sh (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.s +a52e51ac : ld1sh z12.s, p4/Z, [x13, x14, LSL #1] : ld1sh (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.s +a53051ee : ld1sh z14.s, p4/Z, [x15, x16, LSL #1] : ld1sh (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.s +a5325630 : ld1sh z16.s, p5/Z, [x17, x18, LSL #1] : ld1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s +a5345671 : ld1sh z17.s, p5/Z, [x19, x20, LSL #1] : ld1sh (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.s +a53656b3 : ld1sh z19.s, p5/Z, [x21, x22, LSL #1] : ld1sh (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.s +a5385af5 : ld1sh z21.s, p6/Z, [x23, x24, LSL #1] : ld1sh (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.s +a5395b17 : ld1sh z23.s, p6/Z, [x24, x25, LSL #1] : ld1sh (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.s +a53b5f59 : ld1sh z25.s, p7/Z, [x26, x27, LSL #1] : ld1sh (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.s +a53d5f9b : ld1sh z27.s, p7/Z, [x28, x29, LSL #1] : ld1sh (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.s +a53e5fff : ld1sh z31.s, p7/Z, [sp, x30, LSL #1] : ld1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s # LD1SH { .S }, /Z, [{, #, MUL VL}] (LD1SH-Z.P.BI-S32) -a528a000 : ld1sh z0.s, p0/Z, [x0, #-8, MUL VL] : ld1sh -0x80(%x0)[16byte] %p0/z -> %z0.s -a529a482 : ld1sh z2.s, p1/Z, [x4, #-7, MUL VL] : ld1sh -0x70(%x4)[16byte] %p1/z -> %z2.s -a52aa8c4 : ld1sh z4.s, p2/Z, [x6, #-6, MUL VL] : ld1sh -0x60(%x6)[16byte] %p2/z -> %z4.s -a52ba906 : ld1sh z6.s, p2/Z, [x8, #-5, MUL VL] : ld1sh -0x50(%x8)[16byte] %p2/z -> %z6.s -a52cad48 : ld1sh z8.s, p3/Z, [x10, #-4, MUL VL] : ld1sh -0x40(%x10)[16byte] %p3/z -> %z8.s -a52dad6a : ld1sh z10.s, p3/Z, [x11, #-3, MUL VL] : ld1sh -0x30(%x11)[16byte] %p3/z -> %z10.s -a52eb1ac : ld1sh z12.s, p4/Z, [x13, #-2, MUL VL] : ld1sh -0x20(%x13)[16byte] %p4/z -> %z12.s -a52fb1ee : ld1sh z14.s, p4/Z, [x15, #-1, MUL VL] : ld1sh -0x10(%x15)[16byte] %p4/z -> %z14.s -a520b630 : ld1sh z16.s, p5/Z, [x17, #0, MUL VL] : ld1sh (%x17)[16byte] %p5/z -> %z16.s -a520b671 : ld1sh z17.s, p5/Z, [x19, #0, MUL VL] : ld1sh (%x19)[16byte] %p5/z -> %z17.s -a521b6b3 : ld1sh z19.s, p5/Z, [x21, #1, MUL VL] : ld1sh +0x10(%x21)[16byte] %p5/z -> %z19.s -a522baf5 : ld1sh z21.s, p6/Z, [x23, #2, MUL VL] : ld1sh +0x20(%x23)[16byte] %p6/z -> %z21.s -a523bb17 : ld1sh z23.s, p6/Z, [x24, #3, MUL VL] : ld1sh +0x30(%x24)[16byte] %p6/z -> %z23.s -a524bf59 : ld1sh z25.s, p7/Z, [x26, #4, MUL VL] : ld1sh +0x40(%x26)[16byte] %p7/z -> %z25.s -a525bf9b : ld1sh z27.s, p7/Z, [x28, #5, MUL VL] : ld1sh +0x50(%x28)[16byte] %p7/z -> %z27.s -a527bfff : ld1sh z31.s, p7/Z, [sp, #7, MUL VL] : ld1sh +0x70(%sp)[16byte] %p7/z -> %z31.s +a528a000 : ld1sh z0.s, p0/Z, [x0, #-8, MUL VL] : ld1sh -0x80(%x0)[2byte] %p0/z -> %z0.s +a529a482 : ld1sh z2.s, p1/Z, [x4, #-7, MUL VL] : ld1sh -0x70(%x4)[2byte] %p1/z -> %z2.s +a52aa8c4 : ld1sh z4.s, p2/Z, [x6, #-6, MUL VL] : ld1sh -0x60(%x6)[2byte] %p2/z -> %z4.s +a52ba906 : ld1sh z6.s, p2/Z, [x8, #-5, MUL VL] : ld1sh -0x50(%x8)[2byte] %p2/z -> %z6.s +a52cad48 : ld1sh z8.s, p3/Z, [x10, #-4, MUL VL] : ld1sh -0x40(%x10)[2byte] %p3/z -> %z8.s +a52dad6a : ld1sh z10.s, p3/Z, [x11, #-3, MUL VL] : ld1sh -0x30(%x11)[2byte] %p3/z -> %z10.s +a52eb1ac : ld1sh z12.s, p4/Z, [x13, #-2, MUL VL] : ld1sh -0x20(%x13)[2byte] %p4/z -> %z12.s +a52fb1ee : ld1sh z14.s, p4/Z, [x15, #-1, MUL VL] : ld1sh -0x10(%x15)[2byte] %p4/z -> %z14.s +a520b630 : ld1sh z16.s, p5/Z, [x17, #0, MUL VL] : ld1sh (%x17)[2byte] %p5/z -> %z16.s +a520b671 : ld1sh z17.s, p5/Z, [x19, #0, MUL VL] : ld1sh (%x19)[2byte] %p5/z -> %z17.s +a521b6b3 : ld1sh z19.s, p5/Z, [x21, #1, MUL VL] : ld1sh +0x10(%x21)[2byte] %p5/z -> %z19.s +a522baf5 : ld1sh z21.s, p6/Z, [x23, #2, MUL VL] : ld1sh +0x20(%x23)[2byte] %p6/z -> %z21.s +a523bb17 : ld1sh z23.s, p6/Z, [x24, #3, MUL VL] : ld1sh +0x30(%x24)[2byte] %p6/z -> %z23.s +a524bf59 : ld1sh z25.s, p7/Z, [x26, #4, MUL VL] : ld1sh +0x40(%x26)[2byte] %p7/z -> %z25.s +a525bf9b : ld1sh z27.s, p7/Z, [x28, #5, MUL VL] : ld1sh +0x50(%x28)[2byte] %p7/z -> %z27.s +a527bfff : ld1sh z31.s, p7/Z, [sp, #7, MUL VL] : ld1sh +0x70(%sp)[2byte] %p7/z -> %z31.s # LD1SH { .D }, /Z, [, .D, ] (LD1SH-Z.P.BZ-D.x32.unscaled) -c4800000 : ld1sh z0.d, p0/Z, [x0, z0.d, UXTW] : ld1sh (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d -c4850482 : ld1sh z2.d, p1/Z, [x4, z5.d, UXTW] : ld1sh (%x4,%z5.d,uxtw)[8byte] %p1/z -> %z2.d -c48708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, UXTW] : ld1sh (%x6,%z7.d,uxtw)[8byte] %p2/z -> %z4.d -c4890906 : ld1sh z6.d, p2/Z, [x8, z9.d, UXTW] : ld1sh (%x8,%z9.d,uxtw)[8byte] %p2/z -> %z6.d -c48b0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, UXTW] : ld1sh (%x10,%z11.d,uxtw)[8byte] %p3/z -> %z8.d -c48d0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, UXTW] : ld1sh (%x11,%z13.d,uxtw)[8byte] %p3/z -> %z10.d -c48f11ac : ld1sh z12.d, p4/Z, [x13, z15.d, UXTW] : ld1sh (%x13,%z15.d,uxtw)[8byte] %p4/z -> %z12.d -c49111ee : ld1sh z14.d, p4/Z, [x15, z17.d, UXTW] : ld1sh (%x15,%z17.d,uxtw)[8byte] %p4/z -> %z14.d -c4931630 : ld1sh z16.d, p5/Z, [x17, z19.d, UXTW] : ld1sh (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d -c4941671 : ld1sh z17.d, p5/Z, [x19, z20.d, UXTW] : ld1sh (%x19,%z20.d,uxtw)[8byte] %p5/z -> %z17.d -c49616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, UXTW] : ld1sh (%x21,%z22.d,uxtw)[8byte] %p5/z -> %z19.d -c4981af5 : ld1sh z21.d, p6/Z, [x23, z24.d, UXTW] : ld1sh (%x23,%z24.d,uxtw)[8byte] %p6/z -> %z21.d -c49a1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, UXTW] : ld1sh (%x24,%z26.d,uxtw)[8byte] %p6/z -> %z23.d -c49c1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, UXTW] : ld1sh (%x26,%z28.d,uxtw)[8byte] %p7/z -> %z25.d -c49e1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, UXTW] : ld1sh (%x28,%z30.d,uxtw)[8byte] %p7/z -> %z27.d -c49f1fff : ld1sh z31.d, p7/Z, [sp, z31.d, UXTW] : ld1sh (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d -c4c00000 : ld1sh z0.d, p0/Z, [x0, z0.d, SXTW] : ld1sh (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d -c4c50482 : ld1sh z2.d, p1/Z, [x4, z5.d, SXTW] : ld1sh (%x4,%z5.d,sxtw)[8byte] %p1/z -> %z2.d -c4c708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, SXTW] : ld1sh (%x6,%z7.d,sxtw)[8byte] %p2/z -> %z4.d -c4c90906 : ld1sh z6.d, p2/Z, [x8, z9.d, SXTW] : ld1sh (%x8,%z9.d,sxtw)[8byte] %p2/z -> %z6.d -c4cb0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, SXTW] : ld1sh (%x10,%z11.d,sxtw)[8byte] %p3/z -> %z8.d -c4cd0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, SXTW] : ld1sh (%x11,%z13.d,sxtw)[8byte] %p3/z -> %z10.d -c4cf11ac : ld1sh z12.d, p4/Z, [x13, z15.d, SXTW] : ld1sh (%x13,%z15.d,sxtw)[8byte] %p4/z -> %z12.d -c4d111ee : ld1sh z14.d, p4/Z, [x15, z17.d, SXTW] : ld1sh (%x15,%z17.d,sxtw)[8byte] %p4/z -> %z14.d -c4d31630 : ld1sh z16.d, p5/Z, [x17, z19.d, SXTW] : ld1sh (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d -c4d41671 : ld1sh z17.d, p5/Z, [x19, z20.d, SXTW] : ld1sh (%x19,%z20.d,sxtw)[8byte] %p5/z -> %z17.d -c4d616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, SXTW] : ld1sh (%x21,%z22.d,sxtw)[8byte] %p5/z -> %z19.d -c4d81af5 : ld1sh z21.d, p6/Z, [x23, z24.d, SXTW] : ld1sh (%x23,%z24.d,sxtw)[8byte] %p6/z -> %z21.d -c4da1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, SXTW] : ld1sh (%x24,%z26.d,sxtw)[8byte] %p6/z -> %z23.d -c4dc1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, SXTW] : ld1sh (%x26,%z28.d,sxtw)[8byte] %p7/z -> %z25.d -c4de1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, SXTW] : ld1sh (%x28,%z30.d,sxtw)[8byte] %p7/z -> %z27.d -c4df1fff : ld1sh z31.d, p7/Z, [sp, z31.d, SXTW] : ld1sh (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d +c4800000 : ld1sh z0.d, p0/Z, [x0, z0.d, UXTW] : ld1sh (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d +c4850482 : ld1sh z2.d, p1/Z, [x4, z5.d, UXTW] : ld1sh (%x4,%z5.d,uxtw)[2byte] %p1/z -> %z2.d +c48708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, UXTW] : ld1sh (%x6,%z7.d,uxtw)[2byte] %p2/z -> %z4.d +c4890906 : ld1sh z6.d, p2/Z, [x8, z9.d, UXTW] : ld1sh (%x8,%z9.d,uxtw)[2byte] %p2/z -> %z6.d +c48b0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, UXTW] : ld1sh (%x10,%z11.d,uxtw)[2byte] %p3/z -> %z8.d +c48d0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, UXTW] : ld1sh (%x11,%z13.d,uxtw)[2byte] %p3/z -> %z10.d +c48f11ac : ld1sh z12.d, p4/Z, [x13, z15.d, UXTW] : ld1sh (%x13,%z15.d,uxtw)[2byte] %p4/z -> %z12.d +c49111ee : ld1sh z14.d, p4/Z, [x15, z17.d, UXTW] : ld1sh (%x15,%z17.d,uxtw)[2byte] %p4/z -> %z14.d +c4931630 : ld1sh z16.d, p5/Z, [x17, z19.d, UXTW] : ld1sh (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d +c4941671 : ld1sh z17.d, p5/Z, [x19, z20.d, UXTW] : ld1sh (%x19,%z20.d,uxtw)[2byte] %p5/z -> %z17.d +c49616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, UXTW] : ld1sh (%x21,%z22.d,uxtw)[2byte] %p5/z -> %z19.d +c4981af5 : ld1sh z21.d, p6/Z, [x23, z24.d, UXTW] : ld1sh (%x23,%z24.d,uxtw)[2byte] %p6/z -> %z21.d +c49a1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, UXTW] : ld1sh (%x24,%z26.d,uxtw)[2byte] %p6/z -> %z23.d +c49c1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, UXTW] : ld1sh (%x26,%z28.d,uxtw)[2byte] %p7/z -> %z25.d +c49e1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, UXTW] : ld1sh (%x28,%z30.d,uxtw)[2byte] %p7/z -> %z27.d +c49f1fff : ld1sh z31.d, p7/Z, [sp, z31.d, UXTW] : ld1sh (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d +c4c00000 : ld1sh z0.d, p0/Z, [x0, z0.d, SXTW] : ld1sh (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d +c4c50482 : ld1sh z2.d, p1/Z, [x4, z5.d, SXTW] : ld1sh (%x4,%z5.d,sxtw)[2byte] %p1/z -> %z2.d +c4c708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, SXTW] : ld1sh (%x6,%z7.d,sxtw)[2byte] %p2/z -> %z4.d +c4c90906 : ld1sh z6.d, p2/Z, [x8, z9.d, SXTW] : ld1sh (%x8,%z9.d,sxtw)[2byte] %p2/z -> %z6.d +c4cb0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, SXTW] : ld1sh (%x10,%z11.d,sxtw)[2byte] %p3/z -> %z8.d +c4cd0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, SXTW] : ld1sh (%x11,%z13.d,sxtw)[2byte] %p3/z -> %z10.d +c4cf11ac : ld1sh z12.d, p4/Z, [x13, z15.d, SXTW] : ld1sh (%x13,%z15.d,sxtw)[2byte] %p4/z -> %z12.d +c4d111ee : ld1sh z14.d, p4/Z, [x15, z17.d, SXTW] : ld1sh (%x15,%z17.d,sxtw)[2byte] %p4/z -> %z14.d +c4d31630 : ld1sh z16.d, p5/Z, [x17, z19.d, SXTW] : ld1sh (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d +c4d41671 : ld1sh z17.d, p5/Z, [x19, z20.d, SXTW] : ld1sh (%x19,%z20.d,sxtw)[2byte] %p5/z -> %z17.d +c4d616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, SXTW] : ld1sh (%x21,%z22.d,sxtw)[2byte] %p5/z -> %z19.d +c4d81af5 : ld1sh z21.d, p6/Z, [x23, z24.d, SXTW] : ld1sh (%x23,%z24.d,sxtw)[2byte] %p6/z -> %z21.d +c4da1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, SXTW] : ld1sh (%x24,%z26.d,sxtw)[2byte] %p6/z -> %z23.d +c4dc1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, SXTW] : ld1sh (%x26,%z28.d,sxtw)[2byte] %p7/z -> %z25.d +c4de1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, SXTW] : ld1sh (%x28,%z30.d,sxtw)[2byte] %p7/z -> %z27.d +c4df1fff : ld1sh z31.d, p7/Z, [sp, z31.d, SXTW] : ld1sh (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d # LD1SH { .D }, /Z, [, .D, #1] (LD1SH-Z.P.BZ-D.x32.scaled) -c4a00000 : ld1sh z0.d, p0/Z, [x0, z0.d, UXTW #1] : ld1sh (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d -c4a50482 : ld1sh z2.d, p1/Z, [x4, z5.d, UXTW #1] : ld1sh (%x4,%z5.d,uxtw #1)[8byte] %p1/z -> %z2.d -c4a708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, UXTW #1] : ld1sh (%x6,%z7.d,uxtw #1)[8byte] %p2/z -> %z4.d -c4a90906 : ld1sh z6.d, p2/Z, [x8, z9.d, UXTW #1] : ld1sh (%x8,%z9.d,uxtw #1)[8byte] %p2/z -> %z6.d -c4ab0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, UXTW #1] : ld1sh (%x10,%z11.d,uxtw #1)[8byte] %p3/z -> %z8.d -c4ad0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, UXTW #1] : ld1sh (%x11,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d -c4af11ac : ld1sh z12.d, p4/Z, [x13, z15.d, UXTW #1] : ld1sh (%x13,%z15.d,uxtw #1)[8byte] %p4/z -> %z12.d -c4b111ee : ld1sh z14.d, p4/Z, [x15, z17.d, UXTW #1] : ld1sh (%x15,%z17.d,uxtw #1)[8byte] %p4/z -> %z14.d -c4b31630 : ld1sh z16.d, p5/Z, [x17, z19.d, UXTW #1] : ld1sh (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d -c4b41671 : ld1sh z17.d, p5/Z, [x19, z20.d, UXTW #1] : ld1sh (%x19,%z20.d,uxtw #1)[8byte] %p5/z -> %z17.d -c4b616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, UXTW #1] : ld1sh (%x21,%z22.d,uxtw #1)[8byte] %p5/z -> %z19.d -c4b81af5 : ld1sh z21.d, p6/Z, [x23, z24.d, UXTW #1] : ld1sh (%x23,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d -c4ba1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, UXTW #1] : ld1sh (%x24,%z26.d,uxtw #1)[8byte] %p6/z -> %z23.d -c4bc1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, UXTW #1] : ld1sh (%x26,%z28.d,uxtw #1)[8byte] %p7/z -> %z25.d -c4be1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, UXTW #1] : ld1sh (%x28,%z30.d,uxtw #1)[8byte] %p7/z -> %z27.d -c4bf1fff : ld1sh z31.d, p7/Z, [sp, z31.d, UXTW #1] : ld1sh (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d -c4e00000 : ld1sh z0.d, p0/Z, [x0, z0.d, SXTW #1] : ld1sh (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d -c4e50482 : ld1sh z2.d, p1/Z, [x4, z5.d, SXTW #1] : ld1sh (%x4,%z5.d,sxtw #1)[8byte] %p1/z -> %z2.d -c4e708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, SXTW #1] : ld1sh (%x6,%z7.d,sxtw #1)[8byte] %p2/z -> %z4.d -c4e90906 : ld1sh z6.d, p2/Z, [x8, z9.d, SXTW #1] : ld1sh (%x8,%z9.d,sxtw #1)[8byte] %p2/z -> %z6.d -c4eb0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, SXTW #1] : ld1sh (%x10,%z11.d,sxtw #1)[8byte] %p3/z -> %z8.d -c4ed0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, SXTW #1] : ld1sh (%x11,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d -c4ef11ac : ld1sh z12.d, p4/Z, [x13, z15.d, SXTW #1] : ld1sh (%x13,%z15.d,sxtw #1)[8byte] %p4/z -> %z12.d -c4f111ee : ld1sh z14.d, p4/Z, [x15, z17.d, SXTW #1] : ld1sh (%x15,%z17.d,sxtw #1)[8byte] %p4/z -> %z14.d -c4f31630 : ld1sh z16.d, p5/Z, [x17, z19.d, SXTW #1] : ld1sh (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d -c4f41671 : ld1sh z17.d, p5/Z, [x19, z20.d, SXTW #1] : ld1sh (%x19,%z20.d,sxtw #1)[8byte] %p5/z -> %z17.d -c4f616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, SXTW #1] : ld1sh (%x21,%z22.d,sxtw #1)[8byte] %p5/z -> %z19.d -c4f81af5 : ld1sh z21.d, p6/Z, [x23, z24.d, SXTW #1] : ld1sh (%x23,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d -c4fa1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, SXTW #1] : ld1sh (%x24,%z26.d,sxtw #1)[8byte] %p6/z -> %z23.d -c4fc1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, SXTW #1] : ld1sh (%x26,%z28.d,sxtw #1)[8byte] %p7/z -> %z25.d -c4fe1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, SXTW #1] : ld1sh (%x28,%z30.d,sxtw #1)[8byte] %p7/z -> %z27.d -c4ff1fff : ld1sh z31.d, p7/Z, [sp, z31.d, SXTW #1] : ld1sh (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d +c4a00000 : ld1sh z0.d, p0/Z, [x0, z0.d, UXTW #1] : ld1sh (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d +c4a50482 : ld1sh z2.d, p1/Z, [x4, z5.d, UXTW #1] : ld1sh (%x4,%z5.d,uxtw #1)[2byte] %p1/z -> %z2.d +c4a708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, UXTW #1] : ld1sh (%x6,%z7.d,uxtw #1)[2byte] %p2/z -> %z4.d +c4a90906 : ld1sh z6.d, p2/Z, [x8, z9.d, UXTW #1] : ld1sh (%x8,%z9.d,uxtw #1)[2byte] %p2/z -> %z6.d +c4ab0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, UXTW #1] : ld1sh (%x10,%z11.d,uxtw #1)[2byte] %p3/z -> %z8.d +c4ad0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, UXTW #1] : ld1sh (%x11,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d +c4af11ac : ld1sh z12.d, p4/Z, [x13, z15.d, UXTW #1] : ld1sh (%x13,%z15.d,uxtw #1)[2byte] %p4/z -> %z12.d +c4b111ee : ld1sh z14.d, p4/Z, [x15, z17.d, UXTW #1] : ld1sh (%x15,%z17.d,uxtw #1)[2byte] %p4/z -> %z14.d +c4b31630 : ld1sh z16.d, p5/Z, [x17, z19.d, UXTW #1] : ld1sh (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d +c4b41671 : ld1sh z17.d, p5/Z, [x19, z20.d, UXTW #1] : ld1sh (%x19,%z20.d,uxtw #1)[2byte] %p5/z -> %z17.d +c4b616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, UXTW #1] : ld1sh (%x21,%z22.d,uxtw #1)[2byte] %p5/z -> %z19.d +c4b81af5 : ld1sh z21.d, p6/Z, [x23, z24.d, UXTW #1] : ld1sh (%x23,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d +c4ba1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, UXTW #1] : ld1sh (%x24,%z26.d,uxtw #1)[2byte] %p6/z -> %z23.d +c4bc1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, UXTW #1] : ld1sh (%x26,%z28.d,uxtw #1)[2byte] %p7/z -> %z25.d +c4be1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, UXTW #1] : ld1sh (%x28,%z30.d,uxtw #1)[2byte] %p7/z -> %z27.d +c4bf1fff : ld1sh z31.d, p7/Z, [sp, z31.d, UXTW #1] : ld1sh (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d +c4e00000 : ld1sh z0.d, p0/Z, [x0, z0.d, SXTW #1] : ld1sh (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d +c4e50482 : ld1sh z2.d, p1/Z, [x4, z5.d, SXTW #1] : ld1sh (%x4,%z5.d,sxtw #1)[2byte] %p1/z -> %z2.d +c4e708c4 : ld1sh z4.d, p2/Z, [x6, z7.d, SXTW #1] : ld1sh (%x6,%z7.d,sxtw #1)[2byte] %p2/z -> %z4.d +c4e90906 : ld1sh z6.d, p2/Z, [x8, z9.d, SXTW #1] : ld1sh (%x8,%z9.d,sxtw #1)[2byte] %p2/z -> %z6.d +c4eb0d48 : ld1sh z8.d, p3/Z, [x10, z11.d, SXTW #1] : ld1sh (%x10,%z11.d,sxtw #1)[2byte] %p3/z -> %z8.d +c4ed0d6a : ld1sh z10.d, p3/Z, [x11, z13.d, SXTW #1] : ld1sh (%x11,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d +c4ef11ac : ld1sh z12.d, p4/Z, [x13, z15.d, SXTW #1] : ld1sh (%x13,%z15.d,sxtw #1)[2byte] %p4/z -> %z12.d +c4f111ee : ld1sh z14.d, p4/Z, [x15, z17.d, SXTW #1] : ld1sh (%x15,%z17.d,sxtw #1)[2byte] %p4/z -> %z14.d +c4f31630 : ld1sh z16.d, p5/Z, [x17, z19.d, SXTW #1] : ld1sh (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d +c4f41671 : ld1sh z17.d, p5/Z, [x19, z20.d, SXTW #1] : ld1sh (%x19,%z20.d,sxtw #1)[2byte] %p5/z -> %z17.d +c4f616b3 : ld1sh z19.d, p5/Z, [x21, z22.d, SXTW #1] : ld1sh (%x21,%z22.d,sxtw #1)[2byte] %p5/z -> %z19.d +c4f81af5 : ld1sh z21.d, p6/Z, [x23, z24.d, SXTW #1] : ld1sh (%x23,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d +c4fa1b17 : ld1sh z23.d, p6/Z, [x24, z26.d, SXTW #1] : ld1sh (%x24,%z26.d,sxtw #1)[2byte] %p6/z -> %z23.d +c4fc1f59 : ld1sh z25.d, p7/Z, [x26, z28.d, SXTW #1] : ld1sh (%x26,%z28.d,sxtw #1)[2byte] %p7/z -> %z25.d +c4fe1f9b : ld1sh z27.d, p7/Z, [x28, z30.d, SXTW #1] : ld1sh (%x28,%z30.d,sxtw #1)[2byte] %p7/z -> %z27.d +c4ff1fff : ld1sh z31.d, p7/Z, [sp, z31.d, SXTW #1] : ld1sh (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d # LD1SH { .D }, /Z, [.D{, #}] (LD1SH-Z.P.AI-D) -c4a08000 : ld1sh z0.d, p0/Z, [z0.d, #0] : ld1sh (%z0.d)[8byte] %p0/z -> %z0.d -c4a28482 : ld1sh z2.d, p1/Z, [z4.d, #4] : ld1sh +0x04(%z4.d)[8byte] %p1/z -> %z2.d -c4a488c4 : ld1sh z4.d, p2/Z, [z6.d, #8] : ld1sh +0x08(%z6.d)[8byte] %p2/z -> %z4.d -c4a68906 : ld1sh z6.d, p2/Z, [z8.d, #12] : ld1sh +0x0c(%z8.d)[8byte] %p2/z -> %z6.d -c4a88d48 : ld1sh z8.d, p3/Z, [z10.d, #16] : ld1sh +0x10(%z10.d)[8byte] %p3/z -> %z8.d -c4aa8d8a : ld1sh z10.d, p3/Z, [z12.d, #20] : ld1sh +0x14(%z12.d)[8byte] %p3/z -> %z10.d -c4ac91cc : ld1sh z12.d, p4/Z, [z14.d, #24] : ld1sh +0x18(%z14.d)[8byte] %p4/z -> %z12.d -c4ae920e : ld1sh z14.d, p4/Z, [z16.d, #28] : ld1sh +0x1c(%z16.d)[8byte] %p4/z -> %z14.d -c4b09650 : ld1sh z16.d, p5/Z, [z18.d, #32] : ld1sh +0x20(%z18.d)[8byte] %p5/z -> %z16.d -c4b19671 : ld1sh z17.d, p5/Z, [z19.d, #34] : ld1sh +0x22(%z19.d)[8byte] %p5/z -> %z17.d -c4b396b3 : ld1sh z19.d, p5/Z, [z21.d, #38] : ld1sh +0x26(%z21.d)[8byte] %p5/z -> %z19.d -c4b59af5 : ld1sh z21.d, p6/Z, [z23.d, #42] : ld1sh +0x2a(%z23.d)[8byte] %p6/z -> %z21.d -c4b79b37 : ld1sh z23.d, p6/Z, [z25.d, #46] : ld1sh +0x2e(%z25.d)[8byte] %p6/z -> %z23.d -c4b99f79 : ld1sh z25.d, p7/Z, [z27.d, #50] : ld1sh +0x32(%z27.d)[8byte] %p7/z -> %z25.d -c4bb9fbb : ld1sh z27.d, p7/Z, [z29.d, #54] : ld1sh +0x36(%z29.d)[8byte] %p7/z -> %z27.d -c4bf9fff : ld1sh z31.d, p7/Z, [z31.d, #62] : ld1sh +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +c4a08000 : ld1sh z0.d, p0/Z, [z0.d, #0] : ld1sh (%z0.d)[2byte] %p0/z -> %z0.d +c4a28482 : ld1sh z2.d, p1/Z, [z4.d, #4] : ld1sh +0x04(%z4.d)[2byte] %p1/z -> %z2.d +c4a488c4 : ld1sh z4.d, p2/Z, [z6.d, #8] : ld1sh +0x08(%z6.d)[2byte] %p2/z -> %z4.d +c4a68906 : ld1sh z6.d, p2/Z, [z8.d, #12] : ld1sh +0x0c(%z8.d)[2byte] %p2/z -> %z6.d +c4a88d48 : ld1sh z8.d, p3/Z, [z10.d, #16] : ld1sh +0x10(%z10.d)[2byte] %p3/z -> %z8.d +c4aa8d8a : ld1sh z10.d, p3/Z, [z12.d, #20] : ld1sh +0x14(%z12.d)[2byte] %p3/z -> %z10.d +c4ac91cc : ld1sh z12.d, p4/Z, [z14.d, #24] : ld1sh +0x18(%z14.d)[2byte] %p4/z -> %z12.d +c4ae920e : ld1sh z14.d, p4/Z, [z16.d, #28] : ld1sh +0x1c(%z16.d)[2byte] %p4/z -> %z14.d +c4b09650 : ld1sh z16.d, p5/Z, [z18.d, #32] : ld1sh +0x20(%z18.d)[2byte] %p5/z -> %z16.d +c4b19671 : ld1sh z17.d, p5/Z, [z19.d, #34] : ld1sh +0x22(%z19.d)[2byte] %p5/z -> %z17.d +c4b396b3 : ld1sh z19.d, p5/Z, [z21.d, #38] : ld1sh +0x26(%z21.d)[2byte] %p5/z -> %z19.d +c4b59af5 : ld1sh z21.d, p6/Z, [z23.d, #42] : ld1sh +0x2a(%z23.d)[2byte] %p6/z -> %z21.d +c4b79b37 : ld1sh z23.d, p6/Z, [z25.d, #46] : ld1sh +0x2e(%z25.d)[2byte] %p6/z -> %z23.d +c4b99f79 : ld1sh z25.d, p7/Z, [z27.d, #50] : ld1sh +0x32(%z27.d)[2byte] %p7/z -> %z25.d +c4bb9fbb : ld1sh z27.d, p7/Z, [z29.d, #54] : ld1sh +0x36(%z29.d)[2byte] %p7/z -> %z27.d +c4bf9fff : ld1sh z31.d, p7/Z, [z31.d, #62] : ld1sh +0x3e(%z31.d)[2byte] %p7/z -> %z31.d # LD1SH { .D }, /Z, [, .D] (LD1SH-Z.P.BZ-D.64.unscaled) -c4c08000 : ld1sh z0.d, p0/Z, [x0, z0.d] : ld1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d -c4c58482 : ld1sh z2.d, p1/Z, [x4, z5.d] : ld1sh (%x4,%z5.d)[8byte] %p1/z -> %z2.d -c4c788c4 : ld1sh z4.d, p2/Z, [x6, z7.d] : ld1sh (%x6,%z7.d)[8byte] %p2/z -> %z4.d -c4c98906 : ld1sh z6.d, p2/Z, [x8, z9.d] : ld1sh (%x8,%z9.d)[8byte] %p2/z -> %z6.d -c4cb8d48 : ld1sh z8.d, p3/Z, [x10, z11.d] : ld1sh (%x10,%z11.d)[8byte] %p3/z -> %z8.d -c4cd8d6a : ld1sh z10.d, p3/Z, [x11, z13.d] : ld1sh (%x11,%z13.d)[8byte] %p3/z -> %z10.d -c4cf91ac : ld1sh z12.d, p4/Z, [x13, z15.d] : ld1sh (%x13,%z15.d)[8byte] %p4/z -> %z12.d -c4d191ee : ld1sh z14.d, p4/Z, [x15, z17.d] : ld1sh (%x15,%z17.d)[8byte] %p4/z -> %z14.d -c4d39630 : ld1sh z16.d, p5/Z, [x17, z19.d] : ld1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d -c4d49671 : ld1sh z17.d, p5/Z, [x19, z20.d] : ld1sh (%x19,%z20.d)[8byte] %p5/z -> %z17.d -c4d696b3 : ld1sh z19.d, p5/Z, [x21, z22.d] : ld1sh (%x21,%z22.d)[8byte] %p5/z -> %z19.d -c4d89af5 : ld1sh z21.d, p6/Z, [x23, z24.d] : ld1sh (%x23,%z24.d)[8byte] %p6/z -> %z21.d -c4da9b17 : ld1sh z23.d, p6/Z, [x24, z26.d] : ld1sh (%x24,%z26.d)[8byte] %p6/z -> %z23.d -c4dc9f59 : ld1sh z25.d, p7/Z, [x26, z28.d] : ld1sh (%x26,%z28.d)[8byte] %p7/z -> %z25.d -c4de9f9b : ld1sh z27.d, p7/Z, [x28, z30.d] : ld1sh (%x28,%z30.d)[8byte] %p7/z -> %z27.d -c4df9fff : ld1sh z31.d, p7/Z, [sp, z31.d] : ld1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d +c4c08000 : ld1sh z0.d, p0/Z, [x0, z0.d] : ld1sh (%x0,%z0.d)[2byte] %p0/z -> %z0.d +c4c58482 : ld1sh z2.d, p1/Z, [x4, z5.d] : ld1sh (%x4,%z5.d)[2byte] %p1/z -> %z2.d +c4c788c4 : ld1sh z4.d, p2/Z, [x6, z7.d] : ld1sh (%x6,%z7.d)[2byte] %p2/z -> %z4.d +c4c98906 : ld1sh z6.d, p2/Z, [x8, z9.d] : ld1sh (%x8,%z9.d)[2byte] %p2/z -> %z6.d +c4cb8d48 : ld1sh z8.d, p3/Z, [x10, z11.d] : ld1sh (%x10,%z11.d)[2byte] %p3/z -> %z8.d +c4cd8d6a : ld1sh z10.d, p3/Z, [x11, z13.d] : ld1sh (%x11,%z13.d)[2byte] %p3/z -> %z10.d +c4cf91ac : ld1sh z12.d, p4/Z, [x13, z15.d] : ld1sh (%x13,%z15.d)[2byte] %p4/z -> %z12.d +c4d191ee : ld1sh z14.d, p4/Z, [x15, z17.d] : ld1sh (%x15,%z17.d)[2byte] %p4/z -> %z14.d +c4d39630 : ld1sh z16.d, p5/Z, [x17, z19.d] : ld1sh (%x17,%z19.d)[2byte] %p5/z -> %z16.d +c4d49671 : ld1sh z17.d, p5/Z, [x19, z20.d] : ld1sh (%x19,%z20.d)[2byte] %p5/z -> %z17.d +c4d696b3 : ld1sh z19.d, p5/Z, [x21, z22.d] : ld1sh (%x21,%z22.d)[2byte] %p5/z -> %z19.d +c4d89af5 : ld1sh z21.d, p6/Z, [x23, z24.d] : ld1sh (%x23,%z24.d)[2byte] %p6/z -> %z21.d +c4da9b17 : ld1sh z23.d, p6/Z, [x24, z26.d] : ld1sh (%x24,%z26.d)[2byte] %p6/z -> %z23.d +c4dc9f59 : ld1sh z25.d, p7/Z, [x26, z28.d] : ld1sh (%x26,%z28.d)[2byte] %p7/z -> %z25.d +c4de9f9b : ld1sh z27.d, p7/Z, [x28, z30.d] : ld1sh (%x28,%z30.d)[2byte] %p7/z -> %z27.d +c4df9fff : ld1sh z31.d, p7/Z, [sp, z31.d] : ld1sh (%sp,%z31.d)[2byte] %p7/z -> %z31.d # LD1SH { .D }, /Z, [, .D, LSL #1] (LD1SH-Z.P.BZ-D.64.scaled) -c4e08000 : ld1sh z0.d, p0/Z, [x0, z0.d, LSL #1] : ld1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d -c4e58482 : ld1sh z2.d, p1/Z, [x4, z5.d, LSL #1] : ld1sh (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d -c4e788c4 : ld1sh z4.d, p2/Z, [x6, z7.d, LSL #1] : ld1sh (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d -c4e98906 : ld1sh z6.d, p2/Z, [x8, z9.d, LSL #1] : ld1sh (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d -c4eb8d48 : ld1sh z8.d, p3/Z, [x10, z11.d, LSL #1] : ld1sh (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d -c4ed8d6a : ld1sh z10.d, p3/Z, [x11, z13.d, LSL #1] : ld1sh (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d -c4ef91ac : ld1sh z12.d, p4/Z, [x13, z15.d, LSL #1] : ld1sh (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d -c4f191ee : ld1sh z14.d, p4/Z, [x15, z17.d, LSL #1] : ld1sh (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d -c4f39630 : ld1sh z16.d, p5/Z, [x17, z19.d, LSL #1] : ld1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d -c4f49671 : ld1sh z17.d, p5/Z, [x19, z20.d, LSL #1] : ld1sh (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d -c4f696b3 : ld1sh z19.d, p5/Z, [x21, z22.d, LSL #1] : ld1sh (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d -c4f89af5 : ld1sh z21.d, p6/Z, [x23, z24.d, LSL #1] : ld1sh (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d -c4fa9b17 : ld1sh z23.d, p6/Z, [x24, z26.d, LSL #1] : ld1sh (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d -c4fc9f59 : ld1sh z25.d, p7/Z, [x26, z28.d, LSL #1] : ld1sh (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d -c4fe9f9b : ld1sh z27.d, p7/Z, [x28, z30.d, LSL #1] : ld1sh (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d -c4ff9fff : ld1sh z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d +c4e08000 : ld1sh z0.d, p0/Z, [x0, z0.d, LSL #1] : ld1sh (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d +c4e58482 : ld1sh z2.d, p1/Z, [x4, z5.d, LSL #1] : ld1sh (%x4,%z5.d,lsl #1)[2byte] %p1/z -> %z2.d +c4e788c4 : ld1sh z4.d, p2/Z, [x6, z7.d, LSL #1] : ld1sh (%x6,%z7.d,lsl #1)[2byte] %p2/z -> %z4.d +c4e98906 : ld1sh z6.d, p2/Z, [x8, z9.d, LSL #1] : ld1sh (%x8,%z9.d,lsl #1)[2byte] %p2/z -> %z6.d +c4eb8d48 : ld1sh z8.d, p3/Z, [x10, z11.d, LSL #1] : ld1sh (%x10,%z11.d,lsl #1)[2byte] %p3/z -> %z8.d +c4ed8d6a : ld1sh z10.d, p3/Z, [x11, z13.d, LSL #1] : ld1sh (%x11,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d +c4ef91ac : ld1sh z12.d, p4/Z, [x13, z15.d, LSL #1] : ld1sh (%x13,%z15.d,lsl #1)[2byte] %p4/z -> %z12.d +c4f191ee : ld1sh z14.d, p4/Z, [x15, z17.d, LSL #1] : ld1sh (%x15,%z17.d,lsl #1)[2byte] %p4/z -> %z14.d +c4f39630 : ld1sh z16.d, p5/Z, [x17, z19.d, LSL #1] : ld1sh (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d +c4f49671 : ld1sh z17.d, p5/Z, [x19, z20.d, LSL #1] : ld1sh (%x19,%z20.d,lsl #1)[2byte] %p5/z -> %z17.d +c4f696b3 : ld1sh z19.d, p5/Z, [x21, z22.d, LSL #1] : ld1sh (%x21,%z22.d,lsl #1)[2byte] %p5/z -> %z19.d +c4f89af5 : ld1sh z21.d, p6/Z, [x23, z24.d, LSL #1] : ld1sh (%x23,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d +c4fa9b17 : ld1sh z23.d, p6/Z, [x24, z26.d, LSL #1] : ld1sh (%x24,%z26.d,lsl #1)[2byte] %p6/z -> %z23.d +c4fc9f59 : ld1sh z25.d, p7/Z, [x26, z28.d, LSL #1] : ld1sh (%x26,%z28.d,lsl #1)[2byte] %p7/z -> %z25.d +c4fe9f9b : ld1sh z27.d, p7/Z, [x28, z30.d, LSL #1] : ld1sh (%x28,%z30.d,lsl #1)[2byte] %p7/z -> %z27.d +c4ff9fff : ld1sh z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1sh (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [, , LSL #2] (LD1SW-Z.P.BR-S64) -a4804000 : ld1sw z0.d, p0/Z, [x0, x0, LSL #2] : ld1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d -a4854482 : ld1sw z2.d, p1/Z, [x4, x5, LSL #2] : ld1sw (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d -a48748c4 : ld1sw z4.d, p2/Z, [x6, x7, LSL #2] : ld1sw (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.d -a4894906 : ld1sw z6.d, p2/Z, [x8, x9, LSL #2] : ld1sw (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.d -a48b4d48 : ld1sw z8.d, p3/Z, [x10, x11, LSL #2] : ld1sw (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.d -a48c4d6a : ld1sw z10.d, p3/Z, [x11, x12, LSL #2] : ld1sw (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.d -a48e51ac : ld1sw z12.d, p4/Z, [x13, x14, LSL #2] : ld1sw (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.d -a49051ee : ld1sw z14.d, p4/Z, [x15, x16, LSL #2] : ld1sw (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.d -a4925630 : ld1sw z16.d, p5/Z, [x17, x18, LSL #2] : ld1sw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d -a4945671 : ld1sw z17.d, p5/Z, [x19, x20, LSL #2] : ld1sw (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.d -a49656b3 : ld1sw z19.d, p5/Z, [x21, x22, LSL #2] : ld1sw (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.d -a4985af5 : ld1sw z21.d, p6/Z, [x23, x24, LSL #2] : ld1sw (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.d -a4995b17 : ld1sw z23.d, p6/Z, [x24, x25, LSL #2] : ld1sw (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.d -a49b5f59 : ld1sw z25.d, p7/Z, [x26, x27, LSL #2] : ld1sw (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.d -a49d5f9b : ld1sw z27.d, p7/Z, [x28, x29, LSL #2] : ld1sw (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.d -a49e5fff : ld1sw z31.d, p7/Z, [sp, x30, LSL #2] : ld1sw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d +a4804000 : ld1sw z0.d, p0/Z, [x0, x0, LSL #2] : ld1sw (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d +a4854482 : ld1sw z2.d, p1/Z, [x4, x5, LSL #2] : ld1sw (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.d +a48748c4 : ld1sw z4.d, p2/Z, [x6, x7, LSL #2] : ld1sw (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.d +a4894906 : ld1sw z6.d, p2/Z, [x8, x9, LSL #2] : ld1sw (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.d +a48b4d48 : ld1sw z8.d, p3/Z, [x10, x11, LSL #2] : ld1sw (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.d +a48c4d6a : ld1sw z10.d, p3/Z, [x11, x12, LSL #2] : ld1sw (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.d +a48e51ac : ld1sw z12.d, p4/Z, [x13, x14, LSL #2] : ld1sw (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.d +a49051ee : ld1sw z14.d, p4/Z, [x15, x16, LSL #2] : ld1sw (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.d +a4925630 : ld1sw z16.d, p5/Z, [x17, x18, LSL #2] : ld1sw (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d +a4945671 : ld1sw z17.d, p5/Z, [x19, x20, LSL #2] : ld1sw (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.d +a49656b3 : ld1sw z19.d, p5/Z, [x21, x22, LSL #2] : ld1sw (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.d +a4985af5 : ld1sw z21.d, p6/Z, [x23, x24, LSL #2] : ld1sw (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.d +a4995b17 : ld1sw z23.d, p6/Z, [x24, x25, LSL #2] : ld1sw (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.d +a49b5f59 : ld1sw z25.d, p7/Z, [x26, x27, LSL #2] : ld1sw (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.d +a49d5f9b : ld1sw z27.d, p7/Z, [x28, x29, LSL #2] : ld1sw (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.d +a49e5fff : ld1sw z31.d, p7/Z, [sp, x30, LSL #2] : ld1sw (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [{, #, MUL VL}] (LD1SW-Z.P.BI-S64) -a488a000 : ld1sw z0.d, p0/Z, [x0, #-8, MUL VL] : ld1sw -0x80(%x0)[16byte] %p0/z -> %z0.d -a489a482 : ld1sw z2.d, p1/Z, [x4, #-7, MUL VL] : ld1sw -0x70(%x4)[16byte] %p1/z -> %z2.d -a48aa8c4 : ld1sw z4.d, p2/Z, [x6, #-6, MUL VL] : ld1sw -0x60(%x6)[16byte] %p2/z -> %z4.d -a48ba906 : ld1sw z6.d, p2/Z, [x8, #-5, MUL VL] : ld1sw -0x50(%x8)[16byte] %p2/z -> %z6.d -a48cad48 : ld1sw z8.d, p3/Z, [x10, #-4, MUL VL] : ld1sw -0x40(%x10)[16byte] %p3/z -> %z8.d -a48dad6a : ld1sw z10.d, p3/Z, [x11, #-3, MUL VL] : ld1sw -0x30(%x11)[16byte] %p3/z -> %z10.d -a48eb1ac : ld1sw z12.d, p4/Z, [x13, #-2, MUL VL] : ld1sw -0x20(%x13)[16byte] %p4/z -> %z12.d -a48fb1ee : ld1sw z14.d, p4/Z, [x15, #-1, MUL VL] : ld1sw -0x10(%x15)[16byte] %p4/z -> %z14.d -a480b630 : ld1sw z16.d, p5/Z, [x17, #0, MUL VL] : ld1sw (%x17)[16byte] %p5/z -> %z16.d -a480b671 : ld1sw z17.d, p5/Z, [x19, #0, MUL VL] : ld1sw (%x19)[16byte] %p5/z -> %z17.d -a481b6b3 : ld1sw z19.d, p5/Z, [x21, #1, MUL VL] : ld1sw +0x10(%x21)[16byte] %p5/z -> %z19.d -a482baf5 : ld1sw z21.d, p6/Z, [x23, #2, MUL VL] : ld1sw +0x20(%x23)[16byte] %p6/z -> %z21.d -a483bb17 : ld1sw z23.d, p6/Z, [x24, #3, MUL VL] : ld1sw +0x30(%x24)[16byte] %p6/z -> %z23.d -a484bf59 : ld1sw z25.d, p7/Z, [x26, #4, MUL VL] : ld1sw +0x40(%x26)[16byte] %p7/z -> %z25.d -a485bf9b : ld1sw z27.d, p7/Z, [x28, #5, MUL VL] : ld1sw +0x50(%x28)[16byte] %p7/z -> %z27.d -a487bfff : ld1sw z31.d, p7/Z, [sp, #7, MUL VL] : ld1sw +0x70(%sp)[16byte] %p7/z -> %z31.d +a488a000 : ld1sw z0.d, p0/Z, [x0, #-8, MUL VL] : ld1sw -0x80(%x0)[4byte] %p0/z -> %z0.d +a489a482 : ld1sw z2.d, p1/Z, [x4, #-7, MUL VL] : ld1sw -0x70(%x4)[4byte] %p1/z -> %z2.d +a48aa8c4 : ld1sw z4.d, p2/Z, [x6, #-6, MUL VL] : ld1sw -0x60(%x6)[4byte] %p2/z -> %z4.d +a48ba906 : ld1sw z6.d, p2/Z, [x8, #-5, MUL VL] : ld1sw -0x50(%x8)[4byte] %p2/z -> %z6.d +a48cad48 : ld1sw z8.d, p3/Z, [x10, #-4, MUL VL] : ld1sw -0x40(%x10)[4byte] %p3/z -> %z8.d +a48dad6a : ld1sw z10.d, p3/Z, [x11, #-3, MUL VL] : ld1sw -0x30(%x11)[4byte] %p3/z -> %z10.d +a48eb1ac : ld1sw z12.d, p4/Z, [x13, #-2, MUL VL] : ld1sw -0x20(%x13)[4byte] %p4/z -> %z12.d +a48fb1ee : ld1sw z14.d, p4/Z, [x15, #-1, MUL VL] : ld1sw -0x10(%x15)[4byte] %p4/z -> %z14.d +a480b630 : ld1sw z16.d, p5/Z, [x17, #0, MUL VL] : ld1sw (%x17)[4byte] %p5/z -> %z16.d +a480b671 : ld1sw z17.d, p5/Z, [x19, #0, MUL VL] : ld1sw (%x19)[4byte] %p5/z -> %z17.d +a481b6b3 : ld1sw z19.d, p5/Z, [x21, #1, MUL VL] : ld1sw +0x10(%x21)[4byte] %p5/z -> %z19.d +a482baf5 : ld1sw z21.d, p6/Z, [x23, #2, MUL VL] : ld1sw +0x20(%x23)[4byte] %p6/z -> %z21.d +a483bb17 : ld1sw z23.d, p6/Z, [x24, #3, MUL VL] : ld1sw +0x30(%x24)[4byte] %p6/z -> %z23.d +a484bf59 : ld1sw z25.d, p7/Z, [x26, #4, MUL VL] : ld1sw +0x40(%x26)[4byte] %p7/z -> %z25.d +a485bf9b : ld1sw z27.d, p7/Z, [x28, #5, MUL VL] : ld1sw +0x50(%x28)[4byte] %p7/z -> %z27.d +a487bfff : ld1sw z31.d, p7/Z, [sp, #7, MUL VL] : ld1sw +0x70(%sp)[4byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [, .D, ] (LD1SW-Z.P.BZ-D.x32.unscaled) -c5000000 : ld1sw z0.d, p0/Z, [x0, z0.d, UXTW] : ld1sw (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d -c5050482 : ld1sw z2.d, p1/Z, [x4, z5.d, UXTW] : ld1sw (%x4,%z5.d,uxtw)[16byte] %p1/z -> %z2.d -c50708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, UXTW] : ld1sw (%x6,%z7.d,uxtw)[16byte] %p2/z -> %z4.d -c5090906 : ld1sw z6.d, p2/Z, [x8, z9.d, UXTW] : ld1sw (%x8,%z9.d,uxtw)[16byte] %p2/z -> %z6.d -c50b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, UXTW] : ld1sw (%x10,%z11.d,uxtw)[16byte] %p3/z -> %z8.d -c50d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, UXTW] : ld1sw (%x11,%z13.d,uxtw)[16byte] %p3/z -> %z10.d -c50f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, UXTW] : ld1sw (%x13,%z15.d,uxtw)[16byte] %p4/z -> %z12.d -c51111ee : ld1sw z14.d, p4/Z, [x15, z17.d, UXTW] : ld1sw (%x15,%z17.d,uxtw)[16byte] %p4/z -> %z14.d -c5131630 : ld1sw z16.d, p5/Z, [x17, z19.d, UXTW] : ld1sw (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d -c5141671 : ld1sw z17.d, p5/Z, [x19, z20.d, UXTW] : ld1sw (%x19,%z20.d,uxtw)[16byte] %p5/z -> %z17.d -c51616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, UXTW] : ld1sw (%x21,%z22.d,uxtw)[16byte] %p5/z -> %z19.d -c5181af5 : ld1sw z21.d, p6/Z, [x23, z24.d, UXTW] : ld1sw (%x23,%z24.d,uxtw)[16byte] %p6/z -> %z21.d -c51a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, UXTW] : ld1sw (%x24,%z26.d,uxtw)[16byte] %p6/z -> %z23.d -c51c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, UXTW] : ld1sw (%x26,%z28.d,uxtw)[16byte] %p7/z -> %z25.d -c51e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, UXTW] : ld1sw (%x28,%z30.d,uxtw)[16byte] %p7/z -> %z27.d -c51f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, UXTW] : ld1sw (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d -c5400000 : ld1sw z0.d, p0/Z, [x0, z0.d, SXTW] : ld1sw (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d -c5450482 : ld1sw z2.d, p1/Z, [x4, z5.d, SXTW] : ld1sw (%x4,%z5.d,sxtw)[16byte] %p1/z -> %z2.d -c54708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, SXTW] : ld1sw (%x6,%z7.d,sxtw)[16byte] %p2/z -> %z4.d -c5490906 : ld1sw z6.d, p2/Z, [x8, z9.d, SXTW] : ld1sw (%x8,%z9.d,sxtw)[16byte] %p2/z -> %z6.d -c54b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, SXTW] : ld1sw (%x10,%z11.d,sxtw)[16byte] %p3/z -> %z8.d -c54d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, SXTW] : ld1sw (%x11,%z13.d,sxtw)[16byte] %p3/z -> %z10.d -c54f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, SXTW] : ld1sw (%x13,%z15.d,sxtw)[16byte] %p4/z -> %z12.d -c55111ee : ld1sw z14.d, p4/Z, [x15, z17.d, SXTW] : ld1sw (%x15,%z17.d,sxtw)[16byte] %p4/z -> %z14.d -c5531630 : ld1sw z16.d, p5/Z, [x17, z19.d, SXTW] : ld1sw (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d -c5541671 : ld1sw z17.d, p5/Z, [x19, z20.d, SXTW] : ld1sw (%x19,%z20.d,sxtw)[16byte] %p5/z -> %z17.d -c55616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, SXTW] : ld1sw (%x21,%z22.d,sxtw)[16byte] %p5/z -> %z19.d -c5581af5 : ld1sw z21.d, p6/Z, [x23, z24.d, SXTW] : ld1sw (%x23,%z24.d,sxtw)[16byte] %p6/z -> %z21.d -c55a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, SXTW] : ld1sw (%x24,%z26.d,sxtw)[16byte] %p6/z -> %z23.d -c55c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, SXTW] : ld1sw (%x26,%z28.d,sxtw)[16byte] %p7/z -> %z25.d -c55e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, SXTW] : ld1sw (%x28,%z30.d,sxtw)[16byte] %p7/z -> %z27.d -c55f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, SXTW] : ld1sw (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d +c5000000 : ld1sw z0.d, p0/Z, [x0, z0.d, UXTW] : ld1sw (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d +c5050482 : ld1sw z2.d, p1/Z, [x4, z5.d, UXTW] : ld1sw (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d +c50708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, UXTW] : ld1sw (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d +c5090906 : ld1sw z6.d, p2/Z, [x8, z9.d, UXTW] : ld1sw (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d +c50b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, UXTW] : ld1sw (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d +c50d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, UXTW] : ld1sw (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d +c50f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, UXTW] : ld1sw (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d +c51111ee : ld1sw z14.d, p4/Z, [x15, z17.d, UXTW] : ld1sw (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d +c5131630 : ld1sw z16.d, p5/Z, [x17, z19.d, UXTW] : ld1sw (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d +c5141671 : ld1sw z17.d, p5/Z, [x19, z20.d, UXTW] : ld1sw (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d +c51616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, UXTW] : ld1sw (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d +c5181af5 : ld1sw z21.d, p6/Z, [x23, z24.d, UXTW] : ld1sw (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d +c51a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, UXTW] : ld1sw (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d +c51c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, UXTW] : ld1sw (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d +c51e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, UXTW] : ld1sw (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d +c51f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, UXTW] : ld1sw (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d +c5400000 : ld1sw z0.d, p0/Z, [x0, z0.d, SXTW] : ld1sw (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d +c5450482 : ld1sw z2.d, p1/Z, [x4, z5.d, SXTW] : ld1sw (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d +c54708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, SXTW] : ld1sw (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d +c5490906 : ld1sw z6.d, p2/Z, [x8, z9.d, SXTW] : ld1sw (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d +c54b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, SXTW] : ld1sw (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d +c54d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, SXTW] : ld1sw (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d +c54f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, SXTW] : ld1sw (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d +c55111ee : ld1sw z14.d, p4/Z, [x15, z17.d, SXTW] : ld1sw (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d +c5531630 : ld1sw z16.d, p5/Z, [x17, z19.d, SXTW] : ld1sw (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d +c5541671 : ld1sw z17.d, p5/Z, [x19, z20.d, SXTW] : ld1sw (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d +c55616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, SXTW] : ld1sw (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d +c5581af5 : ld1sw z21.d, p6/Z, [x23, z24.d, SXTW] : ld1sw (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d +c55a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, SXTW] : ld1sw (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d +c55c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, SXTW] : ld1sw (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d +c55e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, SXTW] : ld1sw (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d +c55f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, SXTW] : ld1sw (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [, .D, #2] (LD1SW-Z.P.BZ-D.x32.scaled) -c5200000 : ld1sw z0.d, p0/Z, [x0, z0.d, UXTW #2] : ld1sw (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d -c5250482 : ld1sw z2.d, p1/Z, [x4, z5.d, UXTW #2] : ld1sw (%x4,%z5.d,uxtw #2)[16byte] %p1/z -> %z2.d -c52708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, UXTW #2] : ld1sw (%x6,%z7.d,uxtw #2)[16byte] %p2/z -> %z4.d -c5290906 : ld1sw z6.d, p2/Z, [x8, z9.d, UXTW #2] : ld1sw (%x8,%z9.d,uxtw #2)[16byte] %p2/z -> %z6.d -c52b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, UXTW #2] : ld1sw (%x10,%z11.d,uxtw #2)[16byte] %p3/z -> %z8.d -c52d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, UXTW #2] : ld1sw (%x11,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d -c52f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, UXTW #2] : ld1sw (%x13,%z15.d,uxtw #2)[16byte] %p4/z -> %z12.d -c53111ee : ld1sw z14.d, p4/Z, [x15, z17.d, UXTW #2] : ld1sw (%x15,%z17.d,uxtw #2)[16byte] %p4/z -> %z14.d -c5331630 : ld1sw z16.d, p5/Z, [x17, z19.d, UXTW #2] : ld1sw (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d -c5341671 : ld1sw z17.d, p5/Z, [x19, z20.d, UXTW #2] : ld1sw (%x19,%z20.d,uxtw #2)[16byte] %p5/z -> %z17.d -c53616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, UXTW #2] : ld1sw (%x21,%z22.d,uxtw #2)[16byte] %p5/z -> %z19.d -c5381af5 : ld1sw z21.d, p6/Z, [x23, z24.d, UXTW #2] : ld1sw (%x23,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d -c53a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, UXTW #2] : ld1sw (%x24,%z26.d,uxtw #2)[16byte] %p6/z -> %z23.d -c53c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, UXTW #2] : ld1sw (%x26,%z28.d,uxtw #2)[16byte] %p7/z -> %z25.d -c53e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, UXTW #2] : ld1sw (%x28,%z30.d,uxtw #2)[16byte] %p7/z -> %z27.d -c53f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, UXTW #2] : ld1sw (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d -c5600000 : ld1sw z0.d, p0/Z, [x0, z0.d, SXTW #2] : ld1sw (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d -c5650482 : ld1sw z2.d, p1/Z, [x4, z5.d, SXTW #2] : ld1sw (%x4,%z5.d,sxtw #2)[16byte] %p1/z -> %z2.d -c56708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, SXTW #2] : ld1sw (%x6,%z7.d,sxtw #2)[16byte] %p2/z -> %z4.d -c5690906 : ld1sw z6.d, p2/Z, [x8, z9.d, SXTW #2] : ld1sw (%x8,%z9.d,sxtw #2)[16byte] %p2/z -> %z6.d -c56b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, SXTW #2] : ld1sw (%x10,%z11.d,sxtw #2)[16byte] %p3/z -> %z8.d -c56d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, SXTW #2] : ld1sw (%x11,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d -c56f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, SXTW #2] : ld1sw (%x13,%z15.d,sxtw #2)[16byte] %p4/z -> %z12.d -c57111ee : ld1sw z14.d, p4/Z, [x15, z17.d, SXTW #2] : ld1sw (%x15,%z17.d,sxtw #2)[16byte] %p4/z -> %z14.d -c5731630 : ld1sw z16.d, p5/Z, [x17, z19.d, SXTW #2] : ld1sw (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d -c5741671 : ld1sw z17.d, p5/Z, [x19, z20.d, SXTW #2] : ld1sw (%x19,%z20.d,sxtw #2)[16byte] %p5/z -> %z17.d -c57616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, SXTW #2] : ld1sw (%x21,%z22.d,sxtw #2)[16byte] %p5/z -> %z19.d -c5781af5 : ld1sw z21.d, p6/Z, [x23, z24.d, SXTW #2] : ld1sw (%x23,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d -c57a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, SXTW #2] : ld1sw (%x24,%z26.d,sxtw #2)[16byte] %p6/z -> %z23.d -c57c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, SXTW #2] : ld1sw (%x26,%z28.d,sxtw #2)[16byte] %p7/z -> %z25.d -c57e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, SXTW #2] : ld1sw (%x28,%z30.d,sxtw #2)[16byte] %p7/z -> %z27.d -c57f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, SXTW #2] : ld1sw (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d +c5200000 : ld1sw z0.d, p0/Z, [x0, z0.d, UXTW #2] : ld1sw (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d +c5250482 : ld1sw z2.d, p1/Z, [x4, z5.d, UXTW #2] : ld1sw (%x4,%z5.d,uxtw #2)[4byte] %p1/z -> %z2.d +c52708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, UXTW #2] : ld1sw (%x6,%z7.d,uxtw #2)[4byte] %p2/z -> %z4.d +c5290906 : ld1sw z6.d, p2/Z, [x8, z9.d, UXTW #2] : ld1sw (%x8,%z9.d,uxtw #2)[4byte] %p2/z -> %z6.d +c52b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, UXTW #2] : ld1sw (%x10,%z11.d,uxtw #2)[4byte] %p3/z -> %z8.d +c52d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, UXTW #2] : ld1sw (%x11,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d +c52f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, UXTW #2] : ld1sw (%x13,%z15.d,uxtw #2)[4byte] %p4/z -> %z12.d +c53111ee : ld1sw z14.d, p4/Z, [x15, z17.d, UXTW #2] : ld1sw (%x15,%z17.d,uxtw #2)[4byte] %p4/z -> %z14.d +c5331630 : ld1sw z16.d, p5/Z, [x17, z19.d, UXTW #2] : ld1sw (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d +c5341671 : ld1sw z17.d, p5/Z, [x19, z20.d, UXTW #2] : ld1sw (%x19,%z20.d,uxtw #2)[4byte] %p5/z -> %z17.d +c53616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, UXTW #2] : ld1sw (%x21,%z22.d,uxtw #2)[4byte] %p5/z -> %z19.d +c5381af5 : ld1sw z21.d, p6/Z, [x23, z24.d, UXTW #2] : ld1sw (%x23,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d +c53a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, UXTW #2] : ld1sw (%x24,%z26.d,uxtw #2)[4byte] %p6/z -> %z23.d +c53c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, UXTW #2] : ld1sw (%x26,%z28.d,uxtw #2)[4byte] %p7/z -> %z25.d +c53e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, UXTW #2] : ld1sw (%x28,%z30.d,uxtw #2)[4byte] %p7/z -> %z27.d +c53f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, UXTW #2] : ld1sw (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d +c5600000 : ld1sw z0.d, p0/Z, [x0, z0.d, SXTW #2] : ld1sw (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d +c5650482 : ld1sw z2.d, p1/Z, [x4, z5.d, SXTW #2] : ld1sw (%x4,%z5.d,sxtw #2)[4byte] %p1/z -> %z2.d +c56708c4 : ld1sw z4.d, p2/Z, [x6, z7.d, SXTW #2] : ld1sw (%x6,%z7.d,sxtw #2)[4byte] %p2/z -> %z4.d +c5690906 : ld1sw z6.d, p2/Z, [x8, z9.d, SXTW #2] : ld1sw (%x8,%z9.d,sxtw #2)[4byte] %p2/z -> %z6.d +c56b0d48 : ld1sw z8.d, p3/Z, [x10, z11.d, SXTW #2] : ld1sw (%x10,%z11.d,sxtw #2)[4byte] %p3/z -> %z8.d +c56d0d6a : ld1sw z10.d, p3/Z, [x11, z13.d, SXTW #2] : ld1sw (%x11,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d +c56f11ac : ld1sw z12.d, p4/Z, [x13, z15.d, SXTW #2] : ld1sw (%x13,%z15.d,sxtw #2)[4byte] %p4/z -> %z12.d +c57111ee : ld1sw z14.d, p4/Z, [x15, z17.d, SXTW #2] : ld1sw (%x15,%z17.d,sxtw #2)[4byte] %p4/z -> %z14.d +c5731630 : ld1sw z16.d, p5/Z, [x17, z19.d, SXTW #2] : ld1sw (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d +c5741671 : ld1sw z17.d, p5/Z, [x19, z20.d, SXTW #2] : ld1sw (%x19,%z20.d,sxtw #2)[4byte] %p5/z -> %z17.d +c57616b3 : ld1sw z19.d, p5/Z, [x21, z22.d, SXTW #2] : ld1sw (%x21,%z22.d,sxtw #2)[4byte] %p5/z -> %z19.d +c5781af5 : ld1sw z21.d, p6/Z, [x23, z24.d, SXTW #2] : ld1sw (%x23,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d +c57a1b17 : ld1sw z23.d, p6/Z, [x24, z26.d, SXTW #2] : ld1sw (%x24,%z26.d,sxtw #2)[4byte] %p6/z -> %z23.d +c57c1f59 : ld1sw z25.d, p7/Z, [x26, z28.d, SXTW #2] : ld1sw (%x26,%z28.d,sxtw #2)[4byte] %p7/z -> %z25.d +c57e1f9b : ld1sw z27.d, p7/Z, [x28, z30.d, SXTW #2] : ld1sw (%x28,%z30.d,sxtw #2)[4byte] %p7/z -> %z27.d +c57f1fff : ld1sw z31.d, p7/Z, [sp, z31.d, SXTW #2] : ld1sw (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [.D{, #}] (LD1SW-Z.P.AI-D) -c5208000 : ld1sw z0.d, p0/Z, [z0.d, #0] : ld1sw (%z0.d)[16byte] %p0/z -> %z0.d -c5228482 : ld1sw z2.d, p1/Z, [z4.d, #8] : ld1sw +0x08(%z4.d)[16byte] %p1/z -> %z2.d -c52488c4 : ld1sw z4.d, p2/Z, [z6.d, #16] : ld1sw +0x10(%z6.d)[16byte] %p2/z -> %z4.d -c5268906 : ld1sw z6.d, p2/Z, [z8.d, #24] : ld1sw +0x18(%z8.d)[16byte] %p2/z -> %z6.d -c5288d48 : ld1sw z8.d, p3/Z, [z10.d, #32] : ld1sw +0x20(%z10.d)[16byte] %p3/z -> %z8.d -c52a8d8a : ld1sw z10.d, p3/Z, [z12.d, #40] : ld1sw +0x28(%z12.d)[16byte] %p3/z -> %z10.d -c52c91cc : ld1sw z12.d, p4/Z, [z14.d, #48] : ld1sw +0x30(%z14.d)[16byte] %p4/z -> %z12.d -c52e920e : ld1sw z14.d, p4/Z, [z16.d, #56] : ld1sw +0x38(%z16.d)[16byte] %p4/z -> %z14.d -c5309650 : ld1sw z16.d, p5/Z, [z18.d, #64] : ld1sw +0x40(%z18.d)[16byte] %p5/z -> %z16.d -c5319671 : ld1sw z17.d, p5/Z, [z19.d, #68] : ld1sw +0x44(%z19.d)[16byte] %p5/z -> %z17.d -c53396b3 : ld1sw z19.d, p5/Z, [z21.d, #76] : ld1sw +0x4c(%z21.d)[16byte] %p5/z -> %z19.d -c5359af5 : ld1sw z21.d, p6/Z, [z23.d, #84] : ld1sw +0x54(%z23.d)[16byte] %p6/z -> %z21.d -c5379b37 : ld1sw z23.d, p6/Z, [z25.d, #92] : ld1sw +0x5c(%z25.d)[16byte] %p6/z -> %z23.d -c5399f79 : ld1sw z25.d, p7/Z, [z27.d, #100] : ld1sw +0x64(%z27.d)[16byte] %p7/z -> %z25.d -c53b9fbb : ld1sw z27.d, p7/Z, [z29.d, #108] : ld1sw +0x6c(%z29.d)[16byte] %p7/z -> %z27.d -c53f9fff : ld1sw z31.d, p7/Z, [z31.d, #124] : ld1sw +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +c5208000 : ld1sw z0.d, p0/Z, [z0.d, #0] : ld1sw (%z0.d)[4byte] %p0/z -> %z0.d +c5228482 : ld1sw z2.d, p1/Z, [z4.d, #8] : ld1sw +0x08(%z4.d)[4byte] %p1/z -> %z2.d +c52488c4 : ld1sw z4.d, p2/Z, [z6.d, #16] : ld1sw +0x10(%z6.d)[4byte] %p2/z -> %z4.d +c5268906 : ld1sw z6.d, p2/Z, [z8.d, #24] : ld1sw +0x18(%z8.d)[4byte] %p2/z -> %z6.d +c5288d48 : ld1sw z8.d, p3/Z, [z10.d, #32] : ld1sw +0x20(%z10.d)[4byte] %p3/z -> %z8.d +c52a8d8a : ld1sw z10.d, p3/Z, [z12.d, #40] : ld1sw +0x28(%z12.d)[4byte] %p3/z -> %z10.d +c52c91cc : ld1sw z12.d, p4/Z, [z14.d, #48] : ld1sw +0x30(%z14.d)[4byte] %p4/z -> %z12.d +c52e920e : ld1sw z14.d, p4/Z, [z16.d, #56] : ld1sw +0x38(%z16.d)[4byte] %p4/z -> %z14.d +c5309650 : ld1sw z16.d, p5/Z, [z18.d, #64] : ld1sw +0x40(%z18.d)[4byte] %p5/z -> %z16.d +c5319671 : ld1sw z17.d, p5/Z, [z19.d, #68] : ld1sw +0x44(%z19.d)[4byte] %p5/z -> %z17.d +c53396b3 : ld1sw z19.d, p5/Z, [z21.d, #76] : ld1sw +0x4c(%z21.d)[4byte] %p5/z -> %z19.d +c5359af5 : ld1sw z21.d, p6/Z, [z23.d, #84] : ld1sw +0x54(%z23.d)[4byte] %p6/z -> %z21.d +c5379b37 : ld1sw z23.d, p6/Z, [z25.d, #92] : ld1sw +0x5c(%z25.d)[4byte] %p6/z -> %z23.d +c5399f79 : ld1sw z25.d, p7/Z, [z27.d, #100] : ld1sw +0x64(%z27.d)[4byte] %p7/z -> %z25.d +c53b9fbb : ld1sw z27.d, p7/Z, [z29.d, #108] : ld1sw +0x6c(%z29.d)[4byte] %p7/z -> %z27.d +c53f9fff : ld1sw z31.d, p7/Z, [z31.d, #124] : ld1sw +0x7c(%z31.d)[4byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [, .D] (LD1SW-Z.P.BZ-D.64.unscaled) -c5408000 : ld1sw z0.d, p0/Z, [x0, z0.d] : ld1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d -c5458482 : ld1sw z2.d, p1/Z, [x4, z5.d] : ld1sw (%x4,%z5.d)[16byte] %p1/z -> %z2.d -c54788c4 : ld1sw z4.d, p2/Z, [x6, z7.d] : ld1sw (%x6,%z7.d)[16byte] %p2/z -> %z4.d -c5498906 : ld1sw z6.d, p2/Z, [x8, z9.d] : ld1sw (%x8,%z9.d)[16byte] %p2/z -> %z6.d -c54b8d48 : ld1sw z8.d, p3/Z, [x10, z11.d] : ld1sw (%x10,%z11.d)[16byte] %p3/z -> %z8.d -c54d8d6a : ld1sw z10.d, p3/Z, [x11, z13.d] : ld1sw (%x11,%z13.d)[16byte] %p3/z -> %z10.d -c54f91ac : ld1sw z12.d, p4/Z, [x13, z15.d] : ld1sw (%x13,%z15.d)[16byte] %p4/z -> %z12.d -c55191ee : ld1sw z14.d, p4/Z, [x15, z17.d] : ld1sw (%x15,%z17.d)[16byte] %p4/z -> %z14.d -c5539630 : ld1sw z16.d, p5/Z, [x17, z19.d] : ld1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d -c5549671 : ld1sw z17.d, p5/Z, [x19, z20.d] : ld1sw (%x19,%z20.d)[16byte] %p5/z -> %z17.d -c55696b3 : ld1sw z19.d, p5/Z, [x21, z22.d] : ld1sw (%x21,%z22.d)[16byte] %p5/z -> %z19.d -c5589af5 : ld1sw z21.d, p6/Z, [x23, z24.d] : ld1sw (%x23,%z24.d)[16byte] %p6/z -> %z21.d -c55a9b17 : ld1sw z23.d, p6/Z, [x24, z26.d] : ld1sw (%x24,%z26.d)[16byte] %p6/z -> %z23.d -c55c9f59 : ld1sw z25.d, p7/Z, [x26, z28.d] : ld1sw (%x26,%z28.d)[16byte] %p7/z -> %z25.d -c55e9f9b : ld1sw z27.d, p7/Z, [x28, z30.d] : ld1sw (%x28,%z30.d)[16byte] %p7/z -> %z27.d -c55f9fff : ld1sw z31.d, p7/Z, [sp, z31.d] : ld1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d +c5408000 : ld1sw z0.d, p0/Z, [x0, z0.d] : ld1sw (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c5458482 : ld1sw z2.d, p1/Z, [x4, z5.d] : ld1sw (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c54788c4 : ld1sw z4.d, p2/Z, [x6, z7.d] : ld1sw (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c5498906 : ld1sw z6.d, p2/Z, [x8, z9.d] : ld1sw (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c54b8d48 : ld1sw z8.d, p3/Z, [x10, z11.d] : ld1sw (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c54d8d6a : ld1sw z10.d, p3/Z, [x11, z13.d] : ld1sw (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c54f91ac : ld1sw z12.d, p4/Z, [x13, z15.d] : ld1sw (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c55191ee : ld1sw z14.d, p4/Z, [x15, z17.d] : ld1sw (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c5539630 : ld1sw z16.d, p5/Z, [x17, z19.d] : ld1sw (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c5549671 : ld1sw z17.d, p5/Z, [x19, z20.d] : ld1sw (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c55696b3 : ld1sw z19.d, p5/Z, [x21, z22.d] : ld1sw (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c5589af5 : ld1sw z21.d, p6/Z, [x23, z24.d] : ld1sw (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c55a9b17 : ld1sw z23.d, p6/Z, [x24, z26.d] : ld1sw (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c55c9f59 : ld1sw z25.d, p7/Z, [x26, z28.d] : ld1sw (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c55e9f9b : ld1sw z27.d, p7/Z, [x28, z30.d] : ld1sw (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c55f9fff : ld1sw z31.d, p7/Z, [sp, z31.d] : ld1sw (%sp,%z31.d)[4byte] %p7/z -> %z31.d # LD1SW { .D }, /Z, [, .D, LSL #2] (LD1SW-Z.P.BZ-D.64.scaled) -c5608000 : ld1sw z0.d, p0/Z, [x0, z0.d, LSL #2] : ld1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d -c5658482 : ld1sw z2.d, p1/Z, [x4, z5.d, LSL #2] : ld1sw (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d -c56788c4 : ld1sw z4.d, p2/Z, [x6, z7.d, LSL #2] : ld1sw (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d -c5698906 : ld1sw z6.d, p2/Z, [x8, z9.d, LSL #2] : ld1sw (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d -c56b8d48 : ld1sw z8.d, p3/Z, [x10, z11.d, LSL #2] : ld1sw (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d -c56d8d6a : ld1sw z10.d, p3/Z, [x11, z13.d, LSL #2] : ld1sw (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d -c56f91ac : ld1sw z12.d, p4/Z, [x13, z15.d, LSL #2] : ld1sw (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d -c57191ee : ld1sw z14.d, p4/Z, [x15, z17.d, LSL #2] : ld1sw (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d -c5739630 : ld1sw z16.d, p5/Z, [x17, z19.d, LSL #2] : ld1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d -c5749671 : ld1sw z17.d, p5/Z, [x19, z20.d, LSL #2] : ld1sw (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d -c57696b3 : ld1sw z19.d, p5/Z, [x21, z22.d, LSL #2] : ld1sw (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d -c5789af5 : ld1sw z21.d, p6/Z, [x23, z24.d, LSL #2] : ld1sw (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d -c57a9b17 : ld1sw z23.d, p6/Z, [x24, z26.d, LSL #2] : ld1sw (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d -c57c9f59 : ld1sw z25.d, p7/Z, [x26, z28.d, LSL #2] : ld1sw (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d -c57e9f9b : ld1sw z27.d, p7/Z, [x28, z30.d, LSL #2] : ld1sw (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d -c57f9fff : ld1sw z31.d, p7/Z, [sp, z31.d, LSL #2] : ld1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d +c5608000 : ld1sw z0.d, p0/Z, [x0, z0.d, LSL #2] : ld1sw (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d +c5658482 : ld1sw z2.d, p1/Z, [x4, z5.d, LSL #2] : ld1sw (%x4,%z5.d,lsl #2)[4byte] %p1/z -> %z2.d +c56788c4 : ld1sw z4.d, p2/Z, [x6, z7.d, LSL #2] : ld1sw (%x6,%z7.d,lsl #2)[4byte] %p2/z -> %z4.d +c5698906 : ld1sw z6.d, p2/Z, [x8, z9.d, LSL #2] : ld1sw (%x8,%z9.d,lsl #2)[4byte] %p2/z -> %z6.d +c56b8d48 : ld1sw z8.d, p3/Z, [x10, z11.d, LSL #2] : ld1sw (%x10,%z11.d,lsl #2)[4byte] %p3/z -> %z8.d +c56d8d6a : ld1sw z10.d, p3/Z, [x11, z13.d, LSL #2] : ld1sw (%x11,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d +c56f91ac : ld1sw z12.d, p4/Z, [x13, z15.d, LSL #2] : ld1sw (%x13,%z15.d,lsl #2)[4byte] %p4/z -> %z12.d +c57191ee : ld1sw z14.d, p4/Z, [x15, z17.d, LSL #2] : ld1sw (%x15,%z17.d,lsl #2)[4byte] %p4/z -> %z14.d +c5739630 : ld1sw z16.d, p5/Z, [x17, z19.d, LSL #2] : ld1sw (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d +c5749671 : ld1sw z17.d, p5/Z, [x19, z20.d, LSL #2] : ld1sw (%x19,%z20.d,lsl #2)[4byte] %p5/z -> %z17.d +c57696b3 : ld1sw z19.d, p5/Z, [x21, z22.d, LSL #2] : ld1sw (%x21,%z22.d,lsl #2)[4byte] %p5/z -> %z19.d +c5789af5 : ld1sw z21.d, p6/Z, [x23, z24.d, LSL #2] : ld1sw (%x23,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d +c57a9b17 : ld1sw z23.d, p6/Z, [x24, z26.d, LSL #2] : ld1sw (%x24,%z26.d,lsl #2)[4byte] %p6/z -> %z23.d +c57c9f59 : ld1sw z25.d, p7/Z, [x26, z28.d, LSL #2] : ld1sw (%x26,%z28.d,lsl #2)[4byte] %p7/z -> %z25.d +c57e9f9b : ld1sw z27.d, p7/Z, [x28, z30.d, LSL #2] : ld1sw (%x28,%z30.d,lsl #2)[4byte] %p7/z -> %z27.d +c57f9fff : ld1sw z31.d, p7/Z, [sp, z31.d, LSL #2] : ld1sw (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d # LD1W { .S }, /Z, [, .S, ] (LD1W-Z.P.BZ-S.x32.unscaled) -85004000 : ld1w z0.s, p0/Z, [x0, z0.s, UXTW] : ld1w (%x0,%z0.s,uxtw)[32byte] %p0/z -> %z0.s -85054482 : ld1w z2.s, p1/Z, [x4, z5.s, UXTW] : ld1w (%x4,%z5.s,uxtw)[32byte] %p1/z -> %z2.s -850748c4 : ld1w z4.s, p2/Z, [x6, z7.s, UXTW] : ld1w (%x6,%z7.s,uxtw)[32byte] %p2/z -> %z4.s -85094906 : ld1w z6.s, p2/Z, [x8, z9.s, UXTW] : ld1w (%x8,%z9.s,uxtw)[32byte] %p2/z -> %z6.s -850b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, UXTW] : ld1w (%x10,%z11.s,uxtw)[32byte] %p3/z -> %z8.s -850d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, UXTW] : ld1w (%x11,%z13.s,uxtw)[32byte] %p3/z -> %z10.s -850f51ac : ld1w z12.s, p4/Z, [x13, z15.s, UXTW] : ld1w (%x13,%z15.s,uxtw)[32byte] %p4/z -> %z12.s -851151ee : ld1w z14.s, p4/Z, [x15, z17.s, UXTW] : ld1w (%x15,%z17.s,uxtw)[32byte] %p4/z -> %z14.s -85135630 : ld1w z16.s, p5/Z, [x17, z19.s, UXTW] : ld1w (%x17,%z19.s,uxtw)[32byte] %p5/z -> %z16.s -85145671 : ld1w z17.s, p5/Z, [x19, z20.s, UXTW] : ld1w (%x19,%z20.s,uxtw)[32byte] %p5/z -> %z17.s -851656b3 : ld1w z19.s, p5/Z, [x21, z22.s, UXTW] : ld1w (%x21,%z22.s,uxtw)[32byte] %p5/z -> %z19.s -85185af5 : ld1w z21.s, p6/Z, [x23, z24.s, UXTW] : ld1w (%x23,%z24.s,uxtw)[32byte] %p6/z -> %z21.s -851a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, UXTW] : ld1w (%x24,%z26.s,uxtw)[32byte] %p6/z -> %z23.s -851c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, UXTW] : ld1w (%x26,%z28.s,uxtw)[32byte] %p7/z -> %z25.s -851e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, UXTW] : ld1w (%x28,%z30.s,uxtw)[32byte] %p7/z -> %z27.s -851f5fff : ld1w z31.s, p7/Z, [sp, z31.s, UXTW] : ld1w (%sp,%z31.s,uxtw)[32byte] %p7/z -> %z31.s -85404000 : ld1w z0.s, p0/Z, [x0, z0.s, SXTW] : ld1w (%x0,%z0.s,sxtw)[32byte] %p0/z -> %z0.s -85454482 : ld1w z2.s, p1/Z, [x4, z5.s, SXTW] : ld1w (%x4,%z5.s,sxtw)[32byte] %p1/z -> %z2.s -854748c4 : ld1w z4.s, p2/Z, [x6, z7.s, SXTW] : ld1w (%x6,%z7.s,sxtw)[32byte] %p2/z -> %z4.s -85494906 : ld1w z6.s, p2/Z, [x8, z9.s, SXTW] : ld1w (%x8,%z9.s,sxtw)[32byte] %p2/z -> %z6.s -854b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, SXTW] : ld1w (%x10,%z11.s,sxtw)[32byte] %p3/z -> %z8.s -854d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, SXTW] : ld1w (%x11,%z13.s,sxtw)[32byte] %p3/z -> %z10.s -854f51ac : ld1w z12.s, p4/Z, [x13, z15.s, SXTW] : ld1w (%x13,%z15.s,sxtw)[32byte] %p4/z -> %z12.s -855151ee : ld1w z14.s, p4/Z, [x15, z17.s, SXTW] : ld1w (%x15,%z17.s,sxtw)[32byte] %p4/z -> %z14.s -85535630 : ld1w z16.s, p5/Z, [x17, z19.s, SXTW] : ld1w (%x17,%z19.s,sxtw)[32byte] %p5/z -> %z16.s -85545671 : ld1w z17.s, p5/Z, [x19, z20.s, SXTW] : ld1w (%x19,%z20.s,sxtw)[32byte] %p5/z -> %z17.s -855656b3 : ld1w z19.s, p5/Z, [x21, z22.s, SXTW] : ld1w (%x21,%z22.s,sxtw)[32byte] %p5/z -> %z19.s -85585af5 : ld1w z21.s, p6/Z, [x23, z24.s, SXTW] : ld1w (%x23,%z24.s,sxtw)[32byte] %p6/z -> %z21.s -855a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, SXTW] : ld1w (%x24,%z26.s,sxtw)[32byte] %p6/z -> %z23.s -855c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, SXTW] : ld1w (%x26,%z28.s,sxtw)[32byte] %p7/z -> %z25.s -855e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, SXTW] : ld1w (%x28,%z30.s,sxtw)[32byte] %p7/z -> %z27.s -855f5fff : ld1w z31.s, p7/Z, [sp, z31.s, SXTW] : ld1w (%sp,%z31.s,sxtw)[32byte] %p7/z -> %z31.s +85004000 : ld1w z0.s, p0/Z, [x0, z0.s, UXTW] : ld1w (%x0,%z0.s,uxtw)[4byte] %p0/z -> %z0.s +85054482 : ld1w z2.s, p1/Z, [x4, z5.s, UXTW] : ld1w (%x4,%z5.s,uxtw)[4byte] %p1/z -> %z2.s +850748c4 : ld1w z4.s, p2/Z, [x6, z7.s, UXTW] : ld1w (%x6,%z7.s,uxtw)[4byte] %p2/z -> %z4.s +85094906 : ld1w z6.s, p2/Z, [x8, z9.s, UXTW] : ld1w (%x8,%z9.s,uxtw)[4byte] %p2/z -> %z6.s +850b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, UXTW] : ld1w (%x10,%z11.s,uxtw)[4byte] %p3/z -> %z8.s +850d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, UXTW] : ld1w (%x11,%z13.s,uxtw)[4byte] %p3/z -> %z10.s +850f51ac : ld1w z12.s, p4/Z, [x13, z15.s, UXTW] : ld1w (%x13,%z15.s,uxtw)[4byte] %p4/z -> %z12.s +851151ee : ld1w z14.s, p4/Z, [x15, z17.s, UXTW] : ld1w (%x15,%z17.s,uxtw)[4byte] %p4/z -> %z14.s +85135630 : ld1w z16.s, p5/Z, [x17, z19.s, UXTW] : ld1w (%x17,%z19.s,uxtw)[4byte] %p5/z -> %z16.s +85145671 : ld1w z17.s, p5/Z, [x19, z20.s, UXTW] : ld1w (%x19,%z20.s,uxtw)[4byte] %p5/z -> %z17.s +851656b3 : ld1w z19.s, p5/Z, [x21, z22.s, UXTW] : ld1w (%x21,%z22.s,uxtw)[4byte] %p5/z -> %z19.s +85185af5 : ld1w z21.s, p6/Z, [x23, z24.s, UXTW] : ld1w (%x23,%z24.s,uxtw)[4byte] %p6/z -> %z21.s +851a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, UXTW] : ld1w (%x24,%z26.s,uxtw)[4byte] %p6/z -> %z23.s +851c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, UXTW] : ld1w (%x26,%z28.s,uxtw)[4byte] %p7/z -> %z25.s +851e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, UXTW] : ld1w (%x28,%z30.s,uxtw)[4byte] %p7/z -> %z27.s +851f5fff : ld1w z31.s, p7/Z, [sp, z31.s, UXTW] : ld1w (%sp,%z31.s,uxtw)[4byte] %p7/z -> %z31.s +85404000 : ld1w z0.s, p0/Z, [x0, z0.s, SXTW] : ld1w (%x0,%z0.s,sxtw)[4byte] %p0/z -> %z0.s +85454482 : ld1w z2.s, p1/Z, [x4, z5.s, SXTW] : ld1w (%x4,%z5.s,sxtw)[4byte] %p1/z -> %z2.s +854748c4 : ld1w z4.s, p2/Z, [x6, z7.s, SXTW] : ld1w (%x6,%z7.s,sxtw)[4byte] %p2/z -> %z4.s +85494906 : ld1w z6.s, p2/Z, [x8, z9.s, SXTW] : ld1w (%x8,%z9.s,sxtw)[4byte] %p2/z -> %z6.s +854b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, SXTW] : ld1w (%x10,%z11.s,sxtw)[4byte] %p3/z -> %z8.s +854d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, SXTW] : ld1w (%x11,%z13.s,sxtw)[4byte] %p3/z -> %z10.s +854f51ac : ld1w z12.s, p4/Z, [x13, z15.s, SXTW] : ld1w (%x13,%z15.s,sxtw)[4byte] %p4/z -> %z12.s +855151ee : ld1w z14.s, p4/Z, [x15, z17.s, SXTW] : ld1w (%x15,%z17.s,sxtw)[4byte] %p4/z -> %z14.s +85535630 : ld1w z16.s, p5/Z, [x17, z19.s, SXTW] : ld1w (%x17,%z19.s,sxtw)[4byte] %p5/z -> %z16.s +85545671 : ld1w z17.s, p5/Z, [x19, z20.s, SXTW] : ld1w (%x19,%z20.s,sxtw)[4byte] %p5/z -> %z17.s +855656b3 : ld1w z19.s, p5/Z, [x21, z22.s, SXTW] : ld1w (%x21,%z22.s,sxtw)[4byte] %p5/z -> %z19.s +85585af5 : ld1w z21.s, p6/Z, [x23, z24.s, SXTW] : ld1w (%x23,%z24.s,sxtw)[4byte] %p6/z -> %z21.s +855a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, SXTW] : ld1w (%x24,%z26.s,sxtw)[4byte] %p6/z -> %z23.s +855c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, SXTW] : ld1w (%x26,%z28.s,sxtw)[4byte] %p7/z -> %z25.s +855e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, SXTW] : ld1w (%x28,%z30.s,sxtw)[4byte] %p7/z -> %z27.s +855f5fff : ld1w z31.s, p7/Z, [sp, z31.s, SXTW] : ld1w (%sp,%z31.s,sxtw)[4byte] %p7/z -> %z31.s # LD1W { .S }, /Z, [, .S, #2] (LD1W-Z.P.BZ-S.x32.scaled) -85204000 : ld1w z0.s, p0/Z, [x0, z0.s, UXTW #2] : ld1w (%x0,%z0.s,uxtw #2)[32byte] %p0/z -> %z0.s -85254482 : ld1w z2.s, p1/Z, [x4, z5.s, UXTW #2] : ld1w (%x4,%z5.s,uxtw #2)[32byte] %p1/z -> %z2.s -852748c4 : ld1w z4.s, p2/Z, [x6, z7.s, UXTW #2] : ld1w (%x6,%z7.s,uxtw #2)[32byte] %p2/z -> %z4.s -85294906 : ld1w z6.s, p2/Z, [x8, z9.s, UXTW #2] : ld1w (%x8,%z9.s,uxtw #2)[32byte] %p2/z -> %z6.s -852b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, UXTW #2] : ld1w (%x10,%z11.s,uxtw #2)[32byte] %p3/z -> %z8.s -852d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, UXTW #2] : ld1w (%x11,%z13.s,uxtw #2)[32byte] %p3/z -> %z10.s -852f51ac : ld1w z12.s, p4/Z, [x13, z15.s, UXTW #2] : ld1w (%x13,%z15.s,uxtw #2)[32byte] %p4/z -> %z12.s -853151ee : ld1w z14.s, p4/Z, [x15, z17.s, UXTW #2] : ld1w (%x15,%z17.s,uxtw #2)[32byte] %p4/z -> %z14.s -85335630 : ld1w z16.s, p5/Z, [x17, z19.s, UXTW #2] : ld1w (%x17,%z19.s,uxtw #2)[32byte] %p5/z -> %z16.s -85345671 : ld1w z17.s, p5/Z, [x19, z20.s, UXTW #2] : ld1w (%x19,%z20.s,uxtw #2)[32byte] %p5/z -> %z17.s -853656b3 : ld1w z19.s, p5/Z, [x21, z22.s, UXTW #2] : ld1w (%x21,%z22.s,uxtw #2)[32byte] %p5/z -> %z19.s -85385af5 : ld1w z21.s, p6/Z, [x23, z24.s, UXTW #2] : ld1w (%x23,%z24.s,uxtw #2)[32byte] %p6/z -> %z21.s -853a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, UXTW #2] : ld1w (%x24,%z26.s,uxtw #2)[32byte] %p6/z -> %z23.s -853c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, UXTW #2] : ld1w (%x26,%z28.s,uxtw #2)[32byte] %p7/z -> %z25.s -853e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, UXTW #2] : ld1w (%x28,%z30.s,uxtw #2)[32byte] %p7/z -> %z27.s -853f5fff : ld1w z31.s, p7/Z, [sp, z31.s, UXTW #2] : ld1w (%sp,%z31.s,uxtw #2)[32byte] %p7/z -> %z31.s -85604000 : ld1w z0.s, p0/Z, [x0, z0.s, SXTW #2] : ld1w (%x0,%z0.s,sxtw #2)[32byte] %p0/z -> %z0.s -85654482 : ld1w z2.s, p1/Z, [x4, z5.s, SXTW #2] : ld1w (%x4,%z5.s,sxtw #2)[32byte] %p1/z -> %z2.s -856748c4 : ld1w z4.s, p2/Z, [x6, z7.s, SXTW #2] : ld1w (%x6,%z7.s,sxtw #2)[32byte] %p2/z -> %z4.s -85694906 : ld1w z6.s, p2/Z, [x8, z9.s, SXTW #2] : ld1w (%x8,%z9.s,sxtw #2)[32byte] %p2/z -> %z6.s -856b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, SXTW #2] : ld1w (%x10,%z11.s,sxtw #2)[32byte] %p3/z -> %z8.s -856d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, SXTW #2] : ld1w (%x11,%z13.s,sxtw #2)[32byte] %p3/z -> %z10.s -856f51ac : ld1w z12.s, p4/Z, [x13, z15.s, SXTW #2] : ld1w (%x13,%z15.s,sxtw #2)[32byte] %p4/z -> %z12.s -857151ee : ld1w z14.s, p4/Z, [x15, z17.s, SXTW #2] : ld1w (%x15,%z17.s,sxtw #2)[32byte] %p4/z -> %z14.s -85735630 : ld1w z16.s, p5/Z, [x17, z19.s, SXTW #2] : ld1w (%x17,%z19.s,sxtw #2)[32byte] %p5/z -> %z16.s -85745671 : ld1w z17.s, p5/Z, [x19, z20.s, SXTW #2] : ld1w (%x19,%z20.s,sxtw #2)[32byte] %p5/z -> %z17.s -857656b3 : ld1w z19.s, p5/Z, [x21, z22.s, SXTW #2] : ld1w (%x21,%z22.s,sxtw #2)[32byte] %p5/z -> %z19.s -85785af5 : ld1w z21.s, p6/Z, [x23, z24.s, SXTW #2] : ld1w (%x23,%z24.s,sxtw #2)[32byte] %p6/z -> %z21.s -857a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, SXTW #2] : ld1w (%x24,%z26.s,sxtw #2)[32byte] %p6/z -> %z23.s -857c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, SXTW #2] : ld1w (%x26,%z28.s,sxtw #2)[32byte] %p7/z -> %z25.s -857e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, SXTW #2] : ld1w (%x28,%z30.s,sxtw #2)[32byte] %p7/z -> %z27.s -857f5fff : ld1w z31.s, p7/Z, [sp, z31.s, SXTW #2] : ld1w (%sp,%z31.s,sxtw #2)[32byte] %p7/z -> %z31.s +85204000 : ld1w z0.s, p0/Z, [x0, z0.s, UXTW #2] : ld1w (%x0,%z0.s,uxtw #2)[4byte] %p0/z -> %z0.s +85254482 : ld1w z2.s, p1/Z, [x4, z5.s, UXTW #2] : ld1w (%x4,%z5.s,uxtw #2)[4byte] %p1/z -> %z2.s +852748c4 : ld1w z4.s, p2/Z, [x6, z7.s, UXTW #2] : ld1w (%x6,%z7.s,uxtw #2)[4byte] %p2/z -> %z4.s +85294906 : ld1w z6.s, p2/Z, [x8, z9.s, UXTW #2] : ld1w (%x8,%z9.s,uxtw #2)[4byte] %p2/z -> %z6.s +852b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, UXTW #2] : ld1w (%x10,%z11.s,uxtw #2)[4byte] %p3/z -> %z8.s +852d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, UXTW #2] : ld1w (%x11,%z13.s,uxtw #2)[4byte] %p3/z -> %z10.s +852f51ac : ld1w z12.s, p4/Z, [x13, z15.s, UXTW #2] : ld1w (%x13,%z15.s,uxtw #2)[4byte] %p4/z -> %z12.s +853151ee : ld1w z14.s, p4/Z, [x15, z17.s, UXTW #2] : ld1w (%x15,%z17.s,uxtw #2)[4byte] %p4/z -> %z14.s +85335630 : ld1w z16.s, p5/Z, [x17, z19.s, UXTW #2] : ld1w (%x17,%z19.s,uxtw #2)[4byte] %p5/z -> %z16.s +85345671 : ld1w z17.s, p5/Z, [x19, z20.s, UXTW #2] : ld1w (%x19,%z20.s,uxtw #2)[4byte] %p5/z -> %z17.s +853656b3 : ld1w z19.s, p5/Z, [x21, z22.s, UXTW #2] : ld1w (%x21,%z22.s,uxtw #2)[4byte] %p5/z -> %z19.s +85385af5 : ld1w z21.s, p6/Z, [x23, z24.s, UXTW #2] : ld1w (%x23,%z24.s,uxtw #2)[4byte] %p6/z -> %z21.s +853a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, UXTW #2] : ld1w (%x24,%z26.s,uxtw #2)[4byte] %p6/z -> %z23.s +853c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, UXTW #2] : ld1w (%x26,%z28.s,uxtw #2)[4byte] %p7/z -> %z25.s +853e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, UXTW #2] : ld1w (%x28,%z30.s,uxtw #2)[4byte] %p7/z -> %z27.s +853f5fff : ld1w z31.s, p7/Z, [sp, z31.s, UXTW #2] : ld1w (%sp,%z31.s,uxtw #2)[4byte] %p7/z -> %z31.s +85604000 : ld1w z0.s, p0/Z, [x0, z0.s, SXTW #2] : ld1w (%x0,%z0.s,sxtw #2)[4byte] %p0/z -> %z0.s +85654482 : ld1w z2.s, p1/Z, [x4, z5.s, SXTW #2] : ld1w (%x4,%z5.s,sxtw #2)[4byte] %p1/z -> %z2.s +856748c4 : ld1w z4.s, p2/Z, [x6, z7.s, SXTW #2] : ld1w (%x6,%z7.s,sxtw #2)[4byte] %p2/z -> %z4.s +85694906 : ld1w z6.s, p2/Z, [x8, z9.s, SXTW #2] : ld1w (%x8,%z9.s,sxtw #2)[4byte] %p2/z -> %z6.s +856b4d48 : ld1w z8.s, p3/Z, [x10, z11.s, SXTW #2] : ld1w (%x10,%z11.s,sxtw #2)[4byte] %p3/z -> %z8.s +856d4d6a : ld1w z10.s, p3/Z, [x11, z13.s, SXTW #2] : ld1w (%x11,%z13.s,sxtw #2)[4byte] %p3/z -> %z10.s +856f51ac : ld1w z12.s, p4/Z, [x13, z15.s, SXTW #2] : ld1w (%x13,%z15.s,sxtw #2)[4byte] %p4/z -> %z12.s +857151ee : ld1w z14.s, p4/Z, [x15, z17.s, SXTW #2] : ld1w (%x15,%z17.s,sxtw #2)[4byte] %p4/z -> %z14.s +85735630 : ld1w z16.s, p5/Z, [x17, z19.s, SXTW #2] : ld1w (%x17,%z19.s,sxtw #2)[4byte] %p5/z -> %z16.s +85745671 : ld1w z17.s, p5/Z, [x19, z20.s, SXTW #2] : ld1w (%x19,%z20.s,sxtw #2)[4byte] %p5/z -> %z17.s +857656b3 : ld1w z19.s, p5/Z, [x21, z22.s, SXTW #2] : ld1w (%x21,%z22.s,sxtw #2)[4byte] %p5/z -> %z19.s +85785af5 : ld1w z21.s, p6/Z, [x23, z24.s, SXTW #2] : ld1w (%x23,%z24.s,sxtw #2)[4byte] %p6/z -> %z21.s +857a5b17 : ld1w z23.s, p6/Z, [x24, z26.s, SXTW #2] : ld1w (%x24,%z26.s,sxtw #2)[4byte] %p6/z -> %z23.s +857c5f59 : ld1w z25.s, p7/Z, [x26, z28.s, SXTW #2] : ld1w (%x26,%z28.s,sxtw #2)[4byte] %p7/z -> %z25.s +857e5f9b : ld1w z27.s, p7/Z, [x28, z30.s, SXTW #2] : ld1w (%x28,%z30.s,sxtw #2)[4byte] %p7/z -> %z27.s +857f5fff : ld1w z31.s, p7/Z, [sp, z31.s, SXTW #2] : ld1w (%sp,%z31.s,sxtw #2)[4byte] %p7/z -> %z31.s # LD1W { .S }, /Z, [.S{, #}] (LD1W-Z.P.AI-S) -8520c000 : ld1w z0.s, p0/Z, [z0.s, #0] : ld1w (%z0.s)[32byte] %p0/z -> %z0.s -8522c482 : ld1w z2.s, p1/Z, [z4.s, #8] : ld1w +0x08(%z4.s)[32byte] %p1/z -> %z2.s -8524c8c4 : ld1w z4.s, p2/Z, [z6.s, #16] : ld1w +0x10(%z6.s)[32byte] %p2/z -> %z4.s -8526c906 : ld1w z6.s, p2/Z, [z8.s, #24] : ld1w +0x18(%z8.s)[32byte] %p2/z -> %z6.s -8528cd48 : ld1w z8.s, p3/Z, [z10.s, #32] : ld1w +0x20(%z10.s)[32byte] %p3/z -> %z8.s -852acd8a : ld1w z10.s, p3/Z, [z12.s, #40] : ld1w +0x28(%z12.s)[32byte] %p3/z -> %z10.s -852cd1cc : ld1w z12.s, p4/Z, [z14.s, #48] : ld1w +0x30(%z14.s)[32byte] %p4/z -> %z12.s -852ed20e : ld1w z14.s, p4/Z, [z16.s, #56] : ld1w +0x38(%z16.s)[32byte] %p4/z -> %z14.s -8530d650 : ld1w z16.s, p5/Z, [z18.s, #64] : ld1w +0x40(%z18.s)[32byte] %p5/z -> %z16.s -8531d671 : ld1w z17.s, p5/Z, [z19.s, #68] : ld1w +0x44(%z19.s)[32byte] %p5/z -> %z17.s -8533d6b3 : ld1w z19.s, p5/Z, [z21.s, #76] : ld1w +0x4c(%z21.s)[32byte] %p5/z -> %z19.s -8535daf5 : ld1w z21.s, p6/Z, [z23.s, #84] : ld1w +0x54(%z23.s)[32byte] %p6/z -> %z21.s -8537db37 : ld1w z23.s, p6/Z, [z25.s, #92] : ld1w +0x5c(%z25.s)[32byte] %p6/z -> %z23.s -8539df79 : ld1w z25.s, p7/Z, [z27.s, #100] : ld1w +0x64(%z27.s)[32byte] %p7/z -> %z25.s -853bdfbb : ld1w z27.s, p7/Z, [z29.s, #108] : ld1w +0x6c(%z29.s)[32byte] %p7/z -> %z27.s -853fdfff : ld1w z31.s, p7/Z, [z31.s, #124] : ld1w +0x7c(%z31.s)[32byte] %p7/z -> %z31.s +8520c000 : ld1w z0.s, p0/Z, [z0.s, #0] : ld1w (%z0.s)[4byte] %p0/z -> %z0.s +8522c482 : ld1w z2.s, p1/Z, [z4.s, #8] : ld1w +0x08(%z4.s)[4byte] %p1/z -> %z2.s +8524c8c4 : ld1w z4.s, p2/Z, [z6.s, #16] : ld1w +0x10(%z6.s)[4byte] %p2/z -> %z4.s +8526c906 : ld1w z6.s, p2/Z, [z8.s, #24] : ld1w +0x18(%z8.s)[4byte] %p2/z -> %z6.s +8528cd48 : ld1w z8.s, p3/Z, [z10.s, #32] : ld1w +0x20(%z10.s)[4byte] %p3/z -> %z8.s +852acd8a : ld1w z10.s, p3/Z, [z12.s, #40] : ld1w +0x28(%z12.s)[4byte] %p3/z -> %z10.s +852cd1cc : ld1w z12.s, p4/Z, [z14.s, #48] : ld1w +0x30(%z14.s)[4byte] %p4/z -> %z12.s +852ed20e : ld1w z14.s, p4/Z, [z16.s, #56] : ld1w +0x38(%z16.s)[4byte] %p4/z -> %z14.s +8530d650 : ld1w z16.s, p5/Z, [z18.s, #64] : ld1w +0x40(%z18.s)[4byte] %p5/z -> %z16.s +8531d671 : ld1w z17.s, p5/Z, [z19.s, #68] : ld1w +0x44(%z19.s)[4byte] %p5/z -> %z17.s +8533d6b3 : ld1w z19.s, p5/Z, [z21.s, #76] : ld1w +0x4c(%z21.s)[4byte] %p5/z -> %z19.s +8535daf5 : ld1w z21.s, p6/Z, [z23.s, #84] : ld1w +0x54(%z23.s)[4byte] %p6/z -> %z21.s +8537db37 : ld1w z23.s, p6/Z, [z25.s, #92] : ld1w +0x5c(%z25.s)[4byte] %p6/z -> %z23.s +8539df79 : ld1w z25.s, p7/Z, [z27.s, #100] : ld1w +0x64(%z27.s)[4byte] %p7/z -> %z25.s +853bdfbb : ld1w z27.s, p7/Z, [z29.s, #108] : ld1w +0x6c(%z29.s)[4byte] %p7/z -> %z27.s +853fdfff : ld1w z31.s, p7/Z, [z31.s, #124] : ld1w +0x7c(%z31.s)[4byte] %p7/z -> %z31.s # LD1W { .S }, /Z, [, , LSL #2] (LD1W-Z.P.BR-U32) -a5404000 : ld1w z0.s, p0/Z, [x0, x0, LSL #2] : ld1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s -a5454482 : ld1w z2.s, p1/Z, [x4, x5, LSL #2] : ld1w (%x4,%x5,lsl #2)[32byte] %p1/z -> %z2.s -a54748c4 : ld1w z4.s, p2/Z, [x6, x7, LSL #2] : ld1w (%x6,%x7,lsl #2)[32byte] %p2/z -> %z4.s -a5494906 : ld1w z6.s, p2/Z, [x8, x9, LSL #2] : ld1w (%x8,%x9,lsl #2)[32byte] %p2/z -> %z6.s -a54b4d48 : ld1w z8.s, p3/Z, [x10, x11, LSL #2] : ld1w (%x10,%x11,lsl #2)[32byte] %p3/z -> %z8.s -a54c4d6a : ld1w z10.s, p3/Z, [x11, x12, LSL #2] : ld1w (%x11,%x12,lsl #2)[32byte] %p3/z -> %z10.s -a54e51ac : ld1w z12.s, p4/Z, [x13, x14, LSL #2] : ld1w (%x13,%x14,lsl #2)[32byte] %p4/z -> %z12.s -a55051ee : ld1w z14.s, p4/Z, [x15, x16, LSL #2] : ld1w (%x15,%x16,lsl #2)[32byte] %p4/z -> %z14.s -a5525630 : ld1w z16.s, p5/Z, [x17, x18, LSL #2] : ld1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s -a5545671 : ld1w z17.s, p5/Z, [x19, x20, LSL #2] : ld1w (%x19,%x20,lsl #2)[32byte] %p5/z -> %z17.s -a55656b3 : ld1w z19.s, p5/Z, [x21, x22, LSL #2] : ld1w (%x21,%x22,lsl #2)[32byte] %p5/z -> %z19.s -a5585af5 : ld1w z21.s, p6/Z, [x23, x24, LSL #2] : ld1w (%x23,%x24,lsl #2)[32byte] %p6/z -> %z21.s -a5595b17 : ld1w z23.s, p6/Z, [x24, x25, LSL #2] : ld1w (%x24,%x25,lsl #2)[32byte] %p6/z -> %z23.s -a55b5f59 : ld1w z25.s, p7/Z, [x26, x27, LSL #2] : ld1w (%x26,%x27,lsl #2)[32byte] %p7/z -> %z25.s -a55d5f9b : ld1w z27.s, p7/Z, [x28, x29, LSL #2] : ld1w (%x28,%x29,lsl #2)[32byte] %p7/z -> %z27.s -a55e5fff : ld1w z31.s, p7/Z, [sp, x30, LSL #2] : ld1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s +a5404000 : ld1w z0.s, p0/Z, [x0, x0, LSL #2] : ld1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s +a5454482 : ld1w z2.s, p1/Z, [x4, x5, LSL #2] : ld1w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s +a54748c4 : ld1w z4.s, p2/Z, [x6, x7, LSL #2] : ld1w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s +a5494906 : ld1w z6.s, p2/Z, [x8, x9, LSL #2] : ld1w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s +a54b4d48 : ld1w z8.s, p3/Z, [x10, x11, LSL #2] : ld1w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s +a54c4d6a : ld1w z10.s, p3/Z, [x11, x12, LSL #2] : ld1w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s +a54e51ac : ld1w z12.s, p4/Z, [x13, x14, LSL #2] : ld1w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s +a55051ee : ld1w z14.s, p4/Z, [x15, x16, LSL #2] : ld1w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s +a5525630 : ld1w z16.s, p5/Z, [x17, x18, LSL #2] : ld1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s +a5545671 : ld1w z17.s, p5/Z, [x19, x20, LSL #2] : ld1w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s +a55656b3 : ld1w z19.s, p5/Z, [x21, x22, LSL #2] : ld1w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s +a5585af5 : ld1w z21.s, p6/Z, [x23, x24, LSL #2] : ld1w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s +a5595b17 : ld1w z23.s, p6/Z, [x24, x25, LSL #2] : ld1w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s +a55b5f59 : ld1w z25.s, p7/Z, [x26, x27, LSL #2] : ld1w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s +a55d5f9b : ld1w z27.s, p7/Z, [x28, x29, LSL #2] : ld1w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s +a55e5fff : ld1w z31.s, p7/Z, [sp, x30, LSL #2] : ld1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s # LD1W { .S }, /Z, [{, #, MUL VL}] (LD1W-Z.P.BI-U32) -a548a000 : ld1w z0.s, p0/Z, [x0, #-8, MUL VL] : ld1w -0x0100(%x0)[32byte] %p0/z -> %z0.s -a549a482 : ld1w z2.s, p1/Z, [x4, #-7, MUL VL] : ld1w -0xe0(%x4)[32byte] %p1/z -> %z2.s -a54aa8c4 : ld1w z4.s, p2/Z, [x6, #-6, MUL VL] : ld1w -0xc0(%x6)[32byte] %p2/z -> %z4.s -a54ba906 : ld1w z6.s, p2/Z, [x8, #-5, MUL VL] : ld1w -0xa0(%x8)[32byte] %p2/z -> %z6.s -a54cad48 : ld1w z8.s, p3/Z, [x10, #-4, MUL VL] : ld1w -0x80(%x10)[32byte] %p3/z -> %z8.s -a54dad6a : ld1w z10.s, p3/Z, [x11, #-3, MUL VL] : ld1w -0x60(%x11)[32byte] %p3/z -> %z10.s -a54eb1ac : ld1w z12.s, p4/Z, [x13, #-2, MUL VL] : ld1w -0x40(%x13)[32byte] %p4/z -> %z12.s -a54fb1ee : ld1w z14.s, p4/Z, [x15, #-1, MUL VL] : ld1w -0x20(%x15)[32byte] %p4/z -> %z14.s -a540b630 : ld1w z16.s, p5/Z, [x17, #0, MUL VL] : ld1w (%x17)[32byte] %p5/z -> %z16.s -a540b671 : ld1w z17.s, p5/Z, [x19, #0, MUL VL] : ld1w (%x19)[32byte] %p5/z -> %z17.s -a541b6b3 : ld1w z19.s, p5/Z, [x21, #1, MUL VL] : ld1w +0x20(%x21)[32byte] %p5/z -> %z19.s -a542baf5 : ld1w z21.s, p6/Z, [x23, #2, MUL VL] : ld1w +0x40(%x23)[32byte] %p6/z -> %z21.s -a543bb17 : ld1w z23.s, p6/Z, [x24, #3, MUL VL] : ld1w +0x60(%x24)[32byte] %p6/z -> %z23.s -a544bf59 : ld1w z25.s, p7/Z, [x26, #4, MUL VL] : ld1w +0x80(%x26)[32byte] %p7/z -> %z25.s -a545bf9b : ld1w z27.s, p7/Z, [x28, #5, MUL VL] : ld1w +0xa0(%x28)[32byte] %p7/z -> %z27.s -a547bfff : ld1w z31.s, p7/Z, [sp, #7, MUL VL] : ld1w +0xe0(%sp)[32byte] %p7/z -> %z31.s +a548a000 : ld1w z0.s, p0/Z, [x0, #-8, MUL VL] : ld1w -0x0100(%x0)[4byte] %p0/z -> %z0.s +a549a482 : ld1w z2.s, p1/Z, [x4, #-7, MUL VL] : ld1w -0xe0(%x4)[4byte] %p1/z -> %z2.s +a54aa8c4 : ld1w z4.s, p2/Z, [x6, #-6, MUL VL] : ld1w -0xc0(%x6)[4byte] %p2/z -> %z4.s +a54ba906 : ld1w z6.s, p2/Z, [x8, #-5, MUL VL] : ld1w -0xa0(%x8)[4byte] %p2/z -> %z6.s +a54cad48 : ld1w z8.s, p3/Z, [x10, #-4, MUL VL] : ld1w -0x80(%x10)[4byte] %p3/z -> %z8.s +a54dad6a : ld1w z10.s, p3/Z, [x11, #-3, MUL VL] : ld1w -0x60(%x11)[4byte] %p3/z -> %z10.s +a54eb1ac : ld1w z12.s, p4/Z, [x13, #-2, MUL VL] : ld1w -0x40(%x13)[4byte] %p4/z -> %z12.s +a54fb1ee : ld1w z14.s, p4/Z, [x15, #-1, MUL VL] : ld1w -0x20(%x15)[4byte] %p4/z -> %z14.s +a540b630 : ld1w z16.s, p5/Z, [x17, #0, MUL VL] : ld1w (%x17)[4byte] %p5/z -> %z16.s +a540b671 : ld1w z17.s, p5/Z, [x19, #0, MUL VL] : ld1w (%x19)[4byte] %p5/z -> %z17.s +a541b6b3 : ld1w z19.s, p5/Z, [x21, #1, MUL VL] : ld1w +0x20(%x21)[4byte] %p5/z -> %z19.s +a542baf5 : ld1w z21.s, p6/Z, [x23, #2, MUL VL] : ld1w +0x40(%x23)[4byte] %p6/z -> %z21.s +a543bb17 : ld1w z23.s, p6/Z, [x24, #3, MUL VL] : ld1w +0x60(%x24)[4byte] %p6/z -> %z23.s +a544bf59 : ld1w z25.s, p7/Z, [x26, #4, MUL VL] : ld1w +0x80(%x26)[4byte] %p7/z -> %z25.s +a545bf9b : ld1w z27.s, p7/Z, [x28, #5, MUL VL] : ld1w +0xa0(%x28)[4byte] %p7/z -> %z27.s +a547bfff : ld1w z31.s, p7/Z, [sp, #7, MUL VL] : ld1w +0xe0(%sp)[4byte] %p7/z -> %z31.s # LD1W { .D }, /Z, [, , LSL #2] (LD1W-Z.P.BR-U64) -a5604000 : ld1w z0.d, p0/Z, [x0, x0, LSL #2] : ld1w (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d -a5654482 : ld1w z2.d, p1/Z, [x4, x5, LSL #2] : ld1w (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d -a56748c4 : ld1w z4.d, p2/Z, [x6, x7, LSL #2] : ld1w (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.d -a5694906 : ld1w z6.d, p2/Z, [x8, x9, LSL #2] : ld1w (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.d -a56b4d48 : ld1w z8.d, p3/Z, [x10, x11, LSL #2] : ld1w (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.d -a56c4d6a : ld1w z10.d, p3/Z, [x11, x12, LSL #2] : ld1w (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.d -a56e51ac : ld1w z12.d, p4/Z, [x13, x14, LSL #2] : ld1w (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.d -a57051ee : ld1w z14.d, p4/Z, [x15, x16, LSL #2] : ld1w (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.d -a5725630 : ld1w z16.d, p5/Z, [x17, x18, LSL #2] : ld1w (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d -a5745671 : ld1w z17.d, p5/Z, [x19, x20, LSL #2] : ld1w (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.d -a57656b3 : ld1w z19.d, p5/Z, [x21, x22, LSL #2] : ld1w (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.d -a5785af5 : ld1w z21.d, p6/Z, [x23, x24, LSL #2] : ld1w (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.d -a5795b17 : ld1w z23.d, p6/Z, [x24, x25, LSL #2] : ld1w (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.d -a57b5f59 : ld1w z25.d, p7/Z, [x26, x27, LSL #2] : ld1w (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.d -a57d5f9b : ld1w z27.d, p7/Z, [x28, x29, LSL #2] : ld1w (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.d -a57e5fff : ld1w z31.d, p7/Z, [sp, x30, LSL #2] : ld1w (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d +a5604000 : ld1w z0.d, p0/Z, [x0, x0, LSL #2] : ld1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d +a5654482 : ld1w z2.d, p1/Z, [x4, x5, LSL #2] : ld1w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.d +a56748c4 : ld1w z4.d, p2/Z, [x6, x7, LSL #2] : ld1w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.d +a5694906 : ld1w z6.d, p2/Z, [x8, x9, LSL #2] : ld1w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.d +a56b4d48 : ld1w z8.d, p3/Z, [x10, x11, LSL #2] : ld1w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.d +a56c4d6a : ld1w z10.d, p3/Z, [x11, x12, LSL #2] : ld1w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.d +a56e51ac : ld1w z12.d, p4/Z, [x13, x14, LSL #2] : ld1w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.d +a57051ee : ld1w z14.d, p4/Z, [x15, x16, LSL #2] : ld1w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.d +a5725630 : ld1w z16.d, p5/Z, [x17, x18, LSL #2] : ld1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d +a5745671 : ld1w z17.d, p5/Z, [x19, x20, LSL #2] : ld1w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.d +a57656b3 : ld1w z19.d, p5/Z, [x21, x22, LSL #2] : ld1w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.d +a5785af5 : ld1w z21.d, p6/Z, [x23, x24, LSL #2] : ld1w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.d +a5795b17 : ld1w z23.d, p6/Z, [x24, x25, LSL #2] : ld1w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.d +a57b5f59 : ld1w z25.d, p7/Z, [x26, x27, LSL #2] : ld1w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.d +a57d5f9b : ld1w z27.d, p7/Z, [x28, x29, LSL #2] : ld1w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.d +a57e5fff : ld1w z31.d, p7/Z, [sp, x30, LSL #2] : ld1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d # LD1W { .D }, /Z, [{, #, MUL VL}] (LD1W-Z.P.BI-U64) -a568a000 : ld1w z0.d, p0/Z, [x0, #-8, MUL VL] : ld1w -0x80(%x0)[16byte] %p0/z -> %z0.d -a569a482 : ld1w z2.d, p1/Z, [x4, #-7, MUL VL] : ld1w -0x70(%x4)[16byte] %p1/z -> %z2.d -a56aa8c4 : ld1w z4.d, p2/Z, [x6, #-6, MUL VL] : ld1w -0x60(%x6)[16byte] %p2/z -> %z4.d -a56ba906 : ld1w z6.d, p2/Z, [x8, #-5, MUL VL] : ld1w -0x50(%x8)[16byte] %p2/z -> %z6.d -a56cad48 : ld1w z8.d, p3/Z, [x10, #-4, MUL VL] : ld1w -0x40(%x10)[16byte] %p3/z -> %z8.d -a56dad6a : ld1w z10.d, p3/Z, [x11, #-3, MUL VL] : ld1w -0x30(%x11)[16byte] %p3/z -> %z10.d -a56eb1ac : ld1w z12.d, p4/Z, [x13, #-2, MUL VL] : ld1w -0x20(%x13)[16byte] %p4/z -> %z12.d -a56fb1ee : ld1w z14.d, p4/Z, [x15, #-1, MUL VL] : ld1w -0x10(%x15)[16byte] %p4/z -> %z14.d -a560b630 : ld1w z16.d, p5/Z, [x17, #0, MUL VL] : ld1w (%x17)[16byte] %p5/z -> %z16.d -a560b671 : ld1w z17.d, p5/Z, [x19, #0, MUL VL] : ld1w (%x19)[16byte] %p5/z -> %z17.d -a561b6b3 : ld1w z19.d, p5/Z, [x21, #1, MUL VL] : ld1w +0x10(%x21)[16byte] %p5/z -> %z19.d -a562baf5 : ld1w z21.d, p6/Z, [x23, #2, MUL VL] : ld1w +0x20(%x23)[16byte] %p6/z -> %z21.d -a563bb17 : ld1w z23.d, p6/Z, [x24, #3, MUL VL] : ld1w +0x30(%x24)[16byte] %p6/z -> %z23.d -a564bf59 : ld1w z25.d, p7/Z, [x26, #4, MUL VL] : ld1w +0x40(%x26)[16byte] %p7/z -> %z25.d -a565bf9b : ld1w z27.d, p7/Z, [x28, #5, MUL VL] : ld1w +0x50(%x28)[16byte] %p7/z -> %z27.d -a567bfff : ld1w z31.d, p7/Z, [sp, #7, MUL VL] : ld1w +0x70(%sp)[16byte] %p7/z -> %z31.d +a568a000 : ld1w z0.d, p0/Z, [x0, #-8, MUL VL] : ld1w -0x80(%x0)[4byte] %p0/z -> %z0.d +a569a482 : ld1w z2.d, p1/Z, [x4, #-7, MUL VL] : ld1w -0x70(%x4)[4byte] %p1/z -> %z2.d +a56aa8c4 : ld1w z4.d, p2/Z, [x6, #-6, MUL VL] : ld1w -0x60(%x6)[4byte] %p2/z -> %z4.d +a56ba906 : ld1w z6.d, p2/Z, [x8, #-5, MUL VL] : ld1w -0x50(%x8)[4byte] %p2/z -> %z6.d +a56cad48 : ld1w z8.d, p3/Z, [x10, #-4, MUL VL] : ld1w -0x40(%x10)[4byte] %p3/z -> %z8.d +a56dad6a : ld1w z10.d, p3/Z, [x11, #-3, MUL VL] : ld1w -0x30(%x11)[4byte] %p3/z -> %z10.d +a56eb1ac : ld1w z12.d, p4/Z, [x13, #-2, MUL VL] : ld1w -0x20(%x13)[4byte] %p4/z -> %z12.d +a56fb1ee : ld1w z14.d, p4/Z, [x15, #-1, MUL VL] : ld1w -0x10(%x15)[4byte] %p4/z -> %z14.d +a560b630 : ld1w z16.d, p5/Z, [x17, #0, MUL VL] : ld1w (%x17)[4byte] %p5/z -> %z16.d +a560b671 : ld1w z17.d, p5/Z, [x19, #0, MUL VL] : ld1w (%x19)[4byte] %p5/z -> %z17.d +a561b6b3 : ld1w z19.d, p5/Z, [x21, #1, MUL VL] : ld1w +0x10(%x21)[4byte] %p5/z -> %z19.d +a562baf5 : ld1w z21.d, p6/Z, [x23, #2, MUL VL] : ld1w +0x20(%x23)[4byte] %p6/z -> %z21.d +a563bb17 : ld1w z23.d, p6/Z, [x24, #3, MUL VL] : ld1w +0x30(%x24)[4byte] %p6/z -> %z23.d +a564bf59 : ld1w z25.d, p7/Z, [x26, #4, MUL VL] : ld1w +0x40(%x26)[4byte] %p7/z -> %z25.d +a565bf9b : ld1w z27.d, p7/Z, [x28, #5, MUL VL] : ld1w +0x50(%x28)[4byte] %p7/z -> %z27.d +a567bfff : ld1w z31.d, p7/Z, [sp, #7, MUL VL] : ld1w +0x70(%sp)[4byte] %p7/z -> %z31.d # LD1W { .D }, /Z, [, .D, ] (LD1W-Z.P.BZ-D.x32.unscaled) -c5004000 : ld1w z0.d, p0/Z, [x0, z0.d, UXTW] : ld1w (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d -c5054482 : ld1w z2.d, p1/Z, [x4, z5.d, UXTW] : ld1w (%x4,%z5.d,uxtw)[16byte] %p1/z -> %z2.d -c50748c4 : ld1w z4.d, p2/Z, [x6, z7.d, UXTW] : ld1w (%x6,%z7.d,uxtw)[16byte] %p2/z -> %z4.d -c5094906 : ld1w z6.d, p2/Z, [x8, z9.d, UXTW] : ld1w (%x8,%z9.d,uxtw)[16byte] %p2/z -> %z6.d -c50b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, UXTW] : ld1w (%x10,%z11.d,uxtw)[16byte] %p3/z -> %z8.d -c50d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, UXTW] : ld1w (%x11,%z13.d,uxtw)[16byte] %p3/z -> %z10.d -c50f51ac : ld1w z12.d, p4/Z, [x13, z15.d, UXTW] : ld1w (%x13,%z15.d,uxtw)[16byte] %p4/z -> %z12.d -c51151ee : ld1w z14.d, p4/Z, [x15, z17.d, UXTW] : ld1w (%x15,%z17.d,uxtw)[16byte] %p4/z -> %z14.d -c5135630 : ld1w z16.d, p5/Z, [x17, z19.d, UXTW] : ld1w (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d -c5145671 : ld1w z17.d, p5/Z, [x19, z20.d, UXTW] : ld1w (%x19,%z20.d,uxtw)[16byte] %p5/z -> %z17.d -c51656b3 : ld1w z19.d, p5/Z, [x21, z22.d, UXTW] : ld1w (%x21,%z22.d,uxtw)[16byte] %p5/z -> %z19.d -c5185af5 : ld1w z21.d, p6/Z, [x23, z24.d, UXTW] : ld1w (%x23,%z24.d,uxtw)[16byte] %p6/z -> %z21.d -c51a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, UXTW] : ld1w (%x24,%z26.d,uxtw)[16byte] %p6/z -> %z23.d -c51c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, UXTW] : ld1w (%x26,%z28.d,uxtw)[16byte] %p7/z -> %z25.d -c51e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, UXTW] : ld1w (%x28,%z30.d,uxtw)[16byte] %p7/z -> %z27.d -c51f5fff : ld1w z31.d, p7/Z, [sp, z31.d, UXTW] : ld1w (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d -c5404000 : ld1w z0.d, p0/Z, [x0, z0.d, SXTW] : ld1w (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d -c5454482 : ld1w z2.d, p1/Z, [x4, z5.d, SXTW] : ld1w (%x4,%z5.d,sxtw)[16byte] %p1/z -> %z2.d -c54748c4 : ld1w z4.d, p2/Z, [x6, z7.d, SXTW] : ld1w (%x6,%z7.d,sxtw)[16byte] %p2/z -> %z4.d -c5494906 : ld1w z6.d, p2/Z, [x8, z9.d, SXTW] : ld1w (%x8,%z9.d,sxtw)[16byte] %p2/z -> %z6.d -c54b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, SXTW] : ld1w (%x10,%z11.d,sxtw)[16byte] %p3/z -> %z8.d -c54d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, SXTW] : ld1w (%x11,%z13.d,sxtw)[16byte] %p3/z -> %z10.d -c54f51ac : ld1w z12.d, p4/Z, [x13, z15.d, SXTW] : ld1w (%x13,%z15.d,sxtw)[16byte] %p4/z -> %z12.d -c55151ee : ld1w z14.d, p4/Z, [x15, z17.d, SXTW] : ld1w (%x15,%z17.d,sxtw)[16byte] %p4/z -> %z14.d -c5535630 : ld1w z16.d, p5/Z, [x17, z19.d, SXTW] : ld1w (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d -c5545671 : ld1w z17.d, p5/Z, [x19, z20.d, SXTW] : ld1w (%x19,%z20.d,sxtw)[16byte] %p5/z -> %z17.d -c55656b3 : ld1w z19.d, p5/Z, [x21, z22.d, SXTW] : ld1w (%x21,%z22.d,sxtw)[16byte] %p5/z -> %z19.d -c5585af5 : ld1w z21.d, p6/Z, [x23, z24.d, SXTW] : ld1w (%x23,%z24.d,sxtw)[16byte] %p6/z -> %z21.d -c55a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, SXTW] : ld1w (%x24,%z26.d,sxtw)[16byte] %p6/z -> %z23.d -c55c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, SXTW] : ld1w (%x26,%z28.d,sxtw)[16byte] %p7/z -> %z25.d -c55e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, SXTW] : ld1w (%x28,%z30.d,sxtw)[16byte] %p7/z -> %z27.d -c55f5fff : ld1w z31.d, p7/Z, [sp, z31.d, SXTW] : ld1w (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d +c5004000 : ld1w z0.d, p0/Z, [x0, z0.d, UXTW] : ld1w (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d +c5054482 : ld1w z2.d, p1/Z, [x4, z5.d, UXTW] : ld1w (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d +c50748c4 : ld1w z4.d, p2/Z, [x6, z7.d, UXTW] : ld1w (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d +c5094906 : ld1w z6.d, p2/Z, [x8, z9.d, UXTW] : ld1w (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d +c50b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, UXTW] : ld1w (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d +c50d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, UXTW] : ld1w (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d +c50f51ac : ld1w z12.d, p4/Z, [x13, z15.d, UXTW] : ld1w (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d +c51151ee : ld1w z14.d, p4/Z, [x15, z17.d, UXTW] : ld1w (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d +c5135630 : ld1w z16.d, p5/Z, [x17, z19.d, UXTW] : ld1w (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d +c5145671 : ld1w z17.d, p5/Z, [x19, z20.d, UXTW] : ld1w (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d +c51656b3 : ld1w z19.d, p5/Z, [x21, z22.d, UXTW] : ld1w (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d +c5185af5 : ld1w z21.d, p6/Z, [x23, z24.d, UXTW] : ld1w (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d +c51a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, UXTW] : ld1w (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d +c51c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, UXTW] : ld1w (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d +c51e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, UXTW] : ld1w (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d +c51f5fff : ld1w z31.d, p7/Z, [sp, z31.d, UXTW] : ld1w (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d +c5404000 : ld1w z0.d, p0/Z, [x0, z0.d, SXTW] : ld1w (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d +c5454482 : ld1w z2.d, p1/Z, [x4, z5.d, SXTW] : ld1w (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d +c54748c4 : ld1w z4.d, p2/Z, [x6, z7.d, SXTW] : ld1w (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d +c5494906 : ld1w z6.d, p2/Z, [x8, z9.d, SXTW] : ld1w (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d +c54b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, SXTW] : ld1w (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d +c54d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, SXTW] : ld1w (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d +c54f51ac : ld1w z12.d, p4/Z, [x13, z15.d, SXTW] : ld1w (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d +c55151ee : ld1w z14.d, p4/Z, [x15, z17.d, SXTW] : ld1w (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d +c5535630 : ld1w z16.d, p5/Z, [x17, z19.d, SXTW] : ld1w (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d +c5545671 : ld1w z17.d, p5/Z, [x19, z20.d, SXTW] : ld1w (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d +c55656b3 : ld1w z19.d, p5/Z, [x21, z22.d, SXTW] : ld1w (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d +c5585af5 : ld1w z21.d, p6/Z, [x23, z24.d, SXTW] : ld1w (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d +c55a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, SXTW] : ld1w (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d +c55c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, SXTW] : ld1w (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d +c55e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, SXTW] : ld1w (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d +c55f5fff : ld1w z31.d, p7/Z, [sp, z31.d, SXTW] : ld1w (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d # LD1W { .D }, /Z, [, .D, #2] (LD1W-Z.P.BZ-D.x32.scaled) -c5204000 : ld1w z0.d, p0/Z, [x0, z0.d, UXTW #2] : ld1w (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d -c5254482 : ld1w z2.d, p1/Z, [x4, z5.d, UXTW #2] : ld1w (%x4,%z5.d,uxtw #2)[16byte] %p1/z -> %z2.d -c52748c4 : ld1w z4.d, p2/Z, [x6, z7.d, UXTW #2] : ld1w (%x6,%z7.d,uxtw #2)[16byte] %p2/z -> %z4.d -c5294906 : ld1w z6.d, p2/Z, [x8, z9.d, UXTW #2] : ld1w (%x8,%z9.d,uxtw #2)[16byte] %p2/z -> %z6.d -c52b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, UXTW #2] : ld1w (%x10,%z11.d,uxtw #2)[16byte] %p3/z -> %z8.d -c52d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, UXTW #2] : ld1w (%x11,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d -c52f51ac : ld1w z12.d, p4/Z, [x13, z15.d, UXTW #2] : ld1w (%x13,%z15.d,uxtw #2)[16byte] %p4/z -> %z12.d -c53151ee : ld1w z14.d, p4/Z, [x15, z17.d, UXTW #2] : ld1w (%x15,%z17.d,uxtw #2)[16byte] %p4/z -> %z14.d -c5335630 : ld1w z16.d, p5/Z, [x17, z19.d, UXTW #2] : ld1w (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d -c5345671 : ld1w z17.d, p5/Z, [x19, z20.d, UXTW #2] : ld1w (%x19,%z20.d,uxtw #2)[16byte] %p5/z -> %z17.d -c53656b3 : ld1w z19.d, p5/Z, [x21, z22.d, UXTW #2] : ld1w (%x21,%z22.d,uxtw #2)[16byte] %p5/z -> %z19.d -c5385af5 : ld1w z21.d, p6/Z, [x23, z24.d, UXTW #2] : ld1w (%x23,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d -c53a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, UXTW #2] : ld1w (%x24,%z26.d,uxtw #2)[16byte] %p6/z -> %z23.d -c53c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, UXTW #2] : ld1w (%x26,%z28.d,uxtw #2)[16byte] %p7/z -> %z25.d -c53e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, UXTW #2] : ld1w (%x28,%z30.d,uxtw #2)[16byte] %p7/z -> %z27.d -c53f5fff : ld1w z31.d, p7/Z, [sp, z31.d, UXTW #2] : ld1w (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d -c5604000 : ld1w z0.d, p0/Z, [x0, z0.d, SXTW #2] : ld1w (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d -c5654482 : ld1w z2.d, p1/Z, [x4, z5.d, SXTW #2] : ld1w (%x4,%z5.d,sxtw #2)[16byte] %p1/z -> %z2.d -c56748c4 : ld1w z4.d, p2/Z, [x6, z7.d, SXTW #2] : ld1w (%x6,%z7.d,sxtw #2)[16byte] %p2/z -> %z4.d -c5694906 : ld1w z6.d, p2/Z, [x8, z9.d, SXTW #2] : ld1w (%x8,%z9.d,sxtw #2)[16byte] %p2/z -> %z6.d -c56b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, SXTW #2] : ld1w (%x10,%z11.d,sxtw #2)[16byte] %p3/z -> %z8.d -c56d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, SXTW #2] : ld1w (%x11,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d -c56f51ac : ld1w z12.d, p4/Z, [x13, z15.d, SXTW #2] : ld1w (%x13,%z15.d,sxtw #2)[16byte] %p4/z -> %z12.d -c57151ee : ld1w z14.d, p4/Z, [x15, z17.d, SXTW #2] : ld1w (%x15,%z17.d,sxtw #2)[16byte] %p4/z -> %z14.d -c5735630 : ld1w z16.d, p5/Z, [x17, z19.d, SXTW #2] : ld1w (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d -c5745671 : ld1w z17.d, p5/Z, [x19, z20.d, SXTW #2] : ld1w (%x19,%z20.d,sxtw #2)[16byte] %p5/z -> %z17.d -c57656b3 : ld1w z19.d, p5/Z, [x21, z22.d, SXTW #2] : ld1w (%x21,%z22.d,sxtw #2)[16byte] %p5/z -> %z19.d -c5785af5 : ld1w z21.d, p6/Z, [x23, z24.d, SXTW #2] : ld1w (%x23,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d -c57a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, SXTW #2] : ld1w (%x24,%z26.d,sxtw #2)[16byte] %p6/z -> %z23.d -c57c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, SXTW #2] : ld1w (%x26,%z28.d,sxtw #2)[16byte] %p7/z -> %z25.d -c57e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, SXTW #2] : ld1w (%x28,%z30.d,sxtw #2)[16byte] %p7/z -> %z27.d -c57f5fff : ld1w z31.d, p7/Z, [sp, z31.d, SXTW #2] : ld1w (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d +c5204000 : ld1w z0.d, p0/Z, [x0, z0.d, UXTW #2] : ld1w (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d +c5254482 : ld1w z2.d, p1/Z, [x4, z5.d, UXTW #2] : ld1w (%x4,%z5.d,uxtw #2)[4byte] %p1/z -> %z2.d +c52748c4 : ld1w z4.d, p2/Z, [x6, z7.d, UXTW #2] : ld1w (%x6,%z7.d,uxtw #2)[4byte] %p2/z -> %z4.d +c5294906 : ld1w z6.d, p2/Z, [x8, z9.d, UXTW #2] : ld1w (%x8,%z9.d,uxtw #2)[4byte] %p2/z -> %z6.d +c52b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, UXTW #2] : ld1w (%x10,%z11.d,uxtw #2)[4byte] %p3/z -> %z8.d +c52d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, UXTW #2] : ld1w (%x11,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d +c52f51ac : ld1w z12.d, p4/Z, [x13, z15.d, UXTW #2] : ld1w (%x13,%z15.d,uxtw #2)[4byte] %p4/z -> %z12.d +c53151ee : ld1w z14.d, p4/Z, [x15, z17.d, UXTW #2] : ld1w (%x15,%z17.d,uxtw #2)[4byte] %p4/z -> %z14.d +c5335630 : ld1w z16.d, p5/Z, [x17, z19.d, UXTW #2] : ld1w (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d +c5345671 : ld1w z17.d, p5/Z, [x19, z20.d, UXTW #2] : ld1w (%x19,%z20.d,uxtw #2)[4byte] %p5/z -> %z17.d +c53656b3 : ld1w z19.d, p5/Z, [x21, z22.d, UXTW #2] : ld1w (%x21,%z22.d,uxtw #2)[4byte] %p5/z -> %z19.d +c5385af5 : ld1w z21.d, p6/Z, [x23, z24.d, UXTW #2] : ld1w (%x23,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d +c53a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, UXTW #2] : ld1w (%x24,%z26.d,uxtw #2)[4byte] %p6/z -> %z23.d +c53c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, UXTW #2] : ld1w (%x26,%z28.d,uxtw #2)[4byte] %p7/z -> %z25.d +c53e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, UXTW #2] : ld1w (%x28,%z30.d,uxtw #2)[4byte] %p7/z -> %z27.d +c53f5fff : ld1w z31.d, p7/Z, [sp, z31.d, UXTW #2] : ld1w (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d +c5604000 : ld1w z0.d, p0/Z, [x0, z0.d, SXTW #2] : ld1w (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d +c5654482 : ld1w z2.d, p1/Z, [x4, z5.d, SXTW #2] : ld1w (%x4,%z5.d,sxtw #2)[4byte] %p1/z -> %z2.d +c56748c4 : ld1w z4.d, p2/Z, [x6, z7.d, SXTW #2] : ld1w (%x6,%z7.d,sxtw #2)[4byte] %p2/z -> %z4.d +c5694906 : ld1w z6.d, p2/Z, [x8, z9.d, SXTW #2] : ld1w (%x8,%z9.d,sxtw #2)[4byte] %p2/z -> %z6.d +c56b4d48 : ld1w z8.d, p3/Z, [x10, z11.d, SXTW #2] : ld1w (%x10,%z11.d,sxtw #2)[4byte] %p3/z -> %z8.d +c56d4d6a : ld1w z10.d, p3/Z, [x11, z13.d, SXTW #2] : ld1w (%x11,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d +c56f51ac : ld1w z12.d, p4/Z, [x13, z15.d, SXTW #2] : ld1w (%x13,%z15.d,sxtw #2)[4byte] %p4/z -> %z12.d +c57151ee : ld1w z14.d, p4/Z, [x15, z17.d, SXTW #2] : ld1w (%x15,%z17.d,sxtw #2)[4byte] %p4/z -> %z14.d +c5735630 : ld1w z16.d, p5/Z, [x17, z19.d, SXTW #2] : ld1w (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d +c5745671 : ld1w z17.d, p5/Z, [x19, z20.d, SXTW #2] : ld1w (%x19,%z20.d,sxtw #2)[4byte] %p5/z -> %z17.d +c57656b3 : ld1w z19.d, p5/Z, [x21, z22.d, SXTW #2] : ld1w (%x21,%z22.d,sxtw #2)[4byte] %p5/z -> %z19.d +c5785af5 : ld1w z21.d, p6/Z, [x23, z24.d, SXTW #2] : ld1w (%x23,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d +c57a5b17 : ld1w z23.d, p6/Z, [x24, z26.d, SXTW #2] : ld1w (%x24,%z26.d,sxtw #2)[4byte] %p6/z -> %z23.d +c57c5f59 : ld1w z25.d, p7/Z, [x26, z28.d, SXTW #2] : ld1w (%x26,%z28.d,sxtw #2)[4byte] %p7/z -> %z25.d +c57e5f9b : ld1w z27.d, p7/Z, [x28, z30.d, SXTW #2] : ld1w (%x28,%z30.d,sxtw #2)[4byte] %p7/z -> %z27.d +c57f5fff : ld1w z31.d, p7/Z, [sp, z31.d, SXTW #2] : ld1w (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d # LD1W { .D }, /Z, [.D{, #}] (LD1W-Z.P.AI-D) -c520c000 : ld1w z0.d, p0/Z, [z0.d, #0] : ld1w (%z0.d)[16byte] %p0/z -> %z0.d -c522c482 : ld1w z2.d, p1/Z, [z4.d, #8] : ld1w +0x08(%z4.d)[16byte] %p1/z -> %z2.d -c524c8c4 : ld1w z4.d, p2/Z, [z6.d, #16] : ld1w +0x10(%z6.d)[16byte] %p2/z -> %z4.d -c526c906 : ld1w z6.d, p2/Z, [z8.d, #24] : ld1w +0x18(%z8.d)[16byte] %p2/z -> %z6.d -c528cd48 : ld1w z8.d, p3/Z, [z10.d, #32] : ld1w +0x20(%z10.d)[16byte] %p3/z -> %z8.d -c52acd8a : ld1w z10.d, p3/Z, [z12.d, #40] : ld1w +0x28(%z12.d)[16byte] %p3/z -> %z10.d -c52cd1cc : ld1w z12.d, p4/Z, [z14.d, #48] : ld1w +0x30(%z14.d)[16byte] %p4/z -> %z12.d -c52ed20e : ld1w z14.d, p4/Z, [z16.d, #56] : ld1w +0x38(%z16.d)[16byte] %p4/z -> %z14.d -c530d650 : ld1w z16.d, p5/Z, [z18.d, #64] : ld1w +0x40(%z18.d)[16byte] %p5/z -> %z16.d -c531d671 : ld1w z17.d, p5/Z, [z19.d, #68] : ld1w +0x44(%z19.d)[16byte] %p5/z -> %z17.d -c533d6b3 : ld1w z19.d, p5/Z, [z21.d, #76] : ld1w +0x4c(%z21.d)[16byte] %p5/z -> %z19.d -c535daf5 : ld1w z21.d, p6/Z, [z23.d, #84] : ld1w +0x54(%z23.d)[16byte] %p6/z -> %z21.d -c537db37 : ld1w z23.d, p6/Z, [z25.d, #92] : ld1w +0x5c(%z25.d)[16byte] %p6/z -> %z23.d -c539df79 : ld1w z25.d, p7/Z, [z27.d, #100] : ld1w +0x64(%z27.d)[16byte] %p7/z -> %z25.d -c53bdfbb : ld1w z27.d, p7/Z, [z29.d, #108] : ld1w +0x6c(%z29.d)[16byte] %p7/z -> %z27.d -c53fdfff : ld1w z31.d, p7/Z, [z31.d, #124] : ld1w +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +c520c000 : ld1w z0.d, p0/Z, [z0.d, #0] : ld1w (%z0.d)[4byte] %p0/z -> %z0.d +c522c482 : ld1w z2.d, p1/Z, [z4.d, #8] : ld1w +0x08(%z4.d)[4byte] %p1/z -> %z2.d +c524c8c4 : ld1w z4.d, p2/Z, [z6.d, #16] : ld1w +0x10(%z6.d)[4byte] %p2/z -> %z4.d +c526c906 : ld1w z6.d, p2/Z, [z8.d, #24] : ld1w +0x18(%z8.d)[4byte] %p2/z -> %z6.d +c528cd48 : ld1w z8.d, p3/Z, [z10.d, #32] : ld1w +0x20(%z10.d)[4byte] %p3/z -> %z8.d +c52acd8a : ld1w z10.d, p3/Z, [z12.d, #40] : ld1w +0x28(%z12.d)[4byte] %p3/z -> %z10.d +c52cd1cc : ld1w z12.d, p4/Z, [z14.d, #48] : ld1w +0x30(%z14.d)[4byte] %p4/z -> %z12.d +c52ed20e : ld1w z14.d, p4/Z, [z16.d, #56] : ld1w +0x38(%z16.d)[4byte] %p4/z -> %z14.d +c530d650 : ld1w z16.d, p5/Z, [z18.d, #64] : ld1w +0x40(%z18.d)[4byte] %p5/z -> %z16.d +c531d671 : ld1w z17.d, p5/Z, [z19.d, #68] : ld1w +0x44(%z19.d)[4byte] %p5/z -> %z17.d +c533d6b3 : ld1w z19.d, p5/Z, [z21.d, #76] : ld1w +0x4c(%z21.d)[4byte] %p5/z -> %z19.d +c535daf5 : ld1w z21.d, p6/Z, [z23.d, #84] : ld1w +0x54(%z23.d)[4byte] %p6/z -> %z21.d +c537db37 : ld1w z23.d, p6/Z, [z25.d, #92] : ld1w +0x5c(%z25.d)[4byte] %p6/z -> %z23.d +c539df79 : ld1w z25.d, p7/Z, [z27.d, #100] : ld1w +0x64(%z27.d)[4byte] %p7/z -> %z25.d +c53bdfbb : ld1w z27.d, p7/Z, [z29.d, #108] : ld1w +0x6c(%z29.d)[4byte] %p7/z -> %z27.d +c53fdfff : ld1w z31.d, p7/Z, [z31.d, #124] : ld1w +0x7c(%z31.d)[4byte] %p7/z -> %z31.d # LD1W { .D }, /Z, [, .D] (LD1W-Z.P.BZ-D.64.unscaled) -c540c000 : ld1w z0.d, p0/Z, [x0, z0.d] : ld1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d -c545c482 : ld1w z2.d, p1/Z, [x4, z5.d] : ld1w (%x4,%z5.d)[16byte] %p1/z -> %z2.d -c547c8c4 : ld1w z4.d, p2/Z, [x6, z7.d] : ld1w (%x6,%z7.d)[16byte] %p2/z -> %z4.d -c549c906 : ld1w z6.d, p2/Z, [x8, z9.d] : ld1w (%x8,%z9.d)[16byte] %p2/z -> %z6.d -c54bcd48 : ld1w z8.d, p3/Z, [x10, z11.d] : ld1w (%x10,%z11.d)[16byte] %p3/z -> %z8.d -c54dcd6a : ld1w z10.d, p3/Z, [x11, z13.d] : ld1w (%x11,%z13.d)[16byte] %p3/z -> %z10.d -c54fd1ac : ld1w z12.d, p4/Z, [x13, z15.d] : ld1w (%x13,%z15.d)[16byte] %p4/z -> %z12.d -c551d1ee : ld1w z14.d, p4/Z, [x15, z17.d] : ld1w (%x15,%z17.d)[16byte] %p4/z -> %z14.d -c553d630 : ld1w z16.d, p5/Z, [x17, z19.d] : ld1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d -c554d671 : ld1w z17.d, p5/Z, [x19, z20.d] : ld1w (%x19,%z20.d)[16byte] %p5/z -> %z17.d -c556d6b3 : ld1w z19.d, p5/Z, [x21, z22.d] : ld1w (%x21,%z22.d)[16byte] %p5/z -> %z19.d -c558daf5 : ld1w z21.d, p6/Z, [x23, z24.d] : ld1w (%x23,%z24.d)[16byte] %p6/z -> %z21.d -c55adb17 : ld1w z23.d, p6/Z, [x24, z26.d] : ld1w (%x24,%z26.d)[16byte] %p6/z -> %z23.d -c55cdf59 : ld1w z25.d, p7/Z, [x26, z28.d] : ld1w (%x26,%z28.d)[16byte] %p7/z -> %z25.d -c55edf9b : ld1w z27.d, p7/Z, [x28, z30.d] : ld1w (%x28,%z30.d)[16byte] %p7/z -> %z27.d -c55fdfff : ld1w z31.d, p7/Z, [sp, z31.d] : ld1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d +c540c000 : ld1w z0.d, p0/Z, [x0, z0.d] : ld1w (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c545c482 : ld1w z2.d, p1/Z, [x4, z5.d] : ld1w (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c547c8c4 : ld1w z4.d, p2/Z, [x6, z7.d] : ld1w (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c549c906 : ld1w z6.d, p2/Z, [x8, z9.d] : ld1w (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c54bcd48 : ld1w z8.d, p3/Z, [x10, z11.d] : ld1w (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c54dcd6a : ld1w z10.d, p3/Z, [x11, z13.d] : ld1w (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c54fd1ac : ld1w z12.d, p4/Z, [x13, z15.d] : ld1w (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c551d1ee : ld1w z14.d, p4/Z, [x15, z17.d] : ld1w (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c553d630 : ld1w z16.d, p5/Z, [x17, z19.d] : ld1w (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c554d671 : ld1w z17.d, p5/Z, [x19, z20.d] : ld1w (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c556d6b3 : ld1w z19.d, p5/Z, [x21, z22.d] : ld1w (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c558daf5 : ld1w z21.d, p6/Z, [x23, z24.d] : ld1w (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c55adb17 : ld1w z23.d, p6/Z, [x24, z26.d] : ld1w (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c55cdf59 : ld1w z25.d, p7/Z, [x26, z28.d] : ld1w (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c55edf9b : ld1w z27.d, p7/Z, [x28, z30.d] : ld1w (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c55fdfff : ld1w z31.d, p7/Z, [sp, z31.d] : ld1w (%sp,%z31.d)[4byte] %p7/z -> %z31.d # LD1W { .D }, /Z, [, .D, LSL #2] (LD1W-Z.P.BZ-D.64.scaled) -c560c000 : ld1w z0.d, p0/Z, [x0, z0.d, LSL #2] : ld1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d -c565c482 : ld1w z2.d, p1/Z, [x4, z5.d, LSL #2] : ld1w (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d -c567c8c4 : ld1w z4.d, p2/Z, [x6, z7.d, LSL #2] : ld1w (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d -c569c906 : ld1w z6.d, p2/Z, [x8, z9.d, LSL #2] : ld1w (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d -c56bcd48 : ld1w z8.d, p3/Z, [x10, z11.d, LSL #2] : ld1w (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d -c56dcd6a : ld1w z10.d, p3/Z, [x11, z13.d, LSL #2] : ld1w (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d -c56fd1ac : ld1w z12.d, p4/Z, [x13, z15.d, LSL #2] : ld1w (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d -c571d1ee : ld1w z14.d, p4/Z, [x15, z17.d, LSL #2] : ld1w (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d -c573d630 : ld1w z16.d, p5/Z, [x17, z19.d, LSL #2] : ld1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d -c574d671 : ld1w z17.d, p5/Z, [x19, z20.d, LSL #2] : ld1w (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d -c576d6b3 : ld1w z19.d, p5/Z, [x21, z22.d, LSL #2] : ld1w (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d -c578daf5 : ld1w z21.d, p6/Z, [x23, z24.d, LSL #2] : ld1w (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d -c57adb17 : ld1w z23.d, p6/Z, [x24, z26.d, LSL #2] : ld1w (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d -c57cdf59 : ld1w z25.d, p7/Z, [x26, z28.d, LSL #2] : ld1w (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d -c57edf9b : ld1w z27.d, p7/Z, [x28, z30.d, LSL #2] : ld1w (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d -c57fdfff : ld1w z31.d, p7/Z, [sp, z31.d, LSL #2] : ld1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d +c560c000 : ld1w z0.d, p0/Z, [x0, z0.d, LSL #2] : ld1w (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d +c565c482 : ld1w z2.d, p1/Z, [x4, z5.d, LSL #2] : ld1w (%x4,%z5.d,lsl #2)[4byte] %p1/z -> %z2.d +c567c8c4 : ld1w z4.d, p2/Z, [x6, z7.d, LSL #2] : ld1w (%x6,%z7.d,lsl #2)[4byte] %p2/z -> %z4.d +c569c906 : ld1w z6.d, p2/Z, [x8, z9.d, LSL #2] : ld1w (%x8,%z9.d,lsl #2)[4byte] %p2/z -> %z6.d +c56bcd48 : ld1w z8.d, p3/Z, [x10, z11.d, LSL #2] : ld1w (%x10,%z11.d,lsl #2)[4byte] %p3/z -> %z8.d +c56dcd6a : ld1w z10.d, p3/Z, [x11, z13.d, LSL #2] : ld1w (%x11,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d +c56fd1ac : ld1w z12.d, p4/Z, [x13, z15.d, LSL #2] : ld1w (%x13,%z15.d,lsl #2)[4byte] %p4/z -> %z12.d +c571d1ee : ld1w z14.d, p4/Z, [x15, z17.d, LSL #2] : ld1w (%x15,%z17.d,lsl #2)[4byte] %p4/z -> %z14.d +c573d630 : ld1w z16.d, p5/Z, [x17, z19.d, LSL #2] : ld1w (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d +c574d671 : ld1w z17.d, p5/Z, [x19, z20.d, LSL #2] : ld1w (%x19,%z20.d,lsl #2)[4byte] %p5/z -> %z17.d +c576d6b3 : ld1w z19.d, p5/Z, [x21, z22.d, LSL #2] : ld1w (%x21,%z22.d,lsl #2)[4byte] %p5/z -> %z19.d +c578daf5 : ld1w z21.d, p6/Z, [x23, z24.d, LSL #2] : ld1w (%x23,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d +c57adb17 : ld1w z23.d, p6/Z, [x24, z26.d, LSL #2] : ld1w (%x24,%z26.d,lsl #2)[4byte] %p6/z -> %z23.d +c57cdf59 : ld1w z25.d, p7/Z, [x26, z28.d, LSL #2] : ld1w (%x26,%z28.d,lsl #2)[4byte] %p7/z -> %z25.d +c57edf9b : ld1w z27.d, p7/Z, [x28, z30.d, LSL #2] : ld1w (%x28,%z30.d,lsl #2)[4byte] %p7/z -> %z27.d +c57fdfff : ld1w z31.d, p7/Z, [sp, z31.d, LSL #2] : ld1w (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d # LD2B { .B, .B }, /Z, [, ] (LD2B-Z.P.BR-Contiguous) -a420c000 : ld2b {z0.b, z1.b}, p0/Z, [x0, x0] : ld2b (%x0,%x0)[64byte] %p0/z -> %z0.b %z1.b -a425c482 : ld2b {z2.b, z3.b}, p1/Z, [x4, x5] : ld2b (%x4,%x5)[64byte] %p1/z -> %z2.b %z3.b -a427c8c4 : ld2b {z4.b, z5.b}, p2/Z, [x6, x7] : ld2b (%x6,%x7)[64byte] %p2/z -> %z4.b %z5.b -a429c906 : ld2b {z6.b, z7.b}, p2/Z, [x8, x9] : ld2b (%x8,%x9)[64byte] %p2/z -> %z6.b %z7.b -a42bcd48 : ld2b {z8.b, z9.b}, p3/Z, [x10, x11] : ld2b (%x10,%x11)[64byte] %p3/z -> %z8.b %z9.b -a42ccd6a : ld2b {z10.b, z11.b}, p3/Z, [x11, x12] : ld2b (%x11,%x12)[64byte] %p3/z -> %z10.b %z11.b -a42ed1ac : ld2b {z12.b, z13.b}, p4/Z, [x13, x14] : ld2b (%x13,%x14)[64byte] %p4/z -> %z12.b %z13.b -a430d1ee : ld2b {z14.b, z15.b}, p4/Z, [x15, x16] : ld2b (%x15,%x16)[64byte] %p4/z -> %z14.b %z15.b -a432d630 : ld2b {z16.b, z17.b}, p5/Z, [x17, x18] : ld2b (%x17,%x18)[64byte] %p5/z -> %z16.b %z17.b -a434d671 : ld2b {z17.b, z18.b}, p5/Z, [x19, x20] : ld2b (%x19,%x20)[64byte] %p5/z -> %z17.b %z18.b -a436d6b3 : ld2b {z19.b, z20.b}, p5/Z, [x21, x22] : ld2b (%x21,%x22)[64byte] %p5/z -> %z19.b %z20.b -a438daf5 : ld2b {z21.b, z22.b}, p6/Z, [x23, x24] : ld2b (%x23,%x24)[64byte] %p6/z -> %z21.b %z22.b -a439db17 : ld2b {z23.b, z24.b}, p6/Z, [x24, x25] : ld2b (%x24,%x25)[64byte] %p6/z -> %z23.b %z24.b -a43bdf59 : ld2b {z25.b, z26.b}, p7/Z, [x26, x27] : ld2b (%x26,%x27)[64byte] %p7/z -> %z25.b %z26.b -a43ddf9b : ld2b {z27.b, z28.b}, p7/Z, [x28, x29] : ld2b (%x28,%x29)[64byte] %p7/z -> %z27.b %z28.b -a43edfff : ld2b {z31.b, z0.b}, p7/Z, [sp, x30] : ld2b (%sp,%x30)[64byte] %p7/z -> %z31.b %z0.b +a420c000 : ld2b {z0.b, z1.b}, p0/Z, [x0, x0] : ld2b (%x0,%x0)[1byte] %p0/z -> %z0.b %z1.b +a425c482 : ld2b {z2.b, z3.b}, p1/Z, [x4, x5] : ld2b (%x4,%x5)[1byte] %p1/z -> %z2.b %z3.b +a427c8c4 : ld2b {z4.b, z5.b}, p2/Z, [x6, x7] : ld2b (%x6,%x7)[1byte] %p2/z -> %z4.b %z5.b +a429c906 : ld2b {z6.b, z7.b}, p2/Z, [x8, x9] : ld2b (%x8,%x9)[1byte] %p2/z -> %z6.b %z7.b +a42bcd48 : ld2b {z8.b, z9.b}, p3/Z, [x10, x11] : ld2b (%x10,%x11)[1byte] %p3/z -> %z8.b %z9.b +a42ccd6a : ld2b {z10.b, z11.b}, p3/Z, [x11, x12] : ld2b (%x11,%x12)[1byte] %p3/z -> %z10.b %z11.b +a42ed1ac : ld2b {z12.b, z13.b}, p4/Z, [x13, x14] : ld2b (%x13,%x14)[1byte] %p4/z -> %z12.b %z13.b +a430d1ee : ld2b {z14.b, z15.b}, p4/Z, [x15, x16] : ld2b (%x15,%x16)[1byte] %p4/z -> %z14.b %z15.b +a432d630 : ld2b {z16.b, z17.b}, p5/Z, [x17, x18] : ld2b (%x17,%x18)[1byte] %p5/z -> %z16.b %z17.b +a434d671 : ld2b {z17.b, z18.b}, p5/Z, [x19, x20] : ld2b (%x19,%x20)[1byte] %p5/z -> %z17.b %z18.b +a436d6b3 : ld2b {z19.b, z20.b}, p5/Z, [x21, x22] : ld2b (%x21,%x22)[1byte] %p5/z -> %z19.b %z20.b +a438daf5 : ld2b {z21.b, z22.b}, p6/Z, [x23, x24] : ld2b (%x23,%x24)[1byte] %p6/z -> %z21.b %z22.b +a439db17 : ld2b {z23.b, z24.b}, p6/Z, [x24, x25] : ld2b (%x24,%x25)[1byte] %p6/z -> %z23.b %z24.b +a43bdf59 : ld2b {z25.b, z26.b}, p7/Z, [x26, x27] : ld2b (%x26,%x27)[1byte] %p7/z -> %z25.b %z26.b +a43ddf9b : ld2b {z27.b, z28.b}, p7/Z, [x28, x29] : ld2b (%x28,%x29)[1byte] %p7/z -> %z27.b %z28.b +a43edfff : ld2b {z31.b, z0.b}, p7/Z, [sp, x30] : ld2b (%sp,%x30)[1byte] %p7/z -> %z31.b %z0.b # LD2B { .B, .B }, /Z, [{, #, MUL VL}] (LD2B-Z.P.BI-Contiguous) -a428e000 : ld2b {z0.b, z1.b}, p0/Z, [x0, #-16, MUL VL] : ld2b -0x0200(%x0)[64byte] %p0/z -> %z0.b %z1.b -a429e482 : ld2b {z2.b, z3.b}, p1/Z, [x4, #-14, MUL VL] : ld2b -0x01c0(%x4)[64byte] %p1/z -> %z2.b %z3.b -a42ae8c4 : ld2b {z4.b, z5.b}, p2/Z, [x6, #-12, MUL VL] : ld2b -0x0180(%x6)[64byte] %p2/z -> %z4.b %z5.b -a42be906 : ld2b {z6.b, z7.b}, p2/Z, [x8, #-10, MUL VL] : ld2b -0x0140(%x8)[64byte] %p2/z -> %z6.b %z7.b -a42ced48 : ld2b {z8.b, z9.b}, p3/Z, [x10, #-8, MUL VL] : ld2b -0x0100(%x10)[64byte] %p3/z -> %z8.b %z9.b -a42ded6a : ld2b {z10.b, z11.b}, p3/Z, [x11, #-6, MUL VL] : ld2b -0xc0(%x11)[64byte] %p3/z -> %z10.b %z11.b -a42ef1ac : ld2b {z12.b, z13.b}, p4/Z, [x13, #-4, MUL VL] : ld2b -0x80(%x13)[64byte] %p4/z -> %z12.b %z13.b -a42ff1ee : ld2b {z14.b, z15.b}, p4/Z, [x15, #-2, MUL VL] : ld2b -0x40(%x15)[64byte] %p4/z -> %z14.b %z15.b -a420f630 : ld2b {z16.b, z17.b}, p5/Z, [x17, #0, MUL VL] : ld2b (%x17)[64byte] %p5/z -> %z16.b %z17.b -a420f671 : ld2b {z17.b, z18.b}, p5/Z, [x19, #0, MUL VL] : ld2b (%x19)[64byte] %p5/z -> %z17.b %z18.b -a421f6b3 : ld2b {z19.b, z20.b}, p5/Z, [x21, #2, MUL VL] : ld2b +0x40(%x21)[64byte] %p5/z -> %z19.b %z20.b -a422faf5 : ld2b {z21.b, z22.b}, p6/Z, [x23, #4, MUL VL] : ld2b +0x80(%x23)[64byte] %p6/z -> %z21.b %z22.b -a423fb17 : ld2b {z23.b, z24.b}, p6/Z, [x24, #6, MUL VL] : ld2b +0xc0(%x24)[64byte] %p6/z -> %z23.b %z24.b -a424ff59 : ld2b {z25.b, z26.b}, p7/Z, [x26, #8, MUL VL] : ld2b +0x0100(%x26)[64byte] %p7/z -> %z25.b %z26.b -a425ff9b : ld2b {z27.b, z28.b}, p7/Z, [x28, #10, MUL VL] : ld2b +0x0140(%x28)[64byte] %p7/z -> %z27.b %z28.b -a427ffff : ld2b {z31.b, z0.b}, p7/Z, [sp, #14, MUL VL] : ld2b +0x01c0(%sp)[64byte] %p7/z -> %z31.b %z0.b +a428e000 : ld2b {z0.b, z1.b}, p0/Z, [x0, #-16, MUL VL] : ld2b -0x0200(%x0)[1byte] %p0/z -> %z0.b %z1.b +a429e482 : ld2b {z2.b, z3.b}, p1/Z, [x4, #-14, MUL VL] : ld2b -0x01c0(%x4)[1byte] %p1/z -> %z2.b %z3.b +a42ae8c4 : ld2b {z4.b, z5.b}, p2/Z, [x6, #-12, MUL VL] : ld2b -0x0180(%x6)[1byte] %p2/z -> %z4.b %z5.b +a42be906 : ld2b {z6.b, z7.b}, p2/Z, [x8, #-10, MUL VL] : ld2b -0x0140(%x8)[1byte] %p2/z -> %z6.b %z7.b +a42ced48 : ld2b {z8.b, z9.b}, p3/Z, [x10, #-8, MUL VL] : ld2b -0x0100(%x10)[1byte] %p3/z -> %z8.b %z9.b +a42ded6a : ld2b {z10.b, z11.b}, p3/Z, [x11, #-6, MUL VL] : ld2b -0xc0(%x11)[1byte] %p3/z -> %z10.b %z11.b +a42ef1ac : ld2b {z12.b, z13.b}, p4/Z, [x13, #-4, MUL VL] : ld2b -0x80(%x13)[1byte] %p4/z -> %z12.b %z13.b +a42ff1ee : ld2b {z14.b, z15.b}, p4/Z, [x15, #-2, MUL VL] : ld2b -0x40(%x15)[1byte] %p4/z -> %z14.b %z15.b +a420f630 : ld2b {z16.b, z17.b}, p5/Z, [x17, #0, MUL VL] : ld2b (%x17)[1byte] %p5/z -> %z16.b %z17.b +a420f671 : ld2b {z17.b, z18.b}, p5/Z, [x19, #0, MUL VL] : ld2b (%x19)[1byte] %p5/z -> %z17.b %z18.b +a421f6b3 : ld2b {z19.b, z20.b}, p5/Z, [x21, #2, MUL VL] : ld2b +0x40(%x21)[1byte] %p5/z -> %z19.b %z20.b +a422faf5 : ld2b {z21.b, z22.b}, p6/Z, [x23, #4, MUL VL] : ld2b +0x80(%x23)[1byte] %p6/z -> %z21.b %z22.b +a423fb17 : ld2b {z23.b, z24.b}, p6/Z, [x24, #6, MUL VL] : ld2b +0xc0(%x24)[1byte] %p6/z -> %z23.b %z24.b +a424ff59 : ld2b {z25.b, z26.b}, p7/Z, [x26, #8, MUL VL] : ld2b +0x0100(%x26)[1byte] %p7/z -> %z25.b %z26.b +a425ff9b : ld2b {z27.b, z28.b}, p7/Z, [x28, #10, MUL VL] : ld2b +0x0140(%x28)[1byte] %p7/z -> %z27.b %z28.b +a427ffff : ld2b {z31.b, z0.b}, p7/Z, [sp, #14, MUL VL] : ld2b +0x01c0(%sp)[1byte] %p7/z -> %z31.b %z0.b # LD2D { .D, .D }, /Z, [, , LSL #3] (LD2D-Z.P.BR-Contiguous) -a5a0c000 : ld2d {z0.d, z1.d}, p0/Z, [x0, x0, LSL #3] : ld2d (%x0,%x0,lsl #3)[64byte] %p0/z -> %z0.d %z1.d -a5a5c482 : ld2d {z2.d, z3.d}, p1/Z, [x4, x5, LSL #3] : ld2d (%x4,%x5,lsl #3)[64byte] %p1/z -> %z2.d %z3.d -a5a7c8c4 : ld2d {z4.d, z5.d}, p2/Z, [x6, x7, LSL #3] : ld2d (%x6,%x7,lsl #3)[64byte] %p2/z -> %z4.d %z5.d -a5a9c906 : ld2d {z6.d, z7.d}, p2/Z, [x8, x9, LSL #3] : ld2d (%x8,%x9,lsl #3)[64byte] %p2/z -> %z6.d %z7.d -a5abcd48 : ld2d {z8.d, z9.d}, p3/Z, [x10, x11, LSL #3] : ld2d (%x10,%x11,lsl #3)[64byte] %p3/z -> %z8.d %z9.d -a5accd6a : ld2d {z10.d, z11.d}, p3/Z, [x11, x12, LSL #3] : ld2d (%x11,%x12,lsl #3)[64byte] %p3/z -> %z10.d %z11.d -a5aed1ac : ld2d {z12.d, z13.d}, p4/Z, [x13, x14, LSL #3] : ld2d (%x13,%x14,lsl #3)[64byte] %p4/z -> %z12.d %z13.d -a5b0d1ee : ld2d {z14.d, z15.d}, p4/Z, [x15, x16, LSL #3] : ld2d (%x15,%x16,lsl #3)[64byte] %p4/z -> %z14.d %z15.d -a5b2d630 : ld2d {z16.d, z17.d}, p5/Z, [x17, x18, LSL #3] : ld2d (%x17,%x18,lsl #3)[64byte] %p5/z -> %z16.d %z17.d -a5b4d671 : ld2d {z17.d, z18.d}, p5/Z, [x19, x20, LSL #3] : ld2d (%x19,%x20,lsl #3)[64byte] %p5/z -> %z17.d %z18.d -a5b6d6b3 : ld2d {z19.d, z20.d}, p5/Z, [x21, x22, LSL #3] : ld2d (%x21,%x22,lsl #3)[64byte] %p5/z -> %z19.d %z20.d -a5b8daf5 : ld2d {z21.d, z22.d}, p6/Z, [x23, x24, LSL #3] : ld2d (%x23,%x24,lsl #3)[64byte] %p6/z -> %z21.d %z22.d -a5b9db17 : ld2d {z23.d, z24.d}, p6/Z, [x24, x25, LSL #3] : ld2d (%x24,%x25,lsl #3)[64byte] %p6/z -> %z23.d %z24.d -a5bbdf59 : ld2d {z25.d, z26.d}, p7/Z, [x26, x27, LSL #3] : ld2d (%x26,%x27,lsl #3)[64byte] %p7/z -> %z25.d %z26.d -a5bddf9b : ld2d {z27.d, z28.d}, p7/Z, [x28, x29, LSL #3] : ld2d (%x28,%x29,lsl #3)[64byte] %p7/z -> %z27.d %z28.d -a5bedfff : ld2d {z31.d, z0.d}, p7/Z, [sp, x30, LSL #3] : ld2d (%sp,%x30,lsl #3)[64byte] %p7/z -> %z31.d %z0.d +a5a0c000 : ld2d {z0.d, z1.d}, p0/Z, [x0, x0, LSL #3] : ld2d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d %z1.d +a5a5c482 : ld2d {z2.d, z3.d}, p1/Z, [x4, x5, LSL #3] : ld2d (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d %z3.d +a5a7c8c4 : ld2d {z4.d, z5.d}, p2/Z, [x6, x7, LSL #3] : ld2d (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d %z5.d +a5a9c906 : ld2d {z6.d, z7.d}, p2/Z, [x8, x9, LSL #3] : ld2d (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d %z7.d +a5abcd48 : ld2d {z8.d, z9.d}, p3/Z, [x10, x11, LSL #3] : ld2d (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d %z9.d +a5accd6a : ld2d {z10.d, z11.d}, p3/Z, [x11, x12, LSL #3] : ld2d (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d %z11.d +a5aed1ac : ld2d {z12.d, z13.d}, p4/Z, [x13, x14, LSL #3] : ld2d (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d %z13.d +a5b0d1ee : ld2d {z14.d, z15.d}, p4/Z, [x15, x16, LSL #3] : ld2d (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d %z15.d +a5b2d630 : ld2d {z16.d, z17.d}, p5/Z, [x17, x18, LSL #3] : ld2d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d %z17.d +a5b4d671 : ld2d {z17.d, z18.d}, p5/Z, [x19, x20, LSL #3] : ld2d (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d %z18.d +a5b6d6b3 : ld2d {z19.d, z20.d}, p5/Z, [x21, x22, LSL #3] : ld2d (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d %z20.d +a5b8daf5 : ld2d {z21.d, z22.d}, p6/Z, [x23, x24, LSL #3] : ld2d (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d %z22.d +a5b9db17 : ld2d {z23.d, z24.d}, p6/Z, [x24, x25, LSL #3] : ld2d (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d %z24.d +a5bbdf59 : ld2d {z25.d, z26.d}, p7/Z, [x26, x27, LSL #3] : ld2d (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d %z26.d +a5bddf9b : ld2d {z27.d, z28.d}, p7/Z, [x28, x29, LSL #3] : ld2d (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d %z28.d +a5bedfff : ld2d {z31.d, z0.d}, p7/Z, [sp, x30, LSL #3] : ld2d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d %z0.d # LD2D { .D, .D }, /Z, [{, #, MUL VL}] (LD2D-Z.P.BI-Contiguous) -a5a8e000 : ld2d {z0.d, z1.d}, p0/Z, [x0, #-16, MUL VL] : ld2d -0x0200(%x0)[64byte] %p0/z -> %z0.d %z1.d -a5a9e482 : ld2d {z2.d, z3.d}, p1/Z, [x4, #-14, MUL VL] : ld2d -0x01c0(%x4)[64byte] %p1/z -> %z2.d %z3.d -a5aae8c4 : ld2d {z4.d, z5.d}, p2/Z, [x6, #-12, MUL VL] : ld2d -0x0180(%x6)[64byte] %p2/z -> %z4.d %z5.d -a5abe906 : ld2d {z6.d, z7.d}, p2/Z, [x8, #-10, MUL VL] : ld2d -0x0140(%x8)[64byte] %p2/z -> %z6.d %z7.d -a5aced48 : ld2d {z8.d, z9.d}, p3/Z, [x10, #-8, MUL VL] : ld2d -0x0100(%x10)[64byte] %p3/z -> %z8.d %z9.d -a5aded6a : ld2d {z10.d, z11.d}, p3/Z, [x11, #-6, MUL VL] : ld2d -0xc0(%x11)[64byte] %p3/z -> %z10.d %z11.d -a5aef1ac : ld2d {z12.d, z13.d}, p4/Z, [x13, #-4, MUL VL] : ld2d -0x80(%x13)[64byte] %p4/z -> %z12.d %z13.d -a5aff1ee : ld2d {z14.d, z15.d}, p4/Z, [x15, #-2, MUL VL] : ld2d -0x40(%x15)[64byte] %p4/z -> %z14.d %z15.d -a5a0f630 : ld2d {z16.d, z17.d}, p5/Z, [x17, #0, MUL VL] : ld2d (%x17)[64byte] %p5/z -> %z16.d %z17.d -a5a0f671 : ld2d {z17.d, z18.d}, p5/Z, [x19, #0, MUL VL] : ld2d (%x19)[64byte] %p5/z -> %z17.d %z18.d -a5a1f6b3 : ld2d {z19.d, z20.d}, p5/Z, [x21, #2, MUL VL] : ld2d +0x40(%x21)[64byte] %p5/z -> %z19.d %z20.d -a5a2faf5 : ld2d {z21.d, z22.d}, p6/Z, [x23, #4, MUL VL] : ld2d +0x80(%x23)[64byte] %p6/z -> %z21.d %z22.d -a5a3fb17 : ld2d {z23.d, z24.d}, p6/Z, [x24, #6, MUL VL] : ld2d +0xc0(%x24)[64byte] %p6/z -> %z23.d %z24.d -a5a4ff59 : ld2d {z25.d, z26.d}, p7/Z, [x26, #8, MUL VL] : ld2d +0x0100(%x26)[64byte] %p7/z -> %z25.d %z26.d -a5a5ff9b : ld2d {z27.d, z28.d}, p7/Z, [x28, #10, MUL VL] : ld2d +0x0140(%x28)[64byte] %p7/z -> %z27.d %z28.d -a5a7ffff : ld2d {z31.d, z0.d}, p7/Z, [sp, #14, MUL VL] : ld2d +0x01c0(%sp)[64byte] %p7/z -> %z31.d %z0.d +a5a8e000 : ld2d {z0.d, z1.d}, p0/Z, [x0, #-16, MUL VL] : ld2d -0x0200(%x0)[8byte] %p0/z -> %z0.d %z1.d +a5a9e482 : ld2d {z2.d, z3.d}, p1/Z, [x4, #-14, MUL VL] : ld2d -0x01c0(%x4)[8byte] %p1/z -> %z2.d %z3.d +a5aae8c4 : ld2d {z4.d, z5.d}, p2/Z, [x6, #-12, MUL VL] : ld2d -0x0180(%x6)[8byte] %p2/z -> %z4.d %z5.d +a5abe906 : ld2d {z6.d, z7.d}, p2/Z, [x8, #-10, MUL VL] : ld2d -0x0140(%x8)[8byte] %p2/z -> %z6.d %z7.d +a5aced48 : ld2d {z8.d, z9.d}, p3/Z, [x10, #-8, MUL VL] : ld2d -0x0100(%x10)[8byte] %p3/z -> %z8.d %z9.d +a5aded6a : ld2d {z10.d, z11.d}, p3/Z, [x11, #-6, MUL VL] : ld2d -0xc0(%x11)[8byte] %p3/z -> %z10.d %z11.d +a5aef1ac : ld2d {z12.d, z13.d}, p4/Z, [x13, #-4, MUL VL] : ld2d -0x80(%x13)[8byte] %p4/z -> %z12.d %z13.d +a5aff1ee : ld2d {z14.d, z15.d}, p4/Z, [x15, #-2, MUL VL] : ld2d -0x40(%x15)[8byte] %p4/z -> %z14.d %z15.d +a5a0f630 : ld2d {z16.d, z17.d}, p5/Z, [x17, #0, MUL VL] : ld2d (%x17)[8byte] %p5/z -> %z16.d %z17.d +a5a0f671 : ld2d {z17.d, z18.d}, p5/Z, [x19, #0, MUL VL] : ld2d (%x19)[8byte] %p5/z -> %z17.d %z18.d +a5a1f6b3 : ld2d {z19.d, z20.d}, p5/Z, [x21, #2, MUL VL] : ld2d +0x40(%x21)[8byte] %p5/z -> %z19.d %z20.d +a5a2faf5 : ld2d {z21.d, z22.d}, p6/Z, [x23, #4, MUL VL] : ld2d +0x80(%x23)[8byte] %p6/z -> %z21.d %z22.d +a5a3fb17 : ld2d {z23.d, z24.d}, p6/Z, [x24, #6, MUL VL] : ld2d +0xc0(%x24)[8byte] %p6/z -> %z23.d %z24.d +a5a4ff59 : ld2d {z25.d, z26.d}, p7/Z, [x26, #8, MUL VL] : ld2d +0x0100(%x26)[8byte] %p7/z -> %z25.d %z26.d +a5a5ff9b : ld2d {z27.d, z28.d}, p7/Z, [x28, #10, MUL VL] : ld2d +0x0140(%x28)[8byte] %p7/z -> %z27.d %z28.d +a5a7ffff : ld2d {z31.d, z0.d}, p7/Z, [sp, #14, MUL VL] : ld2d +0x01c0(%sp)[8byte] %p7/z -> %z31.d %z0.d # LD2H { .H, .H }, /Z, [, , LSL #1] (LD2H-Z.P.BR-Contiguous) -a4a0c000 : ld2h {z0.h, z1.h}, p0/Z, [x0, x0, LSL #1] : ld2h (%x0,%x0,lsl #1)[64byte] %p0/z -> %z0.h %z1.h -a4a5c482 : ld2h {z2.h, z3.h}, p1/Z, [x4, x5, LSL #1] : ld2h (%x4,%x5,lsl #1)[64byte] %p1/z -> %z2.h %z3.h -a4a7c8c4 : ld2h {z4.h, z5.h}, p2/Z, [x6, x7, LSL #1] : ld2h (%x6,%x7,lsl #1)[64byte] %p2/z -> %z4.h %z5.h -a4a9c906 : ld2h {z6.h, z7.h}, p2/Z, [x8, x9, LSL #1] : ld2h (%x8,%x9,lsl #1)[64byte] %p2/z -> %z6.h %z7.h -a4abcd48 : ld2h {z8.h, z9.h}, p3/Z, [x10, x11, LSL #1] : ld2h (%x10,%x11,lsl #1)[64byte] %p3/z -> %z8.h %z9.h -a4accd6a : ld2h {z10.h, z11.h}, p3/Z, [x11, x12, LSL #1] : ld2h (%x11,%x12,lsl #1)[64byte] %p3/z -> %z10.h %z11.h -a4aed1ac : ld2h {z12.h, z13.h}, p4/Z, [x13, x14, LSL #1] : ld2h (%x13,%x14,lsl #1)[64byte] %p4/z -> %z12.h %z13.h -a4b0d1ee : ld2h {z14.h, z15.h}, p4/Z, [x15, x16, LSL #1] : ld2h (%x15,%x16,lsl #1)[64byte] %p4/z -> %z14.h %z15.h -a4b2d630 : ld2h {z16.h, z17.h}, p5/Z, [x17, x18, LSL #1] : ld2h (%x17,%x18,lsl #1)[64byte] %p5/z -> %z16.h %z17.h -a4b4d671 : ld2h {z17.h, z18.h}, p5/Z, [x19, x20, LSL #1] : ld2h (%x19,%x20,lsl #1)[64byte] %p5/z -> %z17.h %z18.h -a4b6d6b3 : ld2h {z19.h, z20.h}, p5/Z, [x21, x22, LSL #1] : ld2h (%x21,%x22,lsl #1)[64byte] %p5/z -> %z19.h %z20.h -a4b8daf5 : ld2h {z21.h, z22.h}, p6/Z, [x23, x24, LSL #1] : ld2h (%x23,%x24,lsl #1)[64byte] %p6/z -> %z21.h %z22.h -a4b9db17 : ld2h {z23.h, z24.h}, p6/Z, [x24, x25, LSL #1] : ld2h (%x24,%x25,lsl #1)[64byte] %p6/z -> %z23.h %z24.h -a4bbdf59 : ld2h {z25.h, z26.h}, p7/Z, [x26, x27, LSL #1] : ld2h (%x26,%x27,lsl #1)[64byte] %p7/z -> %z25.h %z26.h -a4bddf9b : ld2h {z27.h, z28.h}, p7/Z, [x28, x29, LSL #1] : ld2h (%x28,%x29,lsl #1)[64byte] %p7/z -> %z27.h %z28.h -a4bedfff : ld2h {z31.h, z0.h}, p7/Z, [sp, x30, LSL #1] : ld2h (%sp,%x30,lsl #1)[64byte] %p7/z -> %z31.h %z0.h +a4a0c000 : ld2h {z0.h, z1.h}, p0/Z, [x0, x0, LSL #1] : ld2h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h %z1.h +a4a5c482 : ld2h {z2.h, z3.h}, p1/Z, [x4, x5, LSL #1] : ld2h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h %z3.h +a4a7c8c4 : ld2h {z4.h, z5.h}, p2/Z, [x6, x7, LSL #1] : ld2h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h %z5.h +a4a9c906 : ld2h {z6.h, z7.h}, p2/Z, [x8, x9, LSL #1] : ld2h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h %z7.h +a4abcd48 : ld2h {z8.h, z9.h}, p3/Z, [x10, x11, LSL #1] : ld2h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h %z9.h +a4accd6a : ld2h {z10.h, z11.h}, p3/Z, [x11, x12, LSL #1] : ld2h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h %z11.h +a4aed1ac : ld2h {z12.h, z13.h}, p4/Z, [x13, x14, LSL #1] : ld2h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h %z13.h +a4b0d1ee : ld2h {z14.h, z15.h}, p4/Z, [x15, x16, LSL #1] : ld2h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h %z15.h +a4b2d630 : ld2h {z16.h, z17.h}, p5/Z, [x17, x18, LSL #1] : ld2h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h %z17.h +a4b4d671 : ld2h {z17.h, z18.h}, p5/Z, [x19, x20, LSL #1] : ld2h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h %z18.h +a4b6d6b3 : ld2h {z19.h, z20.h}, p5/Z, [x21, x22, LSL #1] : ld2h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h %z20.h +a4b8daf5 : ld2h {z21.h, z22.h}, p6/Z, [x23, x24, LSL #1] : ld2h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h %z22.h +a4b9db17 : ld2h {z23.h, z24.h}, p6/Z, [x24, x25, LSL #1] : ld2h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h %z24.h +a4bbdf59 : ld2h {z25.h, z26.h}, p7/Z, [x26, x27, LSL #1] : ld2h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h %z26.h +a4bddf9b : ld2h {z27.h, z28.h}, p7/Z, [x28, x29, LSL #1] : ld2h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h %z28.h +a4bedfff : ld2h {z31.h, z0.h}, p7/Z, [sp, x30, LSL #1] : ld2h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h %z0.h # LD2H { .H, .H }, /Z, [{, #, MUL VL}] (LD2H-Z.P.BI-Contiguous) -a4a8e000 : ld2h {z0.h, z1.h}, p0/Z, [x0, #-16, MUL VL] : ld2h -0x0200(%x0)[64byte] %p0/z -> %z0.h %z1.h -a4a9e482 : ld2h {z2.h, z3.h}, p1/Z, [x4, #-14, MUL VL] : ld2h -0x01c0(%x4)[64byte] %p1/z -> %z2.h %z3.h -a4aae8c4 : ld2h {z4.h, z5.h}, p2/Z, [x6, #-12, MUL VL] : ld2h -0x0180(%x6)[64byte] %p2/z -> %z4.h %z5.h -a4abe906 : ld2h {z6.h, z7.h}, p2/Z, [x8, #-10, MUL VL] : ld2h -0x0140(%x8)[64byte] %p2/z -> %z6.h %z7.h -a4aced48 : ld2h {z8.h, z9.h}, p3/Z, [x10, #-8, MUL VL] : ld2h -0x0100(%x10)[64byte] %p3/z -> %z8.h %z9.h -a4aded6a : ld2h {z10.h, z11.h}, p3/Z, [x11, #-6, MUL VL] : ld2h -0xc0(%x11)[64byte] %p3/z -> %z10.h %z11.h -a4aef1ac : ld2h {z12.h, z13.h}, p4/Z, [x13, #-4, MUL VL] : ld2h -0x80(%x13)[64byte] %p4/z -> %z12.h %z13.h -a4aff1ee : ld2h {z14.h, z15.h}, p4/Z, [x15, #-2, MUL VL] : ld2h -0x40(%x15)[64byte] %p4/z -> %z14.h %z15.h -a4a0f630 : ld2h {z16.h, z17.h}, p5/Z, [x17, #0, MUL VL] : ld2h (%x17)[64byte] %p5/z -> %z16.h %z17.h -a4a0f671 : ld2h {z17.h, z18.h}, p5/Z, [x19, #0, MUL VL] : ld2h (%x19)[64byte] %p5/z -> %z17.h %z18.h -a4a1f6b3 : ld2h {z19.h, z20.h}, p5/Z, [x21, #2, MUL VL] : ld2h +0x40(%x21)[64byte] %p5/z -> %z19.h %z20.h -a4a2faf5 : ld2h {z21.h, z22.h}, p6/Z, [x23, #4, MUL VL] : ld2h +0x80(%x23)[64byte] %p6/z -> %z21.h %z22.h -a4a3fb17 : ld2h {z23.h, z24.h}, p6/Z, [x24, #6, MUL VL] : ld2h +0xc0(%x24)[64byte] %p6/z -> %z23.h %z24.h -a4a4ff59 : ld2h {z25.h, z26.h}, p7/Z, [x26, #8, MUL VL] : ld2h +0x0100(%x26)[64byte] %p7/z -> %z25.h %z26.h -a4a5ff9b : ld2h {z27.h, z28.h}, p7/Z, [x28, #10, MUL VL] : ld2h +0x0140(%x28)[64byte] %p7/z -> %z27.h %z28.h -a4a7ffff : ld2h {z31.h, z0.h}, p7/Z, [sp, #14, MUL VL] : ld2h +0x01c0(%sp)[64byte] %p7/z -> %z31.h %z0.h +a4a8e000 : ld2h {z0.h, z1.h}, p0/Z, [x0, #-16, MUL VL] : ld2h -0x0200(%x0)[2byte] %p0/z -> %z0.h %z1.h +a4a9e482 : ld2h {z2.h, z3.h}, p1/Z, [x4, #-14, MUL VL] : ld2h -0x01c0(%x4)[2byte] %p1/z -> %z2.h %z3.h +a4aae8c4 : ld2h {z4.h, z5.h}, p2/Z, [x6, #-12, MUL VL] : ld2h -0x0180(%x6)[2byte] %p2/z -> %z4.h %z5.h +a4abe906 : ld2h {z6.h, z7.h}, p2/Z, [x8, #-10, MUL VL] : ld2h -0x0140(%x8)[2byte] %p2/z -> %z6.h %z7.h +a4aced48 : ld2h {z8.h, z9.h}, p3/Z, [x10, #-8, MUL VL] : ld2h -0x0100(%x10)[2byte] %p3/z -> %z8.h %z9.h +a4aded6a : ld2h {z10.h, z11.h}, p3/Z, [x11, #-6, MUL VL] : ld2h -0xc0(%x11)[2byte] %p3/z -> %z10.h %z11.h +a4aef1ac : ld2h {z12.h, z13.h}, p4/Z, [x13, #-4, MUL VL] : ld2h -0x80(%x13)[2byte] %p4/z -> %z12.h %z13.h +a4aff1ee : ld2h {z14.h, z15.h}, p4/Z, [x15, #-2, MUL VL] : ld2h -0x40(%x15)[2byte] %p4/z -> %z14.h %z15.h +a4a0f630 : ld2h {z16.h, z17.h}, p5/Z, [x17, #0, MUL VL] : ld2h (%x17)[2byte] %p5/z -> %z16.h %z17.h +a4a0f671 : ld2h {z17.h, z18.h}, p5/Z, [x19, #0, MUL VL] : ld2h (%x19)[2byte] %p5/z -> %z17.h %z18.h +a4a1f6b3 : ld2h {z19.h, z20.h}, p5/Z, [x21, #2, MUL VL] : ld2h +0x40(%x21)[2byte] %p5/z -> %z19.h %z20.h +a4a2faf5 : ld2h {z21.h, z22.h}, p6/Z, [x23, #4, MUL VL] : ld2h +0x80(%x23)[2byte] %p6/z -> %z21.h %z22.h +a4a3fb17 : ld2h {z23.h, z24.h}, p6/Z, [x24, #6, MUL VL] : ld2h +0xc0(%x24)[2byte] %p6/z -> %z23.h %z24.h +a4a4ff59 : ld2h {z25.h, z26.h}, p7/Z, [x26, #8, MUL VL] : ld2h +0x0100(%x26)[2byte] %p7/z -> %z25.h %z26.h +a4a5ff9b : ld2h {z27.h, z28.h}, p7/Z, [x28, #10, MUL VL] : ld2h +0x0140(%x28)[2byte] %p7/z -> %z27.h %z28.h +a4a7ffff : ld2h {z31.h, z0.h}, p7/Z, [sp, #14, MUL VL] : ld2h +0x01c0(%sp)[2byte] %p7/z -> %z31.h %z0.h # LD2W { .S, .S }, /Z, [, , LSL #2] (LD2W-Z.P.BR-Contiguous) -a520c000 : ld2w {z0.s, z1.s}, p0/Z, [x0, x0, LSL #2] : ld2w (%x0,%x0,lsl #2)[64byte] %p0/z -> %z0.s %z1.s -a525c482 : ld2w {z2.s, z3.s}, p1/Z, [x4, x5, LSL #2] : ld2w (%x4,%x5,lsl #2)[64byte] %p1/z -> %z2.s %z3.s -a527c8c4 : ld2w {z4.s, z5.s}, p2/Z, [x6, x7, LSL #2] : ld2w (%x6,%x7,lsl #2)[64byte] %p2/z -> %z4.s %z5.s -a529c906 : ld2w {z6.s, z7.s}, p2/Z, [x8, x9, LSL #2] : ld2w (%x8,%x9,lsl #2)[64byte] %p2/z -> %z6.s %z7.s -a52bcd48 : ld2w {z8.s, z9.s}, p3/Z, [x10, x11, LSL #2] : ld2w (%x10,%x11,lsl #2)[64byte] %p3/z -> %z8.s %z9.s -a52ccd6a : ld2w {z10.s, z11.s}, p3/Z, [x11, x12, LSL #2] : ld2w (%x11,%x12,lsl #2)[64byte] %p3/z -> %z10.s %z11.s -a52ed1ac : ld2w {z12.s, z13.s}, p4/Z, [x13, x14, LSL #2] : ld2w (%x13,%x14,lsl #2)[64byte] %p4/z -> %z12.s %z13.s -a530d1ee : ld2w {z14.s, z15.s}, p4/Z, [x15, x16, LSL #2] : ld2w (%x15,%x16,lsl #2)[64byte] %p4/z -> %z14.s %z15.s -a532d630 : ld2w {z16.s, z17.s}, p5/Z, [x17, x18, LSL #2] : ld2w (%x17,%x18,lsl #2)[64byte] %p5/z -> %z16.s %z17.s -a534d671 : ld2w {z17.s, z18.s}, p5/Z, [x19, x20, LSL #2] : ld2w (%x19,%x20,lsl #2)[64byte] %p5/z -> %z17.s %z18.s -a536d6b3 : ld2w {z19.s, z20.s}, p5/Z, [x21, x22, LSL #2] : ld2w (%x21,%x22,lsl #2)[64byte] %p5/z -> %z19.s %z20.s -a538daf5 : ld2w {z21.s, z22.s}, p6/Z, [x23, x24, LSL #2] : ld2w (%x23,%x24,lsl #2)[64byte] %p6/z -> %z21.s %z22.s -a539db17 : ld2w {z23.s, z24.s}, p6/Z, [x24, x25, LSL #2] : ld2w (%x24,%x25,lsl #2)[64byte] %p6/z -> %z23.s %z24.s -a53bdf59 : ld2w {z25.s, z26.s}, p7/Z, [x26, x27, LSL #2] : ld2w (%x26,%x27,lsl #2)[64byte] %p7/z -> %z25.s %z26.s -a53ddf9b : ld2w {z27.s, z28.s}, p7/Z, [x28, x29, LSL #2] : ld2w (%x28,%x29,lsl #2)[64byte] %p7/z -> %z27.s %z28.s -a53edfff : ld2w {z31.s, z0.s}, p7/Z, [sp, x30, LSL #2] : ld2w (%sp,%x30,lsl #2)[64byte] %p7/z -> %z31.s %z0.s +a520c000 : ld2w {z0.s, z1.s}, p0/Z, [x0, x0, LSL #2] : ld2w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s %z1.s +a525c482 : ld2w {z2.s, z3.s}, p1/Z, [x4, x5, LSL #2] : ld2w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s %z3.s +a527c8c4 : ld2w {z4.s, z5.s}, p2/Z, [x6, x7, LSL #2] : ld2w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s %z5.s +a529c906 : ld2w {z6.s, z7.s}, p2/Z, [x8, x9, LSL #2] : ld2w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s %z7.s +a52bcd48 : ld2w {z8.s, z9.s}, p3/Z, [x10, x11, LSL #2] : ld2w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s %z9.s +a52ccd6a : ld2w {z10.s, z11.s}, p3/Z, [x11, x12, LSL #2] : ld2w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s %z11.s +a52ed1ac : ld2w {z12.s, z13.s}, p4/Z, [x13, x14, LSL #2] : ld2w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s %z13.s +a530d1ee : ld2w {z14.s, z15.s}, p4/Z, [x15, x16, LSL #2] : ld2w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s %z15.s +a532d630 : ld2w {z16.s, z17.s}, p5/Z, [x17, x18, LSL #2] : ld2w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s %z17.s +a534d671 : ld2w {z17.s, z18.s}, p5/Z, [x19, x20, LSL #2] : ld2w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s %z18.s +a536d6b3 : ld2w {z19.s, z20.s}, p5/Z, [x21, x22, LSL #2] : ld2w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s %z20.s +a538daf5 : ld2w {z21.s, z22.s}, p6/Z, [x23, x24, LSL #2] : ld2w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s %z22.s +a539db17 : ld2w {z23.s, z24.s}, p6/Z, [x24, x25, LSL #2] : ld2w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s %z24.s +a53bdf59 : ld2w {z25.s, z26.s}, p7/Z, [x26, x27, LSL #2] : ld2w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s %z26.s +a53ddf9b : ld2w {z27.s, z28.s}, p7/Z, [x28, x29, LSL #2] : ld2w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s %z28.s +a53edfff : ld2w {z31.s, z0.s}, p7/Z, [sp, x30, LSL #2] : ld2w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s %z0.s # LD2W { .S, .S }, /Z, [{, #, MUL VL}] (LD2W-Z.P.BI-Contiguous) -a528e000 : ld2w {z0.s, z1.s}, p0/Z, [x0, #-16, MUL VL] : ld2w -0x0200(%x0)[64byte] %p0/z -> %z0.s %z1.s -a529e482 : ld2w {z2.s, z3.s}, p1/Z, [x4, #-14, MUL VL] : ld2w -0x01c0(%x4)[64byte] %p1/z -> %z2.s %z3.s -a52ae8c4 : ld2w {z4.s, z5.s}, p2/Z, [x6, #-12, MUL VL] : ld2w -0x0180(%x6)[64byte] %p2/z -> %z4.s %z5.s -a52be906 : ld2w {z6.s, z7.s}, p2/Z, [x8, #-10, MUL VL] : ld2w -0x0140(%x8)[64byte] %p2/z -> %z6.s %z7.s -a52ced48 : ld2w {z8.s, z9.s}, p3/Z, [x10, #-8, MUL VL] : ld2w -0x0100(%x10)[64byte] %p3/z -> %z8.s %z9.s -a52ded6a : ld2w {z10.s, z11.s}, p3/Z, [x11, #-6, MUL VL] : ld2w -0xc0(%x11)[64byte] %p3/z -> %z10.s %z11.s -a52ef1ac : ld2w {z12.s, z13.s}, p4/Z, [x13, #-4, MUL VL] : ld2w -0x80(%x13)[64byte] %p4/z -> %z12.s %z13.s -a52ff1ee : ld2w {z14.s, z15.s}, p4/Z, [x15, #-2, MUL VL] : ld2w -0x40(%x15)[64byte] %p4/z -> %z14.s %z15.s -a520f630 : ld2w {z16.s, z17.s}, p5/Z, [x17, #0, MUL VL] : ld2w (%x17)[64byte] %p5/z -> %z16.s %z17.s -a520f671 : ld2w {z17.s, z18.s}, p5/Z, [x19, #0, MUL VL] : ld2w (%x19)[64byte] %p5/z -> %z17.s %z18.s -a521f6b3 : ld2w {z19.s, z20.s}, p5/Z, [x21, #2, MUL VL] : ld2w +0x40(%x21)[64byte] %p5/z -> %z19.s %z20.s -a522faf5 : ld2w {z21.s, z22.s}, p6/Z, [x23, #4, MUL VL] : ld2w +0x80(%x23)[64byte] %p6/z -> %z21.s %z22.s -a523fb17 : ld2w {z23.s, z24.s}, p6/Z, [x24, #6, MUL VL] : ld2w +0xc0(%x24)[64byte] %p6/z -> %z23.s %z24.s -a524ff59 : ld2w {z25.s, z26.s}, p7/Z, [x26, #8, MUL VL] : ld2w +0x0100(%x26)[64byte] %p7/z -> %z25.s %z26.s -a525ff9b : ld2w {z27.s, z28.s}, p7/Z, [x28, #10, MUL VL] : ld2w +0x0140(%x28)[64byte] %p7/z -> %z27.s %z28.s -a527ffff : ld2w {z31.s, z0.s}, p7/Z, [sp, #14, MUL VL] : ld2w +0x01c0(%sp)[64byte] %p7/z -> %z31.s %z0.s +a528e000 : ld2w {z0.s, z1.s}, p0/Z, [x0, #-16, MUL VL] : ld2w -0x0200(%x0)[4byte] %p0/z -> %z0.s %z1.s +a529e482 : ld2w {z2.s, z3.s}, p1/Z, [x4, #-14, MUL VL] : ld2w -0x01c0(%x4)[4byte] %p1/z -> %z2.s %z3.s +a52ae8c4 : ld2w {z4.s, z5.s}, p2/Z, [x6, #-12, MUL VL] : ld2w -0x0180(%x6)[4byte] %p2/z -> %z4.s %z5.s +a52be906 : ld2w {z6.s, z7.s}, p2/Z, [x8, #-10, MUL VL] : ld2w -0x0140(%x8)[4byte] %p2/z -> %z6.s %z7.s +a52ced48 : ld2w {z8.s, z9.s}, p3/Z, [x10, #-8, MUL VL] : ld2w -0x0100(%x10)[4byte] %p3/z -> %z8.s %z9.s +a52ded6a : ld2w {z10.s, z11.s}, p3/Z, [x11, #-6, MUL VL] : ld2w -0xc0(%x11)[4byte] %p3/z -> %z10.s %z11.s +a52ef1ac : ld2w {z12.s, z13.s}, p4/Z, [x13, #-4, MUL VL] : ld2w -0x80(%x13)[4byte] %p4/z -> %z12.s %z13.s +a52ff1ee : ld2w {z14.s, z15.s}, p4/Z, [x15, #-2, MUL VL] : ld2w -0x40(%x15)[4byte] %p4/z -> %z14.s %z15.s +a520f630 : ld2w {z16.s, z17.s}, p5/Z, [x17, #0, MUL VL] : ld2w (%x17)[4byte] %p5/z -> %z16.s %z17.s +a520f671 : ld2w {z17.s, z18.s}, p5/Z, [x19, #0, MUL VL] : ld2w (%x19)[4byte] %p5/z -> %z17.s %z18.s +a521f6b3 : ld2w {z19.s, z20.s}, p5/Z, [x21, #2, MUL VL] : ld2w +0x40(%x21)[4byte] %p5/z -> %z19.s %z20.s +a522faf5 : ld2w {z21.s, z22.s}, p6/Z, [x23, #4, MUL VL] : ld2w +0x80(%x23)[4byte] %p6/z -> %z21.s %z22.s +a523fb17 : ld2w {z23.s, z24.s}, p6/Z, [x24, #6, MUL VL] : ld2w +0xc0(%x24)[4byte] %p6/z -> %z23.s %z24.s +a524ff59 : ld2w {z25.s, z26.s}, p7/Z, [x26, #8, MUL VL] : ld2w +0x0100(%x26)[4byte] %p7/z -> %z25.s %z26.s +a525ff9b : ld2w {z27.s, z28.s}, p7/Z, [x28, #10, MUL VL] : ld2w +0x0140(%x28)[4byte] %p7/z -> %z27.s %z28.s +a527ffff : ld2w {z31.s, z0.s}, p7/Z, [sp, #14, MUL VL] : ld2w +0x01c0(%sp)[4byte] %p7/z -> %z31.s %z0.s # LD3B { .B, .B, .B }, /Z, [, ] (LD3B-Z.P.BR-Contiguous) -a440c000 : ld3b {z0.b, z1.b, z2.b}, p0/Z, [x0, x0] : ld3b (%x0,%x0)[96byte] %p0/z -> %z0.b %z1.b %z2.b -a445c482 : ld3b {z2.b, z3.b, z4.b}, p1/Z, [x4, x5] : ld3b (%x4,%x5)[96byte] %p1/z -> %z2.b %z3.b %z4.b -a447c8c4 : ld3b {z4.b, z5.b, z6.b}, p2/Z, [x6, x7] : ld3b (%x6,%x7)[96byte] %p2/z -> %z4.b %z5.b %z6.b -a449c906 : ld3b {z6.b, z7.b, z8.b}, p2/Z, [x8, x9] : ld3b (%x8,%x9)[96byte] %p2/z -> %z6.b %z7.b %z8.b -a44bcd48 : ld3b {z8.b, z9.b, z10.b}, p3/Z, [x10, x11] : ld3b (%x10,%x11)[96byte] %p3/z -> %z8.b %z9.b %z10.b -a44ccd6a : ld3b {z10.b, z11.b, z12.b}, p3/Z, [x11, x12] : ld3b (%x11,%x12)[96byte] %p3/z -> %z10.b %z11.b %z12.b -a44ed1ac : ld3b {z12.b, z13.b, z14.b}, p4/Z, [x13, x14] : ld3b (%x13,%x14)[96byte] %p4/z -> %z12.b %z13.b %z14.b -a450d1ee : ld3b {z14.b, z15.b, z16.b}, p4/Z, [x15, x16] : ld3b (%x15,%x16)[96byte] %p4/z -> %z14.b %z15.b %z16.b -a452d630 : ld3b {z16.b, z17.b, z18.b}, p5/Z, [x17, x18] : ld3b (%x17,%x18)[96byte] %p5/z -> %z16.b %z17.b %z18.b -a454d671 : ld3b {z17.b, z18.b, z19.b}, p5/Z, [x19, x20] : ld3b (%x19,%x20)[96byte] %p5/z -> %z17.b %z18.b %z19.b -a456d6b3 : ld3b {z19.b, z20.b, z21.b}, p5/Z, [x21, x22] : ld3b (%x21,%x22)[96byte] %p5/z -> %z19.b %z20.b %z21.b -a458daf5 : ld3b {z21.b, z22.b, z23.b}, p6/Z, [x23, x24] : ld3b (%x23,%x24)[96byte] %p6/z -> %z21.b %z22.b %z23.b -a459db17 : ld3b {z23.b, z24.b, z25.b}, p6/Z, [x24, x25] : ld3b (%x24,%x25)[96byte] %p6/z -> %z23.b %z24.b %z25.b -a45bdf59 : ld3b {z25.b, z26.b, z27.b}, p7/Z, [x26, x27] : ld3b (%x26,%x27)[96byte] %p7/z -> %z25.b %z26.b %z27.b -a45ddf9b : ld3b {z27.b, z28.b, z29.b}, p7/Z, [x28, x29] : ld3b (%x28,%x29)[96byte] %p7/z -> %z27.b %z28.b %z29.b -a45edfff : ld3b {z31.b, z0.b, z1.b}, p7/Z, [sp, x30] : ld3b (%sp,%x30)[96byte] %p7/z -> %z31.b %z0.b %z1.b +a440c000 : ld3b {z0.b, z1.b, z2.b}, p0/Z, [x0, x0] : ld3b (%x0,%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b +a445c482 : ld3b {z2.b, z3.b, z4.b}, p1/Z, [x4, x5] : ld3b (%x4,%x5)[1byte] %p1/z -> %z2.b %z3.b %z4.b +a447c8c4 : ld3b {z4.b, z5.b, z6.b}, p2/Z, [x6, x7] : ld3b (%x6,%x7)[1byte] %p2/z -> %z4.b %z5.b %z6.b +a449c906 : ld3b {z6.b, z7.b, z8.b}, p2/Z, [x8, x9] : ld3b (%x8,%x9)[1byte] %p2/z -> %z6.b %z7.b %z8.b +a44bcd48 : ld3b {z8.b, z9.b, z10.b}, p3/Z, [x10, x11] : ld3b (%x10,%x11)[1byte] %p3/z -> %z8.b %z9.b %z10.b +a44ccd6a : ld3b {z10.b, z11.b, z12.b}, p3/Z, [x11, x12] : ld3b (%x11,%x12)[1byte] %p3/z -> %z10.b %z11.b %z12.b +a44ed1ac : ld3b {z12.b, z13.b, z14.b}, p4/Z, [x13, x14] : ld3b (%x13,%x14)[1byte] %p4/z -> %z12.b %z13.b %z14.b +a450d1ee : ld3b {z14.b, z15.b, z16.b}, p4/Z, [x15, x16] : ld3b (%x15,%x16)[1byte] %p4/z -> %z14.b %z15.b %z16.b +a452d630 : ld3b {z16.b, z17.b, z18.b}, p5/Z, [x17, x18] : ld3b (%x17,%x18)[1byte] %p5/z -> %z16.b %z17.b %z18.b +a454d671 : ld3b {z17.b, z18.b, z19.b}, p5/Z, [x19, x20] : ld3b (%x19,%x20)[1byte] %p5/z -> %z17.b %z18.b %z19.b +a456d6b3 : ld3b {z19.b, z20.b, z21.b}, p5/Z, [x21, x22] : ld3b (%x21,%x22)[1byte] %p5/z -> %z19.b %z20.b %z21.b +a458daf5 : ld3b {z21.b, z22.b, z23.b}, p6/Z, [x23, x24] : ld3b (%x23,%x24)[1byte] %p6/z -> %z21.b %z22.b %z23.b +a459db17 : ld3b {z23.b, z24.b, z25.b}, p6/Z, [x24, x25] : ld3b (%x24,%x25)[1byte] %p6/z -> %z23.b %z24.b %z25.b +a45bdf59 : ld3b {z25.b, z26.b, z27.b}, p7/Z, [x26, x27] : ld3b (%x26,%x27)[1byte] %p7/z -> %z25.b %z26.b %z27.b +a45ddf9b : ld3b {z27.b, z28.b, z29.b}, p7/Z, [x28, x29] : ld3b (%x28,%x29)[1byte] %p7/z -> %z27.b %z28.b %z29.b +a45edfff : ld3b {z31.b, z0.b, z1.b}, p7/Z, [sp, x30] : ld3b (%sp,%x30)[1byte] %p7/z -> %z31.b %z0.b %z1.b # LD3B { .B, .B, .B }, /Z, [{, #, MUL VL}] (LD3B-Z.P.BI-Contiguous) -a448e000 : ld3b {z0.b, z1.b, z2.b}, p0/Z, [x0, #-24, MUL VL] : ld3b -0x0300(%x0)[96byte] %p0/z -> %z0.b %z1.b %z2.b -a449e482 : ld3b {z2.b, z3.b, z4.b}, p1/Z, [x4, #-21, MUL VL] : ld3b -0x02a0(%x4)[96byte] %p1/z -> %z2.b %z3.b %z4.b -a44ae8c4 : ld3b {z4.b, z5.b, z6.b}, p2/Z, [x6, #-18, MUL VL] : ld3b -0x0240(%x6)[96byte] %p2/z -> %z4.b %z5.b %z6.b -a44be906 : ld3b {z6.b, z7.b, z8.b}, p2/Z, [x8, #-15, MUL VL] : ld3b -0x01e0(%x8)[96byte] %p2/z -> %z6.b %z7.b %z8.b -a44ced48 : ld3b {z8.b, z9.b, z10.b}, p3/Z, [x10, #-12, MUL VL] : ld3b -0x0180(%x10)[96byte] %p3/z -> %z8.b %z9.b %z10.b -a44ded6a : ld3b {z10.b, z11.b, z12.b}, p3/Z, [x11, #-9, MUL VL] : ld3b -0x0120(%x11)[96byte] %p3/z -> %z10.b %z11.b %z12.b -a44ef1ac : ld3b {z12.b, z13.b, z14.b}, p4/Z, [x13, #-6, MUL VL] : ld3b -0xc0(%x13)[96byte] %p4/z -> %z12.b %z13.b %z14.b -a44ff1ee : ld3b {z14.b, z15.b, z16.b}, p4/Z, [x15, #-3, MUL VL] : ld3b -0x60(%x15)[96byte] %p4/z -> %z14.b %z15.b %z16.b -a440f630 : ld3b {z16.b, z17.b, z18.b}, p5/Z, [x17, #0, MUL VL] : ld3b (%x17)[96byte] %p5/z -> %z16.b %z17.b %z18.b -a440f671 : ld3b {z17.b, z18.b, z19.b}, p5/Z, [x19, #0, MUL VL] : ld3b (%x19)[96byte] %p5/z -> %z17.b %z18.b %z19.b -a441f6b3 : ld3b {z19.b, z20.b, z21.b}, p5/Z, [x21, #3, MUL VL] : ld3b +0x60(%x21)[96byte] %p5/z -> %z19.b %z20.b %z21.b -a442faf5 : ld3b {z21.b, z22.b, z23.b}, p6/Z, [x23, #6, MUL VL] : ld3b +0xc0(%x23)[96byte] %p6/z -> %z21.b %z22.b %z23.b -a443fb17 : ld3b {z23.b, z24.b, z25.b}, p6/Z, [x24, #9, MUL VL] : ld3b +0x0120(%x24)[96byte] %p6/z -> %z23.b %z24.b %z25.b -a444ff59 : ld3b {z25.b, z26.b, z27.b}, p7/Z, [x26, #12, MUL VL] : ld3b +0x0180(%x26)[96byte] %p7/z -> %z25.b %z26.b %z27.b -a445ff9b : ld3b {z27.b, z28.b, z29.b}, p7/Z, [x28, #15, MUL VL] : ld3b +0x01e0(%x28)[96byte] %p7/z -> %z27.b %z28.b %z29.b -a447ffff : ld3b {z31.b, z0.b, z1.b}, p7/Z, [sp, #21, MUL VL] : ld3b +0x02a0(%sp)[96byte] %p7/z -> %z31.b %z0.b %z1.b +a448e000 : ld3b {z0.b, z1.b, z2.b}, p0/Z, [x0, #-24, MUL VL] : ld3b -0x0300(%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b +a449e482 : ld3b {z2.b, z3.b, z4.b}, p1/Z, [x4, #-21, MUL VL] : ld3b -0x02a0(%x4)[1byte] %p1/z -> %z2.b %z3.b %z4.b +a44ae8c4 : ld3b {z4.b, z5.b, z6.b}, p2/Z, [x6, #-18, MUL VL] : ld3b -0x0240(%x6)[1byte] %p2/z -> %z4.b %z5.b %z6.b +a44be906 : ld3b {z6.b, z7.b, z8.b}, p2/Z, [x8, #-15, MUL VL] : ld3b -0x01e0(%x8)[1byte] %p2/z -> %z6.b %z7.b %z8.b +a44ced48 : ld3b {z8.b, z9.b, z10.b}, p3/Z, [x10, #-12, MUL VL] : ld3b -0x0180(%x10)[1byte] %p3/z -> %z8.b %z9.b %z10.b +a44ded6a : ld3b {z10.b, z11.b, z12.b}, p3/Z, [x11, #-9, MUL VL] : ld3b -0x0120(%x11)[1byte] %p3/z -> %z10.b %z11.b %z12.b +a44ef1ac : ld3b {z12.b, z13.b, z14.b}, p4/Z, [x13, #-6, MUL VL] : ld3b -0xc0(%x13)[1byte] %p4/z -> %z12.b %z13.b %z14.b +a44ff1ee : ld3b {z14.b, z15.b, z16.b}, p4/Z, [x15, #-3, MUL VL] : ld3b -0x60(%x15)[1byte] %p4/z -> %z14.b %z15.b %z16.b +a440f630 : ld3b {z16.b, z17.b, z18.b}, p5/Z, [x17, #0, MUL VL] : ld3b (%x17)[1byte] %p5/z -> %z16.b %z17.b %z18.b +a440f671 : ld3b {z17.b, z18.b, z19.b}, p5/Z, [x19, #0, MUL VL] : ld3b (%x19)[1byte] %p5/z -> %z17.b %z18.b %z19.b +a441f6b3 : ld3b {z19.b, z20.b, z21.b}, p5/Z, [x21, #3, MUL VL] : ld3b +0x60(%x21)[1byte] %p5/z -> %z19.b %z20.b %z21.b +a442faf5 : ld3b {z21.b, z22.b, z23.b}, p6/Z, [x23, #6, MUL VL] : ld3b +0xc0(%x23)[1byte] %p6/z -> %z21.b %z22.b %z23.b +a443fb17 : ld3b {z23.b, z24.b, z25.b}, p6/Z, [x24, #9, MUL VL] : ld3b +0x0120(%x24)[1byte] %p6/z -> %z23.b %z24.b %z25.b +a444ff59 : ld3b {z25.b, z26.b, z27.b}, p7/Z, [x26, #12, MUL VL] : ld3b +0x0180(%x26)[1byte] %p7/z -> %z25.b %z26.b %z27.b +a445ff9b : ld3b {z27.b, z28.b, z29.b}, p7/Z, [x28, #15, MUL VL] : ld3b +0x01e0(%x28)[1byte] %p7/z -> %z27.b %z28.b %z29.b +a447ffff : ld3b {z31.b, z0.b, z1.b}, p7/Z, [sp, #21, MUL VL] : ld3b +0x02a0(%sp)[1byte] %p7/z -> %z31.b %z0.b %z1.b # LD3D { .D, .D, .D }, /Z, [, , LSL #3] (LD3D-Z.P.BR-Contiguous) -a5c0c000 : ld3d {z0.d, z1.d, z2.d}, p0/Z, [x0, x0, LSL #3] : ld3d (%x0,%x0,lsl #3)[96byte] %p0/z -> %z0.d %z1.d %z2.d -a5c5c482 : ld3d {z2.d, z3.d, z4.d}, p1/Z, [x4, x5, LSL #3] : ld3d (%x4,%x5,lsl #3)[96byte] %p1/z -> %z2.d %z3.d %z4.d -a5c7c8c4 : ld3d {z4.d, z5.d, z6.d}, p2/Z, [x6, x7, LSL #3] : ld3d (%x6,%x7,lsl #3)[96byte] %p2/z -> %z4.d %z5.d %z6.d -a5c9c906 : ld3d {z6.d, z7.d, z8.d}, p2/Z, [x8, x9, LSL #3] : ld3d (%x8,%x9,lsl #3)[96byte] %p2/z -> %z6.d %z7.d %z8.d -a5cbcd48 : ld3d {z8.d, z9.d, z10.d}, p3/Z, [x10, x11, LSL #3] : ld3d (%x10,%x11,lsl #3)[96byte] %p3/z -> %z8.d %z9.d %z10.d -a5cccd6a : ld3d {z10.d, z11.d, z12.d}, p3/Z, [x11, x12, LSL #3] : ld3d (%x11,%x12,lsl #3)[96byte] %p3/z -> %z10.d %z11.d %z12.d -a5ced1ac : ld3d {z12.d, z13.d, z14.d}, p4/Z, [x13, x14, LSL #3] : ld3d (%x13,%x14,lsl #3)[96byte] %p4/z -> %z12.d %z13.d %z14.d -a5d0d1ee : ld3d {z14.d, z15.d, z16.d}, p4/Z, [x15, x16, LSL #3] : ld3d (%x15,%x16,lsl #3)[96byte] %p4/z -> %z14.d %z15.d %z16.d -a5d2d630 : ld3d {z16.d, z17.d, z18.d}, p5/Z, [x17, x18, LSL #3] : ld3d (%x17,%x18,lsl #3)[96byte] %p5/z -> %z16.d %z17.d %z18.d -a5d4d671 : ld3d {z17.d, z18.d, z19.d}, p5/Z, [x19, x20, LSL #3] : ld3d (%x19,%x20,lsl #3)[96byte] %p5/z -> %z17.d %z18.d %z19.d -a5d6d6b3 : ld3d {z19.d, z20.d, z21.d}, p5/Z, [x21, x22, LSL #3] : ld3d (%x21,%x22,lsl #3)[96byte] %p5/z -> %z19.d %z20.d %z21.d -a5d8daf5 : ld3d {z21.d, z22.d, z23.d}, p6/Z, [x23, x24, LSL #3] : ld3d (%x23,%x24,lsl #3)[96byte] %p6/z -> %z21.d %z22.d %z23.d -a5d9db17 : ld3d {z23.d, z24.d, z25.d}, p6/Z, [x24, x25, LSL #3] : ld3d (%x24,%x25,lsl #3)[96byte] %p6/z -> %z23.d %z24.d %z25.d -a5dbdf59 : ld3d {z25.d, z26.d, z27.d}, p7/Z, [x26, x27, LSL #3] : ld3d (%x26,%x27,lsl #3)[96byte] %p7/z -> %z25.d %z26.d %z27.d -a5dddf9b : ld3d {z27.d, z28.d, z29.d}, p7/Z, [x28, x29, LSL #3] : ld3d (%x28,%x29,lsl #3)[96byte] %p7/z -> %z27.d %z28.d %z29.d -a5dedfff : ld3d {z31.d, z0.d, z1.d}, p7/Z, [sp, x30, LSL #3] : ld3d (%sp,%x30,lsl #3)[96byte] %p7/z -> %z31.d %z0.d %z1.d +a5c0c000 : ld3d {z0.d, z1.d, z2.d}, p0/Z, [x0, x0, LSL #3] : ld3d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d %z1.d %z2.d +a5c5c482 : ld3d {z2.d, z3.d, z4.d}, p1/Z, [x4, x5, LSL #3] : ld3d (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d %z3.d %z4.d +a5c7c8c4 : ld3d {z4.d, z5.d, z6.d}, p2/Z, [x6, x7, LSL #3] : ld3d (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d %z5.d %z6.d +a5c9c906 : ld3d {z6.d, z7.d, z8.d}, p2/Z, [x8, x9, LSL #3] : ld3d (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d %z7.d %z8.d +a5cbcd48 : ld3d {z8.d, z9.d, z10.d}, p3/Z, [x10, x11, LSL #3] : ld3d (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d %z9.d %z10.d +a5cccd6a : ld3d {z10.d, z11.d, z12.d}, p3/Z, [x11, x12, LSL #3] : ld3d (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d %z11.d %z12.d +a5ced1ac : ld3d {z12.d, z13.d, z14.d}, p4/Z, [x13, x14, LSL #3] : ld3d (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d %z13.d %z14.d +a5d0d1ee : ld3d {z14.d, z15.d, z16.d}, p4/Z, [x15, x16, LSL #3] : ld3d (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d %z15.d %z16.d +a5d2d630 : ld3d {z16.d, z17.d, z18.d}, p5/Z, [x17, x18, LSL #3] : ld3d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d %z17.d %z18.d +a5d4d671 : ld3d {z17.d, z18.d, z19.d}, p5/Z, [x19, x20, LSL #3] : ld3d (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d %z18.d %z19.d +a5d6d6b3 : ld3d {z19.d, z20.d, z21.d}, p5/Z, [x21, x22, LSL #3] : ld3d (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d %z20.d %z21.d +a5d8daf5 : ld3d {z21.d, z22.d, z23.d}, p6/Z, [x23, x24, LSL #3] : ld3d (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d %z22.d %z23.d +a5d9db17 : ld3d {z23.d, z24.d, z25.d}, p6/Z, [x24, x25, LSL #3] : ld3d (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d %z24.d %z25.d +a5dbdf59 : ld3d {z25.d, z26.d, z27.d}, p7/Z, [x26, x27, LSL #3] : ld3d (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d %z26.d %z27.d +a5dddf9b : ld3d {z27.d, z28.d, z29.d}, p7/Z, [x28, x29, LSL #3] : ld3d (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d %z28.d %z29.d +a5dedfff : ld3d {z31.d, z0.d, z1.d}, p7/Z, [sp, x30, LSL #3] : ld3d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d %z0.d %z1.d # LD3D { .D, .D, .D }, /Z, [{, #, MUL VL}] (LD3D-Z.P.BI-Contiguous) -a5c8e000 : ld3d {z0.d, z1.d, z2.d}, p0/Z, [x0, #-24, MUL VL] : ld3d -0x0300(%x0)[96byte] %p0/z -> %z0.d %z1.d %z2.d -a5c9e482 : ld3d {z2.d, z3.d, z4.d}, p1/Z, [x4, #-21, MUL VL] : ld3d -0x02a0(%x4)[96byte] %p1/z -> %z2.d %z3.d %z4.d -a5cae8c4 : ld3d {z4.d, z5.d, z6.d}, p2/Z, [x6, #-18, MUL VL] : ld3d -0x0240(%x6)[96byte] %p2/z -> %z4.d %z5.d %z6.d -a5cbe906 : ld3d {z6.d, z7.d, z8.d}, p2/Z, [x8, #-15, MUL VL] : ld3d -0x01e0(%x8)[96byte] %p2/z -> %z6.d %z7.d %z8.d -a5cced48 : ld3d {z8.d, z9.d, z10.d}, p3/Z, [x10, #-12, MUL VL] : ld3d -0x0180(%x10)[96byte] %p3/z -> %z8.d %z9.d %z10.d -a5cded6a : ld3d {z10.d, z11.d, z12.d}, p3/Z, [x11, #-9, MUL VL] : ld3d -0x0120(%x11)[96byte] %p3/z -> %z10.d %z11.d %z12.d -a5cef1ac : ld3d {z12.d, z13.d, z14.d}, p4/Z, [x13, #-6, MUL VL] : ld3d -0xc0(%x13)[96byte] %p4/z -> %z12.d %z13.d %z14.d -a5cff1ee : ld3d {z14.d, z15.d, z16.d}, p4/Z, [x15, #-3, MUL VL] : ld3d -0x60(%x15)[96byte] %p4/z -> %z14.d %z15.d %z16.d -a5c0f630 : ld3d {z16.d, z17.d, z18.d}, p5/Z, [x17, #0, MUL VL] : ld3d (%x17)[96byte] %p5/z -> %z16.d %z17.d %z18.d -a5c0f671 : ld3d {z17.d, z18.d, z19.d}, p5/Z, [x19, #0, MUL VL] : ld3d (%x19)[96byte] %p5/z -> %z17.d %z18.d %z19.d -a5c1f6b3 : ld3d {z19.d, z20.d, z21.d}, p5/Z, [x21, #3, MUL VL] : ld3d +0x60(%x21)[96byte] %p5/z -> %z19.d %z20.d %z21.d -a5c2faf5 : ld3d {z21.d, z22.d, z23.d}, p6/Z, [x23, #6, MUL VL] : ld3d +0xc0(%x23)[96byte] %p6/z -> %z21.d %z22.d %z23.d -a5c3fb17 : ld3d {z23.d, z24.d, z25.d}, p6/Z, [x24, #9, MUL VL] : ld3d +0x0120(%x24)[96byte] %p6/z -> %z23.d %z24.d %z25.d -a5c4ff59 : ld3d {z25.d, z26.d, z27.d}, p7/Z, [x26, #12, MUL VL] : ld3d +0x0180(%x26)[96byte] %p7/z -> %z25.d %z26.d %z27.d -a5c5ff9b : ld3d {z27.d, z28.d, z29.d}, p7/Z, [x28, #15, MUL VL] : ld3d +0x01e0(%x28)[96byte] %p7/z -> %z27.d %z28.d %z29.d -a5c7ffff : ld3d {z31.d, z0.d, z1.d}, p7/Z, [sp, #21, MUL VL] : ld3d +0x02a0(%sp)[96byte] %p7/z -> %z31.d %z0.d %z1.d +a5c8e000 : ld3d {z0.d, z1.d, z2.d}, p0/Z, [x0, #-24, MUL VL] : ld3d -0x0300(%x0)[8byte] %p0/z -> %z0.d %z1.d %z2.d +a5c9e482 : ld3d {z2.d, z3.d, z4.d}, p1/Z, [x4, #-21, MUL VL] : ld3d -0x02a0(%x4)[8byte] %p1/z -> %z2.d %z3.d %z4.d +a5cae8c4 : ld3d {z4.d, z5.d, z6.d}, p2/Z, [x6, #-18, MUL VL] : ld3d -0x0240(%x6)[8byte] %p2/z -> %z4.d %z5.d %z6.d +a5cbe906 : ld3d {z6.d, z7.d, z8.d}, p2/Z, [x8, #-15, MUL VL] : ld3d -0x01e0(%x8)[8byte] %p2/z -> %z6.d %z7.d %z8.d +a5cced48 : ld3d {z8.d, z9.d, z10.d}, p3/Z, [x10, #-12, MUL VL] : ld3d -0x0180(%x10)[8byte] %p3/z -> %z8.d %z9.d %z10.d +a5cded6a : ld3d {z10.d, z11.d, z12.d}, p3/Z, [x11, #-9, MUL VL] : ld3d -0x0120(%x11)[8byte] %p3/z -> %z10.d %z11.d %z12.d +a5cef1ac : ld3d {z12.d, z13.d, z14.d}, p4/Z, [x13, #-6, MUL VL] : ld3d -0xc0(%x13)[8byte] %p4/z -> %z12.d %z13.d %z14.d +a5cff1ee : ld3d {z14.d, z15.d, z16.d}, p4/Z, [x15, #-3, MUL VL] : ld3d -0x60(%x15)[8byte] %p4/z -> %z14.d %z15.d %z16.d +a5c0f630 : ld3d {z16.d, z17.d, z18.d}, p5/Z, [x17, #0, MUL VL] : ld3d (%x17)[8byte] %p5/z -> %z16.d %z17.d %z18.d +a5c0f671 : ld3d {z17.d, z18.d, z19.d}, p5/Z, [x19, #0, MUL VL] : ld3d (%x19)[8byte] %p5/z -> %z17.d %z18.d %z19.d +a5c1f6b3 : ld3d {z19.d, z20.d, z21.d}, p5/Z, [x21, #3, MUL VL] : ld3d +0x60(%x21)[8byte] %p5/z -> %z19.d %z20.d %z21.d +a5c2faf5 : ld3d {z21.d, z22.d, z23.d}, p6/Z, [x23, #6, MUL VL] : ld3d +0xc0(%x23)[8byte] %p6/z -> %z21.d %z22.d %z23.d +a5c3fb17 : ld3d {z23.d, z24.d, z25.d}, p6/Z, [x24, #9, MUL VL] : ld3d +0x0120(%x24)[8byte] %p6/z -> %z23.d %z24.d %z25.d +a5c4ff59 : ld3d {z25.d, z26.d, z27.d}, p7/Z, [x26, #12, MUL VL] : ld3d +0x0180(%x26)[8byte] %p7/z -> %z25.d %z26.d %z27.d +a5c5ff9b : ld3d {z27.d, z28.d, z29.d}, p7/Z, [x28, #15, MUL VL] : ld3d +0x01e0(%x28)[8byte] %p7/z -> %z27.d %z28.d %z29.d +a5c7ffff : ld3d {z31.d, z0.d, z1.d}, p7/Z, [sp, #21, MUL VL] : ld3d +0x02a0(%sp)[8byte] %p7/z -> %z31.d %z0.d %z1.d # LD3H { .H, .H, .H }, /Z, [, , LSL #1] (LD3H-Z.P.BR-Contiguous) -a4c0c000 : ld3h {z0.h, z1.h, z2.h}, p0/Z, [x0, x0, LSL #1] : ld3h (%x0,%x0,lsl #1)[96byte] %p0/z -> %z0.h %z1.h %z2.h -a4c5c482 : ld3h {z2.h, z3.h, z4.h}, p1/Z, [x4, x5, LSL #1] : ld3h (%x4,%x5,lsl #1)[96byte] %p1/z -> %z2.h %z3.h %z4.h -a4c7c8c4 : ld3h {z4.h, z5.h, z6.h}, p2/Z, [x6, x7, LSL #1] : ld3h (%x6,%x7,lsl #1)[96byte] %p2/z -> %z4.h %z5.h %z6.h -a4c9c906 : ld3h {z6.h, z7.h, z8.h}, p2/Z, [x8, x9, LSL #1] : ld3h (%x8,%x9,lsl #1)[96byte] %p2/z -> %z6.h %z7.h %z8.h -a4cbcd48 : ld3h {z8.h, z9.h, z10.h}, p3/Z, [x10, x11, LSL #1] : ld3h (%x10,%x11,lsl #1)[96byte] %p3/z -> %z8.h %z9.h %z10.h -a4cccd6a : ld3h {z10.h, z11.h, z12.h}, p3/Z, [x11, x12, LSL #1] : ld3h (%x11,%x12,lsl #1)[96byte] %p3/z -> %z10.h %z11.h %z12.h -a4ced1ac : ld3h {z12.h, z13.h, z14.h}, p4/Z, [x13, x14, LSL #1] : ld3h (%x13,%x14,lsl #1)[96byte] %p4/z -> %z12.h %z13.h %z14.h -a4d0d1ee : ld3h {z14.h, z15.h, z16.h}, p4/Z, [x15, x16, LSL #1] : ld3h (%x15,%x16,lsl #1)[96byte] %p4/z -> %z14.h %z15.h %z16.h -a4d2d630 : ld3h {z16.h, z17.h, z18.h}, p5/Z, [x17, x18, LSL #1] : ld3h (%x17,%x18,lsl #1)[96byte] %p5/z -> %z16.h %z17.h %z18.h -a4d4d671 : ld3h {z17.h, z18.h, z19.h}, p5/Z, [x19, x20, LSL #1] : ld3h (%x19,%x20,lsl #1)[96byte] %p5/z -> %z17.h %z18.h %z19.h -a4d6d6b3 : ld3h {z19.h, z20.h, z21.h}, p5/Z, [x21, x22, LSL #1] : ld3h (%x21,%x22,lsl #1)[96byte] %p5/z -> %z19.h %z20.h %z21.h -a4d8daf5 : ld3h {z21.h, z22.h, z23.h}, p6/Z, [x23, x24, LSL #1] : ld3h (%x23,%x24,lsl #1)[96byte] %p6/z -> %z21.h %z22.h %z23.h -a4d9db17 : ld3h {z23.h, z24.h, z25.h}, p6/Z, [x24, x25, LSL #1] : ld3h (%x24,%x25,lsl #1)[96byte] %p6/z -> %z23.h %z24.h %z25.h -a4dbdf59 : ld3h {z25.h, z26.h, z27.h}, p7/Z, [x26, x27, LSL #1] : ld3h (%x26,%x27,lsl #1)[96byte] %p7/z -> %z25.h %z26.h %z27.h -a4dddf9b : ld3h {z27.h, z28.h, z29.h}, p7/Z, [x28, x29, LSL #1] : ld3h (%x28,%x29,lsl #1)[96byte] %p7/z -> %z27.h %z28.h %z29.h -a4dedfff : ld3h {z31.h, z0.h, z1.h}, p7/Z, [sp, x30, LSL #1] : ld3h (%sp,%x30,lsl #1)[96byte] %p7/z -> %z31.h %z0.h %z1.h +a4c0c000 : ld3h {z0.h, z1.h, z2.h}, p0/Z, [x0, x0, LSL #1] : ld3h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h %z1.h %z2.h +a4c5c482 : ld3h {z2.h, z3.h, z4.h}, p1/Z, [x4, x5, LSL #1] : ld3h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h %z3.h %z4.h +a4c7c8c4 : ld3h {z4.h, z5.h, z6.h}, p2/Z, [x6, x7, LSL #1] : ld3h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h %z5.h %z6.h +a4c9c906 : ld3h {z6.h, z7.h, z8.h}, p2/Z, [x8, x9, LSL #1] : ld3h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h %z7.h %z8.h +a4cbcd48 : ld3h {z8.h, z9.h, z10.h}, p3/Z, [x10, x11, LSL #1] : ld3h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h %z9.h %z10.h +a4cccd6a : ld3h {z10.h, z11.h, z12.h}, p3/Z, [x11, x12, LSL #1] : ld3h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h %z11.h %z12.h +a4ced1ac : ld3h {z12.h, z13.h, z14.h}, p4/Z, [x13, x14, LSL #1] : ld3h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h %z13.h %z14.h +a4d0d1ee : ld3h {z14.h, z15.h, z16.h}, p4/Z, [x15, x16, LSL #1] : ld3h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h %z15.h %z16.h +a4d2d630 : ld3h {z16.h, z17.h, z18.h}, p5/Z, [x17, x18, LSL #1] : ld3h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h %z17.h %z18.h +a4d4d671 : ld3h {z17.h, z18.h, z19.h}, p5/Z, [x19, x20, LSL #1] : ld3h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h %z18.h %z19.h +a4d6d6b3 : ld3h {z19.h, z20.h, z21.h}, p5/Z, [x21, x22, LSL #1] : ld3h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h %z20.h %z21.h +a4d8daf5 : ld3h {z21.h, z22.h, z23.h}, p6/Z, [x23, x24, LSL #1] : ld3h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h %z22.h %z23.h +a4d9db17 : ld3h {z23.h, z24.h, z25.h}, p6/Z, [x24, x25, LSL #1] : ld3h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h %z24.h %z25.h +a4dbdf59 : ld3h {z25.h, z26.h, z27.h}, p7/Z, [x26, x27, LSL #1] : ld3h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h %z26.h %z27.h +a4dddf9b : ld3h {z27.h, z28.h, z29.h}, p7/Z, [x28, x29, LSL #1] : ld3h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h %z28.h %z29.h +a4dedfff : ld3h {z31.h, z0.h, z1.h}, p7/Z, [sp, x30, LSL #1] : ld3h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h %z0.h %z1.h # LD3H { .H, .H, .H }, /Z, [{, #, MUL VL}] (LD3H-Z.P.BI-Contiguous) -a4c8e000 : ld3h {z0.h, z1.h, z2.h}, p0/Z, [x0, #-24, MUL VL] : ld3h -0x0300(%x0)[96byte] %p0/z -> %z0.h %z1.h %z2.h -a4c9e482 : ld3h {z2.h, z3.h, z4.h}, p1/Z, [x4, #-21, MUL VL] : ld3h -0x02a0(%x4)[96byte] %p1/z -> %z2.h %z3.h %z4.h -a4cae8c4 : ld3h {z4.h, z5.h, z6.h}, p2/Z, [x6, #-18, MUL VL] : ld3h -0x0240(%x6)[96byte] %p2/z -> %z4.h %z5.h %z6.h -a4cbe906 : ld3h {z6.h, z7.h, z8.h}, p2/Z, [x8, #-15, MUL VL] : ld3h -0x01e0(%x8)[96byte] %p2/z -> %z6.h %z7.h %z8.h -a4cced48 : ld3h {z8.h, z9.h, z10.h}, p3/Z, [x10, #-12, MUL VL] : ld3h -0x0180(%x10)[96byte] %p3/z -> %z8.h %z9.h %z10.h -a4cded6a : ld3h {z10.h, z11.h, z12.h}, p3/Z, [x11, #-9, MUL VL] : ld3h -0x0120(%x11)[96byte] %p3/z -> %z10.h %z11.h %z12.h -a4cef1ac : ld3h {z12.h, z13.h, z14.h}, p4/Z, [x13, #-6, MUL VL] : ld3h -0xc0(%x13)[96byte] %p4/z -> %z12.h %z13.h %z14.h -a4cff1ee : ld3h {z14.h, z15.h, z16.h}, p4/Z, [x15, #-3, MUL VL] : ld3h -0x60(%x15)[96byte] %p4/z -> %z14.h %z15.h %z16.h -a4c0f630 : ld3h {z16.h, z17.h, z18.h}, p5/Z, [x17, #0, MUL VL] : ld3h (%x17)[96byte] %p5/z -> %z16.h %z17.h %z18.h -a4c0f671 : ld3h {z17.h, z18.h, z19.h}, p5/Z, [x19, #0, MUL VL] : ld3h (%x19)[96byte] %p5/z -> %z17.h %z18.h %z19.h -a4c1f6b3 : ld3h {z19.h, z20.h, z21.h}, p5/Z, [x21, #3, MUL VL] : ld3h +0x60(%x21)[96byte] %p5/z -> %z19.h %z20.h %z21.h -a4c2faf5 : ld3h {z21.h, z22.h, z23.h}, p6/Z, [x23, #6, MUL VL] : ld3h +0xc0(%x23)[96byte] %p6/z -> %z21.h %z22.h %z23.h -a4c3fb17 : ld3h {z23.h, z24.h, z25.h}, p6/Z, [x24, #9, MUL VL] : ld3h +0x0120(%x24)[96byte] %p6/z -> %z23.h %z24.h %z25.h -a4c4ff59 : ld3h {z25.h, z26.h, z27.h}, p7/Z, [x26, #12, MUL VL] : ld3h +0x0180(%x26)[96byte] %p7/z -> %z25.h %z26.h %z27.h -a4c5ff9b : ld3h {z27.h, z28.h, z29.h}, p7/Z, [x28, #15, MUL VL] : ld3h +0x01e0(%x28)[96byte] %p7/z -> %z27.h %z28.h %z29.h -a4c7ffff : ld3h {z31.h, z0.h, z1.h}, p7/Z, [sp, #21, MUL VL] : ld3h +0x02a0(%sp)[96byte] %p7/z -> %z31.h %z0.h %z1.h +a4c8e000 : ld3h {z0.h, z1.h, z2.h}, p0/Z, [x0, #-24, MUL VL] : ld3h -0x0300(%x0)[2byte] %p0/z -> %z0.h %z1.h %z2.h +a4c9e482 : ld3h {z2.h, z3.h, z4.h}, p1/Z, [x4, #-21, MUL VL] : ld3h -0x02a0(%x4)[2byte] %p1/z -> %z2.h %z3.h %z4.h +a4cae8c4 : ld3h {z4.h, z5.h, z6.h}, p2/Z, [x6, #-18, MUL VL] : ld3h -0x0240(%x6)[2byte] %p2/z -> %z4.h %z5.h %z6.h +a4cbe906 : ld3h {z6.h, z7.h, z8.h}, p2/Z, [x8, #-15, MUL VL] : ld3h -0x01e0(%x8)[2byte] %p2/z -> %z6.h %z7.h %z8.h +a4cced48 : ld3h {z8.h, z9.h, z10.h}, p3/Z, [x10, #-12, MUL VL] : ld3h -0x0180(%x10)[2byte] %p3/z -> %z8.h %z9.h %z10.h +a4cded6a : ld3h {z10.h, z11.h, z12.h}, p3/Z, [x11, #-9, MUL VL] : ld3h -0x0120(%x11)[2byte] %p3/z -> %z10.h %z11.h %z12.h +a4cef1ac : ld3h {z12.h, z13.h, z14.h}, p4/Z, [x13, #-6, MUL VL] : ld3h -0xc0(%x13)[2byte] %p4/z -> %z12.h %z13.h %z14.h +a4cff1ee : ld3h {z14.h, z15.h, z16.h}, p4/Z, [x15, #-3, MUL VL] : ld3h -0x60(%x15)[2byte] %p4/z -> %z14.h %z15.h %z16.h +a4c0f630 : ld3h {z16.h, z17.h, z18.h}, p5/Z, [x17, #0, MUL VL] : ld3h (%x17)[2byte] %p5/z -> %z16.h %z17.h %z18.h +a4c0f671 : ld3h {z17.h, z18.h, z19.h}, p5/Z, [x19, #0, MUL VL] : ld3h (%x19)[2byte] %p5/z -> %z17.h %z18.h %z19.h +a4c1f6b3 : ld3h {z19.h, z20.h, z21.h}, p5/Z, [x21, #3, MUL VL] : ld3h +0x60(%x21)[2byte] %p5/z -> %z19.h %z20.h %z21.h +a4c2faf5 : ld3h {z21.h, z22.h, z23.h}, p6/Z, [x23, #6, MUL VL] : ld3h +0xc0(%x23)[2byte] %p6/z -> %z21.h %z22.h %z23.h +a4c3fb17 : ld3h {z23.h, z24.h, z25.h}, p6/Z, [x24, #9, MUL VL] : ld3h +0x0120(%x24)[2byte] %p6/z -> %z23.h %z24.h %z25.h +a4c4ff59 : ld3h {z25.h, z26.h, z27.h}, p7/Z, [x26, #12, MUL VL] : ld3h +0x0180(%x26)[2byte] %p7/z -> %z25.h %z26.h %z27.h +a4c5ff9b : ld3h {z27.h, z28.h, z29.h}, p7/Z, [x28, #15, MUL VL] : ld3h +0x01e0(%x28)[2byte] %p7/z -> %z27.h %z28.h %z29.h +a4c7ffff : ld3h {z31.h, z0.h, z1.h}, p7/Z, [sp, #21, MUL VL] : ld3h +0x02a0(%sp)[2byte] %p7/z -> %z31.h %z0.h %z1.h # LD3W { .S, .S, .S }, /Z, [, , LSL #2] (LD3W-Z.P.BR-Contiguous) -a540c000 : ld3w {z0.s, z1.s, z2.s}, p0/Z, [x0, x0, LSL #2] : ld3w (%x0,%x0,lsl #2)[96byte] %p0/z -> %z0.s %z1.s %z2.s -a545c482 : ld3w {z2.s, z3.s, z4.s}, p1/Z, [x4, x5, LSL #2] : ld3w (%x4,%x5,lsl #2)[96byte] %p1/z -> %z2.s %z3.s %z4.s -a547c8c4 : ld3w {z4.s, z5.s, z6.s}, p2/Z, [x6, x7, LSL #2] : ld3w (%x6,%x7,lsl #2)[96byte] %p2/z -> %z4.s %z5.s %z6.s -a549c906 : ld3w {z6.s, z7.s, z8.s}, p2/Z, [x8, x9, LSL #2] : ld3w (%x8,%x9,lsl #2)[96byte] %p2/z -> %z6.s %z7.s %z8.s -a54bcd48 : ld3w {z8.s, z9.s, z10.s}, p3/Z, [x10, x11, LSL #2] : ld3w (%x10,%x11,lsl #2)[96byte] %p3/z -> %z8.s %z9.s %z10.s -a54ccd6a : ld3w {z10.s, z11.s, z12.s}, p3/Z, [x11, x12, LSL #2] : ld3w (%x11,%x12,lsl #2)[96byte] %p3/z -> %z10.s %z11.s %z12.s -a54ed1ac : ld3w {z12.s, z13.s, z14.s}, p4/Z, [x13, x14, LSL #2] : ld3w (%x13,%x14,lsl #2)[96byte] %p4/z -> %z12.s %z13.s %z14.s -a550d1ee : ld3w {z14.s, z15.s, z16.s}, p4/Z, [x15, x16, LSL #2] : ld3w (%x15,%x16,lsl #2)[96byte] %p4/z -> %z14.s %z15.s %z16.s -a552d630 : ld3w {z16.s, z17.s, z18.s}, p5/Z, [x17, x18, LSL #2] : ld3w (%x17,%x18,lsl #2)[96byte] %p5/z -> %z16.s %z17.s %z18.s -a554d671 : ld3w {z17.s, z18.s, z19.s}, p5/Z, [x19, x20, LSL #2] : ld3w (%x19,%x20,lsl #2)[96byte] %p5/z -> %z17.s %z18.s %z19.s -a556d6b3 : ld3w {z19.s, z20.s, z21.s}, p5/Z, [x21, x22, LSL #2] : ld3w (%x21,%x22,lsl #2)[96byte] %p5/z -> %z19.s %z20.s %z21.s -a558daf5 : ld3w {z21.s, z22.s, z23.s}, p6/Z, [x23, x24, LSL #2] : ld3w (%x23,%x24,lsl #2)[96byte] %p6/z -> %z21.s %z22.s %z23.s -a559db17 : ld3w {z23.s, z24.s, z25.s}, p6/Z, [x24, x25, LSL #2] : ld3w (%x24,%x25,lsl #2)[96byte] %p6/z -> %z23.s %z24.s %z25.s -a55bdf59 : ld3w {z25.s, z26.s, z27.s}, p7/Z, [x26, x27, LSL #2] : ld3w (%x26,%x27,lsl #2)[96byte] %p7/z -> %z25.s %z26.s %z27.s -a55ddf9b : ld3w {z27.s, z28.s, z29.s}, p7/Z, [x28, x29, LSL #2] : ld3w (%x28,%x29,lsl #2)[96byte] %p7/z -> %z27.s %z28.s %z29.s -a55edfff : ld3w {z31.s, z0.s, z1.s}, p7/Z, [sp, x30, LSL #2] : ld3w (%sp,%x30,lsl #2)[96byte] %p7/z -> %z31.s %z0.s %z1.s +a540c000 : ld3w {z0.s, z1.s, z2.s}, p0/Z, [x0, x0, LSL #2] : ld3w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s %z1.s %z2.s +a545c482 : ld3w {z2.s, z3.s, z4.s}, p1/Z, [x4, x5, LSL #2] : ld3w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s %z3.s %z4.s +a547c8c4 : ld3w {z4.s, z5.s, z6.s}, p2/Z, [x6, x7, LSL #2] : ld3w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s %z5.s %z6.s +a549c906 : ld3w {z6.s, z7.s, z8.s}, p2/Z, [x8, x9, LSL #2] : ld3w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s %z7.s %z8.s +a54bcd48 : ld3w {z8.s, z9.s, z10.s}, p3/Z, [x10, x11, LSL #2] : ld3w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s %z9.s %z10.s +a54ccd6a : ld3w {z10.s, z11.s, z12.s}, p3/Z, [x11, x12, LSL #2] : ld3w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s %z11.s %z12.s +a54ed1ac : ld3w {z12.s, z13.s, z14.s}, p4/Z, [x13, x14, LSL #2] : ld3w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s %z13.s %z14.s +a550d1ee : ld3w {z14.s, z15.s, z16.s}, p4/Z, [x15, x16, LSL #2] : ld3w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s %z15.s %z16.s +a552d630 : ld3w {z16.s, z17.s, z18.s}, p5/Z, [x17, x18, LSL #2] : ld3w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s %z17.s %z18.s +a554d671 : ld3w {z17.s, z18.s, z19.s}, p5/Z, [x19, x20, LSL #2] : ld3w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s %z18.s %z19.s +a556d6b3 : ld3w {z19.s, z20.s, z21.s}, p5/Z, [x21, x22, LSL #2] : ld3w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s %z20.s %z21.s +a558daf5 : ld3w {z21.s, z22.s, z23.s}, p6/Z, [x23, x24, LSL #2] : ld3w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s %z22.s %z23.s +a559db17 : ld3w {z23.s, z24.s, z25.s}, p6/Z, [x24, x25, LSL #2] : ld3w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s %z24.s %z25.s +a55bdf59 : ld3w {z25.s, z26.s, z27.s}, p7/Z, [x26, x27, LSL #2] : ld3w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s %z26.s %z27.s +a55ddf9b : ld3w {z27.s, z28.s, z29.s}, p7/Z, [x28, x29, LSL #2] : ld3w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s %z28.s %z29.s +a55edfff : ld3w {z31.s, z0.s, z1.s}, p7/Z, [sp, x30, LSL #2] : ld3w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s %z0.s %z1.s # LD3W { .S, .S, .S }, /Z, [{, #, MUL VL}] (LD3W-Z.P.BI-Contiguous) -a548e000 : ld3w {z0.s, z1.s, z2.s}, p0/Z, [x0, #-24, MUL VL] : ld3w -0x0300(%x0)[96byte] %p0/z -> %z0.s %z1.s %z2.s -a549e482 : ld3w {z2.s, z3.s, z4.s}, p1/Z, [x4, #-21, MUL VL] : ld3w -0x02a0(%x4)[96byte] %p1/z -> %z2.s %z3.s %z4.s -a54ae8c4 : ld3w {z4.s, z5.s, z6.s}, p2/Z, [x6, #-18, MUL VL] : ld3w -0x0240(%x6)[96byte] %p2/z -> %z4.s %z5.s %z6.s -a54be906 : ld3w {z6.s, z7.s, z8.s}, p2/Z, [x8, #-15, MUL VL] : ld3w -0x01e0(%x8)[96byte] %p2/z -> %z6.s %z7.s %z8.s -a54ced48 : ld3w {z8.s, z9.s, z10.s}, p3/Z, [x10, #-12, MUL VL] : ld3w -0x0180(%x10)[96byte] %p3/z -> %z8.s %z9.s %z10.s -a54ded6a : ld3w {z10.s, z11.s, z12.s}, p3/Z, [x11, #-9, MUL VL] : ld3w -0x0120(%x11)[96byte] %p3/z -> %z10.s %z11.s %z12.s -a54ef1ac : ld3w {z12.s, z13.s, z14.s}, p4/Z, [x13, #-6, MUL VL] : ld3w -0xc0(%x13)[96byte] %p4/z -> %z12.s %z13.s %z14.s -a54ff1ee : ld3w {z14.s, z15.s, z16.s}, p4/Z, [x15, #-3, MUL VL] : ld3w -0x60(%x15)[96byte] %p4/z -> %z14.s %z15.s %z16.s -a540f630 : ld3w {z16.s, z17.s, z18.s}, p5/Z, [x17, #0, MUL VL] : ld3w (%x17)[96byte] %p5/z -> %z16.s %z17.s %z18.s -a540f671 : ld3w {z17.s, z18.s, z19.s}, p5/Z, [x19, #0, MUL VL] : ld3w (%x19)[96byte] %p5/z -> %z17.s %z18.s %z19.s -a541f6b3 : ld3w {z19.s, z20.s, z21.s}, p5/Z, [x21, #3, MUL VL] : ld3w +0x60(%x21)[96byte] %p5/z -> %z19.s %z20.s %z21.s -a542faf5 : ld3w {z21.s, z22.s, z23.s}, p6/Z, [x23, #6, MUL VL] : ld3w +0xc0(%x23)[96byte] %p6/z -> %z21.s %z22.s %z23.s -a543fb17 : ld3w {z23.s, z24.s, z25.s}, p6/Z, [x24, #9, MUL VL] : ld3w +0x0120(%x24)[96byte] %p6/z -> %z23.s %z24.s %z25.s -a544ff59 : ld3w {z25.s, z26.s, z27.s}, p7/Z, [x26, #12, MUL VL] : ld3w +0x0180(%x26)[96byte] %p7/z -> %z25.s %z26.s %z27.s -a545ff9b : ld3w {z27.s, z28.s, z29.s}, p7/Z, [x28, #15, MUL VL] : ld3w +0x01e0(%x28)[96byte] %p7/z -> %z27.s %z28.s %z29.s -a547ffff : ld3w {z31.s, z0.s, z1.s}, p7/Z, [sp, #21, MUL VL] : ld3w +0x02a0(%sp)[96byte] %p7/z -> %z31.s %z0.s %z1.s +a548e000 : ld3w {z0.s, z1.s, z2.s}, p0/Z, [x0, #-24, MUL VL] : ld3w -0x0300(%x0)[4byte] %p0/z -> %z0.s %z1.s %z2.s +a549e482 : ld3w {z2.s, z3.s, z4.s}, p1/Z, [x4, #-21, MUL VL] : ld3w -0x02a0(%x4)[4byte] %p1/z -> %z2.s %z3.s %z4.s +a54ae8c4 : ld3w {z4.s, z5.s, z6.s}, p2/Z, [x6, #-18, MUL VL] : ld3w -0x0240(%x6)[4byte] %p2/z -> %z4.s %z5.s %z6.s +a54be906 : ld3w {z6.s, z7.s, z8.s}, p2/Z, [x8, #-15, MUL VL] : ld3w -0x01e0(%x8)[4byte] %p2/z -> %z6.s %z7.s %z8.s +a54ced48 : ld3w {z8.s, z9.s, z10.s}, p3/Z, [x10, #-12, MUL VL] : ld3w -0x0180(%x10)[4byte] %p3/z -> %z8.s %z9.s %z10.s +a54ded6a : ld3w {z10.s, z11.s, z12.s}, p3/Z, [x11, #-9, MUL VL] : ld3w -0x0120(%x11)[4byte] %p3/z -> %z10.s %z11.s %z12.s +a54ef1ac : ld3w {z12.s, z13.s, z14.s}, p4/Z, [x13, #-6, MUL VL] : ld3w -0xc0(%x13)[4byte] %p4/z -> %z12.s %z13.s %z14.s +a54ff1ee : ld3w {z14.s, z15.s, z16.s}, p4/Z, [x15, #-3, MUL VL] : ld3w -0x60(%x15)[4byte] %p4/z -> %z14.s %z15.s %z16.s +a540f630 : ld3w {z16.s, z17.s, z18.s}, p5/Z, [x17, #0, MUL VL] : ld3w (%x17)[4byte] %p5/z -> %z16.s %z17.s %z18.s +a540f671 : ld3w {z17.s, z18.s, z19.s}, p5/Z, [x19, #0, MUL VL] : ld3w (%x19)[4byte] %p5/z -> %z17.s %z18.s %z19.s +a541f6b3 : ld3w {z19.s, z20.s, z21.s}, p5/Z, [x21, #3, MUL VL] : ld3w +0x60(%x21)[4byte] %p5/z -> %z19.s %z20.s %z21.s +a542faf5 : ld3w {z21.s, z22.s, z23.s}, p6/Z, [x23, #6, MUL VL] : ld3w +0xc0(%x23)[4byte] %p6/z -> %z21.s %z22.s %z23.s +a543fb17 : ld3w {z23.s, z24.s, z25.s}, p6/Z, [x24, #9, MUL VL] : ld3w +0x0120(%x24)[4byte] %p6/z -> %z23.s %z24.s %z25.s +a544ff59 : ld3w {z25.s, z26.s, z27.s}, p7/Z, [x26, #12, MUL VL] : ld3w +0x0180(%x26)[4byte] %p7/z -> %z25.s %z26.s %z27.s +a545ff9b : ld3w {z27.s, z28.s, z29.s}, p7/Z, [x28, #15, MUL VL] : ld3w +0x01e0(%x28)[4byte] %p7/z -> %z27.s %z28.s %z29.s +a547ffff : ld3w {z31.s, z0.s, z1.s}, p7/Z, [sp, #21, MUL VL] : ld3w +0x02a0(%sp)[4byte] %p7/z -> %z31.s %z0.s %z1.s # LD4B { .B, .B, .B, .B }, /Z, [, ] (LD4B-Z.P.BR-Contiguous) -a460c000 : ld4b {z0.b, z1.b, z2.b, z3.b}, p0/Z, [x0, x0] : ld4b (%x0,%x0)[128byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b -a465c482 : ld4b {z2.b, z3.b, z4.b, z5.b}, p1/Z, [x4, x5] : ld4b (%x4,%x5)[128byte] %p1/z -> %z2.b %z3.b %z4.b %z5.b -a467c8c4 : ld4b {z4.b, z5.b, z6.b, z7.b}, p2/Z, [x6, x7] : ld4b (%x6,%x7)[128byte] %p2/z -> %z4.b %z5.b %z6.b %z7.b -a469c906 : ld4b {z6.b, z7.b, z8.b, z9.b}, p2/Z, [x8, x9] : ld4b (%x8,%x9)[128byte] %p2/z -> %z6.b %z7.b %z8.b %z9.b -a46bcd48 : ld4b {z8.b, z9.b, z10.b, z11.b}, p3/Z, [x10, x11] : ld4b (%x10,%x11)[128byte] %p3/z -> %z8.b %z9.b %z10.b %z11.b -a46ccd6a : ld4b {z10.b, z11.b, z12.b, z13.b}, p3/Z, [x11, x12] : ld4b (%x11,%x12)[128byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b -a46ed1ac : ld4b {z12.b, z13.b, z14.b, z15.b}, p4/Z, [x13, x14] : ld4b (%x13,%x14)[128byte] %p4/z -> %z12.b %z13.b %z14.b %z15.b -a470d1ee : ld4b {z14.b, z15.b, z16.b, z17.b}, p4/Z, [x15, x16] : ld4b (%x15,%x16)[128byte] %p4/z -> %z14.b %z15.b %z16.b %z17.b -a472d630 : ld4b {z16.b, z17.b, z18.b, z19.b}, p5/Z, [x17, x18] : ld4b (%x17,%x18)[128byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b -a474d671 : ld4b {z17.b, z18.b, z19.b, z20.b}, p5/Z, [x19, x20] : ld4b (%x19,%x20)[128byte] %p5/z -> %z17.b %z18.b %z19.b %z20.b -a476d6b3 : ld4b {z19.b, z20.b, z21.b, z22.b}, p5/Z, [x21, x22] : ld4b (%x21,%x22)[128byte] %p5/z -> %z19.b %z20.b %z21.b %z22.b -a478daf5 : ld4b {z21.b, z22.b, z23.b, z24.b}, p6/Z, [x23, x24] : ld4b (%x23,%x24)[128byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b -a479db17 : ld4b {z23.b, z24.b, z25.b, z26.b}, p6/Z, [x24, x25] : ld4b (%x24,%x25)[128byte] %p6/z -> %z23.b %z24.b %z25.b %z26.b -a47bdf59 : ld4b {z25.b, z26.b, z27.b, z28.b}, p7/Z, [x26, x27] : ld4b (%x26,%x27)[128byte] %p7/z -> %z25.b %z26.b %z27.b %z28.b -a47ddf9b : ld4b {z27.b, z28.b, z29.b, z30.b}, p7/Z, [x28, x29] : ld4b (%x28,%x29)[128byte] %p7/z -> %z27.b %z28.b %z29.b %z30.b -a47edfff : ld4b {z31.b, z0.b, z1.b, z2.b}, p7/Z, [sp, x30] : ld4b (%sp,%x30)[128byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b +a460c000 : ld4b {z0.b, z1.b, z2.b, z3.b}, p0/Z, [x0, x0] : ld4b (%x0,%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b +a465c482 : ld4b {z2.b, z3.b, z4.b, z5.b}, p1/Z, [x4, x5] : ld4b (%x4,%x5)[1byte] %p1/z -> %z2.b %z3.b %z4.b %z5.b +a467c8c4 : ld4b {z4.b, z5.b, z6.b, z7.b}, p2/Z, [x6, x7] : ld4b (%x6,%x7)[1byte] %p2/z -> %z4.b %z5.b %z6.b %z7.b +a469c906 : ld4b {z6.b, z7.b, z8.b, z9.b}, p2/Z, [x8, x9] : ld4b (%x8,%x9)[1byte] %p2/z -> %z6.b %z7.b %z8.b %z9.b +a46bcd48 : ld4b {z8.b, z9.b, z10.b, z11.b}, p3/Z, [x10, x11] : ld4b (%x10,%x11)[1byte] %p3/z -> %z8.b %z9.b %z10.b %z11.b +a46ccd6a : ld4b {z10.b, z11.b, z12.b, z13.b}, p3/Z, [x11, x12] : ld4b (%x11,%x12)[1byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b +a46ed1ac : ld4b {z12.b, z13.b, z14.b, z15.b}, p4/Z, [x13, x14] : ld4b (%x13,%x14)[1byte] %p4/z -> %z12.b %z13.b %z14.b %z15.b +a470d1ee : ld4b {z14.b, z15.b, z16.b, z17.b}, p4/Z, [x15, x16] : ld4b (%x15,%x16)[1byte] %p4/z -> %z14.b %z15.b %z16.b %z17.b +a472d630 : ld4b {z16.b, z17.b, z18.b, z19.b}, p5/Z, [x17, x18] : ld4b (%x17,%x18)[1byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b +a474d671 : ld4b {z17.b, z18.b, z19.b, z20.b}, p5/Z, [x19, x20] : ld4b (%x19,%x20)[1byte] %p5/z -> %z17.b %z18.b %z19.b %z20.b +a476d6b3 : ld4b {z19.b, z20.b, z21.b, z22.b}, p5/Z, [x21, x22] : ld4b (%x21,%x22)[1byte] %p5/z -> %z19.b %z20.b %z21.b %z22.b +a478daf5 : ld4b {z21.b, z22.b, z23.b, z24.b}, p6/Z, [x23, x24] : ld4b (%x23,%x24)[1byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b +a479db17 : ld4b {z23.b, z24.b, z25.b, z26.b}, p6/Z, [x24, x25] : ld4b (%x24,%x25)[1byte] %p6/z -> %z23.b %z24.b %z25.b %z26.b +a47bdf59 : ld4b {z25.b, z26.b, z27.b, z28.b}, p7/Z, [x26, x27] : ld4b (%x26,%x27)[1byte] %p7/z -> %z25.b %z26.b %z27.b %z28.b +a47ddf9b : ld4b {z27.b, z28.b, z29.b, z30.b}, p7/Z, [x28, x29] : ld4b (%x28,%x29)[1byte] %p7/z -> %z27.b %z28.b %z29.b %z30.b +a47edfff : ld4b {z31.b, z0.b, z1.b, z2.b}, p7/Z, [sp, x30] : ld4b (%sp,%x30)[1byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b # LD4B { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] (LD4B-Z.P.BI-Contiguous) -a468e000 : ld4b {z0.b, z1.b, z2.b, z3.b}, p0/Z, [x0, #-32, MUL VL] : ld4b -0x0400(%x0)[128byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b -a469e482 : ld4b {z2.b, z3.b, z4.b, z5.b}, p1/Z, [x4, #-28, MUL VL] : ld4b -0x0380(%x4)[128byte] %p1/z -> %z2.b %z3.b %z4.b %z5.b -a46ae8c4 : ld4b {z4.b, z5.b, z6.b, z7.b}, p2/Z, [x6, #-24, MUL VL] : ld4b -0x0300(%x6)[128byte] %p2/z -> %z4.b %z5.b %z6.b %z7.b -a46be906 : ld4b {z6.b, z7.b, z8.b, z9.b}, p2/Z, [x8, #-20, MUL VL] : ld4b -0x0280(%x8)[128byte] %p2/z -> %z6.b %z7.b %z8.b %z9.b -a46ced48 : ld4b {z8.b, z9.b, z10.b, z11.b}, p3/Z, [x10, #-16, MUL VL] : ld4b -0x0200(%x10)[128byte] %p3/z -> %z8.b %z9.b %z10.b %z11.b -a46ded6a : ld4b {z10.b, z11.b, z12.b, z13.b}, p3/Z, [x11, #-12, MUL VL] : ld4b -0x0180(%x11)[128byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b -a46ef1ac : ld4b {z12.b, z13.b, z14.b, z15.b}, p4/Z, [x13, #-8, MUL VL] : ld4b -0x0100(%x13)[128byte] %p4/z -> %z12.b %z13.b %z14.b %z15.b -a46ff1ee : ld4b {z14.b, z15.b, z16.b, z17.b}, p4/Z, [x15, #-4, MUL VL] : ld4b -0x80(%x15)[128byte] %p4/z -> %z14.b %z15.b %z16.b %z17.b -a460f630 : ld4b {z16.b, z17.b, z18.b, z19.b}, p5/Z, [x17, #0, MUL VL] : ld4b (%x17)[128byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b -a460f671 : ld4b {z17.b, z18.b, z19.b, z20.b}, p5/Z, [x19, #0, MUL VL] : ld4b (%x19)[128byte] %p5/z -> %z17.b %z18.b %z19.b %z20.b -a461f6b3 : ld4b {z19.b, z20.b, z21.b, z22.b}, p5/Z, [x21, #4, MUL VL] : ld4b +0x80(%x21)[128byte] %p5/z -> %z19.b %z20.b %z21.b %z22.b -a462faf5 : ld4b {z21.b, z22.b, z23.b, z24.b}, p6/Z, [x23, #8, MUL VL] : ld4b +0x0100(%x23)[128byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b -a463fb17 : ld4b {z23.b, z24.b, z25.b, z26.b}, p6/Z, [x24, #12, MUL VL] : ld4b +0x0180(%x24)[128byte] %p6/z -> %z23.b %z24.b %z25.b %z26.b -a464ff59 : ld4b {z25.b, z26.b, z27.b, z28.b}, p7/Z, [x26, #16, MUL VL] : ld4b +0x0200(%x26)[128byte] %p7/z -> %z25.b %z26.b %z27.b %z28.b -a465ff9b : ld4b {z27.b, z28.b, z29.b, z30.b}, p7/Z, [x28, #20, MUL VL] : ld4b +0x0280(%x28)[128byte] %p7/z -> %z27.b %z28.b %z29.b %z30.b -a467ffff : ld4b {z31.b, z0.b, z1.b, z2.b}, p7/Z, [sp, #28, MUL VL] : ld4b +0x0380(%sp)[128byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b +a468e000 : ld4b {z0.b, z1.b, z2.b, z3.b}, p0/Z, [x0, #-32, MUL VL] : ld4b -0x0400(%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b +a469e482 : ld4b {z2.b, z3.b, z4.b, z5.b}, p1/Z, [x4, #-28, MUL VL] : ld4b -0x0380(%x4)[1byte] %p1/z -> %z2.b %z3.b %z4.b %z5.b +a46ae8c4 : ld4b {z4.b, z5.b, z6.b, z7.b}, p2/Z, [x6, #-24, MUL VL] : ld4b -0x0300(%x6)[1byte] %p2/z -> %z4.b %z5.b %z6.b %z7.b +a46be906 : ld4b {z6.b, z7.b, z8.b, z9.b}, p2/Z, [x8, #-20, MUL VL] : ld4b -0x0280(%x8)[1byte] %p2/z -> %z6.b %z7.b %z8.b %z9.b +a46ced48 : ld4b {z8.b, z9.b, z10.b, z11.b}, p3/Z, [x10, #-16, MUL VL] : ld4b -0x0200(%x10)[1byte] %p3/z -> %z8.b %z9.b %z10.b %z11.b +a46ded6a : ld4b {z10.b, z11.b, z12.b, z13.b}, p3/Z, [x11, #-12, MUL VL] : ld4b -0x0180(%x11)[1byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b +a46ef1ac : ld4b {z12.b, z13.b, z14.b, z15.b}, p4/Z, [x13, #-8, MUL VL] : ld4b -0x0100(%x13)[1byte] %p4/z -> %z12.b %z13.b %z14.b %z15.b +a46ff1ee : ld4b {z14.b, z15.b, z16.b, z17.b}, p4/Z, [x15, #-4, MUL VL] : ld4b -0x80(%x15)[1byte] %p4/z -> %z14.b %z15.b %z16.b %z17.b +a460f630 : ld4b {z16.b, z17.b, z18.b, z19.b}, p5/Z, [x17, #0, MUL VL] : ld4b (%x17)[1byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b +a460f671 : ld4b {z17.b, z18.b, z19.b, z20.b}, p5/Z, [x19, #0, MUL VL] : ld4b (%x19)[1byte] %p5/z -> %z17.b %z18.b %z19.b %z20.b +a461f6b3 : ld4b {z19.b, z20.b, z21.b, z22.b}, p5/Z, [x21, #4, MUL VL] : ld4b +0x80(%x21)[1byte] %p5/z -> %z19.b %z20.b %z21.b %z22.b +a462faf5 : ld4b {z21.b, z22.b, z23.b, z24.b}, p6/Z, [x23, #8, MUL VL] : ld4b +0x0100(%x23)[1byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b +a463fb17 : ld4b {z23.b, z24.b, z25.b, z26.b}, p6/Z, [x24, #12, MUL VL] : ld4b +0x0180(%x24)[1byte] %p6/z -> %z23.b %z24.b %z25.b %z26.b +a464ff59 : ld4b {z25.b, z26.b, z27.b, z28.b}, p7/Z, [x26, #16, MUL VL] : ld4b +0x0200(%x26)[1byte] %p7/z -> %z25.b %z26.b %z27.b %z28.b +a465ff9b : ld4b {z27.b, z28.b, z29.b, z30.b}, p7/Z, [x28, #20, MUL VL] : ld4b +0x0280(%x28)[1byte] %p7/z -> %z27.b %z28.b %z29.b %z30.b +a467ffff : ld4b {z31.b, z0.b, z1.b, z2.b}, p7/Z, [sp, #28, MUL VL] : ld4b +0x0380(%sp)[1byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b # LD4D { .D, .D, .D, .D }, /Z, [, , LSL #3] (LD4D-Z.P.BR-Contiguous) -a5e0c000 : ld4d {z0.d, z1.d, z2.d, z3.d}, p0/Z, [x0, x0, LSL #3] : ld4d (%x0,%x0,lsl #3)[128byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d -a5e5c482 : ld4d {z2.d, z3.d, z4.d, z5.d}, p1/Z, [x4, x5, LSL #3] : ld4d (%x4,%x5,lsl #3)[128byte] %p1/z -> %z2.d %z3.d %z4.d %z5.d -a5e7c8c4 : ld4d {z4.d, z5.d, z6.d, z7.d}, p2/Z, [x6, x7, LSL #3] : ld4d (%x6,%x7,lsl #3)[128byte] %p2/z -> %z4.d %z5.d %z6.d %z7.d -a5e9c906 : ld4d {z6.d, z7.d, z8.d, z9.d}, p2/Z, [x8, x9, LSL #3] : ld4d (%x8,%x9,lsl #3)[128byte] %p2/z -> %z6.d %z7.d %z8.d %z9.d -a5ebcd48 : ld4d {z8.d, z9.d, z10.d, z11.d}, p3/Z, [x10, x11, LSL #3] : ld4d (%x10,%x11,lsl #3)[128byte] %p3/z -> %z8.d %z9.d %z10.d %z11.d -a5eccd6a : ld4d {z10.d, z11.d, z12.d, z13.d}, p3/Z, [x11, x12, LSL #3] : ld4d (%x11,%x12,lsl #3)[128byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d -a5eed1ac : ld4d {z12.d, z13.d, z14.d, z15.d}, p4/Z, [x13, x14, LSL #3] : ld4d (%x13,%x14,lsl #3)[128byte] %p4/z -> %z12.d %z13.d %z14.d %z15.d -a5f0d1ee : ld4d {z14.d, z15.d, z16.d, z17.d}, p4/Z, [x15, x16, LSL #3] : ld4d (%x15,%x16,lsl #3)[128byte] %p4/z -> %z14.d %z15.d %z16.d %z17.d -a5f2d630 : ld4d {z16.d, z17.d, z18.d, z19.d}, p5/Z, [x17, x18, LSL #3] : ld4d (%x17,%x18,lsl #3)[128byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d -a5f4d671 : ld4d {z17.d, z18.d, z19.d, z20.d}, p5/Z, [x19, x20, LSL #3] : ld4d (%x19,%x20,lsl #3)[128byte] %p5/z -> %z17.d %z18.d %z19.d %z20.d -a5f6d6b3 : ld4d {z19.d, z20.d, z21.d, z22.d}, p5/Z, [x21, x22, LSL #3] : ld4d (%x21,%x22,lsl #3)[128byte] %p5/z -> %z19.d %z20.d %z21.d %z22.d -a5f8daf5 : ld4d {z21.d, z22.d, z23.d, z24.d}, p6/Z, [x23, x24, LSL #3] : ld4d (%x23,%x24,lsl #3)[128byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d -a5f9db17 : ld4d {z23.d, z24.d, z25.d, z26.d}, p6/Z, [x24, x25, LSL #3] : ld4d (%x24,%x25,lsl #3)[128byte] %p6/z -> %z23.d %z24.d %z25.d %z26.d -a5fbdf59 : ld4d {z25.d, z26.d, z27.d, z28.d}, p7/Z, [x26, x27, LSL #3] : ld4d (%x26,%x27,lsl #3)[128byte] %p7/z -> %z25.d %z26.d %z27.d %z28.d -a5fddf9b : ld4d {z27.d, z28.d, z29.d, z30.d}, p7/Z, [x28, x29, LSL #3] : ld4d (%x28,%x29,lsl #3)[128byte] %p7/z -> %z27.d %z28.d %z29.d %z30.d -a5fedfff : ld4d {z31.d, z0.d, z1.d, z2.d}, p7/Z, [sp, x30, LSL #3] : ld4d (%sp,%x30,lsl #3)[128byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d +a5e0c000 : ld4d {z0.d, z1.d, z2.d, z3.d}, p0/Z, [x0, x0, LSL #3] : ld4d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d +a5e5c482 : ld4d {z2.d, z3.d, z4.d, z5.d}, p1/Z, [x4, x5, LSL #3] : ld4d (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d %z3.d %z4.d %z5.d +a5e7c8c4 : ld4d {z4.d, z5.d, z6.d, z7.d}, p2/Z, [x6, x7, LSL #3] : ld4d (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d %z5.d %z6.d %z7.d +a5e9c906 : ld4d {z6.d, z7.d, z8.d, z9.d}, p2/Z, [x8, x9, LSL #3] : ld4d (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d %z7.d %z8.d %z9.d +a5ebcd48 : ld4d {z8.d, z9.d, z10.d, z11.d}, p3/Z, [x10, x11, LSL #3] : ld4d (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d %z9.d %z10.d %z11.d +a5eccd6a : ld4d {z10.d, z11.d, z12.d, z13.d}, p3/Z, [x11, x12, LSL #3] : ld4d (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d +a5eed1ac : ld4d {z12.d, z13.d, z14.d, z15.d}, p4/Z, [x13, x14, LSL #3] : ld4d (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d %z13.d %z14.d %z15.d +a5f0d1ee : ld4d {z14.d, z15.d, z16.d, z17.d}, p4/Z, [x15, x16, LSL #3] : ld4d (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d %z15.d %z16.d %z17.d +a5f2d630 : ld4d {z16.d, z17.d, z18.d, z19.d}, p5/Z, [x17, x18, LSL #3] : ld4d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d +a5f4d671 : ld4d {z17.d, z18.d, z19.d, z20.d}, p5/Z, [x19, x20, LSL #3] : ld4d (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d %z18.d %z19.d %z20.d +a5f6d6b3 : ld4d {z19.d, z20.d, z21.d, z22.d}, p5/Z, [x21, x22, LSL #3] : ld4d (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d %z20.d %z21.d %z22.d +a5f8daf5 : ld4d {z21.d, z22.d, z23.d, z24.d}, p6/Z, [x23, x24, LSL #3] : ld4d (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d +a5f9db17 : ld4d {z23.d, z24.d, z25.d, z26.d}, p6/Z, [x24, x25, LSL #3] : ld4d (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d %z24.d %z25.d %z26.d +a5fbdf59 : ld4d {z25.d, z26.d, z27.d, z28.d}, p7/Z, [x26, x27, LSL #3] : ld4d (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d %z26.d %z27.d %z28.d +a5fddf9b : ld4d {z27.d, z28.d, z29.d, z30.d}, p7/Z, [x28, x29, LSL #3] : ld4d (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d %z28.d %z29.d %z30.d +a5fedfff : ld4d {z31.d, z0.d, z1.d, z2.d}, p7/Z, [sp, x30, LSL #3] : ld4d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d # LD4D { .D, .D, .D, .D }, /Z, [{, #, MUL VL}] (LD4D-Z.P.BI-Contiguous) -a5e8e000 : ld4d {z0.d, z1.d, z2.d, z3.d}, p0/Z, [x0, #-32, MUL VL] : ld4d -0x0400(%x0)[128byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d -a5e9e482 : ld4d {z2.d, z3.d, z4.d, z5.d}, p1/Z, [x4, #-28, MUL VL] : ld4d -0x0380(%x4)[128byte] %p1/z -> %z2.d %z3.d %z4.d %z5.d -a5eae8c4 : ld4d {z4.d, z5.d, z6.d, z7.d}, p2/Z, [x6, #-24, MUL VL] : ld4d -0x0300(%x6)[128byte] %p2/z -> %z4.d %z5.d %z6.d %z7.d -a5ebe906 : ld4d {z6.d, z7.d, z8.d, z9.d}, p2/Z, [x8, #-20, MUL VL] : ld4d -0x0280(%x8)[128byte] %p2/z -> %z6.d %z7.d %z8.d %z9.d -a5eced48 : ld4d {z8.d, z9.d, z10.d, z11.d}, p3/Z, [x10, #-16, MUL VL] : ld4d -0x0200(%x10)[128byte] %p3/z -> %z8.d %z9.d %z10.d %z11.d -a5eded6a : ld4d {z10.d, z11.d, z12.d, z13.d}, p3/Z, [x11, #-12, MUL VL] : ld4d -0x0180(%x11)[128byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d -a5eef1ac : ld4d {z12.d, z13.d, z14.d, z15.d}, p4/Z, [x13, #-8, MUL VL] : ld4d -0x0100(%x13)[128byte] %p4/z -> %z12.d %z13.d %z14.d %z15.d -a5eff1ee : ld4d {z14.d, z15.d, z16.d, z17.d}, p4/Z, [x15, #-4, MUL VL] : ld4d -0x80(%x15)[128byte] %p4/z -> %z14.d %z15.d %z16.d %z17.d -a5e0f630 : ld4d {z16.d, z17.d, z18.d, z19.d}, p5/Z, [x17, #0, MUL VL] : ld4d (%x17)[128byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d -a5e0f671 : ld4d {z17.d, z18.d, z19.d, z20.d}, p5/Z, [x19, #0, MUL VL] : ld4d (%x19)[128byte] %p5/z -> %z17.d %z18.d %z19.d %z20.d -a5e1f6b3 : ld4d {z19.d, z20.d, z21.d, z22.d}, p5/Z, [x21, #4, MUL VL] : ld4d +0x80(%x21)[128byte] %p5/z -> %z19.d %z20.d %z21.d %z22.d -a5e2faf5 : ld4d {z21.d, z22.d, z23.d, z24.d}, p6/Z, [x23, #8, MUL VL] : ld4d +0x0100(%x23)[128byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d -a5e3fb17 : ld4d {z23.d, z24.d, z25.d, z26.d}, p6/Z, [x24, #12, MUL VL] : ld4d +0x0180(%x24)[128byte] %p6/z -> %z23.d %z24.d %z25.d %z26.d -a5e4ff59 : ld4d {z25.d, z26.d, z27.d, z28.d}, p7/Z, [x26, #16, MUL VL] : ld4d +0x0200(%x26)[128byte] %p7/z -> %z25.d %z26.d %z27.d %z28.d -a5e5ff9b : ld4d {z27.d, z28.d, z29.d, z30.d}, p7/Z, [x28, #20, MUL VL] : ld4d +0x0280(%x28)[128byte] %p7/z -> %z27.d %z28.d %z29.d %z30.d -a5e7ffff : ld4d {z31.d, z0.d, z1.d, z2.d}, p7/Z, [sp, #28, MUL VL] : ld4d +0x0380(%sp)[128byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d +a5e8e000 : ld4d {z0.d, z1.d, z2.d, z3.d}, p0/Z, [x0, #-32, MUL VL] : ld4d -0x0400(%x0)[8byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d +a5e9e482 : ld4d {z2.d, z3.d, z4.d, z5.d}, p1/Z, [x4, #-28, MUL VL] : ld4d -0x0380(%x4)[8byte] %p1/z -> %z2.d %z3.d %z4.d %z5.d +a5eae8c4 : ld4d {z4.d, z5.d, z6.d, z7.d}, p2/Z, [x6, #-24, MUL VL] : ld4d -0x0300(%x6)[8byte] %p2/z -> %z4.d %z5.d %z6.d %z7.d +a5ebe906 : ld4d {z6.d, z7.d, z8.d, z9.d}, p2/Z, [x8, #-20, MUL VL] : ld4d -0x0280(%x8)[8byte] %p2/z -> %z6.d %z7.d %z8.d %z9.d +a5eced48 : ld4d {z8.d, z9.d, z10.d, z11.d}, p3/Z, [x10, #-16, MUL VL] : ld4d -0x0200(%x10)[8byte] %p3/z -> %z8.d %z9.d %z10.d %z11.d +a5eded6a : ld4d {z10.d, z11.d, z12.d, z13.d}, p3/Z, [x11, #-12, MUL VL] : ld4d -0x0180(%x11)[8byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d +a5eef1ac : ld4d {z12.d, z13.d, z14.d, z15.d}, p4/Z, [x13, #-8, MUL VL] : ld4d -0x0100(%x13)[8byte] %p4/z -> %z12.d %z13.d %z14.d %z15.d +a5eff1ee : ld4d {z14.d, z15.d, z16.d, z17.d}, p4/Z, [x15, #-4, MUL VL] : ld4d -0x80(%x15)[8byte] %p4/z -> %z14.d %z15.d %z16.d %z17.d +a5e0f630 : ld4d {z16.d, z17.d, z18.d, z19.d}, p5/Z, [x17, #0, MUL VL] : ld4d (%x17)[8byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d +a5e0f671 : ld4d {z17.d, z18.d, z19.d, z20.d}, p5/Z, [x19, #0, MUL VL] : ld4d (%x19)[8byte] %p5/z -> %z17.d %z18.d %z19.d %z20.d +a5e1f6b3 : ld4d {z19.d, z20.d, z21.d, z22.d}, p5/Z, [x21, #4, MUL VL] : ld4d +0x80(%x21)[8byte] %p5/z -> %z19.d %z20.d %z21.d %z22.d +a5e2faf5 : ld4d {z21.d, z22.d, z23.d, z24.d}, p6/Z, [x23, #8, MUL VL] : ld4d +0x0100(%x23)[8byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d +a5e3fb17 : ld4d {z23.d, z24.d, z25.d, z26.d}, p6/Z, [x24, #12, MUL VL] : ld4d +0x0180(%x24)[8byte] %p6/z -> %z23.d %z24.d %z25.d %z26.d +a5e4ff59 : ld4d {z25.d, z26.d, z27.d, z28.d}, p7/Z, [x26, #16, MUL VL] : ld4d +0x0200(%x26)[8byte] %p7/z -> %z25.d %z26.d %z27.d %z28.d +a5e5ff9b : ld4d {z27.d, z28.d, z29.d, z30.d}, p7/Z, [x28, #20, MUL VL] : ld4d +0x0280(%x28)[8byte] %p7/z -> %z27.d %z28.d %z29.d %z30.d +a5e7ffff : ld4d {z31.d, z0.d, z1.d, z2.d}, p7/Z, [sp, #28, MUL VL] : ld4d +0x0380(%sp)[8byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d # LD4H { .H, .H, .H, .H }, /Z, [, , LSL #1] (LD4H-Z.P.BR-Contiguous) -a4e0c000 : ld4h {z0.h, z1.h, z2.h, z3.h}, p0/Z, [x0, x0, LSL #1] : ld4h (%x0,%x0,lsl #1)[128byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h -a4e5c482 : ld4h {z2.h, z3.h, z4.h, z5.h}, p1/Z, [x4, x5, LSL #1] : ld4h (%x4,%x5,lsl #1)[128byte] %p1/z -> %z2.h %z3.h %z4.h %z5.h -a4e7c8c4 : ld4h {z4.h, z5.h, z6.h, z7.h}, p2/Z, [x6, x7, LSL #1] : ld4h (%x6,%x7,lsl #1)[128byte] %p2/z -> %z4.h %z5.h %z6.h %z7.h -a4e9c906 : ld4h {z6.h, z7.h, z8.h, z9.h}, p2/Z, [x8, x9, LSL #1] : ld4h (%x8,%x9,lsl #1)[128byte] %p2/z -> %z6.h %z7.h %z8.h %z9.h -a4ebcd48 : ld4h {z8.h, z9.h, z10.h, z11.h}, p3/Z, [x10, x11, LSL #1] : ld4h (%x10,%x11,lsl #1)[128byte] %p3/z -> %z8.h %z9.h %z10.h %z11.h -a4eccd6a : ld4h {z10.h, z11.h, z12.h, z13.h}, p3/Z, [x11, x12, LSL #1] : ld4h (%x11,%x12,lsl #1)[128byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h -a4eed1ac : ld4h {z12.h, z13.h, z14.h, z15.h}, p4/Z, [x13, x14, LSL #1] : ld4h (%x13,%x14,lsl #1)[128byte] %p4/z -> %z12.h %z13.h %z14.h %z15.h -a4f0d1ee : ld4h {z14.h, z15.h, z16.h, z17.h}, p4/Z, [x15, x16, LSL #1] : ld4h (%x15,%x16,lsl #1)[128byte] %p4/z -> %z14.h %z15.h %z16.h %z17.h -a4f2d630 : ld4h {z16.h, z17.h, z18.h, z19.h}, p5/Z, [x17, x18, LSL #1] : ld4h (%x17,%x18,lsl #1)[128byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h -a4f4d671 : ld4h {z17.h, z18.h, z19.h, z20.h}, p5/Z, [x19, x20, LSL #1] : ld4h (%x19,%x20,lsl #1)[128byte] %p5/z -> %z17.h %z18.h %z19.h %z20.h -a4f6d6b3 : ld4h {z19.h, z20.h, z21.h, z22.h}, p5/Z, [x21, x22, LSL #1] : ld4h (%x21,%x22,lsl #1)[128byte] %p5/z -> %z19.h %z20.h %z21.h %z22.h -a4f8daf5 : ld4h {z21.h, z22.h, z23.h, z24.h}, p6/Z, [x23, x24, LSL #1] : ld4h (%x23,%x24,lsl #1)[128byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h -a4f9db17 : ld4h {z23.h, z24.h, z25.h, z26.h}, p6/Z, [x24, x25, LSL #1] : ld4h (%x24,%x25,lsl #1)[128byte] %p6/z -> %z23.h %z24.h %z25.h %z26.h -a4fbdf59 : ld4h {z25.h, z26.h, z27.h, z28.h}, p7/Z, [x26, x27, LSL #1] : ld4h (%x26,%x27,lsl #1)[128byte] %p7/z -> %z25.h %z26.h %z27.h %z28.h -a4fddf9b : ld4h {z27.h, z28.h, z29.h, z30.h}, p7/Z, [x28, x29, LSL #1] : ld4h (%x28,%x29,lsl #1)[128byte] %p7/z -> %z27.h %z28.h %z29.h %z30.h -a4fedfff : ld4h {z31.h, z0.h, z1.h, z2.h}, p7/Z, [sp, x30, LSL #1] : ld4h (%sp,%x30,lsl #1)[128byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h +a4e0c000 : ld4h {z0.h, z1.h, z2.h, z3.h}, p0/Z, [x0, x0, LSL #1] : ld4h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h +a4e5c482 : ld4h {z2.h, z3.h, z4.h, z5.h}, p1/Z, [x4, x5, LSL #1] : ld4h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h %z3.h %z4.h %z5.h +a4e7c8c4 : ld4h {z4.h, z5.h, z6.h, z7.h}, p2/Z, [x6, x7, LSL #1] : ld4h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h %z5.h %z6.h %z7.h +a4e9c906 : ld4h {z6.h, z7.h, z8.h, z9.h}, p2/Z, [x8, x9, LSL #1] : ld4h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h %z7.h %z8.h %z9.h +a4ebcd48 : ld4h {z8.h, z9.h, z10.h, z11.h}, p3/Z, [x10, x11, LSL #1] : ld4h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h %z9.h %z10.h %z11.h +a4eccd6a : ld4h {z10.h, z11.h, z12.h, z13.h}, p3/Z, [x11, x12, LSL #1] : ld4h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h +a4eed1ac : ld4h {z12.h, z13.h, z14.h, z15.h}, p4/Z, [x13, x14, LSL #1] : ld4h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h %z13.h %z14.h %z15.h +a4f0d1ee : ld4h {z14.h, z15.h, z16.h, z17.h}, p4/Z, [x15, x16, LSL #1] : ld4h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h %z15.h %z16.h %z17.h +a4f2d630 : ld4h {z16.h, z17.h, z18.h, z19.h}, p5/Z, [x17, x18, LSL #1] : ld4h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h +a4f4d671 : ld4h {z17.h, z18.h, z19.h, z20.h}, p5/Z, [x19, x20, LSL #1] : ld4h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h %z18.h %z19.h %z20.h +a4f6d6b3 : ld4h {z19.h, z20.h, z21.h, z22.h}, p5/Z, [x21, x22, LSL #1] : ld4h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h %z20.h %z21.h %z22.h +a4f8daf5 : ld4h {z21.h, z22.h, z23.h, z24.h}, p6/Z, [x23, x24, LSL #1] : ld4h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h +a4f9db17 : ld4h {z23.h, z24.h, z25.h, z26.h}, p6/Z, [x24, x25, LSL #1] : ld4h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h %z24.h %z25.h %z26.h +a4fbdf59 : ld4h {z25.h, z26.h, z27.h, z28.h}, p7/Z, [x26, x27, LSL #1] : ld4h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h %z26.h %z27.h %z28.h +a4fddf9b : ld4h {z27.h, z28.h, z29.h, z30.h}, p7/Z, [x28, x29, LSL #1] : ld4h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h %z28.h %z29.h %z30.h +a4fedfff : ld4h {z31.h, z0.h, z1.h, z2.h}, p7/Z, [sp, x30, LSL #1] : ld4h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h # LD4H { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] (LD4H-Z.P.BI-Contiguous) -a4e8e000 : ld4h {z0.h, z1.h, z2.h, z3.h}, p0/Z, [x0, #-32, MUL VL] : ld4h -0x0400(%x0)[128byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h -a4e9e482 : ld4h {z2.h, z3.h, z4.h, z5.h}, p1/Z, [x4, #-28, MUL VL] : ld4h -0x0380(%x4)[128byte] %p1/z -> %z2.h %z3.h %z4.h %z5.h -a4eae8c4 : ld4h {z4.h, z5.h, z6.h, z7.h}, p2/Z, [x6, #-24, MUL VL] : ld4h -0x0300(%x6)[128byte] %p2/z -> %z4.h %z5.h %z6.h %z7.h -a4ebe906 : ld4h {z6.h, z7.h, z8.h, z9.h}, p2/Z, [x8, #-20, MUL VL] : ld4h -0x0280(%x8)[128byte] %p2/z -> %z6.h %z7.h %z8.h %z9.h -a4eced48 : ld4h {z8.h, z9.h, z10.h, z11.h}, p3/Z, [x10, #-16, MUL VL] : ld4h -0x0200(%x10)[128byte] %p3/z -> %z8.h %z9.h %z10.h %z11.h -a4eded6a : ld4h {z10.h, z11.h, z12.h, z13.h}, p3/Z, [x11, #-12, MUL VL] : ld4h -0x0180(%x11)[128byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h -a4eef1ac : ld4h {z12.h, z13.h, z14.h, z15.h}, p4/Z, [x13, #-8, MUL VL] : ld4h -0x0100(%x13)[128byte] %p4/z -> %z12.h %z13.h %z14.h %z15.h -a4eff1ee : ld4h {z14.h, z15.h, z16.h, z17.h}, p4/Z, [x15, #-4, MUL VL] : ld4h -0x80(%x15)[128byte] %p4/z -> %z14.h %z15.h %z16.h %z17.h -a4e0f630 : ld4h {z16.h, z17.h, z18.h, z19.h}, p5/Z, [x17, #0, MUL VL] : ld4h (%x17)[128byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h -a4e0f671 : ld4h {z17.h, z18.h, z19.h, z20.h}, p5/Z, [x19, #0, MUL VL] : ld4h (%x19)[128byte] %p5/z -> %z17.h %z18.h %z19.h %z20.h -a4e1f6b3 : ld4h {z19.h, z20.h, z21.h, z22.h}, p5/Z, [x21, #4, MUL VL] : ld4h +0x80(%x21)[128byte] %p5/z -> %z19.h %z20.h %z21.h %z22.h -a4e2faf5 : ld4h {z21.h, z22.h, z23.h, z24.h}, p6/Z, [x23, #8, MUL VL] : ld4h +0x0100(%x23)[128byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h -a4e3fb17 : ld4h {z23.h, z24.h, z25.h, z26.h}, p6/Z, [x24, #12, MUL VL] : ld4h +0x0180(%x24)[128byte] %p6/z -> %z23.h %z24.h %z25.h %z26.h -a4e4ff59 : ld4h {z25.h, z26.h, z27.h, z28.h}, p7/Z, [x26, #16, MUL VL] : ld4h +0x0200(%x26)[128byte] %p7/z -> %z25.h %z26.h %z27.h %z28.h -a4e5ff9b : ld4h {z27.h, z28.h, z29.h, z30.h}, p7/Z, [x28, #20, MUL VL] : ld4h +0x0280(%x28)[128byte] %p7/z -> %z27.h %z28.h %z29.h %z30.h -a4e7ffff : ld4h {z31.h, z0.h, z1.h, z2.h}, p7/Z, [sp, #28, MUL VL] : ld4h +0x0380(%sp)[128byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h +a4e8e000 : ld4h {z0.h, z1.h, z2.h, z3.h}, p0/Z, [x0, #-32, MUL VL] : ld4h -0x0400(%x0)[2byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h +a4e9e482 : ld4h {z2.h, z3.h, z4.h, z5.h}, p1/Z, [x4, #-28, MUL VL] : ld4h -0x0380(%x4)[2byte] %p1/z -> %z2.h %z3.h %z4.h %z5.h +a4eae8c4 : ld4h {z4.h, z5.h, z6.h, z7.h}, p2/Z, [x6, #-24, MUL VL] : ld4h -0x0300(%x6)[2byte] %p2/z -> %z4.h %z5.h %z6.h %z7.h +a4ebe906 : ld4h {z6.h, z7.h, z8.h, z9.h}, p2/Z, [x8, #-20, MUL VL] : ld4h -0x0280(%x8)[2byte] %p2/z -> %z6.h %z7.h %z8.h %z9.h +a4eced48 : ld4h {z8.h, z9.h, z10.h, z11.h}, p3/Z, [x10, #-16, MUL VL] : ld4h -0x0200(%x10)[2byte] %p3/z -> %z8.h %z9.h %z10.h %z11.h +a4eded6a : ld4h {z10.h, z11.h, z12.h, z13.h}, p3/Z, [x11, #-12, MUL VL] : ld4h -0x0180(%x11)[2byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h +a4eef1ac : ld4h {z12.h, z13.h, z14.h, z15.h}, p4/Z, [x13, #-8, MUL VL] : ld4h -0x0100(%x13)[2byte] %p4/z -> %z12.h %z13.h %z14.h %z15.h +a4eff1ee : ld4h {z14.h, z15.h, z16.h, z17.h}, p4/Z, [x15, #-4, MUL VL] : ld4h -0x80(%x15)[2byte] %p4/z -> %z14.h %z15.h %z16.h %z17.h +a4e0f630 : ld4h {z16.h, z17.h, z18.h, z19.h}, p5/Z, [x17, #0, MUL VL] : ld4h (%x17)[2byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h +a4e0f671 : ld4h {z17.h, z18.h, z19.h, z20.h}, p5/Z, [x19, #0, MUL VL] : ld4h (%x19)[2byte] %p5/z -> %z17.h %z18.h %z19.h %z20.h +a4e1f6b3 : ld4h {z19.h, z20.h, z21.h, z22.h}, p5/Z, [x21, #4, MUL VL] : ld4h +0x80(%x21)[2byte] %p5/z -> %z19.h %z20.h %z21.h %z22.h +a4e2faf5 : ld4h {z21.h, z22.h, z23.h, z24.h}, p6/Z, [x23, #8, MUL VL] : ld4h +0x0100(%x23)[2byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h +a4e3fb17 : ld4h {z23.h, z24.h, z25.h, z26.h}, p6/Z, [x24, #12, MUL VL] : ld4h +0x0180(%x24)[2byte] %p6/z -> %z23.h %z24.h %z25.h %z26.h +a4e4ff59 : ld4h {z25.h, z26.h, z27.h, z28.h}, p7/Z, [x26, #16, MUL VL] : ld4h +0x0200(%x26)[2byte] %p7/z -> %z25.h %z26.h %z27.h %z28.h +a4e5ff9b : ld4h {z27.h, z28.h, z29.h, z30.h}, p7/Z, [x28, #20, MUL VL] : ld4h +0x0280(%x28)[2byte] %p7/z -> %z27.h %z28.h %z29.h %z30.h +a4e7ffff : ld4h {z31.h, z0.h, z1.h, z2.h}, p7/Z, [sp, #28, MUL VL] : ld4h +0x0380(%sp)[2byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h # LD4W { .S, .S, .S, .S }, /Z, [, , LSL #2] (LD4W-Z.P.BR-Contiguous) -a560c000 : ld4w {z0.s, z1.s, z2.s, z3.s}, p0/Z, [x0, x0, LSL #2] : ld4w (%x0,%x0,lsl #2)[128byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s -a565c482 : ld4w {z2.s, z3.s, z4.s, z5.s}, p1/Z, [x4, x5, LSL #2] : ld4w (%x4,%x5,lsl #2)[128byte] %p1/z -> %z2.s %z3.s %z4.s %z5.s -a567c8c4 : ld4w {z4.s, z5.s, z6.s, z7.s}, p2/Z, [x6, x7, LSL #2] : ld4w (%x6,%x7,lsl #2)[128byte] %p2/z -> %z4.s %z5.s %z6.s %z7.s -a569c906 : ld4w {z6.s, z7.s, z8.s, z9.s}, p2/Z, [x8, x9, LSL #2] : ld4w (%x8,%x9,lsl #2)[128byte] %p2/z -> %z6.s %z7.s %z8.s %z9.s -a56bcd48 : ld4w {z8.s, z9.s, z10.s, z11.s}, p3/Z, [x10, x11, LSL #2] : ld4w (%x10,%x11,lsl #2)[128byte] %p3/z -> %z8.s %z9.s %z10.s %z11.s -a56ccd6a : ld4w {z10.s, z11.s, z12.s, z13.s}, p3/Z, [x11, x12, LSL #2] : ld4w (%x11,%x12,lsl #2)[128byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s -a56ed1ac : ld4w {z12.s, z13.s, z14.s, z15.s}, p4/Z, [x13, x14, LSL #2] : ld4w (%x13,%x14,lsl #2)[128byte] %p4/z -> %z12.s %z13.s %z14.s %z15.s -a570d1ee : ld4w {z14.s, z15.s, z16.s, z17.s}, p4/Z, [x15, x16, LSL #2] : ld4w (%x15,%x16,lsl #2)[128byte] %p4/z -> %z14.s %z15.s %z16.s %z17.s -a572d630 : ld4w {z16.s, z17.s, z18.s, z19.s}, p5/Z, [x17, x18, LSL #2] : ld4w (%x17,%x18,lsl #2)[128byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s -a574d671 : ld4w {z17.s, z18.s, z19.s, z20.s}, p5/Z, [x19, x20, LSL #2] : ld4w (%x19,%x20,lsl #2)[128byte] %p5/z -> %z17.s %z18.s %z19.s %z20.s -a576d6b3 : ld4w {z19.s, z20.s, z21.s, z22.s}, p5/Z, [x21, x22, LSL #2] : ld4w (%x21,%x22,lsl #2)[128byte] %p5/z -> %z19.s %z20.s %z21.s %z22.s -a578daf5 : ld4w {z21.s, z22.s, z23.s, z24.s}, p6/Z, [x23, x24, LSL #2] : ld4w (%x23,%x24,lsl #2)[128byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s -a579db17 : ld4w {z23.s, z24.s, z25.s, z26.s}, p6/Z, [x24, x25, LSL #2] : ld4w (%x24,%x25,lsl #2)[128byte] %p6/z -> %z23.s %z24.s %z25.s %z26.s -a57bdf59 : ld4w {z25.s, z26.s, z27.s, z28.s}, p7/Z, [x26, x27, LSL #2] : ld4w (%x26,%x27,lsl #2)[128byte] %p7/z -> %z25.s %z26.s %z27.s %z28.s -a57ddf9b : ld4w {z27.s, z28.s, z29.s, z30.s}, p7/Z, [x28, x29, LSL #2] : ld4w (%x28,%x29,lsl #2)[128byte] %p7/z -> %z27.s %z28.s %z29.s %z30.s -a57edfff : ld4w {z31.s, z0.s, z1.s, z2.s}, p7/Z, [sp, x30, LSL #2] : ld4w (%sp,%x30,lsl #2)[128byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s +a560c000 : ld4w {z0.s, z1.s, z2.s, z3.s}, p0/Z, [x0, x0, LSL #2] : ld4w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s +a565c482 : ld4w {z2.s, z3.s, z4.s, z5.s}, p1/Z, [x4, x5, LSL #2] : ld4w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s %z3.s %z4.s %z5.s +a567c8c4 : ld4w {z4.s, z5.s, z6.s, z7.s}, p2/Z, [x6, x7, LSL #2] : ld4w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s %z5.s %z6.s %z7.s +a569c906 : ld4w {z6.s, z7.s, z8.s, z9.s}, p2/Z, [x8, x9, LSL #2] : ld4w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s %z7.s %z8.s %z9.s +a56bcd48 : ld4w {z8.s, z9.s, z10.s, z11.s}, p3/Z, [x10, x11, LSL #2] : ld4w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s %z9.s %z10.s %z11.s +a56ccd6a : ld4w {z10.s, z11.s, z12.s, z13.s}, p3/Z, [x11, x12, LSL #2] : ld4w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s +a56ed1ac : ld4w {z12.s, z13.s, z14.s, z15.s}, p4/Z, [x13, x14, LSL #2] : ld4w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s %z13.s %z14.s %z15.s +a570d1ee : ld4w {z14.s, z15.s, z16.s, z17.s}, p4/Z, [x15, x16, LSL #2] : ld4w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s %z15.s %z16.s %z17.s +a572d630 : ld4w {z16.s, z17.s, z18.s, z19.s}, p5/Z, [x17, x18, LSL #2] : ld4w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s +a574d671 : ld4w {z17.s, z18.s, z19.s, z20.s}, p5/Z, [x19, x20, LSL #2] : ld4w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s %z18.s %z19.s %z20.s +a576d6b3 : ld4w {z19.s, z20.s, z21.s, z22.s}, p5/Z, [x21, x22, LSL #2] : ld4w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s %z20.s %z21.s %z22.s +a578daf5 : ld4w {z21.s, z22.s, z23.s, z24.s}, p6/Z, [x23, x24, LSL #2] : ld4w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s +a579db17 : ld4w {z23.s, z24.s, z25.s, z26.s}, p6/Z, [x24, x25, LSL #2] : ld4w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s %z24.s %z25.s %z26.s +a57bdf59 : ld4w {z25.s, z26.s, z27.s, z28.s}, p7/Z, [x26, x27, LSL #2] : ld4w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s %z26.s %z27.s %z28.s +a57ddf9b : ld4w {z27.s, z28.s, z29.s, z30.s}, p7/Z, [x28, x29, LSL #2] : ld4w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s %z28.s %z29.s %z30.s +a57edfff : ld4w {z31.s, z0.s, z1.s, z2.s}, p7/Z, [sp, x30, LSL #2] : ld4w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s # LD4W { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] (LD4W-Z.P.BI-Contiguous) -a568e000 : ld4w {z0.s, z1.s, z2.s, z3.s}, p0/Z, [x0, #-32, MUL VL] : ld4w -0x0400(%x0)[128byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s -a569e482 : ld4w {z2.s, z3.s, z4.s, z5.s}, p1/Z, [x4, #-28, MUL VL] : ld4w -0x0380(%x4)[128byte] %p1/z -> %z2.s %z3.s %z4.s %z5.s -a56ae8c4 : ld4w {z4.s, z5.s, z6.s, z7.s}, p2/Z, [x6, #-24, MUL VL] : ld4w -0x0300(%x6)[128byte] %p2/z -> %z4.s %z5.s %z6.s %z7.s -a56be906 : ld4w {z6.s, z7.s, z8.s, z9.s}, p2/Z, [x8, #-20, MUL VL] : ld4w -0x0280(%x8)[128byte] %p2/z -> %z6.s %z7.s %z8.s %z9.s -a56ced48 : ld4w {z8.s, z9.s, z10.s, z11.s}, p3/Z, [x10, #-16, MUL VL] : ld4w -0x0200(%x10)[128byte] %p3/z -> %z8.s %z9.s %z10.s %z11.s -a56ded6a : ld4w {z10.s, z11.s, z12.s, z13.s}, p3/Z, [x11, #-12, MUL VL] : ld4w -0x0180(%x11)[128byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s -a56ef1ac : ld4w {z12.s, z13.s, z14.s, z15.s}, p4/Z, [x13, #-8, MUL VL] : ld4w -0x0100(%x13)[128byte] %p4/z -> %z12.s %z13.s %z14.s %z15.s -a56ff1ee : ld4w {z14.s, z15.s, z16.s, z17.s}, p4/Z, [x15, #-4, MUL VL] : ld4w -0x80(%x15)[128byte] %p4/z -> %z14.s %z15.s %z16.s %z17.s -a560f630 : ld4w {z16.s, z17.s, z18.s, z19.s}, p5/Z, [x17, #0, MUL VL] : ld4w (%x17)[128byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s -a560f671 : ld4w {z17.s, z18.s, z19.s, z20.s}, p5/Z, [x19, #0, MUL VL] : ld4w (%x19)[128byte] %p5/z -> %z17.s %z18.s %z19.s %z20.s -a561f6b3 : ld4w {z19.s, z20.s, z21.s, z22.s}, p5/Z, [x21, #4, MUL VL] : ld4w +0x80(%x21)[128byte] %p5/z -> %z19.s %z20.s %z21.s %z22.s -a562faf5 : ld4w {z21.s, z22.s, z23.s, z24.s}, p6/Z, [x23, #8, MUL VL] : ld4w +0x0100(%x23)[128byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s -a563fb17 : ld4w {z23.s, z24.s, z25.s, z26.s}, p6/Z, [x24, #12, MUL VL] : ld4w +0x0180(%x24)[128byte] %p6/z -> %z23.s %z24.s %z25.s %z26.s -a564ff59 : ld4w {z25.s, z26.s, z27.s, z28.s}, p7/Z, [x26, #16, MUL VL] : ld4w +0x0200(%x26)[128byte] %p7/z -> %z25.s %z26.s %z27.s %z28.s -a565ff9b : ld4w {z27.s, z28.s, z29.s, z30.s}, p7/Z, [x28, #20, MUL VL] : ld4w +0x0280(%x28)[128byte] %p7/z -> %z27.s %z28.s %z29.s %z30.s -a567ffff : ld4w {z31.s, z0.s, z1.s, z2.s}, p7/Z, [sp, #28, MUL VL] : ld4w +0x0380(%sp)[128byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s +a568e000 : ld4w {z0.s, z1.s, z2.s, z3.s}, p0/Z, [x0, #-32, MUL VL] : ld4w -0x0400(%x0)[4byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s +a569e482 : ld4w {z2.s, z3.s, z4.s, z5.s}, p1/Z, [x4, #-28, MUL VL] : ld4w -0x0380(%x4)[4byte] %p1/z -> %z2.s %z3.s %z4.s %z5.s +a56ae8c4 : ld4w {z4.s, z5.s, z6.s, z7.s}, p2/Z, [x6, #-24, MUL VL] : ld4w -0x0300(%x6)[4byte] %p2/z -> %z4.s %z5.s %z6.s %z7.s +a56be906 : ld4w {z6.s, z7.s, z8.s, z9.s}, p2/Z, [x8, #-20, MUL VL] : ld4w -0x0280(%x8)[4byte] %p2/z -> %z6.s %z7.s %z8.s %z9.s +a56ced48 : ld4w {z8.s, z9.s, z10.s, z11.s}, p3/Z, [x10, #-16, MUL VL] : ld4w -0x0200(%x10)[4byte] %p3/z -> %z8.s %z9.s %z10.s %z11.s +a56ded6a : ld4w {z10.s, z11.s, z12.s, z13.s}, p3/Z, [x11, #-12, MUL VL] : ld4w -0x0180(%x11)[4byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s +a56ef1ac : ld4w {z12.s, z13.s, z14.s, z15.s}, p4/Z, [x13, #-8, MUL VL] : ld4w -0x0100(%x13)[4byte] %p4/z -> %z12.s %z13.s %z14.s %z15.s +a56ff1ee : ld4w {z14.s, z15.s, z16.s, z17.s}, p4/Z, [x15, #-4, MUL VL] : ld4w -0x80(%x15)[4byte] %p4/z -> %z14.s %z15.s %z16.s %z17.s +a560f630 : ld4w {z16.s, z17.s, z18.s, z19.s}, p5/Z, [x17, #0, MUL VL] : ld4w (%x17)[4byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s +a560f671 : ld4w {z17.s, z18.s, z19.s, z20.s}, p5/Z, [x19, #0, MUL VL] : ld4w (%x19)[4byte] %p5/z -> %z17.s %z18.s %z19.s %z20.s +a561f6b3 : ld4w {z19.s, z20.s, z21.s, z22.s}, p5/Z, [x21, #4, MUL VL] : ld4w +0x80(%x21)[4byte] %p5/z -> %z19.s %z20.s %z21.s %z22.s +a562faf5 : ld4w {z21.s, z22.s, z23.s, z24.s}, p6/Z, [x23, #8, MUL VL] : ld4w +0x0100(%x23)[4byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s +a563fb17 : ld4w {z23.s, z24.s, z25.s, z26.s}, p6/Z, [x24, #12, MUL VL] : ld4w +0x0180(%x24)[4byte] %p6/z -> %z23.s %z24.s %z25.s %z26.s +a564ff59 : ld4w {z25.s, z26.s, z27.s, z28.s}, p7/Z, [x26, #16, MUL VL] : ld4w +0x0200(%x26)[4byte] %p7/z -> %z25.s %z26.s %z27.s %z28.s +a565ff9b : ld4w {z27.s, z28.s, z29.s, z30.s}, p7/Z, [x28, #20, MUL VL] : ld4w +0x0280(%x28)[4byte] %p7/z -> %z27.s %z28.s %z29.s %z30.s +a567ffff : ld4w {z31.s, z0.s, z1.s, z2.s}, p7/Z, [sp, #28, MUL VL] : ld4w +0x0380(%sp)[4byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s # LDFF1B { .S }, /Z, [, .S, ] (LDFF1B-Z.P.BZ-S.x32.unscaled) -84006000 : ldff1b z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1b (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s -84056482 : ldff1b z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1b (%x4,%z5.s,uxtw)[8byte] %p1/z -> %z2.s -840768c4 : ldff1b z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1b (%x6,%z7.s,uxtw)[8byte] %p2/z -> %z4.s -84096906 : ldff1b z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1b (%x8,%z9.s,uxtw)[8byte] %p2/z -> %z6.s -840b6d48 : ldff1b z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1b (%x10,%z11.s,uxtw)[8byte] %p3/z -> %z8.s -840d6d6a : ldff1b z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1b (%x11,%z13.s,uxtw)[8byte] %p3/z -> %z10.s -840f71ac : ldff1b z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1b (%x13,%z15.s,uxtw)[8byte] %p4/z -> %z12.s -841171ee : ldff1b z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1b (%x15,%z17.s,uxtw)[8byte] %p4/z -> %z14.s -84137630 : ldff1b z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1b (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s -84147671 : ldff1b z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1b (%x19,%z20.s,uxtw)[8byte] %p5/z -> %z17.s -841676b3 : ldff1b z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1b (%x21,%z22.s,uxtw)[8byte] %p5/z -> %z19.s -84187af5 : ldff1b z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1b (%x23,%z24.s,uxtw)[8byte] %p6/z -> %z21.s -841a7b17 : ldff1b z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1b (%x24,%z26.s,uxtw)[8byte] %p6/z -> %z23.s -841c7f59 : ldff1b z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1b (%x26,%z28.s,uxtw)[8byte] %p7/z -> %z25.s -841e7f9b : ldff1b z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1b (%x28,%z30.s,uxtw)[8byte] %p7/z -> %z27.s -841f7fff : ldff1b z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1b (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s -84406000 : ldff1b z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1b (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s -84456482 : ldff1b z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1b (%x4,%z5.s,sxtw)[8byte] %p1/z -> %z2.s -844768c4 : ldff1b z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1b (%x6,%z7.s,sxtw)[8byte] %p2/z -> %z4.s -84496906 : ldff1b z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1b (%x8,%z9.s,sxtw)[8byte] %p2/z -> %z6.s -844b6d48 : ldff1b z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1b (%x10,%z11.s,sxtw)[8byte] %p3/z -> %z8.s -844d6d6a : ldff1b z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1b (%x11,%z13.s,sxtw)[8byte] %p3/z -> %z10.s -844f71ac : ldff1b z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1b (%x13,%z15.s,sxtw)[8byte] %p4/z -> %z12.s -845171ee : ldff1b z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1b (%x15,%z17.s,sxtw)[8byte] %p4/z -> %z14.s -84537630 : ldff1b z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1b (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s -84547671 : ldff1b z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1b (%x19,%z20.s,sxtw)[8byte] %p5/z -> %z17.s -845676b3 : ldff1b z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1b (%x21,%z22.s,sxtw)[8byte] %p5/z -> %z19.s -84587af5 : ldff1b z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1b (%x23,%z24.s,sxtw)[8byte] %p6/z -> %z21.s -845a7b17 : ldff1b z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1b (%x24,%z26.s,sxtw)[8byte] %p6/z -> %z23.s -845c7f59 : ldff1b z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1b (%x26,%z28.s,sxtw)[8byte] %p7/z -> %z25.s -845e7f9b : ldff1b z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1b (%x28,%z30.s,sxtw)[8byte] %p7/z -> %z27.s -845f7fff : ldff1b z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1b (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s +84006000 : ldff1b z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1b (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s +84056482 : ldff1b z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1b (%x4,%z5.s,uxtw)[1byte] %p1/z -> %z2.s +840768c4 : ldff1b z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1b (%x6,%z7.s,uxtw)[1byte] %p2/z -> %z4.s +84096906 : ldff1b z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1b (%x8,%z9.s,uxtw)[1byte] %p2/z -> %z6.s +840b6d48 : ldff1b z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1b (%x10,%z11.s,uxtw)[1byte] %p3/z -> %z8.s +840d6d6a : ldff1b z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1b (%x11,%z13.s,uxtw)[1byte] %p3/z -> %z10.s +840f71ac : ldff1b z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1b (%x13,%z15.s,uxtw)[1byte] %p4/z -> %z12.s +841171ee : ldff1b z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1b (%x15,%z17.s,uxtw)[1byte] %p4/z -> %z14.s +84137630 : ldff1b z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1b (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s +84147671 : ldff1b z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1b (%x19,%z20.s,uxtw)[1byte] %p5/z -> %z17.s +841676b3 : ldff1b z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1b (%x21,%z22.s,uxtw)[1byte] %p5/z -> %z19.s +84187af5 : ldff1b z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1b (%x23,%z24.s,uxtw)[1byte] %p6/z -> %z21.s +841a7b17 : ldff1b z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1b (%x24,%z26.s,uxtw)[1byte] %p6/z -> %z23.s +841c7f59 : ldff1b z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1b (%x26,%z28.s,uxtw)[1byte] %p7/z -> %z25.s +841e7f9b : ldff1b z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1b (%x28,%z30.s,uxtw)[1byte] %p7/z -> %z27.s +841f7fff : ldff1b z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1b (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s +84406000 : ldff1b z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1b (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s +84456482 : ldff1b z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1b (%x4,%z5.s,sxtw)[1byte] %p1/z -> %z2.s +844768c4 : ldff1b z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1b (%x6,%z7.s,sxtw)[1byte] %p2/z -> %z4.s +84496906 : ldff1b z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1b (%x8,%z9.s,sxtw)[1byte] %p2/z -> %z6.s +844b6d48 : ldff1b z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1b (%x10,%z11.s,sxtw)[1byte] %p3/z -> %z8.s +844d6d6a : ldff1b z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1b (%x11,%z13.s,sxtw)[1byte] %p3/z -> %z10.s +844f71ac : ldff1b z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1b (%x13,%z15.s,sxtw)[1byte] %p4/z -> %z12.s +845171ee : ldff1b z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1b (%x15,%z17.s,sxtw)[1byte] %p4/z -> %z14.s +84537630 : ldff1b z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1b (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s +84547671 : ldff1b z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1b (%x19,%z20.s,sxtw)[1byte] %p5/z -> %z17.s +845676b3 : ldff1b z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1b (%x21,%z22.s,sxtw)[1byte] %p5/z -> %z19.s +84587af5 : ldff1b z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1b (%x23,%z24.s,sxtw)[1byte] %p6/z -> %z21.s +845a7b17 : ldff1b z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1b (%x24,%z26.s,sxtw)[1byte] %p6/z -> %z23.s +845c7f59 : ldff1b z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1b (%x26,%z28.s,sxtw)[1byte] %p7/z -> %z25.s +845e7f9b : ldff1b z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1b (%x28,%z30.s,sxtw)[1byte] %p7/z -> %z27.s +845f7fff : ldff1b z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1b (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s # LDFF1B { .S }, /Z, [.S{, #}] (LDFF1B-Z.P.AI-S) -8420e000 : ldff1b z0.s, p0/Z, [z0.s, #0] : ldff1b (%z0.s)[8byte] %p0/z -> %z0.s -8422e482 : ldff1b z2.s, p1/Z, [z4.s, #2] : ldff1b +0x02(%z4.s)[8byte] %p1/z -> %z2.s -8424e8c4 : ldff1b z4.s, p2/Z, [z6.s, #4] : ldff1b +0x04(%z6.s)[8byte] %p2/z -> %z4.s -8426e906 : ldff1b z6.s, p2/Z, [z8.s, #6] : ldff1b +0x06(%z8.s)[8byte] %p2/z -> %z6.s -8428ed48 : ldff1b z8.s, p3/Z, [z10.s, #8] : ldff1b +0x08(%z10.s)[8byte] %p3/z -> %z8.s -842aed8a : ldff1b z10.s, p3/Z, [z12.s, #10] : ldff1b +0x0a(%z12.s)[8byte] %p3/z -> %z10.s -842cf1cc : ldff1b z12.s, p4/Z, [z14.s, #12] : ldff1b +0x0c(%z14.s)[8byte] %p4/z -> %z12.s -842ef20e : ldff1b z14.s, p4/Z, [z16.s, #14] : ldff1b +0x0e(%z16.s)[8byte] %p4/z -> %z14.s -8430f650 : ldff1b z16.s, p5/Z, [z18.s, #16] : ldff1b +0x10(%z18.s)[8byte] %p5/z -> %z16.s -8431f671 : ldff1b z17.s, p5/Z, [z19.s, #17] : ldff1b +0x11(%z19.s)[8byte] %p5/z -> %z17.s -8433f6b3 : ldff1b z19.s, p5/Z, [z21.s, #19] : ldff1b +0x13(%z21.s)[8byte] %p5/z -> %z19.s -8435faf5 : ldff1b z21.s, p6/Z, [z23.s, #21] : ldff1b +0x15(%z23.s)[8byte] %p6/z -> %z21.s -8437fb37 : ldff1b z23.s, p6/Z, [z25.s, #23] : ldff1b +0x17(%z25.s)[8byte] %p6/z -> %z23.s -8439ff79 : ldff1b z25.s, p7/Z, [z27.s, #25] : ldff1b +0x19(%z27.s)[8byte] %p7/z -> %z25.s -843bffbb : ldff1b z27.s, p7/Z, [z29.s, #27] : ldff1b +0x1b(%z29.s)[8byte] %p7/z -> %z27.s -843fffff : ldff1b z31.s, p7/Z, [z31.s, #31] : ldff1b +0x1f(%z31.s)[8byte] %p7/z -> %z31.s +8420e000 : ldff1b z0.s, p0/Z, [z0.s, #0] : ldff1b (%z0.s)[1byte] %p0/z -> %z0.s +8422e482 : ldff1b z2.s, p1/Z, [z4.s, #2] : ldff1b +0x02(%z4.s)[1byte] %p1/z -> %z2.s +8424e8c4 : ldff1b z4.s, p2/Z, [z6.s, #4] : ldff1b +0x04(%z6.s)[1byte] %p2/z -> %z4.s +8426e906 : ldff1b z6.s, p2/Z, [z8.s, #6] : ldff1b +0x06(%z8.s)[1byte] %p2/z -> %z6.s +8428ed48 : ldff1b z8.s, p3/Z, [z10.s, #8] : ldff1b +0x08(%z10.s)[1byte] %p3/z -> %z8.s +842aed8a : ldff1b z10.s, p3/Z, [z12.s, #10] : ldff1b +0x0a(%z12.s)[1byte] %p3/z -> %z10.s +842cf1cc : ldff1b z12.s, p4/Z, [z14.s, #12] : ldff1b +0x0c(%z14.s)[1byte] %p4/z -> %z12.s +842ef20e : ldff1b z14.s, p4/Z, [z16.s, #14] : ldff1b +0x0e(%z16.s)[1byte] %p4/z -> %z14.s +8430f650 : ldff1b z16.s, p5/Z, [z18.s, #16] : ldff1b +0x10(%z18.s)[1byte] %p5/z -> %z16.s +8431f671 : ldff1b z17.s, p5/Z, [z19.s, #17] : ldff1b +0x11(%z19.s)[1byte] %p5/z -> %z17.s +8433f6b3 : ldff1b z19.s, p5/Z, [z21.s, #19] : ldff1b +0x13(%z21.s)[1byte] %p5/z -> %z19.s +8435faf5 : ldff1b z21.s, p6/Z, [z23.s, #21] : ldff1b +0x15(%z23.s)[1byte] %p6/z -> %z21.s +8437fb37 : ldff1b z23.s, p6/Z, [z25.s, #23] : ldff1b +0x17(%z25.s)[1byte] %p6/z -> %z23.s +8439ff79 : ldff1b z25.s, p7/Z, [z27.s, #25] : ldff1b +0x19(%z27.s)[1byte] %p7/z -> %z25.s +843bffbb : ldff1b z27.s, p7/Z, [z29.s, #27] : ldff1b +0x1b(%z29.s)[1byte] %p7/z -> %z27.s +843fffff : ldff1b z31.s, p7/Z, [z31.s, #31] : ldff1b +0x1f(%z31.s)[1byte] %p7/z -> %z31.s # LDFF1B { .B }, /Z, [{, }] (LDFF1B-Z.P.BR-U8) -a4006000 : ldff1b z0.b, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[32byte] %p0/z -> %z0.b -a4056482 : ldff1b z2.b, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[32byte] %p1/z -> %z2.b -a40768c4 : ldff1b z4.b, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[32byte] %p2/z -> %z4.b -a4096906 : ldff1b z6.b, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[32byte] %p2/z -> %z6.b -a40b6d48 : ldff1b z8.b, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[32byte] %p3/z -> %z8.b -a40c6d6a : ldff1b z10.b, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[32byte] %p3/z -> %z10.b -a40e71ac : ldff1b z12.b, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[32byte] %p4/z -> %z12.b -a41071ee : ldff1b z14.b, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[32byte] %p4/z -> %z14.b -a4127630 : ldff1b z16.b, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[32byte] %p5/z -> %z16.b -a4147671 : ldff1b z17.b, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[32byte] %p5/z -> %z17.b -a41676b3 : ldff1b z19.b, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[32byte] %p5/z -> %z19.b -a4187af5 : ldff1b z21.b, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[32byte] %p6/z -> %z21.b -a4197b17 : ldff1b z23.b, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[32byte] %p6/z -> %z23.b -a41b7f59 : ldff1b z25.b, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[32byte] %p7/z -> %z25.b -a41d7f9b : ldff1b z27.b, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[32byte] %p7/z -> %z27.b -a41e7fff : ldff1b z31.b, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[32byte] %p7/z -> %z31.b +a4006000 : ldff1b z0.b, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.b +a4056482 : ldff1b z2.b, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[1byte] %p1/z -> %z2.b +a40768c4 : ldff1b z4.b, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[1byte] %p2/z -> %z4.b +a4096906 : ldff1b z6.b, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[1byte] %p2/z -> %z6.b +a40b6d48 : ldff1b z8.b, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[1byte] %p3/z -> %z8.b +a40c6d6a : ldff1b z10.b, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[1byte] %p3/z -> %z10.b +a40e71ac : ldff1b z12.b, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[1byte] %p4/z -> %z12.b +a41071ee : ldff1b z14.b, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[1byte] %p4/z -> %z14.b +a4127630 : ldff1b z16.b, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.b +a4147671 : ldff1b z17.b, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[1byte] %p5/z -> %z17.b +a41676b3 : ldff1b z19.b, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[1byte] %p5/z -> %z19.b +a4187af5 : ldff1b z21.b, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[1byte] %p6/z -> %z21.b +a4197b17 : ldff1b z23.b, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[1byte] %p6/z -> %z23.b +a41b7f59 : ldff1b z25.b, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[1byte] %p7/z -> %z25.b +a41d7f9b : ldff1b z27.b, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[1byte] %p7/z -> %z27.b +a41e7fff : ldff1b z31.b, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.b # LDFF1B { .H }, /Z, [{, }] (LDFF1B-Z.P.BR-U16) -a4206000 : ldff1b z0.h, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[16byte] %p0/z -> %z0.h -a4256482 : ldff1b z2.h, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[16byte] %p1/z -> %z2.h -a42768c4 : ldff1b z4.h, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[16byte] %p2/z -> %z4.h -a4296906 : ldff1b z6.h, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[16byte] %p2/z -> %z6.h -a42b6d48 : ldff1b z8.h, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[16byte] %p3/z -> %z8.h -a42c6d6a : ldff1b z10.h, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[16byte] %p3/z -> %z10.h -a42e71ac : ldff1b z12.h, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[16byte] %p4/z -> %z12.h -a43071ee : ldff1b z14.h, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[16byte] %p4/z -> %z14.h -a4327630 : ldff1b z16.h, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[16byte] %p5/z -> %z16.h -a4347671 : ldff1b z17.h, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[16byte] %p5/z -> %z17.h -a43676b3 : ldff1b z19.h, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[16byte] %p5/z -> %z19.h -a4387af5 : ldff1b z21.h, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[16byte] %p6/z -> %z21.h -a4397b17 : ldff1b z23.h, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[16byte] %p6/z -> %z23.h -a43b7f59 : ldff1b z25.h, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[16byte] %p7/z -> %z25.h -a43d7f9b : ldff1b z27.h, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[16byte] %p7/z -> %z27.h -a43e7fff : ldff1b z31.h, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[16byte] %p7/z -> %z31.h +a4206000 : ldff1b z0.h, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.h +a4256482 : ldff1b z2.h, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[1byte] %p1/z -> %z2.h +a42768c4 : ldff1b z4.h, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[1byte] %p2/z -> %z4.h +a4296906 : ldff1b z6.h, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[1byte] %p2/z -> %z6.h +a42b6d48 : ldff1b z8.h, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[1byte] %p3/z -> %z8.h +a42c6d6a : ldff1b z10.h, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[1byte] %p3/z -> %z10.h +a42e71ac : ldff1b z12.h, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[1byte] %p4/z -> %z12.h +a43071ee : ldff1b z14.h, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[1byte] %p4/z -> %z14.h +a4327630 : ldff1b z16.h, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.h +a4347671 : ldff1b z17.h, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[1byte] %p5/z -> %z17.h +a43676b3 : ldff1b z19.h, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[1byte] %p5/z -> %z19.h +a4387af5 : ldff1b z21.h, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[1byte] %p6/z -> %z21.h +a4397b17 : ldff1b z23.h, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[1byte] %p6/z -> %z23.h +a43b7f59 : ldff1b z25.h, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[1byte] %p7/z -> %z25.h +a43d7f9b : ldff1b z27.h, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[1byte] %p7/z -> %z27.h +a43e7fff : ldff1b z31.h, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.h # LDFF1B { .S }, /Z, [{, }] (LDFF1B-Z.P.BR-U32) -a4406000 : ldff1b z0.s, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[8byte] %p0/z -> %z0.s -a4456482 : ldff1b z2.s, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[8byte] %p1/z -> %z2.s -a44768c4 : ldff1b z4.s, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[8byte] %p2/z -> %z4.s -a4496906 : ldff1b z6.s, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[8byte] %p2/z -> %z6.s -a44b6d48 : ldff1b z8.s, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[8byte] %p3/z -> %z8.s -a44c6d6a : ldff1b z10.s, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[8byte] %p3/z -> %z10.s -a44e71ac : ldff1b z12.s, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[8byte] %p4/z -> %z12.s -a45071ee : ldff1b z14.s, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[8byte] %p4/z -> %z14.s -a4527630 : ldff1b z16.s, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[8byte] %p5/z -> %z16.s -a4547671 : ldff1b z17.s, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[8byte] %p5/z -> %z17.s -a45676b3 : ldff1b z19.s, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[8byte] %p5/z -> %z19.s -a4587af5 : ldff1b z21.s, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[8byte] %p6/z -> %z21.s -a4597b17 : ldff1b z23.s, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[8byte] %p6/z -> %z23.s -a45b7f59 : ldff1b z25.s, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[8byte] %p7/z -> %z25.s -a45d7f9b : ldff1b z27.s, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[8byte] %p7/z -> %z27.s -a45e7fff : ldff1b z31.s, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[8byte] %p7/z -> %z31.s +a4406000 : ldff1b z0.s, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.s +a4456482 : ldff1b z2.s, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[1byte] %p1/z -> %z2.s +a44768c4 : ldff1b z4.s, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[1byte] %p2/z -> %z4.s +a4496906 : ldff1b z6.s, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[1byte] %p2/z -> %z6.s +a44b6d48 : ldff1b z8.s, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[1byte] %p3/z -> %z8.s +a44c6d6a : ldff1b z10.s, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[1byte] %p3/z -> %z10.s +a44e71ac : ldff1b z12.s, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[1byte] %p4/z -> %z12.s +a45071ee : ldff1b z14.s, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[1byte] %p4/z -> %z14.s +a4527630 : ldff1b z16.s, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.s +a4547671 : ldff1b z17.s, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[1byte] %p5/z -> %z17.s +a45676b3 : ldff1b z19.s, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[1byte] %p5/z -> %z19.s +a4587af5 : ldff1b z21.s, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[1byte] %p6/z -> %z21.s +a4597b17 : ldff1b z23.s, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[1byte] %p6/z -> %z23.s +a45b7f59 : ldff1b z25.s, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[1byte] %p7/z -> %z25.s +a45d7f9b : ldff1b z27.s, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[1byte] %p7/z -> %z27.s +a45e7fff : ldff1b z31.s, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.s # LDFF1B { .D }, /Z, [{, }] (LDFF1B-Z.P.BR-U64) -a4606000 : ldff1b z0.d, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[4byte] %p0/z -> %z0.d -a4656482 : ldff1b z2.d, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[4byte] %p1/z -> %z2.d -a46768c4 : ldff1b z4.d, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[4byte] %p2/z -> %z4.d -a4696906 : ldff1b z6.d, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[4byte] %p2/z -> %z6.d -a46b6d48 : ldff1b z8.d, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[4byte] %p3/z -> %z8.d -a46c6d6a : ldff1b z10.d, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[4byte] %p3/z -> %z10.d -a46e71ac : ldff1b z12.d, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[4byte] %p4/z -> %z12.d -a47071ee : ldff1b z14.d, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[4byte] %p4/z -> %z14.d -a4727630 : ldff1b z16.d, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[4byte] %p5/z -> %z16.d -a4747671 : ldff1b z17.d, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[4byte] %p5/z -> %z17.d -a47676b3 : ldff1b z19.d, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[4byte] %p5/z -> %z19.d -a4787af5 : ldff1b z21.d, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[4byte] %p6/z -> %z21.d -a4797b17 : ldff1b z23.d, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[4byte] %p6/z -> %z23.d -a47b7f59 : ldff1b z25.d, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[4byte] %p7/z -> %z25.d -a47d7f9b : ldff1b z27.d, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[4byte] %p7/z -> %z27.d -a47e7fff : ldff1b z31.d, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[4byte] %p7/z -> %z31.d +a4606000 : ldff1b z0.d, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.d +a4656482 : ldff1b z2.d, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[1byte] %p1/z -> %z2.d +a46768c4 : ldff1b z4.d, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[1byte] %p2/z -> %z4.d +a4696906 : ldff1b z6.d, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[1byte] %p2/z -> %z6.d +a46b6d48 : ldff1b z8.d, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[1byte] %p3/z -> %z8.d +a46c6d6a : ldff1b z10.d, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[1byte] %p3/z -> %z10.d +a46e71ac : ldff1b z12.d, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[1byte] %p4/z -> %z12.d +a47071ee : ldff1b z14.d, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[1byte] %p4/z -> %z14.d +a4727630 : ldff1b z16.d, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.d +a4747671 : ldff1b z17.d, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[1byte] %p5/z -> %z17.d +a47676b3 : ldff1b z19.d, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[1byte] %p5/z -> %z19.d +a4787af5 : ldff1b z21.d, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[1byte] %p6/z -> %z21.d +a4797b17 : ldff1b z23.d, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[1byte] %p6/z -> %z23.d +a47b7f59 : ldff1b z25.d, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[1byte] %p7/z -> %z25.d +a47d7f9b : ldff1b z27.d, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[1byte] %p7/z -> %z27.d +a47e7fff : ldff1b z31.d, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.d # LDFF1B { .D }, /Z, [, .D, ] (LDFF1B-Z.P.BZ-D.x32.unscaled) -c4006000 : ldff1b z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1b (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d -c4056482 : ldff1b z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1b (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d -c40768c4 : ldff1b z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1b (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d -c4096906 : ldff1b z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1b (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d -c40b6d48 : ldff1b z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1b (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d -c40d6d6a : ldff1b z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1b (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d -c40f71ac : ldff1b z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1b (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d -c41171ee : ldff1b z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1b (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d -c4137630 : ldff1b z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1b (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d -c4147671 : ldff1b z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1b (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d -c41676b3 : ldff1b z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1b (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d -c4187af5 : ldff1b z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1b (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d -c41a7b17 : ldff1b z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1b (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d -c41c7f59 : ldff1b z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1b (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d -c41e7f9b : ldff1b z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1b (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d -c41f7fff : ldff1b z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1b (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d -c4406000 : ldff1b z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1b (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d -c4456482 : ldff1b z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1b (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d -c44768c4 : ldff1b z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1b (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d -c4496906 : ldff1b z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1b (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d -c44b6d48 : ldff1b z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1b (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d -c44d6d6a : ldff1b z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1b (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d -c44f71ac : ldff1b z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1b (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d -c45171ee : ldff1b z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1b (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d -c4537630 : ldff1b z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1b (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d -c4547671 : ldff1b z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1b (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d -c45676b3 : ldff1b z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1b (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d -c4587af5 : ldff1b z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1b (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d -c45a7b17 : ldff1b z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1b (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d -c45c7f59 : ldff1b z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1b (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d -c45e7f9b : ldff1b z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1b (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d -c45f7fff : ldff1b z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1b (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d +c4006000 : ldff1b z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1b (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d +c4056482 : ldff1b z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1b (%x4,%z5.d,uxtw)[1byte] %p1/z -> %z2.d +c40768c4 : ldff1b z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1b (%x6,%z7.d,uxtw)[1byte] %p2/z -> %z4.d +c4096906 : ldff1b z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1b (%x8,%z9.d,uxtw)[1byte] %p2/z -> %z6.d +c40b6d48 : ldff1b z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1b (%x10,%z11.d,uxtw)[1byte] %p3/z -> %z8.d +c40d6d6a : ldff1b z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1b (%x11,%z13.d,uxtw)[1byte] %p3/z -> %z10.d +c40f71ac : ldff1b z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1b (%x13,%z15.d,uxtw)[1byte] %p4/z -> %z12.d +c41171ee : ldff1b z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1b (%x15,%z17.d,uxtw)[1byte] %p4/z -> %z14.d +c4137630 : ldff1b z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1b (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d +c4147671 : ldff1b z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1b (%x19,%z20.d,uxtw)[1byte] %p5/z -> %z17.d +c41676b3 : ldff1b z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1b (%x21,%z22.d,uxtw)[1byte] %p5/z -> %z19.d +c4187af5 : ldff1b z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1b (%x23,%z24.d,uxtw)[1byte] %p6/z -> %z21.d +c41a7b17 : ldff1b z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1b (%x24,%z26.d,uxtw)[1byte] %p6/z -> %z23.d +c41c7f59 : ldff1b z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1b (%x26,%z28.d,uxtw)[1byte] %p7/z -> %z25.d +c41e7f9b : ldff1b z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1b (%x28,%z30.d,uxtw)[1byte] %p7/z -> %z27.d +c41f7fff : ldff1b z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1b (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d +c4406000 : ldff1b z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1b (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d +c4456482 : ldff1b z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1b (%x4,%z5.d,sxtw)[1byte] %p1/z -> %z2.d +c44768c4 : ldff1b z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1b (%x6,%z7.d,sxtw)[1byte] %p2/z -> %z4.d +c4496906 : ldff1b z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1b (%x8,%z9.d,sxtw)[1byte] %p2/z -> %z6.d +c44b6d48 : ldff1b z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1b (%x10,%z11.d,sxtw)[1byte] %p3/z -> %z8.d +c44d6d6a : ldff1b z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1b (%x11,%z13.d,sxtw)[1byte] %p3/z -> %z10.d +c44f71ac : ldff1b z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1b (%x13,%z15.d,sxtw)[1byte] %p4/z -> %z12.d +c45171ee : ldff1b z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1b (%x15,%z17.d,sxtw)[1byte] %p4/z -> %z14.d +c4537630 : ldff1b z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1b (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d +c4547671 : ldff1b z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1b (%x19,%z20.d,sxtw)[1byte] %p5/z -> %z17.d +c45676b3 : ldff1b z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1b (%x21,%z22.d,sxtw)[1byte] %p5/z -> %z19.d +c4587af5 : ldff1b z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1b (%x23,%z24.d,sxtw)[1byte] %p6/z -> %z21.d +c45a7b17 : ldff1b z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1b (%x24,%z26.d,sxtw)[1byte] %p6/z -> %z23.d +c45c7f59 : ldff1b z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1b (%x26,%z28.d,sxtw)[1byte] %p7/z -> %z25.d +c45e7f9b : ldff1b z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1b (%x28,%z30.d,sxtw)[1byte] %p7/z -> %z27.d +c45f7fff : ldff1b z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1b (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d # LDFF1B { .D }, /Z, [.D{, #}] (LDFF1B-Z.P.AI-D) -c420e000 : ldff1b z0.d, p0/Z, [z0.d, #0] : ldff1b (%z0.d)[4byte] %p0/z -> %z0.d -c422e482 : ldff1b z2.d, p1/Z, [z4.d, #2] : ldff1b +0x02(%z4.d)[4byte] %p1/z -> %z2.d -c424e8c4 : ldff1b z4.d, p2/Z, [z6.d, #4] : ldff1b +0x04(%z6.d)[4byte] %p2/z -> %z4.d -c426e906 : ldff1b z6.d, p2/Z, [z8.d, #6] : ldff1b +0x06(%z8.d)[4byte] %p2/z -> %z6.d -c428ed48 : ldff1b z8.d, p3/Z, [z10.d, #8] : ldff1b +0x08(%z10.d)[4byte] %p3/z -> %z8.d -c42aed8a : ldff1b z10.d, p3/Z, [z12.d, #10] : ldff1b +0x0a(%z12.d)[4byte] %p3/z -> %z10.d -c42cf1cc : ldff1b z12.d, p4/Z, [z14.d, #12] : ldff1b +0x0c(%z14.d)[4byte] %p4/z -> %z12.d -c42ef20e : ldff1b z14.d, p4/Z, [z16.d, #14] : ldff1b +0x0e(%z16.d)[4byte] %p4/z -> %z14.d -c430f650 : ldff1b z16.d, p5/Z, [z18.d, #16] : ldff1b +0x10(%z18.d)[4byte] %p5/z -> %z16.d -c431f671 : ldff1b z17.d, p5/Z, [z19.d, #17] : ldff1b +0x11(%z19.d)[4byte] %p5/z -> %z17.d -c433f6b3 : ldff1b z19.d, p5/Z, [z21.d, #19] : ldff1b +0x13(%z21.d)[4byte] %p5/z -> %z19.d -c435faf5 : ldff1b z21.d, p6/Z, [z23.d, #21] : ldff1b +0x15(%z23.d)[4byte] %p6/z -> %z21.d -c437fb37 : ldff1b z23.d, p6/Z, [z25.d, #23] : ldff1b +0x17(%z25.d)[4byte] %p6/z -> %z23.d -c439ff79 : ldff1b z25.d, p7/Z, [z27.d, #25] : ldff1b +0x19(%z27.d)[4byte] %p7/z -> %z25.d -c43bffbb : ldff1b z27.d, p7/Z, [z29.d, #27] : ldff1b +0x1b(%z29.d)[4byte] %p7/z -> %z27.d -c43fffff : ldff1b z31.d, p7/Z, [z31.d, #31] : ldff1b +0x1f(%z31.d)[4byte] %p7/z -> %z31.d +c420e000 : ldff1b z0.d, p0/Z, [z0.d, #0] : ldff1b (%z0.d)[1byte] %p0/z -> %z0.d +c422e482 : ldff1b z2.d, p1/Z, [z4.d, #2] : ldff1b +0x02(%z4.d)[1byte] %p1/z -> %z2.d +c424e8c4 : ldff1b z4.d, p2/Z, [z6.d, #4] : ldff1b +0x04(%z6.d)[1byte] %p2/z -> %z4.d +c426e906 : ldff1b z6.d, p2/Z, [z8.d, #6] : ldff1b +0x06(%z8.d)[1byte] %p2/z -> %z6.d +c428ed48 : ldff1b z8.d, p3/Z, [z10.d, #8] : ldff1b +0x08(%z10.d)[1byte] %p3/z -> %z8.d +c42aed8a : ldff1b z10.d, p3/Z, [z12.d, #10] : ldff1b +0x0a(%z12.d)[1byte] %p3/z -> %z10.d +c42cf1cc : ldff1b z12.d, p4/Z, [z14.d, #12] : ldff1b +0x0c(%z14.d)[1byte] %p4/z -> %z12.d +c42ef20e : ldff1b z14.d, p4/Z, [z16.d, #14] : ldff1b +0x0e(%z16.d)[1byte] %p4/z -> %z14.d +c430f650 : ldff1b z16.d, p5/Z, [z18.d, #16] : ldff1b +0x10(%z18.d)[1byte] %p5/z -> %z16.d +c431f671 : ldff1b z17.d, p5/Z, [z19.d, #17] : ldff1b +0x11(%z19.d)[1byte] %p5/z -> %z17.d +c433f6b3 : ldff1b z19.d, p5/Z, [z21.d, #19] : ldff1b +0x13(%z21.d)[1byte] %p5/z -> %z19.d +c435faf5 : ldff1b z21.d, p6/Z, [z23.d, #21] : ldff1b +0x15(%z23.d)[1byte] %p6/z -> %z21.d +c437fb37 : ldff1b z23.d, p6/Z, [z25.d, #23] : ldff1b +0x17(%z25.d)[1byte] %p6/z -> %z23.d +c439ff79 : ldff1b z25.d, p7/Z, [z27.d, #25] : ldff1b +0x19(%z27.d)[1byte] %p7/z -> %z25.d +c43bffbb : ldff1b z27.d, p7/Z, [z29.d, #27] : ldff1b +0x1b(%z29.d)[1byte] %p7/z -> %z27.d +c43fffff : ldff1b z31.d, p7/Z, [z31.d, #31] : ldff1b +0x1f(%z31.d)[1byte] %p7/z -> %z31.d # LDFF1B { .D }, /Z, [, .D] (LDFF1B-Z.P.BZ-D.64.unscaled) -c440e000 : ldff1b z0.d, p0/Z, [x0, z0.d] : ldff1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d -c445e482 : ldff1b z2.d, p1/Z, [x4, z5.d] : ldff1b (%x4,%z5.d)[4byte] %p1/z -> %z2.d -c447e8c4 : ldff1b z4.d, p2/Z, [x6, z7.d] : ldff1b (%x6,%z7.d)[4byte] %p2/z -> %z4.d -c449e906 : ldff1b z6.d, p2/Z, [x8, z9.d] : ldff1b (%x8,%z9.d)[4byte] %p2/z -> %z6.d -c44bed48 : ldff1b z8.d, p3/Z, [x10, z11.d] : ldff1b (%x10,%z11.d)[4byte] %p3/z -> %z8.d -c44ded6a : ldff1b z10.d, p3/Z, [x11, z13.d] : ldff1b (%x11,%z13.d)[4byte] %p3/z -> %z10.d -c44ff1ac : ldff1b z12.d, p4/Z, [x13, z15.d] : ldff1b (%x13,%z15.d)[4byte] %p4/z -> %z12.d -c451f1ee : ldff1b z14.d, p4/Z, [x15, z17.d] : ldff1b (%x15,%z17.d)[4byte] %p4/z -> %z14.d -c453f630 : ldff1b z16.d, p5/Z, [x17, z19.d] : ldff1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d -c454f671 : ldff1b z17.d, p5/Z, [x19, z20.d] : ldff1b (%x19,%z20.d)[4byte] %p5/z -> %z17.d -c456f6b3 : ldff1b z19.d, p5/Z, [x21, z22.d] : ldff1b (%x21,%z22.d)[4byte] %p5/z -> %z19.d -c458faf5 : ldff1b z21.d, p6/Z, [x23, z24.d] : ldff1b (%x23,%z24.d)[4byte] %p6/z -> %z21.d -c45afb17 : ldff1b z23.d, p6/Z, [x24, z26.d] : ldff1b (%x24,%z26.d)[4byte] %p6/z -> %z23.d -c45cff59 : ldff1b z25.d, p7/Z, [x26, z28.d] : ldff1b (%x26,%z28.d)[4byte] %p7/z -> %z25.d -c45eff9b : ldff1b z27.d, p7/Z, [x28, z30.d] : ldff1b (%x28,%z30.d)[4byte] %p7/z -> %z27.d -c45fffff : ldff1b z31.d, p7/Z, [sp, z31.d] : ldff1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d +c440e000 : ldff1b z0.d, p0/Z, [x0, z0.d] : ldff1b (%x0,%z0.d)[1byte] %p0/z -> %z0.d +c445e482 : ldff1b z2.d, p1/Z, [x4, z5.d] : ldff1b (%x4,%z5.d)[1byte] %p1/z -> %z2.d +c447e8c4 : ldff1b z4.d, p2/Z, [x6, z7.d] : ldff1b (%x6,%z7.d)[1byte] %p2/z -> %z4.d +c449e906 : ldff1b z6.d, p2/Z, [x8, z9.d] : ldff1b (%x8,%z9.d)[1byte] %p2/z -> %z6.d +c44bed48 : ldff1b z8.d, p3/Z, [x10, z11.d] : ldff1b (%x10,%z11.d)[1byte] %p3/z -> %z8.d +c44ded6a : ldff1b z10.d, p3/Z, [x11, z13.d] : ldff1b (%x11,%z13.d)[1byte] %p3/z -> %z10.d +c44ff1ac : ldff1b z12.d, p4/Z, [x13, z15.d] : ldff1b (%x13,%z15.d)[1byte] %p4/z -> %z12.d +c451f1ee : ldff1b z14.d, p4/Z, [x15, z17.d] : ldff1b (%x15,%z17.d)[1byte] %p4/z -> %z14.d +c453f630 : ldff1b z16.d, p5/Z, [x17, z19.d] : ldff1b (%x17,%z19.d)[1byte] %p5/z -> %z16.d +c454f671 : ldff1b z17.d, p5/Z, [x19, z20.d] : ldff1b (%x19,%z20.d)[1byte] %p5/z -> %z17.d +c456f6b3 : ldff1b z19.d, p5/Z, [x21, z22.d] : ldff1b (%x21,%z22.d)[1byte] %p5/z -> %z19.d +c458faf5 : ldff1b z21.d, p6/Z, [x23, z24.d] : ldff1b (%x23,%z24.d)[1byte] %p6/z -> %z21.d +c45afb17 : ldff1b z23.d, p6/Z, [x24, z26.d] : ldff1b (%x24,%z26.d)[1byte] %p6/z -> %z23.d +c45cff59 : ldff1b z25.d, p7/Z, [x26, z28.d] : ldff1b (%x26,%z28.d)[1byte] %p7/z -> %z25.d +c45eff9b : ldff1b z27.d, p7/Z, [x28, z30.d] : ldff1b (%x28,%z30.d)[1byte] %p7/z -> %z27.d +c45fffff : ldff1b z31.d, p7/Z, [sp, z31.d] : ldff1b (%sp,%z31.d)[1byte] %p7/z -> %z31.d # LDFF1D { .D }, /Z, [{, , LSL #3}] (LDFF1D-Z.P.BR-U64) -a5e06000 : ldff1d z0.d, p0/Z, [x0, x0, LSL #3] : ldff1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d -a5e56482 : ldff1d z2.d, p1/Z, [x4, x5, LSL #3] : ldff1d (%x4,%x5,lsl #3)[32byte] %p1/z -> %z2.d -a5e768c4 : ldff1d z4.d, p2/Z, [x6, x7, LSL #3] : ldff1d (%x6,%x7,lsl #3)[32byte] %p2/z -> %z4.d -a5e96906 : ldff1d z6.d, p2/Z, [x8, x9, LSL #3] : ldff1d (%x8,%x9,lsl #3)[32byte] %p2/z -> %z6.d -a5eb6d48 : ldff1d z8.d, p3/Z, [x10, x11, LSL #3] : ldff1d (%x10,%x11,lsl #3)[32byte] %p3/z -> %z8.d -a5ec6d6a : ldff1d z10.d, p3/Z, [x11, x12, LSL #3] : ldff1d (%x11,%x12,lsl #3)[32byte] %p3/z -> %z10.d -a5ee71ac : ldff1d z12.d, p4/Z, [x13, x14, LSL #3] : ldff1d (%x13,%x14,lsl #3)[32byte] %p4/z -> %z12.d -a5f071ee : ldff1d z14.d, p4/Z, [x15, x16, LSL #3] : ldff1d (%x15,%x16,lsl #3)[32byte] %p4/z -> %z14.d -a5f27630 : ldff1d z16.d, p5/Z, [x17, x18, LSL #3] : ldff1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d -a5f47671 : ldff1d z17.d, p5/Z, [x19, x20, LSL #3] : ldff1d (%x19,%x20,lsl #3)[32byte] %p5/z -> %z17.d -a5f676b3 : ldff1d z19.d, p5/Z, [x21, x22, LSL #3] : ldff1d (%x21,%x22,lsl #3)[32byte] %p5/z -> %z19.d -a5f87af5 : ldff1d z21.d, p6/Z, [x23, x24, LSL #3] : ldff1d (%x23,%x24,lsl #3)[32byte] %p6/z -> %z21.d -a5f97b17 : ldff1d z23.d, p6/Z, [x24, x25, LSL #3] : ldff1d (%x24,%x25,lsl #3)[32byte] %p6/z -> %z23.d -a5fb7f59 : ldff1d z25.d, p7/Z, [x26, x27, LSL #3] : ldff1d (%x26,%x27,lsl #3)[32byte] %p7/z -> %z25.d -a5fd7f9b : ldff1d z27.d, p7/Z, [x28, x29, LSL #3] : ldff1d (%x28,%x29,lsl #3)[32byte] %p7/z -> %z27.d -a5fe7fff : ldff1d z31.d, p7/Z, [sp, x30, LSL #3] : ldff1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d +a5e06000 : ldff1d z0.d, p0/Z, [x0, x0, LSL #3] : ldff1d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d +a5e56482 : ldff1d z2.d, p1/Z, [x4, x5, LSL #3] : ldff1d (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d +a5e768c4 : ldff1d z4.d, p2/Z, [x6, x7, LSL #3] : ldff1d (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d +a5e96906 : ldff1d z6.d, p2/Z, [x8, x9, LSL #3] : ldff1d (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d +a5eb6d48 : ldff1d z8.d, p3/Z, [x10, x11, LSL #3] : ldff1d (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d +a5ec6d6a : ldff1d z10.d, p3/Z, [x11, x12, LSL #3] : ldff1d (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d +a5ee71ac : ldff1d z12.d, p4/Z, [x13, x14, LSL #3] : ldff1d (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d +a5f071ee : ldff1d z14.d, p4/Z, [x15, x16, LSL #3] : ldff1d (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d +a5f27630 : ldff1d z16.d, p5/Z, [x17, x18, LSL #3] : ldff1d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d +a5f47671 : ldff1d z17.d, p5/Z, [x19, x20, LSL #3] : ldff1d (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d +a5f676b3 : ldff1d z19.d, p5/Z, [x21, x22, LSL #3] : ldff1d (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d +a5f87af5 : ldff1d z21.d, p6/Z, [x23, x24, LSL #3] : ldff1d (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d +a5f97b17 : ldff1d z23.d, p6/Z, [x24, x25, LSL #3] : ldff1d (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d +a5fb7f59 : ldff1d z25.d, p7/Z, [x26, x27, LSL #3] : ldff1d (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d +a5fd7f9b : ldff1d z27.d, p7/Z, [x28, x29, LSL #3] : ldff1d (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d +a5fe7fff : ldff1d z31.d, p7/Z, [sp, x30, LSL #3] : ldff1d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d # LDFF1D { .D }, /Z, [, .D, ] (LDFF1D-Z.P.BZ-D.x32.unscaled) -c5806000 : ldff1d z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1d (%x0,%z0.d,uxtw)[32byte] %p0/z -> %z0.d -c5856482 : ldff1d z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1d (%x4,%z5.d,uxtw)[32byte] %p1/z -> %z2.d -c58768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1d (%x6,%z7.d,uxtw)[32byte] %p2/z -> %z4.d -c5896906 : ldff1d z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1d (%x8,%z9.d,uxtw)[32byte] %p2/z -> %z6.d -c58b6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1d (%x10,%z11.d,uxtw)[32byte] %p3/z -> %z8.d -c58d6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1d (%x11,%z13.d,uxtw)[32byte] %p3/z -> %z10.d -c58f71ac : ldff1d z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1d (%x13,%z15.d,uxtw)[32byte] %p4/z -> %z12.d -c59171ee : ldff1d z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1d (%x15,%z17.d,uxtw)[32byte] %p4/z -> %z14.d -c5937630 : ldff1d z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1d (%x17,%z19.d,uxtw)[32byte] %p5/z -> %z16.d -c5947671 : ldff1d z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1d (%x19,%z20.d,uxtw)[32byte] %p5/z -> %z17.d -c59676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1d (%x21,%z22.d,uxtw)[32byte] %p5/z -> %z19.d -c5987af5 : ldff1d z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1d (%x23,%z24.d,uxtw)[32byte] %p6/z -> %z21.d -c59a7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1d (%x24,%z26.d,uxtw)[32byte] %p6/z -> %z23.d -c59c7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1d (%x26,%z28.d,uxtw)[32byte] %p7/z -> %z25.d -c59e7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1d (%x28,%z30.d,uxtw)[32byte] %p7/z -> %z27.d -c59f7fff : ldff1d z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1d (%sp,%z31.d,uxtw)[32byte] %p7/z -> %z31.d -c5c06000 : ldff1d z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1d (%x0,%z0.d,sxtw)[32byte] %p0/z -> %z0.d -c5c56482 : ldff1d z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1d (%x4,%z5.d,sxtw)[32byte] %p1/z -> %z2.d -c5c768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1d (%x6,%z7.d,sxtw)[32byte] %p2/z -> %z4.d -c5c96906 : ldff1d z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1d (%x8,%z9.d,sxtw)[32byte] %p2/z -> %z6.d -c5cb6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1d (%x10,%z11.d,sxtw)[32byte] %p3/z -> %z8.d -c5cd6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1d (%x11,%z13.d,sxtw)[32byte] %p3/z -> %z10.d -c5cf71ac : ldff1d z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1d (%x13,%z15.d,sxtw)[32byte] %p4/z -> %z12.d -c5d171ee : ldff1d z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1d (%x15,%z17.d,sxtw)[32byte] %p4/z -> %z14.d -c5d37630 : ldff1d z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1d (%x17,%z19.d,sxtw)[32byte] %p5/z -> %z16.d -c5d47671 : ldff1d z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1d (%x19,%z20.d,sxtw)[32byte] %p5/z -> %z17.d -c5d676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1d (%x21,%z22.d,sxtw)[32byte] %p5/z -> %z19.d -c5d87af5 : ldff1d z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1d (%x23,%z24.d,sxtw)[32byte] %p6/z -> %z21.d -c5da7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1d (%x24,%z26.d,sxtw)[32byte] %p6/z -> %z23.d -c5dc7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1d (%x26,%z28.d,sxtw)[32byte] %p7/z -> %z25.d -c5de7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1d (%x28,%z30.d,sxtw)[32byte] %p7/z -> %z27.d -c5df7fff : ldff1d z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1d (%sp,%z31.d,sxtw)[32byte] %p7/z -> %z31.d +c5806000 : ldff1d z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1d (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d +c5856482 : ldff1d z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1d (%x4,%z5.d,uxtw)[8byte] %p1/z -> %z2.d +c58768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1d (%x6,%z7.d,uxtw)[8byte] %p2/z -> %z4.d +c5896906 : ldff1d z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1d (%x8,%z9.d,uxtw)[8byte] %p2/z -> %z6.d +c58b6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1d (%x10,%z11.d,uxtw)[8byte] %p3/z -> %z8.d +c58d6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1d (%x11,%z13.d,uxtw)[8byte] %p3/z -> %z10.d +c58f71ac : ldff1d z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1d (%x13,%z15.d,uxtw)[8byte] %p4/z -> %z12.d +c59171ee : ldff1d z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1d (%x15,%z17.d,uxtw)[8byte] %p4/z -> %z14.d +c5937630 : ldff1d z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1d (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d +c5947671 : ldff1d z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1d (%x19,%z20.d,uxtw)[8byte] %p5/z -> %z17.d +c59676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1d (%x21,%z22.d,uxtw)[8byte] %p5/z -> %z19.d +c5987af5 : ldff1d z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1d (%x23,%z24.d,uxtw)[8byte] %p6/z -> %z21.d +c59a7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1d (%x24,%z26.d,uxtw)[8byte] %p6/z -> %z23.d +c59c7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1d (%x26,%z28.d,uxtw)[8byte] %p7/z -> %z25.d +c59e7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1d (%x28,%z30.d,uxtw)[8byte] %p7/z -> %z27.d +c59f7fff : ldff1d z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1d (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d +c5c06000 : ldff1d z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1d (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d +c5c56482 : ldff1d z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1d (%x4,%z5.d,sxtw)[8byte] %p1/z -> %z2.d +c5c768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1d (%x6,%z7.d,sxtw)[8byte] %p2/z -> %z4.d +c5c96906 : ldff1d z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1d (%x8,%z9.d,sxtw)[8byte] %p2/z -> %z6.d +c5cb6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1d (%x10,%z11.d,sxtw)[8byte] %p3/z -> %z8.d +c5cd6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1d (%x11,%z13.d,sxtw)[8byte] %p3/z -> %z10.d +c5cf71ac : ldff1d z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1d (%x13,%z15.d,sxtw)[8byte] %p4/z -> %z12.d +c5d171ee : ldff1d z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1d (%x15,%z17.d,sxtw)[8byte] %p4/z -> %z14.d +c5d37630 : ldff1d z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1d (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d +c5d47671 : ldff1d z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1d (%x19,%z20.d,sxtw)[8byte] %p5/z -> %z17.d +c5d676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1d (%x21,%z22.d,sxtw)[8byte] %p5/z -> %z19.d +c5d87af5 : ldff1d z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1d (%x23,%z24.d,sxtw)[8byte] %p6/z -> %z21.d +c5da7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1d (%x24,%z26.d,sxtw)[8byte] %p6/z -> %z23.d +c5dc7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1d (%x26,%z28.d,sxtw)[8byte] %p7/z -> %z25.d +c5de7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1d (%x28,%z30.d,sxtw)[8byte] %p7/z -> %z27.d +c5df7fff : ldff1d z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1d (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d # LDFF1D { .D }, /Z, [, .D, #3] (LDFF1D-Z.P.BZ-D.x32.scaled) -c5a06000 : ldff1d z0.d, p0/Z, [x0, z0.d, UXTW #3] : ldff1d (%x0,%z0.d,uxtw #3)[32byte] %p0/z -> %z0.d -c5a56482 : ldff1d z2.d, p1/Z, [x4, z5.d, UXTW #3] : ldff1d (%x4,%z5.d,uxtw #3)[32byte] %p1/z -> %z2.d -c5a768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, UXTW #3] : ldff1d (%x6,%z7.d,uxtw #3)[32byte] %p2/z -> %z4.d -c5a96906 : ldff1d z6.d, p2/Z, [x8, z9.d, UXTW #3] : ldff1d (%x8,%z9.d,uxtw #3)[32byte] %p2/z -> %z6.d -c5ab6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, UXTW #3] : ldff1d (%x10,%z11.d,uxtw #3)[32byte] %p3/z -> %z8.d -c5ad6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, UXTW #3] : ldff1d (%x11,%z13.d,uxtw #3)[32byte] %p3/z -> %z10.d -c5af71ac : ldff1d z12.d, p4/Z, [x13, z15.d, UXTW #3] : ldff1d (%x13,%z15.d,uxtw #3)[32byte] %p4/z -> %z12.d -c5b171ee : ldff1d z14.d, p4/Z, [x15, z17.d, UXTW #3] : ldff1d (%x15,%z17.d,uxtw #3)[32byte] %p4/z -> %z14.d -c5b37630 : ldff1d z16.d, p5/Z, [x17, z19.d, UXTW #3] : ldff1d (%x17,%z19.d,uxtw #3)[32byte] %p5/z -> %z16.d -c5b47671 : ldff1d z17.d, p5/Z, [x19, z20.d, UXTW #3] : ldff1d (%x19,%z20.d,uxtw #3)[32byte] %p5/z -> %z17.d -c5b676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, UXTW #3] : ldff1d (%x21,%z22.d,uxtw #3)[32byte] %p5/z -> %z19.d -c5b87af5 : ldff1d z21.d, p6/Z, [x23, z24.d, UXTW #3] : ldff1d (%x23,%z24.d,uxtw #3)[32byte] %p6/z -> %z21.d -c5ba7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, UXTW #3] : ldff1d (%x24,%z26.d,uxtw #3)[32byte] %p6/z -> %z23.d -c5bc7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, UXTW #3] : ldff1d (%x26,%z28.d,uxtw #3)[32byte] %p7/z -> %z25.d -c5be7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, UXTW #3] : ldff1d (%x28,%z30.d,uxtw #3)[32byte] %p7/z -> %z27.d -c5bf7fff : ldff1d z31.d, p7/Z, [sp, z31.d, UXTW #3] : ldff1d (%sp,%z31.d,uxtw #3)[32byte] %p7/z -> %z31.d -c5e06000 : ldff1d z0.d, p0/Z, [x0, z0.d, SXTW #3] : ldff1d (%x0,%z0.d,sxtw #3)[32byte] %p0/z -> %z0.d -c5e56482 : ldff1d z2.d, p1/Z, [x4, z5.d, SXTW #3] : ldff1d (%x4,%z5.d,sxtw #3)[32byte] %p1/z -> %z2.d -c5e768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, SXTW #3] : ldff1d (%x6,%z7.d,sxtw #3)[32byte] %p2/z -> %z4.d -c5e96906 : ldff1d z6.d, p2/Z, [x8, z9.d, SXTW #3] : ldff1d (%x8,%z9.d,sxtw #3)[32byte] %p2/z -> %z6.d -c5eb6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, SXTW #3] : ldff1d (%x10,%z11.d,sxtw #3)[32byte] %p3/z -> %z8.d -c5ed6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, SXTW #3] : ldff1d (%x11,%z13.d,sxtw #3)[32byte] %p3/z -> %z10.d -c5ef71ac : ldff1d z12.d, p4/Z, [x13, z15.d, SXTW #3] : ldff1d (%x13,%z15.d,sxtw #3)[32byte] %p4/z -> %z12.d -c5f171ee : ldff1d z14.d, p4/Z, [x15, z17.d, SXTW #3] : ldff1d (%x15,%z17.d,sxtw #3)[32byte] %p4/z -> %z14.d -c5f37630 : ldff1d z16.d, p5/Z, [x17, z19.d, SXTW #3] : ldff1d (%x17,%z19.d,sxtw #3)[32byte] %p5/z -> %z16.d -c5f47671 : ldff1d z17.d, p5/Z, [x19, z20.d, SXTW #3] : ldff1d (%x19,%z20.d,sxtw #3)[32byte] %p5/z -> %z17.d -c5f676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, SXTW #3] : ldff1d (%x21,%z22.d,sxtw #3)[32byte] %p5/z -> %z19.d -c5f87af5 : ldff1d z21.d, p6/Z, [x23, z24.d, SXTW #3] : ldff1d (%x23,%z24.d,sxtw #3)[32byte] %p6/z -> %z21.d -c5fa7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, SXTW #3] : ldff1d (%x24,%z26.d,sxtw #3)[32byte] %p6/z -> %z23.d -c5fc7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, SXTW #3] : ldff1d (%x26,%z28.d,sxtw #3)[32byte] %p7/z -> %z25.d -c5fe7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, SXTW #3] : ldff1d (%x28,%z30.d,sxtw #3)[32byte] %p7/z -> %z27.d -c5ff7fff : ldff1d z31.d, p7/Z, [sp, z31.d, SXTW #3] : ldff1d (%sp,%z31.d,sxtw #3)[32byte] %p7/z -> %z31.d +c5a06000 : ldff1d z0.d, p0/Z, [x0, z0.d, UXTW #3] : ldff1d (%x0,%z0.d,uxtw #3)[8byte] %p0/z -> %z0.d +c5a56482 : ldff1d z2.d, p1/Z, [x4, z5.d, UXTW #3] : ldff1d (%x4,%z5.d,uxtw #3)[8byte] %p1/z -> %z2.d +c5a768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, UXTW #3] : ldff1d (%x6,%z7.d,uxtw #3)[8byte] %p2/z -> %z4.d +c5a96906 : ldff1d z6.d, p2/Z, [x8, z9.d, UXTW #3] : ldff1d (%x8,%z9.d,uxtw #3)[8byte] %p2/z -> %z6.d +c5ab6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, UXTW #3] : ldff1d (%x10,%z11.d,uxtw #3)[8byte] %p3/z -> %z8.d +c5ad6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, UXTW #3] : ldff1d (%x11,%z13.d,uxtw #3)[8byte] %p3/z -> %z10.d +c5af71ac : ldff1d z12.d, p4/Z, [x13, z15.d, UXTW #3] : ldff1d (%x13,%z15.d,uxtw #3)[8byte] %p4/z -> %z12.d +c5b171ee : ldff1d z14.d, p4/Z, [x15, z17.d, UXTW #3] : ldff1d (%x15,%z17.d,uxtw #3)[8byte] %p4/z -> %z14.d +c5b37630 : ldff1d z16.d, p5/Z, [x17, z19.d, UXTW #3] : ldff1d (%x17,%z19.d,uxtw #3)[8byte] %p5/z -> %z16.d +c5b47671 : ldff1d z17.d, p5/Z, [x19, z20.d, UXTW #3] : ldff1d (%x19,%z20.d,uxtw #3)[8byte] %p5/z -> %z17.d +c5b676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, UXTW #3] : ldff1d (%x21,%z22.d,uxtw #3)[8byte] %p5/z -> %z19.d +c5b87af5 : ldff1d z21.d, p6/Z, [x23, z24.d, UXTW #3] : ldff1d (%x23,%z24.d,uxtw #3)[8byte] %p6/z -> %z21.d +c5ba7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, UXTW #3] : ldff1d (%x24,%z26.d,uxtw #3)[8byte] %p6/z -> %z23.d +c5bc7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, UXTW #3] : ldff1d (%x26,%z28.d,uxtw #3)[8byte] %p7/z -> %z25.d +c5be7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, UXTW #3] : ldff1d (%x28,%z30.d,uxtw #3)[8byte] %p7/z -> %z27.d +c5bf7fff : ldff1d z31.d, p7/Z, [sp, z31.d, UXTW #3] : ldff1d (%sp,%z31.d,uxtw #3)[8byte] %p7/z -> %z31.d +c5e06000 : ldff1d z0.d, p0/Z, [x0, z0.d, SXTW #3] : ldff1d (%x0,%z0.d,sxtw #3)[8byte] %p0/z -> %z0.d +c5e56482 : ldff1d z2.d, p1/Z, [x4, z5.d, SXTW #3] : ldff1d (%x4,%z5.d,sxtw #3)[8byte] %p1/z -> %z2.d +c5e768c4 : ldff1d z4.d, p2/Z, [x6, z7.d, SXTW #3] : ldff1d (%x6,%z7.d,sxtw #3)[8byte] %p2/z -> %z4.d +c5e96906 : ldff1d z6.d, p2/Z, [x8, z9.d, SXTW #3] : ldff1d (%x8,%z9.d,sxtw #3)[8byte] %p2/z -> %z6.d +c5eb6d48 : ldff1d z8.d, p3/Z, [x10, z11.d, SXTW #3] : ldff1d (%x10,%z11.d,sxtw #3)[8byte] %p3/z -> %z8.d +c5ed6d6a : ldff1d z10.d, p3/Z, [x11, z13.d, SXTW #3] : ldff1d (%x11,%z13.d,sxtw #3)[8byte] %p3/z -> %z10.d +c5ef71ac : ldff1d z12.d, p4/Z, [x13, z15.d, SXTW #3] : ldff1d (%x13,%z15.d,sxtw #3)[8byte] %p4/z -> %z12.d +c5f171ee : ldff1d z14.d, p4/Z, [x15, z17.d, SXTW #3] : ldff1d (%x15,%z17.d,sxtw #3)[8byte] %p4/z -> %z14.d +c5f37630 : ldff1d z16.d, p5/Z, [x17, z19.d, SXTW #3] : ldff1d (%x17,%z19.d,sxtw #3)[8byte] %p5/z -> %z16.d +c5f47671 : ldff1d z17.d, p5/Z, [x19, z20.d, SXTW #3] : ldff1d (%x19,%z20.d,sxtw #3)[8byte] %p5/z -> %z17.d +c5f676b3 : ldff1d z19.d, p5/Z, [x21, z22.d, SXTW #3] : ldff1d (%x21,%z22.d,sxtw #3)[8byte] %p5/z -> %z19.d +c5f87af5 : ldff1d z21.d, p6/Z, [x23, z24.d, SXTW #3] : ldff1d (%x23,%z24.d,sxtw #3)[8byte] %p6/z -> %z21.d +c5fa7b17 : ldff1d z23.d, p6/Z, [x24, z26.d, SXTW #3] : ldff1d (%x24,%z26.d,sxtw #3)[8byte] %p6/z -> %z23.d +c5fc7f59 : ldff1d z25.d, p7/Z, [x26, z28.d, SXTW #3] : ldff1d (%x26,%z28.d,sxtw #3)[8byte] %p7/z -> %z25.d +c5fe7f9b : ldff1d z27.d, p7/Z, [x28, z30.d, SXTW #3] : ldff1d (%x28,%z30.d,sxtw #3)[8byte] %p7/z -> %z27.d +c5ff7fff : ldff1d z31.d, p7/Z, [sp, z31.d, SXTW #3] : ldff1d (%sp,%z31.d,sxtw #3)[8byte] %p7/z -> %z31.d # LDFF1D { .D }, /Z, [.D{, #}] (LDFF1D-Z.P.AI-D) -c5a0e000 : ldff1d z0.d, p0/Z, [z0.d, #0] : ldff1d (%z0.d)[32byte] %p0/z -> %z0.d -c5a2e482 : ldff1d z2.d, p1/Z, [z4.d, #16] : ldff1d +0x10(%z4.d)[32byte] %p1/z -> %z2.d -c5a4e8c4 : ldff1d z4.d, p2/Z, [z6.d, #32] : ldff1d +0x20(%z6.d)[32byte] %p2/z -> %z4.d -c5a6e906 : ldff1d z6.d, p2/Z, [z8.d, #48] : ldff1d +0x30(%z8.d)[32byte] %p2/z -> %z6.d -c5a8ed48 : ldff1d z8.d, p3/Z, [z10.d, #64] : ldff1d +0x40(%z10.d)[32byte] %p3/z -> %z8.d -c5aaed8a : ldff1d z10.d, p3/Z, [z12.d, #80] : ldff1d +0x50(%z12.d)[32byte] %p3/z -> %z10.d -c5acf1cc : ldff1d z12.d, p4/Z, [z14.d, #96] : ldff1d +0x60(%z14.d)[32byte] %p4/z -> %z12.d -c5aef20e : ldff1d z14.d, p4/Z, [z16.d, #112] : ldff1d +0x70(%z16.d)[32byte] %p4/z -> %z14.d -c5b0f650 : ldff1d z16.d, p5/Z, [z18.d, #128] : ldff1d +0x80(%z18.d)[32byte] %p5/z -> %z16.d -c5b1f671 : ldff1d z17.d, p5/Z, [z19.d, #136] : ldff1d +0x88(%z19.d)[32byte] %p5/z -> %z17.d -c5b3f6b3 : ldff1d z19.d, p5/Z, [z21.d, #152] : ldff1d +0x98(%z21.d)[32byte] %p5/z -> %z19.d -c5b5faf5 : ldff1d z21.d, p6/Z, [z23.d, #168] : ldff1d +0xa8(%z23.d)[32byte] %p6/z -> %z21.d -c5b7fb37 : ldff1d z23.d, p6/Z, [z25.d, #184] : ldff1d +0xb8(%z25.d)[32byte] %p6/z -> %z23.d -c5b9ff79 : ldff1d z25.d, p7/Z, [z27.d, #200] : ldff1d +0xc8(%z27.d)[32byte] %p7/z -> %z25.d -c5bbffbb : ldff1d z27.d, p7/Z, [z29.d, #216] : ldff1d +0xd8(%z29.d)[32byte] %p7/z -> %z27.d -c5bfffff : ldff1d z31.d, p7/Z, [z31.d, #248] : ldff1d +0xf8(%z31.d)[32byte] %p7/z -> %z31.d +c5a0e000 : ldff1d z0.d, p0/Z, [z0.d, #0] : ldff1d (%z0.d)[8byte] %p0/z -> %z0.d +c5a2e482 : ldff1d z2.d, p1/Z, [z4.d, #16] : ldff1d +0x10(%z4.d)[8byte] %p1/z -> %z2.d +c5a4e8c4 : ldff1d z4.d, p2/Z, [z6.d, #32] : ldff1d +0x20(%z6.d)[8byte] %p2/z -> %z4.d +c5a6e906 : ldff1d z6.d, p2/Z, [z8.d, #48] : ldff1d +0x30(%z8.d)[8byte] %p2/z -> %z6.d +c5a8ed48 : ldff1d z8.d, p3/Z, [z10.d, #64] : ldff1d +0x40(%z10.d)[8byte] %p3/z -> %z8.d +c5aaed8a : ldff1d z10.d, p3/Z, [z12.d, #80] : ldff1d +0x50(%z12.d)[8byte] %p3/z -> %z10.d +c5acf1cc : ldff1d z12.d, p4/Z, [z14.d, #96] : ldff1d +0x60(%z14.d)[8byte] %p4/z -> %z12.d +c5aef20e : ldff1d z14.d, p4/Z, [z16.d, #112] : ldff1d +0x70(%z16.d)[8byte] %p4/z -> %z14.d +c5b0f650 : ldff1d z16.d, p5/Z, [z18.d, #128] : ldff1d +0x80(%z18.d)[8byte] %p5/z -> %z16.d +c5b1f671 : ldff1d z17.d, p5/Z, [z19.d, #136] : ldff1d +0x88(%z19.d)[8byte] %p5/z -> %z17.d +c5b3f6b3 : ldff1d z19.d, p5/Z, [z21.d, #152] : ldff1d +0x98(%z21.d)[8byte] %p5/z -> %z19.d +c5b5faf5 : ldff1d z21.d, p6/Z, [z23.d, #168] : ldff1d +0xa8(%z23.d)[8byte] %p6/z -> %z21.d +c5b7fb37 : ldff1d z23.d, p6/Z, [z25.d, #184] : ldff1d +0xb8(%z25.d)[8byte] %p6/z -> %z23.d +c5b9ff79 : ldff1d z25.d, p7/Z, [z27.d, #200] : ldff1d +0xc8(%z27.d)[8byte] %p7/z -> %z25.d +c5bbffbb : ldff1d z27.d, p7/Z, [z29.d, #216] : ldff1d +0xd8(%z29.d)[8byte] %p7/z -> %z27.d +c5bfffff : ldff1d z31.d, p7/Z, [z31.d, #248] : ldff1d +0xf8(%z31.d)[8byte] %p7/z -> %z31.d # LDFF1D { .D }, /Z, [, .D] (LDFF1D-Z.P.BZ-D.64.unscaled) -c5c0e000 : ldff1d z0.d, p0/Z, [x0, z0.d] : ldff1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d -c5c5e482 : ldff1d z2.d, p1/Z, [x4, z5.d] : ldff1d (%x4,%z5.d)[32byte] %p1/z -> %z2.d -c5c7e8c4 : ldff1d z4.d, p2/Z, [x6, z7.d] : ldff1d (%x6,%z7.d)[32byte] %p2/z -> %z4.d -c5c9e906 : ldff1d z6.d, p2/Z, [x8, z9.d] : ldff1d (%x8,%z9.d)[32byte] %p2/z -> %z6.d -c5cbed48 : ldff1d z8.d, p3/Z, [x10, z11.d] : ldff1d (%x10,%z11.d)[32byte] %p3/z -> %z8.d -c5cded6a : ldff1d z10.d, p3/Z, [x11, z13.d] : ldff1d (%x11,%z13.d)[32byte] %p3/z -> %z10.d -c5cff1ac : ldff1d z12.d, p4/Z, [x13, z15.d] : ldff1d (%x13,%z15.d)[32byte] %p4/z -> %z12.d -c5d1f1ee : ldff1d z14.d, p4/Z, [x15, z17.d] : ldff1d (%x15,%z17.d)[32byte] %p4/z -> %z14.d -c5d3f630 : ldff1d z16.d, p5/Z, [x17, z19.d] : ldff1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d -c5d4f671 : ldff1d z17.d, p5/Z, [x19, z20.d] : ldff1d (%x19,%z20.d)[32byte] %p5/z -> %z17.d -c5d6f6b3 : ldff1d z19.d, p5/Z, [x21, z22.d] : ldff1d (%x21,%z22.d)[32byte] %p5/z -> %z19.d -c5d8faf5 : ldff1d z21.d, p6/Z, [x23, z24.d] : ldff1d (%x23,%z24.d)[32byte] %p6/z -> %z21.d -c5dafb17 : ldff1d z23.d, p6/Z, [x24, z26.d] : ldff1d (%x24,%z26.d)[32byte] %p6/z -> %z23.d -c5dcff59 : ldff1d z25.d, p7/Z, [x26, z28.d] : ldff1d (%x26,%z28.d)[32byte] %p7/z -> %z25.d -c5deff9b : ldff1d z27.d, p7/Z, [x28, z30.d] : ldff1d (%x28,%z30.d)[32byte] %p7/z -> %z27.d -c5dfffff : ldff1d z31.d, p7/Z, [sp, z31.d] : ldff1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d +c5c0e000 : ldff1d z0.d, p0/Z, [x0, z0.d] : ldff1d (%x0,%z0.d)[8byte] %p0/z -> %z0.d +c5c5e482 : ldff1d z2.d, p1/Z, [x4, z5.d] : ldff1d (%x4,%z5.d)[8byte] %p1/z -> %z2.d +c5c7e8c4 : ldff1d z4.d, p2/Z, [x6, z7.d] : ldff1d (%x6,%z7.d)[8byte] %p2/z -> %z4.d +c5c9e906 : ldff1d z6.d, p2/Z, [x8, z9.d] : ldff1d (%x8,%z9.d)[8byte] %p2/z -> %z6.d +c5cbed48 : ldff1d z8.d, p3/Z, [x10, z11.d] : ldff1d (%x10,%z11.d)[8byte] %p3/z -> %z8.d +c5cded6a : ldff1d z10.d, p3/Z, [x11, z13.d] : ldff1d (%x11,%z13.d)[8byte] %p3/z -> %z10.d +c5cff1ac : ldff1d z12.d, p4/Z, [x13, z15.d] : ldff1d (%x13,%z15.d)[8byte] %p4/z -> %z12.d +c5d1f1ee : ldff1d z14.d, p4/Z, [x15, z17.d] : ldff1d (%x15,%z17.d)[8byte] %p4/z -> %z14.d +c5d3f630 : ldff1d z16.d, p5/Z, [x17, z19.d] : ldff1d (%x17,%z19.d)[8byte] %p5/z -> %z16.d +c5d4f671 : ldff1d z17.d, p5/Z, [x19, z20.d] : ldff1d (%x19,%z20.d)[8byte] %p5/z -> %z17.d +c5d6f6b3 : ldff1d z19.d, p5/Z, [x21, z22.d] : ldff1d (%x21,%z22.d)[8byte] %p5/z -> %z19.d +c5d8faf5 : ldff1d z21.d, p6/Z, [x23, z24.d] : ldff1d (%x23,%z24.d)[8byte] %p6/z -> %z21.d +c5dafb17 : ldff1d z23.d, p6/Z, [x24, z26.d] : ldff1d (%x24,%z26.d)[8byte] %p6/z -> %z23.d +c5dcff59 : ldff1d z25.d, p7/Z, [x26, z28.d] : ldff1d (%x26,%z28.d)[8byte] %p7/z -> %z25.d +c5deff9b : ldff1d z27.d, p7/Z, [x28, z30.d] : ldff1d (%x28,%z30.d)[8byte] %p7/z -> %z27.d +c5dfffff : ldff1d z31.d, p7/Z, [sp, z31.d] : ldff1d (%sp,%z31.d)[8byte] %p7/z -> %z31.d # LDFF1D { .D }, /Z, [, .D, LSL #3] (LDFF1D-Z.P.BZ-D.64.scaled) -c5e0e000 : ldff1d z0.d, p0/Z, [x0, z0.d, LSL #3] : ldff1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d -c5e5e482 : ldff1d z2.d, p1/Z, [x4, z5.d, LSL #3] : ldff1d (%x4,%z5.d,lsl #3)[32byte] %p1/z -> %z2.d -c5e7e8c4 : ldff1d z4.d, p2/Z, [x6, z7.d, LSL #3] : ldff1d (%x6,%z7.d,lsl #3)[32byte] %p2/z -> %z4.d -c5e9e906 : ldff1d z6.d, p2/Z, [x8, z9.d, LSL #3] : ldff1d (%x8,%z9.d,lsl #3)[32byte] %p2/z -> %z6.d -c5ebed48 : ldff1d z8.d, p3/Z, [x10, z11.d, LSL #3] : ldff1d (%x10,%z11.d,lsl #3)[32byte] %p3/z -> %z8.d -c5eded6a : ldff1d z10.d, p3/Z, [x11, z13.d, LSL #3] : ldff1d (%x11,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d -c5eff1ac : ldff1d z12.d, p4/Z, [x13, z15.d, LSL #3] : ldff1d (%x13,%z15.d,lsl #3)[32byte] %p4/z -> %z12.d -c5f1f1ee : ldff1d z14.d, p4/Z, [x15, z17.d, LSL #3] : ldff1d (%x15,%z17.d,lsl #3)[32byte] %p4/z -> %z14.d -c5f3f630 : ldff1d z16.d, p5/Z, [x17, z19.d, LSL #3] : ldff1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d -c5f4f671 : ldff1d z17.d, p5/Z, [x19, z20.d, LSL #3] : ldff1d (%x19,%z20.d,lsl #3)[32byte] %p5/z -> %z17.d -c5f6f6b3 : ldff1d z19.d, p5/Z, [x21, z22.d, LSL #3] : ldff1d (%x21,%z22.d,lsl #3)[32byte] %p5/z -> %z19.d -c5f8faf5 : ldff1d z21.d, p6/Z, [x23, z24.d, LSL #3] : ldff1d (%x23,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d -c5fafb17 : ldff1d z23.d, p6/Z, [x24, z26.d, LSL #3] : ldff1d (%x24,%z26.d,lsl #3)[32byte] %p6/z -> %z23.d -c5fcff59 : ldff1d z25.d, p7/Z, [x26, z28.d, LSL #3] : ldff1d (%x26,%z28.d,lsl #3)[32byte] %p7/z -> %z25.d -c5feff9b : ldff1d z27.d, p7/Z, [x28, z30.d, LSL #3] : ldff1d (%x28,%z30.d,lsl #3)[32byte] %p7/z -> %z27.d -c5ffffff : ldff1d z31.d, p7/Z, [sp, z31.d, LSL #3] : ldff1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d +c5e0e000 : ldff1d z0.d, p0/Z, [x0, z0.d, LSL #3] : ldff1d (%x0,%z0.d,lsl #3)[8byte] %p0/z -> %z0.d +c5e5e482 : ldff1d z2.d, p1/Z, [x4, z5.d, LSL #3] : ldff1d (%x4,%z5.d,lsl #3)[8byte] %p1/z -> %z2.d +c5e7e8c4 : ldff1d z4.d, p2/Z, [x6, z7.d, LSL #3] : ldff1d (%x6,%z7.d,lsl #3)[8byte] %p2/z -> %z4.d +c5e9e906 : ldff1d z6.d, p2/Z, [x8, z9.d, LSL #3] : ldff1d (%x8,%z9.d,lsl #3)[8byte] %p2/z -> %z6.d +c5ebed48 : ldff1d z8.d, p3/Z, [x10, z11.d, LSL #3] : ldff1d (%x10,%z11.d,lsl #3)[8byte] %p3/z -> %z8.d +c5eded6a : ldff1d z10.d, p3/Z, [x11, z13.d, LSL #3] : ldff1d (%x11,%z13.d,lsl #3)[8byte] %p3/z -> %z10.d +c5eff1ac : ldff1d z12.d, p4/Z, [x13, z15.d, LSL #3] : ldff1d (%x13,%z15.d,lsl #3)[8byte] %p4/z -> %z12.d +c5f1f1ee : ldff1d z14.d, p4/Z, [x15, z17.d, LSL #3] : ldff1d (%x15,%z17.d,lsl #3)[8byte] %p4/z -> %z14.d +c5f3f630 : ldff1d z16.d, p5/Z, [x17, z19.d, LSL #3] : ldff1d (%x17,%z19.d,lsl #3)[8byte] %p5/z -> %z16.d +c5f4f671 : ldff1d z17.d, p5/Z, [x19, z20.d, LSL #3] : ldff1d (%x19,%z20.d,lsl #3)[8byte] %p5/z -> %z17.d +c5f6f6b3 : ldff1d z19.d, p5/Z, [x21, z22.d, LSL #3] : ldff1d (%x21,%z22.d,lsl #3)[8byte] %p5/z -> %z19.d +c5f8faf5 : ldff1d z21.d, p6/Z, [x23, z24.d, LSL #3] : ldff1d (%x23,%z24.d,lsl #3)[8byte] %p6/z -> %z21.d +c5fafb17 : ldff1d z23.d, p6/Z, [x24, z26.d, LSL #3] : ldff1d (%x24,%z26.d,lsl #3)[8byte] %p6/z -> %z23.d +c5fcff59 : ldff1d z25.d, p7/Z, [x26, z28.d, LSL #3] : ldff1d (%x26,%z28.d,lsl #3)[8byte] %p7/z -> %z25.d +c5feff9b : ldff1d z27.d, p7/Z, [x28, z30.d, LSL #3] : ldff1d (%x28,%z30.d,lsl #3)[8byte] %p7/z -> %z27.d +c5ffffff : ldff1d z31.d, p7/Z, [sp, z31.d, LSL #3] : ldff1d (%sp,%z31.d,lsl #3)[8byte] %p7/z -> %z31.d # LDFF1H { .S }, /Z, [, .S, ] (LDFF1H-Z.P.BZ-S.x32.unscaled) -84806000 : ldff1h z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1h (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s -84856482 : ldff1h z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1h (%x4,%z5.s,uxtw)[16byte] %p1/z -> %z2.s -848768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1h (%x6,%z7.s,uxtw)[16byte] %p2/z -> %z4.s -84896906 : ldff1h z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1h (%x8,%z9.s,uxtw)[16byte] %p2/z -> %z6.s -848b6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1h (%x10,%z11.s,uxtw)[16byte] %p3/z -> %z8.s -848d6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1h (%x11,%z13.s,uxtw)[16byte] %p3/z -> %z10.s -848f71ac : ldff1h z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1h (%x13,%z15.s,uxtw)[16byte] %p4/z -> %z12.s -849171ee : ldff1h z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1h (%x15,%z17.s,uxtw)[16byte] %p4/z -> %z14.s -84937630 : ldff1h z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1h (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s -84947671 : ldff1h z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1h (%x19,%z20.s,uxtw)[16byte] %p5/z -> %z17.s -849676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1h (%x21,%z22.s,uxtw)[16byte] %p5/z -> %z19.s -84987af5 : ldff1h z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1h (%x23,%z24.s,uxtw)[16byte] %p6/z -> %z21.s -849a7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1h (%x24,%z26.s,uxtw)[16byte] %p6/z -> %z23.s -849c7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1h (%x26,%z28.s,uxtw)[16byte] %p7/z -> %z25.s -849e7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1h (%x28,%z30.s,uxtw)[16byte] %p7/z -> %z27.s -849f7fff : ldff1h z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1h (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s -84c06000 : ldff1h z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1h (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s -84c56482 : ldff1h z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1h (%x4,%z5.s,sxtw)[16byte] %p1/z -> %z2.s -84c768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1h (%x6,%z7.s,sxtw)[16byte] %p2/z -> %z4.s -84c96906 : ldff1h z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1h (%x8,%z9.s,sxtw)[16byte] %p2/z -> %z6.s -84cb6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1h (%x10,%z11.s,sxtw)[16byte] %p3/z -> %z8.s -84cd6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1h (%x11,%z13.s,sxtw)[16byte] %p3/z -> %z10.s -84cf71ac : ldff1h z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1h (%x13,%z15.s,sxtw)[16byte] %p4/z -> %z12.s -84d171ee : ldff1h z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1h (%x15,%z17.s,sxtw)[16byte] %p4/z -> %z14.s -84d37630 : ldff1h z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1h (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s -84d47671 : ldff1h z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1h (%x19,%z20.s,sxtw)[16byte] %p5/z -> %z17.s -84d676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1h (%x21,%z22.s,sxtw)[16byte] %p5/z -> %z19.s -84d87af5 : ldff1h z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1h (%x23,%z24.s,sxtw)[16byte] %p6/z -> %z21.s -84da7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1h (%x24,%z26.s,sxtw)[16byte] %p6/z -> %z23.s -84dc7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1h (%x26,%z28.s,sxtw)[16byte] %p7/z -> %z25.s -84de7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1h (%x28,%z30.s,sxtw)[16byte] %p7/z -> %z27.s -84df7fff : ldff1h z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1h (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s +84806000 : ldff1h z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1h (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s +84856482 : ldff1h z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1h (%x4,%z5.s,uxtw)[2byte] %p1/z -> %z2.s +848768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1h (%x6,%z7.s,uxtw)[2byte] %p2/z -> %z4.s +84896906 : ldff1h z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1h (%x8,%z9.s,uxtw)[2byte] %p2/z -> %z6.s +848b6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1h (%x10,%z11.s,uxtw)[2byte] %p3/z -> %z8.s +848d6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1h (%x11,%z13.s,uxtw)[2byte] %p3/z -> %z10.s +848f71ac : ldff1h z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1h (%x13,%z15.s,uxtw)[2byte] %p4/z -> %z12.s +849171ee : ldff1h z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1h (%x15,%z17.s,uxtw)[2byte] %p4/z -> %z14.s +84937630 : ldff1h z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1h (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s +84947671 : ldff1h z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1h (%x19,%z20.s,uxtw)[2byte] %p5/z -> %z17.s +849676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1h (%x21,%z22.s,uxtw)[2byte] %p5/z -> %z19.s +84987af5 : ldff1h z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1h (%x23,%z24.s,uxtw)[2byte] %p6/z -> %z21.s +849a7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1h (%x24,%z26.s,uxtw)[2byte] %p6/z -> %z23.s +849c7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1h (%x26,%z28.s,uxtw)[2byte] %p7/z -> %z25.s +849e7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1h (%x28,%z30.s,uxtw)[2byte] %p7/z -> %z27.s +849f7fff : ldff1h z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1h (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s +84c06000 : ldff1h z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1h (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s +84c56482 : ldff1h z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1h (%x4,%z5.s,sxtw)[2byte] %p1/z -> %z2.s +84c768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1h (%x6,%z7.s,sxtw)[2byte] %p2/z -> %z4.s +84c96906 : ldff1h z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1h (%x8,%z9.s,sxtw)[2byte] %p2/z -> %z6.s +84cb6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1h (%x10,%z11.s,sxtw)[2byte] %p3/z -> %z8.s +84cd6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1h (%x11,%z13.s,sxtw)[2byte] %p3/z -> %z10.s +84cf71ac : ldff1h z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1h (%x13,%z15.s,sxtw)[2byte] %p4/z -> %z12.s +84d171ee : ldff1h z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1h (%x15,%z17.s,sxtw)[2byte] %p4/z -> %z14.s +84d37630 : ldff1h z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1h (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s +84d47671 : ldff1h z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1h (%x19,%z20.s,sxtw)[2byte] %p5/z -> %z17.s +84d676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1h (%x21,%z22.s,sxtw)[2byte] %p5/z -> %z19.s +84d87af5 : ldff1h z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1h (%x23,%z24.s,sxtw)[2byte] %p6/z -> %z21.s +84da7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1h (%x24,%z26.s,sxtw)[2byte] %p6/z -> %z23.s +84dc7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1h (%x26,%z28.s,sxtw)[2byte] %p7/z -> %z25.s +84de7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1h (%x28,%z30.s,sxtw)[2byte] %p7/z -> %z27.s +84df7fff : ldff1h z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1h (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s # LDFF1H { .S }, /Z, [, .S, #1] (LDFF1H-Z.P.BZ-S.x32.scaled) -84a06000 : ldff1h z0.s, p0/Z, [x0, z0.s, UXTW #1] : ldff1h (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s -84a56482 : ldff1h z2.s, p1/Z, [x4, z5.s, UXTW #1] : ldff1h (%x4,%z5.s,uxtw #1)[16byte] %p1/z -> %z2.s -84a768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, UXTW #1] : ldff1h (%x6,%z7.s,uxtw #1)[16byte] %p2/z -> %z4.s -84a96906 : ldff1h z6.s, p2/Z, [x8, z9.s, UXTW #1] : ldff1h (%x8,%z9.s,uxtw #1)[16byte] %p2/z -> %z6.s -84ab6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, UXTW #1] : ldff1h (%x10,%z11.s,uxtw #1)[16byte] %p3/z -> %z8.s -84ad6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, UXTW #1] : ldff1h (%x11,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s -84af71ac : ldff1h z12.s, p4/Z, [x13, z15.s, UXTW #1] : ldff1h (%x13,%z15.s,uxtw #1)[16byte] %p4/z -> %z12.s -84b171ee : ldff1h z14.s, p4/Z, [x15, z17.s, UXTW #1] : ldff1h (%x15,%z17.s,uxtw #1)[16byte] %p4/z -> %z14.s -84b37630 : ldff1h z16.s, p5/Z, [x17, z19.s, UXTW #1] : ldff1h (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s -84b47671 : ldff1h z17.s, p5/Z, [x19, z20.s, UXTW #1] : ldff1h (%x19,%z20.s,uxtw #1)[16byte] %p5/z -> %z17.s -84b676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, UXTW #1] : ldff1h (%x21,%z22.s,uxtw #1)[16byte] %p5/z -> %z19.s -84b87af5 : ldff1h z21.s, p6/Z, [x23, z24.s, UXTW #1] : ldff1h (%x23,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s -84ba7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, UXTW #1] : ldff1h (%x24,%z26.s,uxtw #1)[16byte] %p6/z -> %z23.s -84bc7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, UXTW #1] : ldff1h (%x26,%z28.s,uxtw #1)[16byte] %p7/z -> %z25.s -84be7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, UXTW #1] : ldff1h (%x28,%z30.s,uxtw #1)[16byte] %p7/z -> %z27.s -84bf7fff : ldff1h z31.s, p7/Z, [sp, z31.s, UXTW #1] : ldff1h (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s -84e06000 : ldff1h z0.s, p0/Z, [x0, z0.s, SXTW #1] : ldff1h (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s -84e56482 : ldff1h z2.s, p1/Z, [x4, z5.s, SXTW #1] : ldff1h (%x4,%z5.s,sxtw #1)[16byte] %p1/z -> %z2.s -84e768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, SXTW #1] : ldff1h (%x6,%z7.s,sxtw #1)[16byte] %p2/z -> %z4.s -84e96906 : ldff1h z6.s, p2/Z, [x8, z9.s, SXTW #1] : ldff1h (%x8,%z9.s,sxtw #1)[16byte] %p2/z -> %z6.s -84eb6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, SXTW #1] : ldff1h (%x10,%z11.s,sxtw #1)[16byte] %p3/z -> %z8.s -84ed6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, SXTW #1] : ldff1h (%x11,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s -84ef71ac : ldff1h z12.s, p4/Z, [x13, z15.s, SXTW #1] : ldff1h (%x13,%z15.s,sxtw #1)[16byte] %p4/z -> %z12.s -84f171ee : ldff1h z14.s, p4/Z, [x15, z17.s, SXTW #1] : ldff1h (%x15,%z17.s,sxtw #1)[16byte] %p4/z -> %z14.s -84f37630 : ldff1h z16.s, p5/Z, [x17, z19.s, SXTW #1] : ldff1h (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s -84f47671 : ldff1h z17.s, p5/Z, [x19, z20.s, SXTW #1] : ldff1h (%x19,%z20.s,sxtw #1)[16byte] %p5/z -> %z17.s -84f676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, SXTW #1] : ldff1h (%x21,%z22.s,sxtw #1)[16byte] %p5/z -> %z19.s -84f87af5 : ldff1h z21.s, p6/Z, [x23, z24.s, SXTW #1] : ldff1h (%x23,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s -84fa7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, SXTW #1] : ldff1h (%x24,%z26.s,sxtw #1)[16byte] %p6/z -> %z23.s -84fc7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, SXTW #1] : ldff1h (%x26,%z28.s,sxtw #1)[16byte] %p7/z -> %z25.s -84fe7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, SXTW #1] : ldff1h (%x28,%z30.s,sxtw #1)[16byte] %p7/z -> %z27.s -84ff7fff : ldff1h z31.s, p7/Z, [sp, z31.s, SXTW #1] : ldff1h (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s +84a06000 : ldff1h z0.s, p0/Z, [x0, z0.s, UXTW #1] : ldff1h (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s +84a56482 : ldff1h z2.s, p1/Z, [x4, z5.s, UXTW #1] : ldff1h (%x4,%z5.s,uxtw #1)[2byte] %p1/z -> %z2.s +84a768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, UXTW #1] : ldff1h (%x6,%z7.s,uxtw #1)[2byte] %p2/z -> %z4.s +84a96906 : ldff1h z6.s, p2/Z, [x8, z9.s, UXTW #1] : ldff1h (%x8,%z9.s,uxtw #1)[2byte] %p2/z -> %z6.s +84ab6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, UXTW #1] : ldff1h (%x10,%z11.s,uxtw #1)[2byte] %p3/z -> %z8.s +84ad6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, UXTW #1] : ldff1h (%x11,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s +84af71ac : ldff1h z12.s, p4/Z, [x13, z15.s, UXTW #1] : ldff1h (%x13,%z15.s,uxtw #1)[2byte] %p4/z -> %z12.s +84b171ee : ldff1h z14.s, p4/Z, [x15, z17.s, UXTW #1] : ldff1h (%x15,%z17.s,uxtw #1)[2byte] %p4/z -> %z14.s +84b37630 : ldff1h z16.s, p5/Z, [x17, z19.s, UXTW #1] : ldff1h (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s +84b47671 : ldff1h z17.s, p5/Z, [x19, z20.s, UXTW #1] : ldff1h (%x19,%z20.s,uxtw #1)[2byte] %p5/z -> %z17.s +84b676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, UXTW #1] : ldff1h (%x21,%z22.s,uxtw #1)[2byte] %p5/z -> %z19.s +84b87af5 : ldff1h z21.s, p6/Z, [x23, z24.s, UXTW #1] : ldff1h (%x23,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s +84ba7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, UXTW #1] : ldff1h (%x24,%z26.s,uxtw #1)[2byte] %p6/z -> %z23.s +84bc7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, UXTW #1] : ldff1h (%x26,%z28.s,uxtw #1)[2byte] %p7/z -> %z25.s +84be7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, UXTW #1] : ldff1h (%x28,%z30.s,uxtw #1)[2byte] %p7/z -> %z27.s +84bf7fff : ldff1h z31.s, p7/Z, [sp, z31.s, UXTW #1] : ldff1h (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s +84e06000 : ldff1h z0.s, p0/Z, [x0, z0.s, SXTW #1] : ldff1h (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s +84e56482 : ldff1h z2.s, p1/Z, [x4, z5.s, SXTW #1] : ldff1h (%x4,%z5.s,sxtw #1)[2byte] %p1/z -> %z2.s +84e768c4 : ldff1h z4.s, p2/Z, [x6, z7.s, SXTW #1] : ldff1h (%x6,%z7.s,sxtw #1)[2byte] %p2/z -> %z4.s +84e96906 : ldff1h z6.s, p2/Z, [x8, z9.s, SXTW #1] : ldff1h (%x8,%z9.s,sxtw #1)[2byte] %p2/z -> %z6.s +84eb6d48 : ldff1h z8.s, p3/Z, [x10, z11.s, SXTW #1] : ldff1h (%x10,%z11.s,sxtw #1)[2byte] %p3/z -> %z8.s +84ed6d6a : ldff1h z10.s, p3/Z, [x11, z13.s, SXTW #1] : ldff1h (%x11,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s +84ef71ac : ldff1h z12.s, p4/Z, [x13, z15.s, SXTW #1] : ldff1h (%x13,%z15.s,sxtw #1)[2byte] %p4/z -> %z12.s +84f171ee : ldff1h z14.s, p4/Z, [x15, z17.s, SXTW #1] : ldff1h (%x15,%z17.s,sxtw #1)[2byte] %p4/z -> %z14.s +84f37630 : ldff1h z16.s, p5/Z, [x17, z19.s, SXTW #1] : ldff1h (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s +84f47671 : ldff1h z17.s, p5/Z, [x19, z20.s, SXTW #1] : ldff1h (%x19,%z20.s,sxtw #1)[2byte] %p5/z -> %z17.s +84f676b3 : ldff1h z19.s, p5/Z, [x21, z22.s, SXTW #1] : ldff1h (%x21,%z22.s,sxtw #1)[2byte] %p5/z -> %z19.s +84f87af5 : ldff1h z21.s, p6/Z, [x23, z24.s, SXTW #1] : ldff1h (%x23,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s +84fa7b17 : ldff1h z23.s, p6/Z, [x24, z26.s, SXTW #1] : ldff1h (%x24,%z26.s,sxtw #1)[2byte] %p6/z -> %z23.s +84fc7f59 : ldff1h z25.s, p7/Z, [x26, z28.s, SXTW #1] : ldff1h (%x26,%z28.s,sxtw #1)[2byte] %p7/z -> %z25.s +84fe7f9b : ldff1h z27.s, p7/Z, [x28, z30.s, SXTW #1] : ldff1h (%x28,%z30.s,sxtw #1)[2byte] %p7/z -> %z27.s +84ff7fff : ldff1h z31.s, p7/Z, [sp, z31.s, SXTW #1] : ldff1h (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s # LDFF1H { .S }, /Z, [.S{, #}] (LDFF1H-Z.P.AI-S) -84a0e000 : ldff1h z0.s, p0/Z, [z0.s, #0] : ldff1h (%z0.s)[16byte] %p0/z -> %z0.s -84a2e482 : ldff1h z2.s, p1/Z, [z4.s, #4] : ldff1h +0x04(%z4.s)[16byte] %p1/z -> %z2.s -84a4e8c4 : ldff1h z4.s, p2/Z, [z6.s, #8] : ldff1h +0x08(%z6.s)[16byte] %p2/z -> %z4.s -84a6e906 : ldff1h z6.s, p2/Z, [z8.s, #12] : ldff1h +0x0c(%z8.s)[16byte] %p2/z -> %z6.s -84a8ed48 : ldff1h z8.s, p3/Z, [z10.s, #16] : ldff1h +0x10(%z10.s)[16byte] %p3/z -> %z8.s -84aaed8a : ldff1h z10.s, p3/Z, [z12.s, #20] : ldff1h +0x14(%z12.s)[16byte] %p3/z -> %z10.s -84acf1cc : ldff1h z12.s, p4/Z, [z14.s, #24] : ldff1h +0x18(%z14.s)[16byte] %p4/z -> %z12.s -84aef20e : ldff1h z14.s, p4/Z, [z16.s, #28] : ldff1h +0x1c(%z16.s)[16byte] %p4/z -> %z14.s -84b0f650 : ldff1h z16.s, p5/Z, [z18.s, #32] : ldff1h +0x20(%z18.s)[16byte] %p5/z -> %z16.s -84b1f671 : ldff1h z17.s, p5/Z, [z19.s, #34] : ldff1h +0x22(%z19.s)[16byte] %p5/z -> %z17.s -84b3f6b3 : ldff1h z19.s, p5/Z, [z21.s, #38] : ldff1h +0x26(%z21.s)[16byte] %p5/z -> %z19.s -84b5faf5 : ldff1h z21.s, p6/Z, [z23.s, #42] : ldff1h +0x2a(%z23.s)[16byte] %p6/z -> %z21.s -84b7fb37 : ldff1h z23.s, p6/Z, [z25.s, #46] : ldff1h +0x2e(%z25.s)[16byte] %p6/z -> %z23.s -84b9ff79 : ldff1h z25.s, p7/Z, [z27.s, #50] : ldff1h +0x32(%z27.s)[16byte] %p7/z -> %z25.s -84bbffbb : ldff1h z27.s, p7/Z, [z29.s, #54] : ldff1h +0x36(%z29.s)[16byte] %p7/z -> %z27.s -84bfffff : ldff1h z31.s, p7/Z, [z31.s, #62] : ldff1h +0x3e(%z31.s)[16byte] %p7/z -> %z31.s +84a0e000 : ldff1h z0.s, p0/Z, [z0.s, #0] : ldff1h (%z0.s)[2byte] %p0/z -> %z0.s +84a2e482 : ldff1h z2.s, p1/Z, [z4.s, #4] : ldff1h +0x04(%z4.s)[2byte] %p1/z -> %z2.s +84a4e8c4 : ldff1h z4.s, p2/Z, [z6.s, #8] : ldff1h +0x08(%z6.s)[2byte] %p2/z -> %z4.s +84a6e906 : ldff1h z6.s, p2/Z, [z8.s, #12] : ldff1h +0x0c(%z8.s)[2byte] %p2/z -> %z6.s +84a8ed48 : ldff1h z8.s, p3/Z, [z10.s, #16] : ldff1h +0x10(%z10.s)[2byte] %p3/z -> %z8.s +84aaed8a : ldff1h z10.s, p3/Z, [z12.s, #20] : ldff1h +0x14(%z12.s)[2byte] %p3/z -> %z10.s +84acf1cc : ldff1h z12.s, p4/Z, [z14.s, #24] : ldff1h +0x18(%z14.s)[2byte] %p4/z -> %z12.s +84aef20e : ldff1h z14.s, p4/Z, [z16.s, #28] : ldff1h +0x1c(%z16.s)[2byte] %p4/z -> %z14.s +84b0f650 : ldff1h z16.s, p5/Z, [z18.s, #32] : ldff1h +0x20(%z18.s)[2byte] %p5/z -> %z16.s +84b1f671 : ldff1h z17.s, p5/Z, [z19.s, #34] : ldff1h +0x22(%z19.s)[2byte] %p5/z -> %z17.s +84b3f6b3 : ldff1h z19.s, p5/Z, [z21.s, #38] : ldff1h +0x26(%z21.s)[2byte] %p5/z -> %z19.s +84b5faf5 : ldff1h z21.s, p6/Z, [z23.s, #42] : ldff1h +0x2a(%z23.s)[2byte] %p6/z -> %z21.s +84b7fb37 : ldff1h z23.s, p6/Z, [z25.s, #46] : ldff1h +0x2e(%z25.s)[2byte] %p6/z -> %z23.s +84b9ff79 : ldff1h z25.s, p7/Z, [z27.s, #50] : ldff1h +0x32(%z27.s)[2byte] %p7/z -> %z25.s +84bbffbb : ldff1h z27.s, p7/Z, [z29.s, #54] : ldff1h +0x36(%z29.s)[2byte] %p7/z -> %z27.s +84bfffff : ldff1h z31.s, p7/Z, [z31.s, #62] : ldff1h +0x3e(%z31.s)[2byte] %p7/z -> %z31.s # LDFF1H { .H }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U16) -a4a06000 : ldff1h z0.h, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h -a4a56482 : ldff1h z2.h, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[32byte] %p1/z -> %z2.h -a4a768c4 : ldff1h z4.h, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[32byte] %p2/z -> %z4.h -a4a96906 : ldff1h z6.h, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[32byte] %p2/z -> %z6.h -a4ab6d48 : ldff1h z8.h, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[32byte] %p3/z -> %z8.h -a4ac6d6a : ldff1h z10.h, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[32byte] %p3/z -> %z10.h -a4ae71ac : ldff1h z12.h, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[32byte] %p4/z -> %z12.h -a4b071ee : ldff1h z14.h, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[32byte] %p4/z -> %z14.h -a4b27630 : ldff1h z16.h, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h -a4b47671 : ldff1h z17.h, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[32byte] %p5/z -> %z17.h -a4b676b3 : ldff1h z19.h, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[32byte] %p5/z -> %z19.h -a4b87af5 : ldff1h z21.h, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[32byte] %p6/z -> %z21.h -a4b97b17 : ldff1h z23.h, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[32byte] %p6/z -> %z23.h -a4bb7f59 : ldff1h z25.h, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[32byte] %p7/z -> %z25.h -a4bd7f9b : ldff1h z27.h, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[32byte] %p7/z -> %z27.h -a4be7fff : ldff1h z31.h, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h +a4a06000 : ldff1h z0.h, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h +a4a56482 : ldff1h z2.h, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h +a4a768c4 : ldff1h z4.h, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h +a4a96906 : ldff1h z6.h, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h +a4ab6d48 : ldff1h z8.h, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h +a4ac6d6a : ldff1h z10.h, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h +a4ae71ac : ldff1h z12.h, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h +a4b071ee : ldff1h z14.h, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h +a4b27630 : ldff1h z16.h, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h +a4b47671 : ldff1h z17.h, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h +a4b676b3 : ldff1h z19.h, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h +a4b87af5 : ldff1h z21.h, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h +a4b97b17 : ldff1h z23.h, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h +a4bb7f59 : ldff1h z25.h, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h +a4bd7f9b : ldff1h z27.h, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h +a4be7fff : ldff1h z31.h, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h # LDFF1H { .S }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U32) -a4c06000 : ldff1h z0.s, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s -a4c56482 : ldff1h z2.s, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s -a4c768c4 : ldff1h z4.s, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.s -a4c96906 : ldff1h z6.s, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.s -a4cb6d48 : ldff1h z8.s, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.s -a4cc6d6a : ldff1h z10.s, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.s -a4ce71ac : ldff1h z12.s, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.s -a4d071ee : ldff1h z14.s, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.s -a4d27630 : ldff1h z16.s, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s -a4d47671 : ldff1h z17.s, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.s -a4d676b3 : ldff1h z19.s, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.s -a4d87af5 : ldff1h z21.s, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.s -a4d97b17 : ldff1h z23.s, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.s -a4db7f59 : ldff1h z25.s, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.s -a4dd7f9b : ldff1h z27.s, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.s -a4de7fff : ldff1h z31.s, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s +a4c06000 : ldff1h z0.s, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s +a4c56482 : ldff1h z2.s, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.s +a4c768c4 : ldff1h z4.s, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.s +a4c96906 : ldff1h z6.s, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.s +a4cb6d48 : ldff1h z8.s, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.s +a4cc6d6a : ldff1h z10.s, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.s +a4ce71ac : ldff1h z12.s, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.s +a4d071ee : ldff1h z14.s, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.s +a4d27630 : ldff1h z16.s, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s +a4d47671 : ldff1h z17.s, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.s +a4d676b3 : ldff1h z19.s, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.s +a4d87af5 : ldff1h z21.s, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.s +a4d97b17 : ldff1h z23.s, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.s +a4db7f59 : ldff1h z25.s, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.s +a4dd7f9b : ldff1h z27.s, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.s +a4de7fff : ldff1h z31.s, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s # LDFF1H { .D }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U64) -a4e06000 : ldff1h z0.d, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d -a4e56482 : ldff1h z2.d, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[8byte] %p1/z -> %z2.d -a4e768c4 : ldff1h z4.d, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[8byte] %p2/z -> %z4.d -a4e96906 : ldff1h z6.d, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[8byte] %p2/z -> %z6.d -a4eb6d48 : ldff1h z8.d, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[8byte] %p3/z -> %z8.d -a4ec6d6a : ldff1h z10.d, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[8byte] %p3/z -> %z10.d -a4ee71ac : ldff1h z12.d, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[8byte] %p4/z -> %z12.d -a4f071ee : ldff1h z14.d, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[8byte] %p4/z -> %z14.d -a4f27630 : ldff1h z16.d, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d -a4f47671 : ldff1h z17.d, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[8byte] %p5/z -> %z17.d -a4f676b3 : ldff1h z19.d, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[8byte] %p5/z -> %z19.d -a4f87af5 : ldff1h z21.d, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[8byte] %p6/z -> %z21.d -a4f97b17 : ldff1h z23.d, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[8byte] %p6/z -> %z23.d -a4fb7f59 : ldff1h z25.d, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[8byte] %p7/z -> %z25.d -a4fd7f9b : ldff1h z27.d, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[8byte] %p7/z -> %z27.d -a4fe7fff : ldff1h z31.d, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d +a4e06000 : ldff1h z0.d, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d +a4e56482 : ldff1h z2.d, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.d +a4e768c4 : ldff1h z4.d, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.d +a4e96906 : ldff1h z6.d, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.d +a4eb6d48 : ldff1h z8.d, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.d +a4ec6d6a : ldff1h z10.d, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.d +a4ee71ac : ldff1h z12.d, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.d +a4f071ee : ldff1h z14.d, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.d +a4f27630 : ldff1h z16.d, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d +a4f47671 : ldff1h z17.d, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.d +a4f676b3 : ldff1h z19.d, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.d +a4f87af5 : ldff1h z21.d, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.d +a4f97b17 : ldff1h z23.d, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.d +a4fb7f59 : ldff1h z25.d, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.d +a4fd7f9b : ldff1h z27.d, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.d +a4fe7fff : ldff1h z31.d, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d # LDFF1H { .D }, /Z, [, .D, ] (LDFF1H-Z.P.BZ-D.x32.unscaled) -c4806000 : ldff1h z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1h (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d -c4856482 : ldff1h z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1h (%x4,%z5.d,uxtw)[8byte] %p1/z -> %z2.d -c48768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1h (%x6,%z7.d,uxtw)[8byte] %p2/z -> %z4.d -c4896906 : ldff1h z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1h (%x8,%z9.d,uxtw)[8byte] %p2/z -> %z6.d -c48b6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1h (%x10,%z11.d,uxtw)[8byte] %p3/z -> %z8.d -c48d6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1h (%x11,%z13.d,uxtw)[8byte] %p3/z -> %z10.d -c48f71ac : ldff1h z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1h (%x13,%z15.d,uxtw)[8byte] %p4/z -> %z12.d -c49171ee : ldff1h z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1h (%x15,%z17.d,uxtw)[8byte] %p4/z -> %z14.d -c4937630 : ldff1h z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1h (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d -c4947671 : ldff1h z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1h (%x19,%z20.d,uxtw)[8byte] %p5/z -> %z17.d -c49676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1h (%x21,%z22.d,uxtw)[8byte] %p5/z -> %z19.d -c4987af5 : ldff1h z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1h (%x23,%z24.d,uxtw)[8byte] %p6/z -> %z21.d -c49a7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1h (%x24,%z26.d,uxtw)[8byte] %p6/z -> %z23.d -c49c7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1h (%x26,%z28.d,uxtw)[8byte] %p7/z -> %z25.d -c49e7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1h (%x28,%z30.d,uxtw)[8byte] %p7/z -> %z27.d -c49f7fff : ldff1h z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1h (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d -c4c06000 : ldff1h z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1h (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d -c4c56482 : ldff1h z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1h (%x4,%z5.d,sxtw)[8byte] %p1/z -> %z2.d -c4c768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1h (%x6,%z7.d,sxtw)[8byte] %p2/z -> %z4.d -c4c96906 : ldff1h z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1h (%x8,%z9.d,sxtw)[8byte] %p2/z -> %z6.d -c4cb6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1h (%x10,%z11.d,sxtw)[8byte] %p3/z -> %z8.d -c4cd6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1h (%x11,%z13.d,sxtw)[8byte] %p3/z -> %z10.d -c4cf71ac : ldff1h z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1h (%x13,%z15.d,sxtw)[8byte] %p4/z -> %z12.d -c4d171ee : ldff1h z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1h (%x15,%z17.d,sxtw)[8byte] %p4/z -> %z14.d -c4d37630 : ldff1h z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1h (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d -c4d47671 : ldff1h z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1h (%x19,%z20.d,sxtw)[8byte] %p5/z -> %z17.d -c4d676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1h (%x21,%z22.d,sxtw)[8byte] %p5/z -> %z19.d -c4d87af5 : ldff1h z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1h (%x23,%z24.d,sxtw)[8byte] %p6/z -> %z21.d -c4da7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1h (%x24,%z26.d,sxtw)[8byte] %p6/z -> %z23.d -c4dc7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1h (%x26,%z28.d,sxtw)[8byte] %p7/z -> %z25.d -c4de7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1h (%x28,%z30.d,sxtw)[8byte] %p7/z -> %z27.d -c4df7fff : ldff1h z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1h (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d +c4806000 : ldff1h z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1h (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d +c4856482 : ldff1h z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1h (%x4,%z5.d,uxtw)[2byte] %p1/z -> %z2.d +c48768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1h (%x6,%z7.d,uxtw)[2byte] %p2/z -> %z4.d +c4896906 : ldff1h z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1h (%x8,%z9.d,uxtw)[2byte] %p2/z -> %z6.d +c48b6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1h (%x10,%z11.d,uxtw)[2byte] %p3/z -> %z8.d +c48d6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1h (%x11,%z13.d,uxtw)[2byte] %p3/z -> %z10.d +c48f71ac : ldff1h z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1h (%x13,%z15.d,uxtw)[2byte] %p4/z -> %z12.d +c49171ee : ldff1h z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1h (%x15,%z17.d,uxtw)[2byte] %p4/z -> %z14.d +c4937630 : ldff1h z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1h (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d +c4947671 : ldff1h z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1h (%x19,%z20.d,uxtw)[2byte] %p5/z -> %z17.d +c49676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1h (%x21,%z22.d,uxtw)[2byte] %p5/z -> %z19.d +c4987af5 : ldff1h z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1h (%x23,%z24.d,uxtw)[2byte] %p6/z -> %z21.d +c49a7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1h (%x24,%z26.d,uxtw)[2byte] %p6/z -> %z23.d +c49c7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1h (%x26,%z28.d,uxtw)[2byte] %p7/z -> %z25.d +c49e7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1h (%x28,%z30.d,uxtw)[2byte] %p7/z -> %z27.d +c49f7fff : ldff1h z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1h (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d +c4c06000 : ldff1h z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1h (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d +c4c56482 : ldff1h z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1h (%x4,%z5.d,sxtw)[2byte] %p1/z -> %z2.d +c4c768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1h (%x6,%z7.d,sxtw)[2byte] %p2/z -> %z4.d +c4c96906 : ldff1h z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1h (%x8,%z9.d,sxtw)[2byte] %p2/z -> %z6.d +c4cb6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1h (%x10,%z11.d,sxtw)[2byte] %p3/z -> %z8.d +c4cd6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1h (%x11,%z13.d,sxtw)[2byte] %p3/z -> %z10.d +c4cf71ac : ldff1h z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1h (%x13,%z15.d,sxtw)[2byte] %p4/z -> %z12.d +c4d171ee : ldff1h z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1h (%x15,%z17.d,sxtw)[2byte] %p4/z -> %z14.d +c4d37630 : ldff1h z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1h (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d +c4d47671 : ldff1h z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1h (%x19,%z20.d,sxtw)[2byte] %p5/z -> %z17.d +c4d676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1h (%x21,%z22.d,sxtw)[2byte] %p5/z -> %z19.d +c4d87af5 : ldff1h z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1h (%x23,%z24.d,sxtw)[2byte] %p6/z -> %z21.d +c4da7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1h (%x24,%z26.d,sxtw)[2byte] %p6/z -> %z23.d +c4dc7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1h (%x26,%z28.d,sxtw)[2byte] %p7/z -> %z25.d +c4de7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1h (%x28,%z30.d,sxtw)[2byte] %p7/z -> %z27.d +c4df7fff : ldff1h z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1h (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d # LDFF1H { .D }, /Z, [, .D, #1] (LDFF1H-Z.P.BZ-D.x32.scaled) -c4a06000 : ldff1h z0.d, p0/Z, [x0, z0.d, UXTW #1] : ldff1h (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d -c4a56482 : ldff1h z2.d, p1/Z, [x4, z5.d, UXTW #1] : ldff1h (%x4,%z5.d,uxtw #1)[8byte] %p1/z -> %z2.d -c4a768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, UXTW #1] : ldff1h (%x6,%z7.d,uxtw #1)[8byte] %p2/z -> %z4.d -c4a96906 : ldff1h z6.d, p2/Z, [x8, z9.d, UXTW #1] : ldff1h (%x8,%z9.d,uxtw #1)[8byte] %p2/z -> %z6.d -c4ab6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, UXTW #1] : ldff1h (%x10,%z11.d,uxtw #1)[8byte] %p3/z -> %z8.d -c4ad6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, UXTW #1] : ldff1h (%x11,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d -c4af71ac : ldff1h z12.d, p4/Z, [x13, z15.d, UXTW #1] : ldff1h (%x13,%z15.d,uxtw #1)[8byte] %p4/z -> %z12.d -c4b171ee : ldff1h z14.d, p4/Z, [x15, z17.d, UXTW #1] : ldff1h (%x15,%z17.d,uxtw #1)[8byte] %p4/z -> %z14.d -c4b37630 : ldff1h z16.d, p5/Z, [x17, z19.d, UXTW #1] : ldff1h (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d -c4b47671 : ldff1h z17.d, p5/Z, [x19, z20.d, UXTW #1] : ldff1h (%x19,%z20.d,uxtw #1)[8byte] %p5/z -> %z17.d -c4b676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, UXTW #1] : ldff1h (%x21,%z22.d,uxtw #1)[8byte] %p5/z -> %z19.d -c4b87af5 : ldff1h z21.d, p6/Z, [x23, z24.d, UXTW #1] : ldff1h (%x23,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d -c4ba7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, UXTW #1] : ldff1h (%x24,%z26.d,uxtw #1)[8byte] %p6/z -> %z23.d -c4bc7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, UXTW #1] : ldff1h (%x26,%z28.d,uxtw #1)[8byte] %p7/z -> %z25.d -c4be7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, UXTW #1] : ldff1h (%x28,%z30.d,uxtw #1)[8byte] %p7/z -> %z27.d -c4bf7fff : ldff1h z31.d, p7/Z, [sp, z31.d, UXTW #1] : ldff1h (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d -c4e06000 : ldff1h z0.d, p0/Z, [x0, z0.d, SXTW #1] : ldff1h (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d -c4e56482 : ldff1h z2.d, p1/Z, [x4, z5.d, SXTW #1] : ldff1h (%x4,%z5.d,sxtw #1)[8byte] %p1/z -> %z2.d -c4e768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, SXTW #1] : ldff1h (%x6,%z7.d,sxtw #1)[8byte] %p2/z -> %z4.d -c4e96906 : ldff1h z6.d, p2/Z, [x8, z9.d, SXTW #1] : ldff1h (%x8,%z9.d,sxtw #1)[8byte] %p2/z -> %z6.d -c4eb6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, SXTW #1] : ldff1h (%x10,%z11.d,sxtw #1)[8byte] %p3/z -> %z8.d -c4ed6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, SXTW #1] : ldff1h (%x11,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d -c4ef71ac : ldff1h z12.d, p4/Z, [x13, z15.d, SXTW #1] : ldff1h (%x13,%z15.d,sxtw #1)[8byte] %p4/z -> %z12.d -c4f171ee : ldff1h z14.d, p4/Z, [x15, z17.d, SXTW #1] : ldff1h (%x15,%z17.d,sxtw #1)[8byte] %p4/z -> %z14.d -c4f37630 : ldff1h z16.d, p5/Z, [x17, z19.d, SXTW #1] : ldff1h (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d -c4f47671 : ldff1h z17.d, p5/Z, [x19, z20.d, SXTW #1] : ldff1h (%x19,%z20.d,sxtw #1)[8byte] %p5/z -> %z17.d -c4f676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, SXTW #1] : ldff1h (%x21,%z22.d,sxtw #1)[8byte] %p5/z -> %z19.d -c4f87af5 : ldff1h z21.d, p6/Z, [x23, z24.d, SXTW #1] : ldff1h (%x23,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d -c4fa7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, SXTW #1] : ldff1h (%x24,%z26.d,sxtw #1)[8byte] %p6/z -> %z23.d -c4fc7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, SXTW #1] : ldff1h (%x26,%z28.d,sxtw #1)[8byte] %p7/z -> %z25.d -c4fe7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, SXTW #1] : ldff1h (%x28,%z30.d,sxtw #1)[8byte] %p7/z -> %z27.d -c4ff7fff : ldff1h z31.d, p7/Z, [sp, z31.d, SXTW #1] : ldff1h (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d +c4a06000 : ldff1h z0.d, p0/Z, [x0, z0.d, UXTW #1] : ldff1h (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d +c4a56482 : ldff1h z2.d, p1/Z, [x4, z5.d, UXTW #1] : ldff1h (%x4,%z5.d,uxtw #1)[2byte] %p1/z -> %z2.d +c4a768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, UXTW #1] : ldff1h (%x6,%z7.d,uxtw #1)[2byte] %p2/z -> %z4.d +c4a96906 : ldff1h z6.d, p2/Z, [x8, z9.d, UXTW #1] : ldff1h (%x8,%z9.d,uxtw #1)[2byte] %p2/z -> %z6.d +c4ab6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, UXTW #1] : ldff1h (%x10,%z11.d,uxtw #1)[2byte] %p3/z -> %z8.d +c4ad6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, UXTW #1] : ldff1h (%x11,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d +c4af71ac : ldff1h z12.d, p4/Z, [x13, z15.d, UXTW #1] : ldff1h (%x13,%z15.d,uxtw #1)[2byte] %p4/z -> %z12.d +c4b171ee : ldff1h z14.d, p4/Z, [x15, z17.d, UXTW #1] : ldff1h (%x15,%z17.d,uxtw #1)[2byte] %p4/z -> %z14.d +c4b37630 : ldff1h z16.d, p5/Z, [x17, z19.d, UXTW #1] : ldff1h (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d +c4b47671 : ldff1h z17.d, p5/Z, [x19, z20.d, UXTW #1] : ldff1h (%x19,%z20.d,uxtw #1)[2byte] %p5/z -> %z17.d +c4b676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, UXTW #1] : ldff1h (%x21,%z22.d,uxtw #1)[2byte] %p5/z -> %z19.d +c4b87af5 : ldff1h z21.d, p6/Z, [x23, z24.d, UXTW #1] : ldff1h (%x23,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d +c4ba7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, UXTW #1] : ldff1h (%x24,%z26.d,uxtw #1)[2byte] %p6/z -> %z23.d +c4bc7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, UXTW #1] : ldff1h (%x26,%z28.d,uxtw #1)[2byte] %p7/z -> %z25.d +c4be7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, UXTW #1] : ldff1h (%x28,%z30.d,uxtw #1)[2byte] %p7/z -> %z27.d +c4bf7fff : ldff1h z31.d, p7/Z, [sp, z31.d, UXTW #1] : ldff1h (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d +c4e06000 : ldff1h z0.d, p0/Z, [x0, z0.d, SXTW #1] : ldff1h (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d +c4e56482 : ldff1h z2.d, p1/Z, [x4, z5.d, SXTW #1] : ldff1h (%x4,%z5.d,sxtw #1)[2byte] %p1/z -> %z2.d +c4e768c4 : ldff1h z4.d, p2/Z, [x6, z7.d, SXTW #1] : ldff1h (%x6,%z7.d,sxtw #1)[2byte] %p2/z -> %z4.d +c4e96906 : ldff1h z6.d, p2/Z, [x8, z9.d, SXTW #1] : ldff1h (%x8,%z9.d,sxtw #1)[2byte] %p2/z -> %z6.d +c4eb6d48 : ldff1h z8.d, p3/Z, [x10, z11.d, SXTW #1] : ldff1h (%x10,%z11.d,sxtw #1)[2byte] %p3/z -> %z8.d +c4ed6d6a : ldff1h z10.d, p3/Z, [x11, z13.d, SXTW #1] : ldff1h (%x11,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d +c4ef71ac : ldff1h z12.d, p4/Z, [x13, z15.d, SXTW #1] : ldff1h (%x13,%z15.d,sxtw #1)[2byte] %p4/z -> %z12.d +c4f171ee : ldff1h z14.d, p4/Z, [x15, z17.d, SXTW #1] : ldff1h (%x15,%z17.d,sxtw #1)[2byte] %p4/z -> %z14.d +c4f37630 : ldff1h z16.d, p5/Z, [x17, z19.d, SXTW #1] : ldff1h (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d +c4f47671 : ldff1h z17.d, p5/Z, [x19, z20.d, SXTW #1] : ldff1h (%x19,%z20.d,sxtw #1)[2byte] %p5/z -> %z17.d +c4f676b3 : ldff1h z19.d, p5/Z, [x21, z22.d, SXTW #1] : ldff1h (%x21,%z22.d,sxtw #1)[2byte] %p5/z -> %z19.d +c4f87af5 : ldff1h z21.d, p6/Z, [x23, z24.d, SXTW #1] : ldff1h (%x23,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d +c4fa7b17 : ldff1h z23.d, p6/Z, [x24, z26.d, SXTW #1] : ldff1h (%x24,%z26.d,sxtw #1)[2byte] %p6/z -> %z23.d +c4fc7f59 : ldff1h z25.d, p7/Z, [x26, z28.d, SXTW #1] : ldff1h (%x26,%z28.d,sxtw #1)[2byte] %p7/z -> %z25.d +c4fe7f9b : ldff1h z27.d, p7/Z, [x28, z30.d, SXTW #1] : ldff1h (%x28,%z30.d,sxtw #1)[2byte] %p7/z -> %z27.d +c4ff7fff : ldff1h z31.d, p7/Z, [sp, z31.d, SXTW #1] : ldff1h (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d # LDFF1H { .D }, /Z, [.D{, #}] (LDFF1H-Z.P.AI-D) -c4a0e000 : ldff1h z0.d, p0/Z, [z0.d, #0] : ldff1h (%z0.d)[8byte] %p0/z -> %z0.d -c4a2e482 : ldff1h z2.d, p1/Z, [z4.d, #4] : ldff1h +0x04(%z4.d)[8byte] %p1/z -> %z2.d -c4a4e8c4 : ldff1h z4.d, p2/Z, [z6.d, #8] : ldff1h +0x08(%z6.d)[8byte] %p2/z -> %z4.d -c4a6e906 : ldff1h z6.d, p2/Z, [z8.d, #12] : ldff1h +0x0c(%z8.d)[8byte] %p2/z -> %z6.d -c4a8ed48 : ldff1h z8.d, p3/Z, [z10.d, #16] : ldff1h +0x10(%z10.d)[8byte] %p3/z -> %z8.d -c4aaed8a : ldff1h z10.d, p3/Z, [z12.d, #20] : ldff1h +0x14(%z12.d)[8byte] %p3/z -> %z10.d -c4acf1cc : ldff1h z12.d, p4/Z, [z14.d, #24] : ldff1h +0x18(%z14.d)[8byte] %p4/z -> %z12.d -c4aef20e : ldff1h z14.d, p4/Z, [z16.d, #28] : ldff1h +0x1c(%z16.d)[8byte] %p4/z -> %z14.d -c4b0f650 : ldff1h z16.d, p5/Z, [z18.d, #32] : ldff1h +0x20(%z18.d)[8byte] %p5/z -> %z16.d -c4b1f671 : ldff1h z17.d, p5/Z, [z19.d, #34] : ldff1h +0x22(%z19.d)[8byte] %p5/z -> %z17.d -c4b3f6b3 : ldff1h z19.d, p5/Z, [z21.d, #38] : ldff1h +0x26(%z21.d)[8byte] %p5/z -> %z19.d -c4b5faf5 : ldff1h z21.d, p6/Z, [z23.d, #42] : ldff1h +0x2a(%z23.d)[8byte] %p6/z -> %z21.d -c4b7fb37 : ldff1h z23.d, p6/Z, [z25.d, #46] : ldff1h +0x2e(%z25.d)[8byte] %p6/z -> %z23.d -c4b9ff79 : ldff1h z25.d, p7/Z, [z27.d, #50] : ldff1h +0x32(%z27.d)[8byte] %p7/z -> %z25.d -c4bbffbb : ldff1h z27.d, p7/Z, [z29.d, #54] : ldff1h +0x36(%z29.d)[8byte] %p7/z -> %z27.d -c4bfffff : ldff1h z31.d, p7/Z, [z31.d, #62] : ldff1h +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +c4a0e000 : ldff1h z0.d, p0/Z, [z0.d, #0] : ldff1h (%z0.d)[2byte] %p0/z -> %z0.d +c4a2e482 : ldff1h z2.d, p1/Z, [z4.d, #4] : ldff1h +0x04(%z4.d)[2byte] %p1/z -> %z2.d +c4a4e8c4 : ldff1h z4.d, p2/Z, [z6.d, #8] : ldff1h +0x08(%z6.d)[2byte] %p2/z -> %z4.d +c4a6e906 : ldff1h z6.d, p2/Z, [z8.d, #12] : ldff1h +0x0c(%z8.d)[2byte] %p2/z -> %z6.d +c4a8ed48 : ldff1h z8.d, p3/Z, [z10.d, #16] : ldff1h +0x10(%z10.d)[2byte] %p3/z -> %z8.d +c4aaed8a : ldff1h z10.d, p3/Z, [z12.d, #20] : ldff1h +0x14(%z12.d)[2byte] %p3/z -> %z10.d +c4acf1cc : ldff1h z12.d, p4/Z, [z14.d, #24] : ldff1h +0x18(%z14.d)[2byte] %p4/z -> %z12.d +c4aef20e : ldff1h z14.d, p4/Z, [z16.d, #28] : ldff1h +0x1c(%z16.d)[2byte] %p4/z -> %z14.d +c4b0f650 : ldff1h z16.d, p5/Z, [z18.d, #32] : ldff1h +0x20(%z18.d)[2byte] %p5/z -> %z16.d +c4b1f671 : ldff1h z17.d, p5/Z, [z19.d, #34] : ldff1h +0x22(%z19.d)[2byte] %p5/z -> %z17.d +c4b3f6b3 : ldff1h z19.d, p5/Z, [z21.d, #38] : ldff1h +0x26(%z21.d)[2byte] %p5/z -> %z19.d +c4b5faf5 : ldff1h z21.d, p6/Z, [z23.d, #42] : ldff1h +0x2a(%z23.d)[2byte] %p6/z -> %z21.d +c4b7fb37 : ldff1h z23.d, p6/Z, [z25.d, #46] : ldff1h +0x2e(%z25.d)[2byte] %p6/z -> %z23.d +c4b9ff79 : ldff1h z25.d, p7/Z, [z27.d, #50] : ldff1h +0x32(%z27.d)[2byte] %p7/z -> %z25.d +c4bbffbb : ldff1h z27.d, p7/Z, [z29.d, #54] : ldff1h +0x36(%z29.d)[2byte] %p7/z -> %z27.d +c4bfffff : ldff1h z31.d, p7/Z, [z31.d, #62] : ldff1h +0x3e(%z31.d)[2byte] %p7/z -> %z31.d # LDFF1H { .D }, /Z, [, .D] (LDFF1H-Z.P.BZ-D.64.unscaled) -c4c0e000 : ldff1h z0.d, p0/Z, [x0, z0.d] : ldff1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d -c4c5e482 : ldff1h z2.d, p1/Z, [x4, z5.d] : ldff1h (%x4,%z5.d)[8byte] %p1/z -> %z2.d -c4c7e8c4 : ldff1h z4.d, p2/Z, [x6, z7.d] : ldff1h (%x6,%z7.d)[8byte] %p2/z -> %z4.d -c4c9e906 : ldff1h z6.d, p2/Z, [x8, z9.d] : ldff1h (%x8,%z9.d)[8byte] %p2/z -> %z6.d -c4cbed48 : ldff1h z8.d, p3/Z, [x10, z11.d] : ldff1h (%x10,%z11.d)[8byte] %p3/z -> %z8.d -c4cded6a : ldff1h z10.d, p3/Z, [x11, z13.d] : ldff1h (%x11,%z13.d)[8byte] %p3/z -> %z10.d -c4cff1ac : ldff1h z12.d, p4/Z, [x13, z15.d] : ldff1h (%x13,%z15.d)[8byte] %p4/z -> %z12.d -c4d1f1ee : ldff1h z14.d, p4/Z, [x15, z17.d] : ldff1h (%x15,%z17.d)[8byte] %p4/z -> %z14.d -c4d3f630 : ldff1h z16.d, p5/Z, [x17, z19.d] : ldff1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d -c4d4f671 : ldff1h z17.d, p5/Z, [x19, z20.d] : ldff1h (%x19,%z20.d)[8byte] %p5/z -> %z17.d -c4d6f6b3 : ldff1h z19.d, p5/Z, [x21, z22.d] : ldff1h (%x21,%z22.d)[8byte] %p5/z -> %z19.d -c4d8faf5 : ldff1h z21.d, p6/Z, [x23, z24.d] : ldff1h (%x23,%z24.d)[8byte] %p6/z -> %z21.d -c4dafb17 : ldff1h z23.d, p6/Z, [x24, z26.d] : ldff1h (%x24,%z26.d)[8byte] %p6/z -> %z23.d -c4dcff59 : ldff1h z25.d, p7/Z, [x26, z28.d] : ldff1h (%x26,%z28.d)[8byte] %p7/z -> %z25.d -c4deff9b : ldff1h z27.d, p7/Z, [x28, z30.d] : ldff1h (%x28,%z30.d)[8byte] %p7/z -> %z27.d -c4dfffff : ldff1h z31.d, p7/Z, [sp, z31.d] : ldff1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d +c4c0e000 : ldff1h z0.d, p0/Z, [x0, z0.d] : ldff1h (%x0,%z0.d)[2byte] %p0/z -> %z0.d +c4c5e482 : ldff1h z2.d, p1/Z, [x4, z5.d] : ldff1h (%x4,%z5.d)[2byte] %p1/z -> %z2.d +c4c7e8c4 : ldff1h z4.d, p2/Z, [x6, z7.d] : ldff1h (%x6,%z7.d)[2byte] %p2/z -> %z4.d +c4c9e906 : ldff1h z6.d, p2/Z, [x8, z9.d] : ldff1h (%x8,%z9.d)[2byte] %p2/z -> %z6.d +c4cbed48 : ldff1h z8.d, p3/Z, [x10, z11.d] : ldff1h (%x10,%z11.d)[2byte] %p3/z -> %z8.d +c4cded6a : ldff1h z10.d, p3/Z, [x11, z13.d] : ldff1h (%x11,%z13.d)[2byte] %p3/z -> %z10.d +c4cff1ac : ldff1h z12.d, p4/Z, [x13, z15.d] : ldff1h (%x13,%z15.d)[2byte] %p4/z -> %z12.d +c4d1f1ee : ldff1h z14.d, p4/Z, [x15, z17.d] : ldff1h (%x15,%z17.d)[2byte] %p4/z -> %z14.d +c4d3f630 : ldff1h z16.d, p5/Z, [x17, z19.d] : ldff1h (%x17,%z19.d)[2byte] %p5/z -> %z16.d +c4d4f671 : ldff1h z17.d, p5/Z, [x19, z20.d] : ldff1h (%x19,%z20.d)[2byte] %p5/z -> %z17.d +c4d6f6b3 : ldff1h z19.d, p5/Z, [x21, z22.d] : ldff1h (%x21,%z22.d)[2byte] %p5/z -> %z19.d +c4d8faf5 : ldff1h z21.d, p6/Z, [x23, z24.d] : ldff1h (%x23,%z24.d)[2byte] %p6/z -> %z21.d +c4dafb17 : ldff1h z23.d, p6/Z, [x24, z26.d] : ldff1h (%x24,%z26.d)[2byte] %p6/z -> %z23.d +c4dcff59 : ldff1h z25.d, p7/Z, [x26, z28.d] : ldff1h (%x26,%z28.d)[2byte] %p7/z -> %z25.d +c4deff9b : ldff1h z27.d, p7/Z, [x28, z30.d] : ldff1h (%x28,%z30.d)[2byte] %p7/z -> %z27.d +c4dfffff : ldff1h z31.d, p7/Z, [sp, z31.d] : ldff1h (%sp,%z31.d)[2byte] %p7/z -> %z31.d # LDFF1H { .D }, /Z, [, .D, LSL #1] (LDFF1H-Z.P.BZ-D.64.scaled) -c4e0e000 : ldff1h z0.d, p0/Z, [x0, z0.d, LSL #1] : ldff1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d -c4e5e482 : ldff1h z2.d, p1/Z, [x4, z5.d, LSL #1] : ldff1h (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d -c4e7e8c4 : ldff1h z4.d, p2/Z, [x6, z7.d, LSL #1] : ldff1h (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d -c4e9e906 : ldff1h z6.d, p2/Z, [x8, z9.d, LSL #1] : ldff1h (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d -c4ebed48 : ldff1h z8.d, p3/Z, [x10, z11.d, LSL #1] : ldff1h (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d -c4eded6a : ldff1h z10.d, p3/Z, [x11, z13.d, LSL #1] : ldff1h (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d -c4eff1ac : ldff1h z12.d, p4/Z, [x13, z15.d, LSL #1] : ldff1h (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d -c4f1f1ee : ldff1h z14.d, p4/Z, [x15, z17.d, LSL #1] : ldff1h (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d -c4f3f630 : ldff1h z16.d, p5/Z, [x17, z19.d, LSL #1] : ldff1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d -c4f4f671 : ldff1h z17.d, p5/Z, [x19, z20.d, LSL #1] : ldff1h (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d -c4f6f6b3 : ldff1h z19.d, p5/Z, [x21, z22.d, LSL #1] : ldff1h (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d -c4f8faf5 : ldff1h z21.d, p6/Z, [x23, z24.d, LSL #1] : ldff1h (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d -c4fafb17 : ldff1h z23.d, p6/Z, [x24, z26.d, LSL #1] : ldff1h (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d -c4fcff59 : ldff1h z25.d, p7/Z, [x26, z28.d, LSL #1] : ldff1h (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d -c4feff9b : ldff1h z27.d, p7/Z, [x28, z30.d, LSL #1] : ldff1h (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d -c4ffffff : ldff1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ldff1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d +c4e0e000 : ldff1h z0.d, p0/Z, [x0, z0.d, LSL #1] : ldff1h (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d +c4e5e482 : ldff1h z2.d, p1/Z, [x4, z5.d, LSL #1] : ldff1h (%x4,%z5.d,lsl #1)[2byte] %p1/z -> %z2.d +c4e7e8c4 : ldff1h z4.d, p2/Z, [x6, z7.d, LSL #1] : ldff1h (%x6,%z7.d,lsl #1)[2byte] %p2/z -> %z4.d +c4e9e906 : ldff1h z6.d, p2/Z, [x8, z9.d, LSL #1] : ldff1h (%x8,%z9.d,lsl #1)[2byte] %p2/z -> %z6.d +c4ebed48 : ldff1h z8.d, p3/Z, [x10, z11.d, LSL #1] : ldff1h (%x10,%z11.d,lsl #1)[2byte] %p3/z -> %z8.d +c4eded6a : ldff1h z10.d, p3/Z, [x11, z13.d, LSL #1] : ldff1h (%x11,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d +c4eff1ac : ldff1h z12.d, p4/Z, [x13, z15.d, LSL #1] : ldff1h (%x13,%z15.d,lsl #1)[2byte] %p4/z -> %z12.d +c4f1f1ee : ldff1h z14.d, p4/Z, [x15, z17.d, LSL #1] : ldff1h (%x15,%z17.d,lsl #1)[2byte] %p4/z -> %z14.d +c4f3f630 : ldff1h z16.d, p5/Z, [x17, z19.d, LSL #1] : ldff1h (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d +c4f4f671 : ldff1h z17.d, p5/Z, [x19, z20.d, LSL #1] : ldff1h (%x19,%z20.d,lsl #1)[2byte] %p5/z -> %z17.d +c4f6f6b3 : ldff1h z19.d, p5/Z, [x21, z22.d, LSL #1] : ldff1h (%x21,%z22.d,lsl #1)[2byte] %p5/z -> %z19.d +c4f8faf5 : ldff1h z21.d, p6/Z, [x23, z24.d, LSL #1] : ldff1h (%x23,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d +c4fafb17 : ldff1h z23.d, p6/Z, [x24, z26.d, LSL #1] : ldff1h (%x24,%z26.d,lsl #1)[2byte] %p6/z -> %z23.d +c4fcff59 : ldff1h z25.d, p7/Z, [x26, z28.d, LSL #1] : ldff1h (%x26,%z28.d,lsl #1)[2byte] %p7/z -> %z25.d +c4feff9b : ldff1h z27.d, p7/Z, [x28, z30.d, LSL #1] : ldff1h (%x28,%z30.d,lsl #1)[2byte] %p7/z -> %z27.d +c4ffffff : ldff1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ldff1h (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d # LDFF1SB { .S }, /Z, [, .S, ] (LDFF1SB-Z.P.BZ-S.x32.unscaled) -84002000 : ldff1sb z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1sb (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s -84052482 : ldff1sb z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1sb (%x4,%z5.s,uxtw)[8byte] %p1/z -> %z2.s -840728c4 : ldff1sb z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1sb (%x6,%z7.s,uxtw)[8byte] %p2/z -> %z4.s -84092906 : ldff1sb z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1sb (%x8,%z9.s,uxtw)[8byte] %p2/z -> %z6.s -840b2d48 : ldff1sb z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1sb (%x10,%z11.s,uxtw)[8byte] %p3/z -> %z8.s -840d2d6a : ldff1sb z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1sb (%x11,%z13.s,uxtw)[8byte] %p3/z -> %z10.s -840f31ac : ldff1sb z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1sb (%x13,%z15.s,uxtw)[8byte] %p4/z -> %z12.s -841131ee : ldff1sb z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1sb (%x15,%z17.s,uxtw)[8byte] %p4/z -> %z14.s -84133630 : ldff1sb z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1sb (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s -84143671 : ldff1sb z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1sb (%x19,%z20.s,uxtw)[8byte] %p5/z -> %z17.s -841636b3 : ldff1sb z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1sb (%x21,%z22.s,uxtw)[8byte] %p5/z -> %z19.s -84183af5 : ldff1sb z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1sb (%x23,%z24.s,uxtw)[8byte] %p6/z -> %z21.s -841a3b17 : ldff1sb z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1sb (%x24,%z26.s,uxtw)[8byte] %p6/z -> %z23.s -841c3f59 : ldff1sb z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1sb (%x26,%z28.s,uxtw)[8byte] %p7/z -> %z25.s -841e3f9b : ldff1sb z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1sb (%x28,%z30.s,uxtw)[8byte] %p7/z -> %z27.s -841f3fff : ldff1sb z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1sb (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s -84402000 : ldff1sb z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1sb (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s -84452482 : ldff1sb z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1sb (%x4,%z5.s,sxtw)[8byte] %p1/z -> %z2.s -844728c4 : ldff1sb z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1sb (%x6,%z7.s,sxtw)[8byte] %p2/z -> %z4.s -84492906 : ldff1sb z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1sb (%x8,%z9.s,sxtw)[8byte] %p2/z -> %z6.s -844b2d48 : ldff1sb z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1sb (%x10,%z11.s,sxtw)[8byte] %p3/z -> %z8.s -844d2d6a : ldff1sb z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1sb (%x11,%z13.s,sxtw)[8byte] %p3/z -> %z10.s -844f31ac : ldff1sb z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1sb (%x13,%z15.s,sxtw)[8byte] %p4/z -> %z12.s -845131ee : ldff1sb z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1sb (%x15,%z17.s,sxtw)[8byte] %p4/z -> %z14.s -84533630 : ldff1sb z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1sb (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s -84543671 : ldff1sb z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1sb (%x19,%z20.s,sxtw)[8byte] %p5/z -> %z17.s -845636b3 : ldff1sb z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1sb (%x21,%z22.s,sxtw)[8byte] %p5/z -> %z19.s -84583af5 : ldff1sb z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1sb (%x23,%z24.s,sxtw)[8byte] %p6/z -> %z21.s -845a3b17 : ldff1sb z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1sb (%x24,%z26.s,sxtw)[8byte] %p6/z -> %z23.s -845c3f59 : ldff1sb z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1sb (%x26,%z28.s,sxtw)[8byte] %p7/z -> %z25.s -845e3f9b : ldff1sb z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1sb (%x28,%z30.s,sxtw)[8byte] %p7/z -> %z27.s -845f3fff : ldff1sb z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1sb (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s +84002000 : ldff1sb z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1sb (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s +84052482 : ldff1sb z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1sb (%x4,%z5.s,uxtw)[1byte] %p1/z -> %z2.s +840728c4 : ldff1sb z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1sb (%x6,%z7.s,uxtw)[1byte] %p2/z -> %z4.s +84092906 : ldff1sb z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1sb (%x8,%z9.s,uxtw)[1byte] %p2/z -> %z6.s +840b2d48 : ldff1sb z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1sb (%x10,%z11.s,uxtw)[1byte] %p3/z -> %z8.s +840d2d6a : ldff1sb z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1sb (%x11,%z13.s,uxtw)[1byte] %p3/z -> %z10.s +840f31ac : ldff1sb z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1sb (%x13,%z15.s,uxtw)[1byte] %p4/z -> %z12.s +841131ee : ldff1sb z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1sb (%x15,%z17.s,uxtw)[1byte] %p4/z -> %z14.s +84133630 : ldff1sb z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1sb (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s +84143671 : ldff1sb z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1sb (%x19,%z20.s,uxtw)[1byte] %p5/z -> %z17.s +841636b3 : ldff1sb z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1sb (%x21,%z22.s,uxtw)[1byte] %p5/z -> %z19.s +84183af5 : ldff1sb z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1sb (%x23,%z24.s,uxtw)[1byte] %p6/z -> %z21.s +841a3b17 : ldff1sb z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1sb (%x24,%z26.s,uxtw)[1byte] %p6/z -> %z23.s +841c3f59 : ldff1sb z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1sb (%x26,%z28.s,uxtw)[1byte] %p7/z -> %z25.s +841e3f9b : ldff1sb z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1sb (%x28,%z30.s,uxtw)[1byte] %p7/z -> %z27.s +841f3fff : ldff1sb z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1sb (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s +84402000 : ldff1sb z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1sb (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s +84452482 : ldff1sb z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1sb (%x4,%z5.s,sxtw)[1byte] %p1/z -> %z2.s +844728c4 : ldff1sb z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1sb (%x6,%z7.s,sxtw)[1byte] %p2/z -> %z4.s +84492906 : ldff1sb z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1sb (%x8,%z9.s,sxtw)[1byte] %p2/z -> %z6.s +844b2d48 : ldff1sb z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1sb (%x10,%z11.s,sxtw)[1byte] %p3/z -> %z8.s +844d2d6a : ldff1sb z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1sb (%x11,%z13.s,sxtw)[1byte] %p3/z -> %z10.s +844f31ac : ldff1sb z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1sb (%x13,%z15.s,sxtw)[1byte] %p4/z -> %z12.s +845131ee : ldff1sb z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1sb (%x15,%z17.s,sxtw)[1byte] %p4/z -> %z14.s +84533630 : ldff1sb z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1sb (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s +84543671 : ldff1sb z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1sb (%x19,%z20.s,sxtw)[1byte] %p5/z -> %z17.s +845636b3 : ldff1sb z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1sb (%x21,%z22.s,sxtw)[1byte] %p5/z -> %z19.s +84583af5 : ldff1sb z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1sb (%x23,%z24.s,sxtw)[1byte] %p6/z -> %z21.s +845a3b17 : ldff1sb z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1sb (%x24,%z26.s,sxtw)[1byte] %p6/z -> %z23.s +845c3f59 : ldff1sb z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1sb (%x26,%z28.s,sxtw)[1byte] %p7/z -> %z25.s +845e3f9b : ldff1sb z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1sb (%x28,%z30.s,sxtw)[1byte] %p7/z -> %z27.s +845f3fff : ldff1sb z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1sb (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s # LDFF1SB { .S }, /Z, [.S{, #}] (LDFF1SB-Z.P.AI-S) -8420a000 : ldff1sb z0.s, p0/Z, [z0.s, #0] : ldff1sb (%z0.s)[8byte] %p0/z -> %z0.s -8422a482 : ldff1sb z2.s, p1/Z, [z4.s, #2] : ldff1sb +0x02(%z4.s)[8byte] %p1/z -> %z2.s -8424a8c4 : ldff1sb z4.s, p2/Z, [z6.s, #4] : ldff1sb +0x04(%z6.s)[8byte] %p2/z -> %z4.s -8426a906 : ldff1sb z6.s, p2/Z, [z8.s, #6] : ldff1sb +0x06(%z8.s)[8byte] %p2/z -> %z6.s -8428ad48 : ldff1sb z8.s, p3/Z, [z10.s, #8] : ldff1sb +0x08(%z10.s)[8byte] %p3/z -> %z8.s -842aad8a : ldff1sb z10.s, p3/Z, [z12.s, #10] : ldff1sb +0x0a(%z12.s)[8byte] %p3/z -> %z10.s -842cb1cc : ldff1sb z12.s, p4/Z, [z14.s, #12] : ldff1sb +0x0c(%z14.s)[8byte] %p4/z -> %z12.s -842eb20e : ldff1sb z14.s, p4/Z, [z16.s, #14] : ldff1sb +0x0e(%z16.s)[8byte] %p4/z -> %z14.s -8430b650 : ldff1sb z16.s, p5/Z, [z18.s, #16] : ldff1sb +0x10(%z18.s)[8byte] %p5/z -> %z16.s -8431b671 : ldff1sb z17.s, p5/Z, [z19.s, #17] : ldff1sb +0x11(%z19.s)[8byte] %p5/z -> %z17.s -8433b6b3 : ldff1sb z19.s, p5/Z, [z21.s, #19] : ldff1sb +0x13(%z21.s)[8byte] %p5/z -> %z19.s -8435baf5 : ldff1sb z21.s, p6/Z, [z23.s, #21] : ldff1sb +0x15(%z23.s)[8byte] %p6/z -> %z21.s -8437bb37 : ldff1sb z23.s, p6/Z, [z25.s, #23] : ldff1sb +0x17(%z25.s)[8byte] %p6/z -> %z23.s -8439bf79 : ldff1sb z25.s, p7/Z, [z27.s, #25] : ldff1sb +0x19(%z27.s)[8byte] %p7/z -> %z25.s -843bbfbb : ldff1sb z27.s, p7/Z, [z29.s, #27] : ldff1sb +0x1b(%z29.s)[8byte] %p7/z -> %z27.s -843fbfff : ldff1sb z31.s, p7/Z, [z31.s, #31] : ldff1sb +0x1f(%z31.s)[8byte] %p7/z -> %z31.s +8420a000 : ldff1sb z0.s, p0/Z, [z0.s, #0] : ldff1sb (%z0.s)[1byte] %p0/z -> %z0.s +8422a482 : ldff1sb z2.s, p1/Z, [z4.s, #2] : ldff1sb +0x02(%z4.s)[1byte] %p1/z -> %z2.s +8424a8c4 : ldff1sb z4.s, p2/Z, [z6.s, #4] : ldff1sb +0x04(%z6.s)[1byte] %p2/z -> %z4.s +8426a906 : ldff1sb z6.s, p2/Z, [z8.s, #6] : ldff1sb +0x06(%z8.s)[1byte] %p2/z -> %z6.s +8428ad48 : ldff1sb z8.s, p3/Z, [z10.s, #8] : ldff1sb +0x08(%z10.s)[1byte] %p3/z -> %z8.s +842aad8a : ldff1sb z10.s, p3/Z, [z12.s, #10] : ldff1sb +0x0a(%z12.s)[1byte] %p3/z -> %z10.s +842cb1cc : ldff1sb z12.s, p4/Z, [z14.s, #12] : ldff1sb +0x0c(%z14.s)[1byte] %p4/z -> %z12.s +842eb20e : ldff1sb z14.s, p4/Z, [z16.s, #14] : ldff1sb +0x0e(%z16.s)[1byte] %p4/z -> %z14.s +8430b650 : ldff1sb z16.s, p5/Z, [z18.s, #16] : ldff1sb +0x10(%z18.s)[1byte] %p5/z -> %z16.s +8431b671 : ldff1sb z17.s, p5/Z, [z19.s, #17] : ldff1sb +0x11(%z19.s)[1byte] %p5/z -> %z17.s +8433b6b3 : ldff1sb z19.s, p5/Z, [z21.s, #19] : ldff1sb +0x13(%z21.s)[1byte] %p5/z -> %z19.s +8435baf5 : ldff1sb z21.s, p6/Z, [z23.s, #21] : ldff1sb +0x15(%z23.s)[1byte] %p6/z -> %z21.s +8437bb37 : ldff1sb z23.s, p6/Z, [z25.s, #23] : ldff1sb +0x17(%z25.s)[1byte] %p6/z -> %z23.s +8439bf79 : ldff1sb z25.s, p7/Z, [z27.s, #25] : ldff1sb +0x19(%z27.s)[1byte] %p7/z -> %z25.s +843bbfbb : ldff1sb z27.s, p7/Z, [z29.s, #27] : ldff1sb +0x1b(%z29.s)[1byte] %p7/z -> %z27.s +843fbfff : ldff1sb z31.s, p7/Z, [z31.s, #31] : ldff1sb +0x1f(%z31.s)[1byte] %p7/z -> %z31.s # LDFF1SB { .D }, /Z, [{, }] (LDFF1SB-Z.P.BR-S64) -a5806000 : ldff1sb z0.d, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[4byte] %p0/z -> %z0.d -a5856482 : ldff1sb z2.d, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[4byte] %p1/z -> %z2.d -a58768c4 : ldff1sb z4.d, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[4byte] %p2/z -> %z4.d -a5896906 : ldff1sb z6.d, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[4byte] %p2/z -> %z6.d -a58b6d48 : ldff1sb z8.d, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[4byte] %p3/z -> %z8.d -a58c6d6a : ldff1sb z10.d, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[4byte] %p3/z -> %z10.d -a58e71ac : ldff1sb z12.d, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[4byte] %p4/z -> %z12.d -a59071ee : ldff1sb z14.d, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[4byte] %p4/z -> %z14.d -a5927630 : ldff1sb z16.d, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[4byte] %p5/z -> %z16.d -a5947671 : ldff1sb z17.d, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[4byte] %p5/z -> %z17.d -a59676b3 : ldff1sb z19.d, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[4byte] %p5/z -> %z19.d -a5987af5 : ldff1sb z21.d, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[4byte] %p6/z -> %z21.d -a5997b17 : ldff1sb z23.d, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[4byte] %p6/z -> %z23.d -a59b7f59 : ldff1sb z25.d, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[4byte] %p7/z -> %z25.d -a59d7f9b : ldff1sb z27.d, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[4byte] %p7/z -> %z27.d -a59e7fff : ldff1sb z31.d, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[4byte] %p7/z -> %z31.d +a5806000 : ldff1sb z0.d, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[1byte] %p0/z -> %z0.d +a5856482 : ldff1sb z2.d, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[1byte] %p1/z -> %z2.d +a58768c4 : ldff1sb z4.d, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[1byte] %p2/z -> %z4.d +a5896906 : ldff1sb z6.d, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[1byte] %p2/z -> %z6.d +a58b6d48 : ldff1sb z8.d, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[1byte] %p3/z -> %z8.d +a58c6d6a : ldff1sb z10.d, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[1byte] %p3/z -> %z10.d +a58e71ac : ldff1sb z12.d, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[1byte] %p4/z -> %z12.d +a59071ee : ldff1sb z14.d, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[1byte] %p4/z -> %z14.d +a5927630 : ldff1sb z16.d, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[1byte] %p5/z -> %z16.d +a5947671 : ldff1sb z17.d, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[1byte] %p5/z -> %z17.d +a59676b3 : ldff1sb z19.d, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[1byte] %p5/z -> %z19.d +a5987af5 : ldff1sb z21.d, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[1byte] %p6/z -> %z21.d +a5997b17 : ldff1sb z23.d, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[1byte] %p6/z -> %z23.d +a59b7f59 : ldff1sb z25.d, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[1byte] %p7/z -> %z25.d +a59d7f9b : ldff1sb z27.d, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[1byte] %p7/z -> %z27.d +a59e7fff : ldff1sb z31.d, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[1byte] %p7/z -> %z31.d # LDFF1SB { .S }, /Z, [{, }] (LDFF1SB-Z.P.BR-S32) -a5a06000 : ldff1sb z0.s, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[8byte] %p0/z -> %z0.s -a5a56482 : ldff1sb z2.s, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[8byte] %p1/z -> %z2.s -a5a768c4 : ldff1sb z4.s, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[8byte] %p2/z -> %z4.s -a5a96906 : ldff1sb z6.s, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[8byte] %p2/z -> %z6.s -a5ab6d48 : ldff1sb z8.s, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[8byte] %p3/z -> %z8.s -a5ac6d6a : ldff1sb z10.s, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[8byte] %p3/z -> %z10.s -a5ae71ac : ldff1sb z12.s, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[8byte] %p4/z -> %z12.s -a5b071ee : ldff1sb z14.s, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[8byte] %p4/z -> %z14.s -a5b27630 : ldff1sb z16.s, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[8byte] %p5/z -> %z16.s -a5b47671 : ldff1sb z17.s, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[8byte] %p5/z -> %z17.s -a5b676b3 : ldff1sb z19.s, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[8byte] %p5/z -> %z19.s -a5b87af5 : ldff1sb z21.s, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[8byte] %p6/z -> %z21.s -a5b97b17 : ldff1sb z23.s, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[8byte] %p6/z -> %z23.s -a5bb7f59 : ldff1sb z25.s, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[8byte] %p7/z -> %z25.s -a5bd7f9b : ldff1sb z27.s, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[8byte] %p7/z -> %z27.s -a5be7fff : ldff1sb z31.s, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[8byte] %p7/z -> %z31.s +a5a06000 : ldff1sb z0.s, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[1byte] %p0/z -> %z0.s +a5a56482 : ldff1sb z2.s, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[1byte] %p1/z -> %z2.s +a5a768c4 : ldff1sb z4.s, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[1byte] %p2/z -> %z4.s +a5a96906 : ldff1sb z6.s, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[1byte] %p2/z -> %z6.s +a5ab6d48 : ldff1sb z8.s, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[1byte] %p3/z -> %z8.s +a5ac6d6a : ldff1sb z10.s, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[1byte] %p3/z -> %z10.s +a5ae71ac : ldff1sb z12.s, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[1byte] %p4/z -> %z12.s +a5b071ee : ldff1sb z14.s, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[1byte] %p4/z -> %z14.s +a5b27630 : ldff1sb z16.s, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[1byte] %p5/z -> %z16.s +a5b47671 : ldff1sb z17.s, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[1byte] %p5/z -> %z17.s +a5b676b3 : ldff1sb z19.s, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[1byte] %p5/z -> %z19.s +a5b87af5 : ldff1sb z21.s, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[1byte] %p6/z -> %z21.s +a5b97b17 : ldff1sb z23.s, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[1byte] %p6/z -> %z23.s +a5bb7f59 : ldff1sb z25.s, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[1byte] %p7/z -> %z25.s +a5bd7f9b : ldff1sb z27.s, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[1byte] %p7/z -> %z27.s +a5be7fff : ldff1sb z31.s, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[1byte] %p7/z -> %z31.s # LDFF1SB { .H }, /Z, [{, }] (LDFF1SB-Z.P.BR-S16) -a5c06000 : ldff1sb z0.h, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[16byte] %p0/z -> %z0.h -a5c56482 : ldff1sb z2.h, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[16byte] %p1/z -> %z2.h -a5c768c4 : ldff1sb z4.h, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[16byte] %p2/z -> %z4.h -a5c96906 : ldff1sb z6.h, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[16byte] %p2/z -> %z6.h -a5cb6d48 : ldff1sb z8.h, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[16byte] %p3/z -> %z8.h -a5cc6d6a : ldff1sb z10.h, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[16byte] %p3/z -> %z10.h -a5ce71ac : ldff1sb z12.h, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[16byte] %p4/z -> %z12.h -a5d071ee : ldff1sb z14.h, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[16byte] %p4/z -> %z14.h -a5d27630 : ldff1sb z16.h, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[16byte] %p5/z -> %z16.h -a5d47671 : ldff1sb z17.h, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[16byte] %p5/z -> %z17.h -a5d676b3 : ldff1sb z19.h, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[16byte] %p5/z -> %z19.h -a5d87af5 : ldff1sb z21.h, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[16byte] %p6/z -> %z21.h -a5d97b17 : ldff1sb z23.h, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[16byte] %p6/z -> %z23.h -a5db7f59 : ldff1sb z25.h, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[16byte] %p7/z -> %z25.h -a5dd7f9b : ldff1sb z27.h, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[16byte] %p7/z -> %z27.h -a5de7fff : ldff1sb z31.h, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[16byte] %p7/z -> %z31.h +a5c06000 : ldff1sb z0.h, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[1byte] %p0/z -> %z0.h +a5c56482 : ldff1sb z2.h, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[1byte] %p1/z -> %z2.h +a5c768c4 : ldff1sb z4.h, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[1byte] %p2/z -> %z4.h +a5c96906 : ldff1sb z6.h, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[1byte] %p2/z -> %z6.h +a5cb6d48 : ldff1sb z8.h, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[1byte] %p3/z -> %z8.h +a5cc6d6a : ldff1sb z10.h, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[1byte] %p3/z -> %z10.h +a5ce71ac : ldff1sb z12.h, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[1byte] %p4/z -> %z12.h +a5d071ee : ldff1sb z14.h, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[1byte] %p4/z -> %z14.h +a5d27630 : ldff1sb z16.h, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[1byte] %p5/z -> %z16.h +a5d47671 : ldff1sb z17.h, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[1byte] %p5/z -> %z17.h +a5d676b3 : ldff1sb z19.h, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[1byte] %p5/z -> %z19.h +a5d87af5 : ldff1sb z21.h, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[1byte] %p6/z -> %z21.h +a5d97b17 : ldff1sb z23.h, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[1byte] %p6/z -> %z23.h +a5db7f59 : ldff1sb z25.h, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[1byte] %p7/z -> %z25.h +a5dd7f9b : ldff1sb z27.h, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[1byte] %p7/z -> %z27.h +a5de7fff : ldff1sb z31.h, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[1byte] %p7/z -> %z31.h # LDFF1SB { .D }, /Z, [, .D, ] (LDFF1SB-Z.P.BZ-D.x32.unscaled) -c4002000 : ldff1sb z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1sb (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d -c4052482 : ldff1sb z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1sb (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d -c40728c4 : ldff1sb z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1sb (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d -c4092906 : ldff1sb z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1sb (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d -c40b2d48 : ldff1sb z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1sb (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d -c40d2d6a : ldff1sb z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1sb (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d -c40f31ac : ldff1sb z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1sb (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d -c41131ee : ldff1sb z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1sb (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d -c4133630 : ldff1sb z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1sb (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d -c4143671 : ldff1sb z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1sb (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d -c41636b3 : ldff1sb z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1sb (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d -c4183af5 : ldff1sb z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1sb (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d -c41a3b17 : ldff1sb z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1sb (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d -c41c3f59 : ldff1sb z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1sb (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d -c41e3f9b : ldff1sb z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1sb (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d -c41f3fff : ldff1sb z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1sb (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d -c4402000 : ldff1sb z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1sb (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d -c4452482 : ldff1sb z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1sb (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d -c44728c4 : ldff1sb z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1sb (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d -c4492906 : ldff1sb z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1sb (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d -c44b2d48 : ldff1sb z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1sb (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d -c44d2d6a : ldff1sb z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1sb (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d -c44f31ac : ldff1sb z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1sb (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d -c45131ee : ldff1sb z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1sb (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d -c4533630 : ldff1sb z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1sb (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d -c4543671 : ldff1sb z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1sb (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d -c45636b3 : ldff1sb z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1sb (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d -c4583af5 : ldff1sb z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1sb (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d -c45a3b17 : ldff1sb z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1sb (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d -c45c3f59 : ldff1sb z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1sb (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d -c45e3f9b : ldff1sb z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1sb (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d -c45f3fff : ldff1sb z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1sb (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d +c4002000 : ldff1sb z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1sb (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d +c4052482 : ldff1sb z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1sb (%x4,%z5.d,uxtw)[1byte] %p1/z -> %z2.d +c40728c4 : ldff1sb z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1sb (%x6,%z7.d,uxtw)[1byte] %p2/z -> %z4.d +c4092906 : ldff1sb z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1sb (%x8,%z9.d,uxtw)[1byte] %p2/z -> %z6.d +c40b2d48 : ldff1sb z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1sb (%x10,%z11.d,uxtw)[1byte] %p3/z -> %z8.d +c40d2d6a : ldff1sb z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1sb (%x11,%z13.d,uxtw)[1byte] %p3/z -> %z10.d +c40f31ac : ldff1sb z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1sb (%x13,%z15.d,uxtw)[1byte] %p4/z -> %z12.d +c41131ee : ldff1sb z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1sb (%x15,%z17.d,uxtw)[1byte] %p4/z -> %z14.d +c4133630 : ldff1sb z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1sb (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d +c4143671 : ldff1sb z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1sb (%x19,%z20.d,uxtw)[1byte] %p5/z -> %z17.d +c41636b3 : ldff1sb z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1sb (%x21,%z22.d,uxtw)[1byte] %p5/z -> %z19.d +c4183af5 : ldff1sb z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1sb (%x23,%z24.d,uxtw)[1byte] %p6/z -> %z21.d +c41a3b17 : ldff1sb z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1sb (%x24,%z26.d,uxtw)[1byte] %p6/z -> %z23.d +c41c3f59 : ldff1sb z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1sb (%x26,%z28.d,uxtw)[1byte] %p7/z -> %z25.d +c41e3f9b : ldff1sb z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1sb (%x28,%z30.d,uxtw)[1byte] %p7/z -> %z27.d +c41f3fff : ldff1sb z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1sb (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d +c4402000 : ldff1sb z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1sb (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d +c4452482 : ldff1sb z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1sb (%x4,%z5.d,sxtw)[1byte] %p1/z -> %z2.d +c44728c4 : ldff1sb z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1sb (%x6,%z7.d,sxtw)[1byte] %p2/z -> %z4.d +c4492906 : ldff1sb z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1sb (%x8,%z9.d,sxtw)[1byte] %p2/z -> %z6.d +c44b2d48 : ldff1sb z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1sb (%x10,%z11.d,sxtw)[1byte] %p3/z -> %z8.d +c44d2d6a : ldff1sb z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1sb (%x11,%z13.d,sxtw)[1byte] %p3/z -> %z10.d +c44f31ac : ldff1sb z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1sb (%x13,%z15.d,sxtw)[1byte] %p4/z -> %z12.d +c45131ee : ldff1sb z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1sb (%x15,%z17.d,sxtw)[1byte] %p4/z -> %z14.d +c4533630 : ldff1sb z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1sb (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d +c4543671 : ldff1sb z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1sb (%x19,%z20.d,sxtw)[1byte] %p5/z -> %z17.d +c45636b3 : ldff1sb z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1sb (%x21,%z22.d,sxtw)[1byte] %p5/z -> %z19.d +c4583af5 : ldff1sb z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1sb (%x23,%z24.d,sxtw)[1byte] %p6/z -> %z21.d +c45a3b17 : ldff1sb z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1sb (%x24,%z26.d,sxtw)[1byte] %p6/z -> %z23.d +c45c3f59 : ldff1sb z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1sb (%x26,%z28.d,sxtw)[1byte] %p7/z -> %z25.d +c45e3f9b : ldff1sb z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1sb (%x28,%z30.d,sxtw)[1byte] %p7/z -> %z27.d +c45f3fff : ldff1sb z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1sb (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d # LDFF1SB { .D }, /Z, [.D{, #}] (LDFF1SB-Z.P.AI-D) -c420a000 : ldff1sb z0.d, p0/Z, [z0.d, #0] : ldff1sb (%z0.d)[4byte] %p0/z -> %z0.d -c422a482 : ldff1sb z2.d, p1/Z, [z4.d, #2] : ldff1sb +0x02(%z4.d)[4byte] %p1/z -> %z2.d -c424a8c4 : ldff1sb z4.d, p2/Z, [z6.d, #4] : ldff1sb +0x04(%z6.d)[4byte] %p2/z -> %z4.d -c426a906 : ldff1sb z6.d, p2/Z, [z8.d, #6] : ldff1sb +0x06(%z8.d)[4byte] %p2/z -> %z6.d -c428ad48 : ldff1sb z8.d, p3/Z, [z10.d, #8] : ldff1sb +0x08(%z10.d)[4byte] %p3/z -> %z8.d -c42aad8a : ldff1sb z10.d, p3/Z, [z12.d, #10] : ldff1sb +0x0a(%z12.d)[4byte] %p3/z -> %z10.d -c42cb1cc : ldff1sb z12.d, p4/Z, [z14.d, #12] : ldff1sb +0x0c(%z14.d)[4byte] %p4/z -> %z12.d -c42eb20e : ldff1sb z14.d, p4/Z, [z16.d, #14] : ldff1sb +0x0e(%z16.d)[4byte] %p4/z -> %z14.d -c430b650 : ldff1sb z16.d, p5/Z, [z18.d, #16] : ldff1sb +0x10(%z18.d)[4byte] %p5/z -> %z16.d -c431b671 : ldff1sb z17.d, p5/Z, [z19.d, #17] : ldff1sb +0x11(%z19.d)[4byte] %p5/z -> %z17.d -c433b6b3 : ldff1sb z19.d, p5/Z, [z21.d, #19] : ldff1sb +0x13(%z21.d)[4byte] %p5/z -> %z19.d -c435baf5 : ldff1sb z21.d, p6/Z, [z23.d, #21] : ldff1sb +0x15(%z23.d)[4byte] %p6/z -> %z21.d -c437bb37 : ldff1sb z23.d, p6/Z, [z25.d, #23] : ldff1sb +0x17(%z25.d)[4byte] %p6/z -> %z23.d -c439bf79 : ldff1sb z25.d, p7/Z, [z27.d, #25] : ldff1sb +0x19(%z27.d)[4byte] %p7/z -> %z25.d -c43bbfbb : ldff1sb z27.d, p7/Z, [z29.d, #27] : ldff1sb +0x1b(%z29.d)[4byte] %p7/z -> %z27.d -c43fbfff : ldff1sb z31.d, p7/Z, [z31.d, #31] : ldff1sb +0x1f(%z31.d)[4byte] %p7/z -> %z31.d +c420a000 : ldff1sb z0.d, p0/Z, [z0.d, #0] : ldff1sb (%z0.d)[1byte] %p0/z -> %z0.d +c422a482 : ldff1sb z2.d, p1/Z, [z4.d, #2] : ldff1sb +0x02(%z4.d)[1byte] %p1/z -> %z2.d +c424a8c4 : ldff1sb z4.d, p2/Z, [z6.d, #4] : ldff1sb +0x04(%z6.d)[1byte] %p2/z -> %z4.d +c426a906 : ldff1sb z6.d, p2/Z, [z8.d, #6] : ldff1sb +0x06(%z8.d)[1byte] %p2/z -> %z6.d +c428ad48 : ldff1sb z8.d, p3/Z, [z10.d, #8] : ldff1sb +0x08(%z10.d)[1byte] %p3/z -> %z8.d +c42aad8a : ldff1sb z10.d, p3/Z, [z12.d, #10] : ldff1sb +0x0a(%z12.d)[1byte] %p3/z -> %z10.d +c42cb1cc : ldff1sb z12.d, p4/Z, [z14.d, #12] : ldff1sb +0x0c(%z14.d)[1byte] %p4/z -> %z12.d +c42eb20e : ldff1sb z14.d, p4/Z, [z16.d, #14] : ldff1sb +0x0e(%z16.d)[1byte] %p4/z -> %z14.d +c430b650 : ldff1sb z16.d, p5/Z, [z18.d, #16] : ldff1sb +0x10(%z18.d)[1byte] %p5/z -> %z16.d +c431b671 : ldff1sb z17.d, p5/Z, [z19.d, #17] : ldff1sb +0x11(%z19.d)[1byte] %p5/z -> %z17.d +c433b6b3 : ldff1sb z19.d, p5/Z, [z21.d, #19] : ldff1sb +0x13(%z21.d)[1byte] %p5/z -> %z19.d +c435baf5 : ldff1sb z21.d, p6/Z, [z23.d, #21] : ldff1sb +0x15(%z23.d)[1byte] %p6/z -> %z21.d +c437bb37 : ldff1sb z23.d, p6/Z, [z25.d, #23] : ldff1sb +0x17(%z25.d)[1byte] %p6/z -> %z23.d +c439bf79 : ldff1sb z25.d, p7/Z, [z27.d, #25] : ldff1sb +0x19(%z27.d)[1byte] %p7/z -> %z25.d +c43bbfbb : ldff1sb z27.d, p7/Z, [z29.d, #27] : ldff1sb +0x1b(%z29.d)[1byte] %p7/z -> %z27.d +c43fbfff : ldff1sb z31.d, p7/Z, [z31.d, #31] : ldff1sb +0x1f(%z31.d)[1byte] %p7/z -> %z31.d # LDFF1SB { .D }, /Z, [, .D] (LDFF1SB-Z.P.BZ-D.64.unscaled) -c440a000 : ldff1sb z0.d, p0/Z, [x0, z0.d] : ldff1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d -c445a482 : ldff1sb z2.d, p1/Z, [x4, z5.d] : ldff1sb (%x4,%z5.d)[4byte] %p1/z -> %z2.d -c447a8c4 : ldff1sb z4.d, p2/Z, [x6, z7.d] : ldff1sb (%x6,%z7.d)[4byte] %p2/z -> %z4.d -c449a906 : ldff1sb z6.d, p2/Z, [x8, z9.d] : ldff1sb (%x8,%z9.d)[4byte] %p2/z -> %z6.d -c44bad48 : ldff1sb z8.d, p3/Z, [x10, z11.d] : ldff1sb (%x10,%z11.d)[4byte] %p3/z -> %z8.d -c44dad6a : ldff1sb z10.d, p3/Z, [x11, z13.d] : ldff1sb (%x11,%z13.d)[4byte] %p3/z -> %z10.d -c44fb1ac : ldff1sb z12.d, p4/Z, [x13, z15.d] : ldff1sb (%x13,%z15.d)[4byte] %p4/z -> %z12.d -c451b1ee : ldff1sb z14.d, p4/Z, [x15, z17.d] : ldff1sb (%x15,%z17.d)[4byte] %p4/z -> %z14.d -c453b630 : ldff1sb z16.d, p5/Z, [x17, z19.d] : ldff1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d -c454b671 : ldff1sb z17.d, p5/Z, [x19, z20.d] : ldff1sb (%x19,%z20.d)[4byte] %p5/z -> %z17.d -c456b6b3 : ldff1sb z19.d, p5/Z, [x21, z22.d] : ldff1sb (%x21,%z22.d)[4byte] %p5/z -> %z19.d -c458baf5 : ldff1sb z21.d, p6/Z, [x23, z24.d] : ldff1sb (%x23,%z24.d)[4byte] %p6/z -> %z21.d -c45abb17 : ldff1sb z23.d, p6/Z, [x24, z26.d] : ldff1sb (%x24,%z26.d)[4byte] %p6/z -> %z23.d -c45cbf59 : ldff1sb z25.d, p7/Z, [x26, z28.d] : ldff1sb (%x26,%z28.d)[4byte] %p7/z -> %z25.d -c45ebf9b : ldff1sb z27.d, p7/Z, [x28, z30.d] : ldff1sb (%x28,%z30.d)[4byte] %p7/z -> %z27.d -c45fbfff : ldff1sb z31.d, p7/Z, [sp, z31.d] : ldff1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d +c440a000 : ldff1sb z0.d, p0/Z, [x0, z0.d] : ldff1sb (%x0,%z0.d)[1byte] %p0/z -> %z0.d +c445a482 : ldff1sb z2.d, p1/Z, [x4, z5.d] : ldff1sb (%x4,%z5.d)[1byte] %p1/z -> %z2.d +c447a8c4 : ldff1sb z4.d, p2/Z, [x6, z7.d] : ldff1sb (%x6,%z7.d)[1byte] %p2/z -> %z4.d +c449a906 : ldff1sb z6.d, p2/Z, [x8, z9.d] : ldff1sb (%x8,%z9.d)[1byte] %p2/z -> %z6.d +c44bad48 : ldff1sb z8.d, p3/Z, [x10, z11.d] : ldff1sb (%x10,%z11.d)[1byte] %p3/z -> %z8.d +c44dad6a : ldff1sb z10.d, p3/Z, [x11, z13.d] : ldff1sb (%x11,%z13.d)[1byte] %p3/z -> %z10.d +c44fb1ac : ldff1sb z12.d, p4/Z, [x13, z15.d] : ldff1sb (%x13,%z15.d)[1byte] %p4/z -> %z12.d +c451b1ee : ldff1sb z14.d, p4/Z, [x15, z17.d] : ldff1sb (%x15,%z17.d)[1byte] %p4/z -> %z14.d +c453b630 : ldff1sb z16.d, p5/Z, [x17, z19.d] : ldff1sb (%x17,%z19.d)[1byte] %p5/z -> %z16.d +c454b671 : ldff1sb z17.d, p5/Z, [x19, z20.d] : ldff1sb (%x19,%z20.d)[1byte] %p5/z -> %z17.d +c456b6b3 : ldff1sb z19.d, p5/Z, [x21, z22.d] : ldff1sb (%x21,%z22.d)[1byte] %p5/z -> %z19.d +c458baf5 : ldff1sb z21.d, p6/Z, [x23, z24.d] : ldff1sb (%x23,%z24.d)[1byte] %p6/z -> %z21.d +c45abb17 : ldff1sb z23.d, p6/Z, [x24, z26.d] : ldff1sb (%x24,%z26.d)[1byte] %p6/z -> %z23.d +c45cbf59 : ldff1sb z25.d, p7/Z, [x26, z28.d] : ldff1sb (%x26,%z28.d)[1byte] %p7/z -> %z25.d +c45ebf9b : ldff1sb z27.d, p7/Z, [x28, z30.d] : ldff1sb (%x28,%z30.d)[1byte] %p7/z -> %z27.d +c45fbfff : ldff1sb z31.d, p7/Z, [sp, z31.d] : ldff1sb (%sp,%z31.d)[1byte] %p7/z -> %z31.d # LDFF1SH { .S }, /Z, [, .S, ] (LDFF1SH-Z.P.BZ-S.x32.unscaled) -84802000 : ldff1sh z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1sh (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s -84852482 : ldff1sh z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1sh (%x4,%z5.s,uxtw)[16byte] %p1/z -> %z2.s -848728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1sh (%x6,%z7.s,uxtw)[16byte] %p2/z -> %z4.s -84892906 : ldff1sh z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1sh (%x8,%z9.s,uxtw)[16byte] %p2/z -> %z6.s -848b2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1sh (%x10,%z11.s,uxtw)[16byte] %p3/z -> %z8.s -848d2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1sh (%x11,%z13.s,uxtw)[16byte] %p3/z -> %z10.s -848f31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1sh (%x13,%z15.s,uxtw)[16byte] %p4/z -> %z12.s -849131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1sh (%x15,%z17.s,uxtw)[16byte] %p4/z -> %z14.s -84933630 : ldff1sh z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1sh (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s -84943671 : ldff1sh z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1sh (%x19,%z20.s,uxtw)[16byte] %p5/z -> %z17.s -849636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1sh (%x21,%z22.s,uxtw)[16byte] %p5/z -> %z19.s -84983af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1sh (%x23,%z24.s,uxtw)[16byte] %p6/z -> %z21.s -849a3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1sh (%x24,%z26.s,uxtw)[16byte] %p6/z -> %z23.s -849c3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1sh (%x26,%z28.s,uxtw)[16byte] %p7/z -> %z25.s -849e3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1sh (%x28,%z30.s,uxtw)[16byte] %p7/z -> %z27.s -849f3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1sh (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s -84c02000 : ldff1sh z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1sh (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s -84c52482 : ldff1sh z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1sh (%x4,%z5.s,sxtw)[16byte] %p1/z -> %z2.s -84c728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1sh (%x6,%z7.s,sxtw)[16byte] %p2/z -> %z4.s -84c92906 : ldff1sh z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1sh (%x8,%z9.s,sxtw)[16byte] %p2/z -> %z6.s -84cb2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1sh (%x10,%z11.s,sxtw)[16byte] %p3/z -> %z8.s -84cd2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1sh (%x11,%z13.s,sxtw)[16byte] %p3/z -> %z10.s -84cf31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1sh (%x13,%z15.s,sxtw)[16byte] %p4/z -> %z12.s -84d131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1sh (%x15,%z17.s,sxtw)[16byte] %p4/z -> %z14.s -84d33630 : ldff1sh z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1sh (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s -84d43671 : ldff1sh z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1sh (%x19,%z20.s,sxtw)[16byte] %p5/z -> %z17.s -84d636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1sh (%x21,%z22.s,sxtw)[16byte] %p5/z -> %z19.s -84d83af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1sh (%x23,%z24.s,sxtw)[16byte] %p6/z -> %z21.s -84da3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1sh (%x24,%z26.s,sxtw)[16byte] %p6/z -> %z23.s -84dc3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1sh (%x26,%z28.s,sxtw)[16byte] %p7/z -> %z25.s -84de3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1sh (%x28,%z30.s,sxtw)[16byte] %p7/z -> %z27.s -84df3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1sh (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s +84802000 : ldff1sh z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1sh (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s +84852482 : ldff1sh z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1sh (%x4,%z5.s,uxtw)[2byte] %p1/z -> %z2.s +848728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1sh (%x6,%z7.s,uxtw)[2byte] %p2/z -> %z4.s +84892906 : ldff1sh z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1sh (%x8,%z9.s,uxtw)[2byte] %p2/z -> %z6.s +848b2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1sh (%x10,%z11.s,uxtw)[2byte] %p3/z -> %z8.s +848d2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1sh (%x11,%z13.s,uxtw)[2byte] %p3/z -> %z10.s +848f31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1sh (%x13,%z15.s,uxtw)[2byte] %p4/z -> %z12.s +849131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1sh (%x15,%z17.s,uxtw)[2byte] %p4/z -> %z14.s +84933630 : ldff1sh z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1sh (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s +84943671 : ldff1sh z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1sh (%x19,%z20.s,uxtw)[2byte] %p5/z -> %z17.s +849636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1sh (%x21,%z22.s,uxtw)[2byte] %p5/z -> %z19.s +84983af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1sh (%x23,%z24.s,uxtw)[2byte] %p6/z -> %z21.s +849a3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1sh (%x24,%z26.s,uxtw)[2byte] %p6/z -> %z23.s +849c3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1sh (%x26,%z28.s,uxtw)[2byte] %p7/z -> %z25.s +849e3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1sh (%x28,%z30.s,uxtw)[2byte] %p7/z -> %z27.s +849f3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1sh (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s +84c02000 : ldff1sh z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1sh (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s +84c52482 : ldff1sh z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1sh (%x4,%z5.s,sxtw)[2byte] %p1/z -> %z2.s +84c728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1sh (%x6,%z7.s,sxtw)[2byte] %p2/z -> %z4.s +84c92906 : ldff1sh z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1sh (%x8,%z9.s,sxtw)[2byte] %p2/z -> %z6.s +84cb2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1sh (%x10,%z11.s,sxtw)[2byte] %p3/z -> %z8.s +84cd2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1sh (%x11,%z13.s,sxtw)[2byte] %p3/z -> %z10.s +84cf31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1sh (%x13,%z15.s,sxtw)[2byte] %p4/z -> %z12.s +84d131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1sh (%x15,%z17.s,sxtw)[2byte] %p4/z -> %z14.s +84d33630 : ldff1sh z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1sh (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s +84d43671 : ldff1sh z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1sh (%x19,%z20.s,sxtw)[2byte] %p5/z -> %z17.s +84d636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1sh (%x21,%z22.s,sxtw)[2byte] %p5/z -> %z19.s +84d83af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1sh (%x23,%z24.s,sxtw)[2byte] %p6/z -> %z21.s +84da3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1sh (%x24,%z26.s,sxtw)[2byte] %p6/z -> %z23.s +84dc3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1sh (%x26,%z28.s,sxtw)[2byte] %p7/z -> %z25.s +84de3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1sh (%x28,%z30.s,sxtw)[2byte] %p7/z -> %z27.s +84df3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1sh (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s # LDFF1SH { .S }, /Z, [, .S, #1] (LDFF1SH-Z.P.BZ-S.x32.scaled) -84a02000 : ldff1sh z0.s, p0/Z, [x0, z0.s, UXTW #1] : ldff1sh (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s -84a52482 : ldff1sh z2.s, p1/Z, [x4, z5.s, UXTW #1] : ldff1sh (%x4,%z5.s,uxtw #1)[16byte] %p1/z -> %z2.s -84a728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, UXTW #1] : ldff1sh (%x6,%z7.s,uxtw #1)[16byte] %p2/z -> %z4.s -84a92906 : ldff1sh z6.s, p2/Z, [x8, z9.s, UXTW #1] : ldff1sh (%x8,%z9.s,uxtw #1)[16byte] %p2/z -> %z6.s -84ab2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, UXTW #1] : ldff1sh (%x10,%z11.s,uxtw #1)[16byte] %p3/z -> %z8.s -84ad2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, UXTW #1] : ldff1sh (%x11,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s -84af31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, UXTW #1] : ldff1sh (%x13,%z15.s,uxtw #1)[16byte] %p4/z -> %z12.s -84b131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, UXTW #1] : ldff1sh (%x15,%z17.s,uxtw #1)[16byte] %p4/z -> %z14.s -84b33630 : ldff1sh z16.s, p5/Z, [x17, z19.s, UXTW #1] : ldff1sh (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s -84b43671 : ldff1sh z17.s, p5/Z, [x19, z20.s, UXTW #1] : ldff1sh (%x19,%z20.s,uxtw #1)[16byte] %p5/z -> %z17.s -84b636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, UXTW #1] : ldff1sh (%x21,%z22.s,uxtw #1)[16byte] %p5/z -> %z19.s -84b83af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, UXTW #1] : ldff1sh (%x23,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s -84ba3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, UXTW #1] : ldff1sh (%x24,%z26.s,uxtw #1)[16byte] %p6/z -> %z23.s -84bc3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, UXTW #1] : ldff1sh (%x26,%z28.s,uxtw #1)[16byte] %p7/z -> %z25.s -84be3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, UXTW #1] : ldff1sh (%x28,%z30.s,uxtw #1)[16byte] %p7/z -> %z27.s -84bf3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, UXTW #1] : ldff1sh (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s -84e02000 : ldff1sh z0.s, p0/Z, [x0, z0.s, SXTW #1] : ldff1sh (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s -84e52482 : ldff1sh z2.s, p1/Z, [x4, z5.s, SXTW #1] : ldff1sh (%x4,%z5.s,sxtw #1)[16byte] %p1/z -> %z2.s -84e728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, SXTW #1] : ldff1sh (%x6,%z7.s,sxtw #1)[16byte] %p2/z -> %z4.s -84e92906 : ldff1sh z6.s, p2/Z, [x8, z9.s, SXTW #1] : ldff1sh (%x8,%z9.s,sxtw #1)[16byte] %p2/z -> %z6.s -84eb2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, SXTW #1] : ldff1sh (%x10,%z11.s,sxtw #1)[16byte] %p3/z -> %z8.s -84ed2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, SXTW #1] : ldff1sh (%x11,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s -84ef31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, SXTW #1] : ldff1sh (%x13,%z15.s,sxtw #1)[16byte] %p4/z -> %z12.s -84f131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, SXTW #1] : ldff1sh (%x15,%z17.s,sxtw #1)[16byte] %p4/z -> %z14.s -84f33630 : ldff1sh z16.s, p5/Z, [x17, z19.s, SXTW #1] : ldff1sh (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s -84f43671 : ldff1sh z17.s, p5/Z, [x19, z20.s, SXTW #1] : ldff1sh (%x19,%z20.s,sxtw #1)[16byte] %p5/z -> %z17.s -84f636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, SXTW #1] : ldff1sh (%x21,%z22.s,sxtw #1)[16byte] %p5/z -> %z19.s -84f83af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, SXTW #1] : ldff1sh (%x23,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s -84fa3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, SXTW #1] : ldff1sh (%x24,%z26.s,sxtw #1)[16byte] %p6/z -> %z23.s -84fc3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, SXTW #1] : ldff1sh (%x26,%z28.s,sxtw #1)[16byte] %p7/z -> %z25.s -84fe3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, SXTW #1] : ldff1sh (%x28,%z30.s,sxtw #1)[16byte] %p7/z -> %z27.s -84ff3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, SXTW #1] : ldff1sh (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s +84a02000 : ldff1sh z0.s, p0/Z, [x0, z0.s, UXTW #1] : ldff1sh (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s +84a52482 : ldff1sh z2.s, p1/Z, [x4, z5.s, UXTW #1] : ldff1sh (%x4,%z5.s,uxtw #1)[2byte] %p1/z -> %z2.s +84a728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, UXTW #1] : ldff1sh (%x6,%z7.s,uxtw #1)[2byte] %p2/z -> %z4.s +84a92906 : ldff1sh z6.s, p2/Z, [x8, z9.s, UXTW #1] : ldff1sh (%x8,%z9.s,uxtw #1)[2byte] %p2/z -> %z6.s +84ab2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, UXTW #1] : ldff1sh (%x10,%z11.s,uxtw #1)[2byte] %p3/z -> %z8.s +84ad2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, UXTW #1] : ldff1sh (%x11,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s +84af31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, UXTW #1] : ldff1sh (%x13,%z15.s,uxtw #1)[2byte] %p4/z -> %z12.s +84b131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, UXTW #1] : ldff1sh (%x15,%z17.s,uxtw #1)[2byte] %p4/z -> %z14.s +84b33630 : ldff1sh z16.s, p5/Z, [x17, z19.s, UXTW #1] : ldff1sh (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s +84b43671 : ldff1sh z17.s, p5/Z, [x19, z20.s, UXTW #1] : ldff1sh (%x19,%z20.s,uxtw #1)[2byte] %p5/z -> %z17.s +84b636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, UXTW #1] : ldff1sh (%x21,%z22.s,uxtw #1)[2byte] %p5/z -> %z19.s +84b83af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, UXTW #1] : ldff1sh (%x23,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s +84ba3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, UXTW #1] : ldff1sh (%x24,%z26.s,uxtw #1)[2byte] %p6/z -> %z23.s +84bc3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, UXTW #1] : ldff1sh (%x26,%z28.s,uxtw #1)[2byte] %p7/z -> %z25.s +84be3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, UXTW #1] : ldff1sh (%x28,%z30.s,uxtw #1)[2byte] %p7/z -> %z27.s +84bf3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, UXTW #1] : ldff1sh (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s +84e02000 : ldff1sh z0.s, p0/Z, [x0, z0.s, SXTW #1] : ldff1sh (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s +84e52482 : ldff1sh z2.s, p1/Z, [x4, z5.s, SXTW #1] : ldff1sh (%x4,%z5.s,sxtw #1)[2byte] %p1/z -> %z2.s +84e728c4 : ldff1sh z4.s, p2/Z, [x6, z7.s, SXTW #1] : ldff1sh (%x6,%z7.s,sxtw #1)[2byte] %p2/z -> %z4.s +84e92906 : ldff1sh z6.s, p2/Z, [x8, z9.s, SXTW #1] : ldff1sh (%x8,%z9.s,sxtw #1)[2byte] %p2/z -> %z6.s +84eb2d48 : ldff1sh z8.s, p3/Z, [x10, z11.s, SXTW #1] : ldff1sh (%x10,%z11.s,sxtw #1)[2byte] %p3/z -> %z8.s +84ed2d6a : ldff1sh z10.s, p3/Z, [x11, z13.s, SXTW #1] : ldff1sh (%x11,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s +84ef31ac : ldff1sh z12.s, p4/Z, [x13, z15.s, SXTW #1] : ldff1sh (%x13,%z15.s,sxtw #1)[2byte] %p4/z -> %z12.s +84f131ee : ldff1sh z14.s, p4/Z, [x15, z17.s, SXTW #1] : ldff1sh (%x15,%z17.s,sxtw #1)[2byte] %p4/z -> %z14.s +84f33630 : ldff1sh z16.s, p5/Z, [x17, z19.s, SXTW #1] : ldff1sh (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s +84f43671 : ldff1sh z17.s, p5/Z, [x19, z20.s, SXTW #1] : ldff1sh (%x19,%z20.s,sxtw #1)[2byte] %p5/z -> %z17.s +84f636b3 : ldff1sh z19.s, p5/Z, [x21, z22.s, SXTW #1] : ldff1sh (%x21,%z22.s,sxtw #1)[2byte] %p5/z -> %z19.s +84f83af5 : ldff1sh z21.s, p6/Z, [x23, z24.s, SXTW #1] : ldff1sh (%x23,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s +84fa3b17 : ldff1sh z23.s, p6/Z, [x24, z26.s, SXTW #1] : ldff1sh (%x24,%z26.s,sxtw #1)[2byte] %p6/z -> %z23.s +84fc3f59 : ldff1sh z25.s, p7/Z, [x26, z28.s, SXTW #1] : ldff1sh (%x26,%z28.s,sxtw #1)[2byte] %p7/z -> %z25.s +84fe3f9b : ldff1sh z27.s, p7/Z, [x28, z30.s, SXTW #1] : ldff1sh (%x28,%z30.s,sxtw #1)[2byte] %p7/z -> %z27.s +84ff3fff : ldff1sh z31.s, p7/Z, [sp, z31.s, SXTW #1] : ldff1sh (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s # LDFF1SH { .S }, /Z, [.S{, #}] (LDFF1SH-Z.P.AI-S) -84a0a000 : ldff1sh z0.s, p0/Z, [z0.s, #0] : ldff1sh (%z0.s)[16byte] %p0/z -> %z0.s -84a2a482 : ldff1sh z2.s, p1/Z, [z4.s, #4] : ldff1sh +0x04(%z4.s)[16byte] %p1/z -> %z2.s -84a4a8c4 : ldff1sh z4.s, p2/Z, [z6.s, #8] : ldff1sh +0x08(%z6.s)[16byte] %p2/z -> %z4.s -84a6a906 : ldff1sh z6.s, p2/Z, [z8.s, #12] : ldff1sh +0x0c(%z8.s)[16byte] %p2/z -> %z6.s -84a8ad48 : ldff1sh z8.s, p3/Z, [z10.s, #16] : ldff1sh +0x10(%z10.s)[16byte] %p3/z -> %z8.s -84aaad8a : ldff1sh z10.s, p3/Z, [z12.s, #20] : ldff1sh +0x14(%z12.s)[16byte] %p3/z -> %z10.s -84acb1cc : ldff1sh z12.s, p4/Z, [z14.s, #24] : ldff1sh +0x18(%z14.s)[16byte] %p4/z -> %z12.s -84aeb20e : ldff1sh z14.s, p4/Z, [z16.s, #28] : ldff1sh +0x1c(%z16.s)[16byte] %p4/z -> %z14.s -84b0b650 : ldff1sh z16.s, p5/Z, [z18.s, #32] : ldff1sh +0x20(%z18.s)[16byte] %p5/z -> %z16.s -84b1b671 : ldff1sh z17.s, p5/Z, [z19.s, #34] : ldff1sh +0x22(%z19.s)[16byte] %p5/z -> %z17.s -84b3b6b3 : ldff1sh z19.s, p5/Z, [z21.s, #38] : ldff1sh +0x26(%z21.s)[16byte] %p5/z -> %z19.s -84b5baf5 : ldff1sh z21.s, p6/Z, [z23.s, #42] : ldff1sh +0x2a(%z23.s)[16byte] %p6/z -> %z21.s -84b7bb37 : ldff1sh z23.s, p6/Z, [z25.s, #46] : ldff1sh +0x2e(%z25.s)[16byte] %p6/z -> %z23.s -84b9bf79 : ldff1sh z25.s, p7/Z, [z27.s, #50] : ldff1sh +0x32(%z27.s)[16byte] %p7/z -> %z25.s -84bbbfbb : ldff1sh z27.s, p7/Z, [z29.s, #54] : ldff1sh +0x36(%z29.s)[16byte] %p7/z -> %z27.s -84bfbfff : ldff1sh z31.s, p7/Z, [z31.s, #62] : ldff1sh +0x3e(%z31.s)[16byte] %p7/z -> %z31.s +84a0a000 : ldff1sh z0.s, p0/Z, [z0.s, #0] : ldff1sh (%z0.s)[2byte] %p0/z -> %z0.s +84a2a482 : ldff1sh z2.s, p1/Z, [z4.s, #4] : ldff1sh +0x04(%z4.s)[2byte] %p1/z -> %z2.s +84a4a8c4 : ldff1sh z4.s, p2/Z, [z6.s, #8] : ldff1sh +0x08(%z6.s)[2byte] %p2/z -> %z4.s +84a6a906 : ldff1sh z6.s, p2/Z, [z8.s, #12] : ldff1sh +0x0c(%z8.s)[2byte] %p2/z -> %z6.s +84a8ad48 : ldff1sh z8.s, p3/Z, [z10.s, #16] : ldff1sh +0x10(%z10.s)[2byte] %p3/z -> %z8.s +84aaad8a : ldff1sh z10.s, p3/Z, [z12.s, #20] : ldff1sh +0x14(%z12.s)[2byte] %p3/z -> %z10.s +84acb1cc : ldff1sh z12.s, p4/Z, [z14.s, #24] : ldff1sh +0x18(%z14.s)[2byte] %p4/z -> %z12.s +84aeb20e : ldff1sh z14.s, p4/Z, [z16.s, #28] : ldff1sh +0x1c(%z16.s)[2byte] %p4/z -> %z14.s +84b0b650 : ldff1sh z16.s, p5/Z, [z18.s, #32] : ldff1sh +0x20(%z18.s)[2byte] %p5/z -> %z16.s +84b1b671 : ldff1sh z17.s, p5/Z, [z19.s, #34] : ldff1sh +0x22(%z19.s)[2byte] %p5/z -> %z17.s +84b3b6b3 : ldff1sh z19.s, p5/Z, [z21.s, #38] : ldff1sh +0x26(%z21.s)[2byte] %p5/z -> %z19.s +84b5baf5 : ldff1sh z21.s, p6/Z, [z23.s, #42] : ldff1sh +0x2a(%z23.s)[2byte] %p6/z -> %z21.s +84b7bb37 : ldff1sh z23.s, p6/Z, [z25.s, #46] : ldff1sh +0x2e(%z25.s)[2byte] %p6/z -> %z23.s +84b9bf79 : ldff1sh z25.s, p7/Z, [z27.s, #50] : ldff1sh +0x32(%z27.s)[2byte] %p7/z -> %z25.s +84bbbfbb : ldff1sh z27.s, p7/Z, [z29.s, #54] : ldff1sh +0x36(%z29.s)[2byte] %p7/z -> %z27.s +84bfbfff : ldff1sh z31.s, p7/Z, [z31.s, #62] : ldff1sh +0x3e(%z31.s)[2byte] %p7/z -> %z31.s # LDFF1SH { .D }, /Z, [{, , LSL #1}] (LDFF1SH-Z.P.BR-S64) -a5006000 : ldff1sh z0.d, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d -a5056482 : ldff1sh z2.d, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[8byte] %p1/z -> %z2.d -a50768c4 : ldff1sh z4.d, p2/Z, [x6, x7, LSL #1] : ldff1sh (%x6,%x7,lsl #1)[8byte] %p2/z -> %z4.d -a5096906 : ldff1sh z6.d, p2/Z, [x8, x9, LSL #1] : ldff1sh (%x8,%x9,lsl #1)[8byte] %p2/z -> %z6.d -a50b6d48 : ldff1sh z8.d, p3/Z, [x10, x11, LSL #1] : ldff1sh (%x10,%x11,lsl #1)[8byte] %p3/z -> %z8.d -a50c6d6a : ldff1sh z10.d, p3/Z, [x11, x12, LSL #1] : ldff1sh (%x11,%x12,lsl #1)[8byte] %p3/z -> %z10.d -a50e71ac : ldff1sh z12.d, p4/Z, [x13, x14, LSL #1] : ldff1sh (%x13,%x14,lsl #1)[8byte] %p4/z -> %z12.d -a51071ee : ldff1sh z14.d, p4/Z, [x15, x16, LSL #1] : ldff1sh (%x15,%x16,lsl #1)[8byte] %p4/z -> %z14.d -a5127630 : ldff1sh z16.d, p5/Z, [x17, x18, LSL #1] : ldff1sh (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d -a5147671 : ldff1sh z17.d, p5/Z, [x19, x20, LSL #1] : ldff1sh (%x19,%x20,lsl #1)[8byte] %p5/z -> %z17.d -a51676b3 : ldff1sh z19.d, p5/Z, [x21, x22, LSL #1] : ldff1sh (%x21,%x22,lsl #1)[8byte] %p5/z -> %z19.d -a5187af5 : ldff1sh z21.d, p6/Z, [x23, x24, LSL #1] : ldff1sh (%x23,%x24,lsl #1)[8byte] %p6/z -> %z21.d -a5197b17 : ldff1sh z23.d, p6/Z, [x24, x25, LSL #1] : ldff1sh (%x24,%x25,lsl #1)[8byte] %p6/z -> %z23.d -a51b7f59 : ldff1sh z25.d, p7/Z, [x26, x27, LSL #1] : ldff1sh (%x26,%x27,lsl #1)[8byte] %p7/z -> %z25.d -a51d7f9b : ldff1sh z27.d, p7/Z, [x28, x29, LSL #1] : ldff1sh (%x28,%x29,lsl #1)[8byte] %p7/z -> %z27.d -a51e7fff : ldff1sh z31.d, p7/Z, [sp, x30, LSL #1] : ldff1sh (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d +a5006000 : ldff1sh z0.d, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d +a5056482 : ldff1sh z2.d, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.d +a50768c4 : ldff1sh z4.d, p2/Z, [x6, x7, LSL #1] : ldff1sh (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.d +a5096906 : ldff1sh z6.d, p2/Z, [x8, x9, LSL #1] : ldff1sh (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.d +a50b6d48 : ldff1sh z8.d, p3/Z, [x10, x11, LSL #1] : ldff1sh (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.d +a50c6d6a : ldff1sh z10.d, p3/Z, [x11, x12, LSL #1] : ldff1sh (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.d +a50e71ac : ldff1sh z12.d, p4/Z, [x13, x14, LSL #1] : ldff1sh (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.d +a51071ee : ldff1sh z14.d, p4/Z, [x15, x16, LSL #1] : ldff1sh (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.d +a5127630 : ldff1sh z16.d, p5/Z, [x17, x18, LSL #1] : ldff1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d +a5147671 : ldff1sh z17.d, p5/Z, [x19, x20, LSL #1] : ldff1sh (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.d +a51676b3 : ldff1sh z19.d, p5/Z, [x21, x22, LSL #1] : ldff1sh (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.d +a5187af5 : ldff1sh z21.d, p6/Z, [x23, x24, LSL #1] : ldff1sh (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.d +a5197b17 : ldff1sh z23.d, p6/Z, [x24, x25, LSL #1] : ldff1sh (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.d +a51b7f59 : ldff1sh z25.d, p7/Z, [x26, x27, LSL #1] : ldff1sh (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.d +a51d7f9b : ldff1sh z27.d, p7/Z, [x28, x29, LSL #1] : ldff1sh (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.d +a51e7fff : ldff1sh z31.d, p7/Z, [sp, x30, LSL #1] : ldff1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d # LDFF1SH { .S }, /Z, [{, , LSL #1}] (LDFF1SH-Z.P.BR-S32) -a5206000 : ldff1sh z0.s, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s -a5256482 : ldff1sh z2.s, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s -a52768c4 : ldff1sh z4.s, p2/Z, [x6, x7, LSL #1] : ldff1sh (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.s -a5296906 : ldff1sh z6.s, p2/Z, [x8, x9, LSL #1] : ldff1sh (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.s -a52b6d48 : ldff1sh z8.s, p3/Z, [x10, x11, LSL #1] : ldff1sh (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.s -a52c6d6a : ldff1sh z10.s, p3/Z, [x11, x12, LSL #1] : ldff1sh (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.s -a52e71ac : ldff1sh z12.s, p4/Z, [x13, x14, LSL #1] : ldff1sh (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.s -a53071ee : ldff1sh z14.s, p4/Z, [x15, x16, LSL #1] : ldff1sh (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.s -a5327630 : ldff1sh z16.s, p5/Z, [x17, x18, LSL #1] : ldff1sh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s -a5347671 : ldff1sh z17.s, p5/Z, [x19, x20, LSL #1] : ldff1sh (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.s -a53676b3 : ldff1sh z19.s, p5/Z, [x21, x22, LSL #1] : ldff1sh (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.s -a5387af5 : ldff1sh z21.s, p6/Z, [x23, x24, LSL #1] : ldff1sh (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.s -a5397b17 : ldff1sh z23.s, p6/Z, [x24, x25, LSL #1] : ldff1sh (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.s -a53b7f59 : ldff1sh z25.s, p7/Z, [x26, x27, LSL #1] : ldff1sh (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.s -a53d7f9b : ldff1sh z27.s, p7/Z, [x28, x29, LSL #1] : ldff1sh (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.s -a53e7fff : ldff1sh z31.s, p7/Z, [sp, x30, LSL #1] : ldff1sh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s +a5206000 : ldff1sh z0.s, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s +a5256482 : ldff1sh z2.s, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.s +a52768c4 : ldff1sh z4.s, p2/Z, [x6, x7, LSL #1] : ldff1sh (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.s +a5296906 : ldff1sh z6.s, p2/Z, [x8, x9, LSL #1] : ldff1sh (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.s +a52b6d48 : ldff1sh z8.s, p3/Z, [x10, x11, LSL #1] : ldff1sh (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.s +a52c6d6a : ldff1sh z10.s, p3/Z, [x11, x12, LSL #1] : ldff1sh (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.s +a52e71ac : ldff1sh z12.s, p4/Z, [x13, x14, LSL #1] : ldff1sh (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.s +a53071ee : ldff1sh z14.s, p4/Z, [x15, x16, LSL #1] : ldff1sh (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.s +a5327630 : ldff1sh z16.s, p5/Z, [x17, x18, LSL #1] : ldff1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s +a5347671 : ldff1sh z17.s, p5/Z, [x19, x20, LSL #1] : ldff1sh (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.s +a53676b3 : ldff1sh z19.s, p5/Z, [x21, x22, LSL #1] : ldff1sh (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.s +a5387af5 : ldff1sh z21.s, p6/Z, [x23, x24, LSL #1] : ldff1sh (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.s +a5397b17 : ldff1sh z23.s, p6/Z, [x24, x25, LSL #1] : ldff1sh (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.s +a53b7f59 : ldff1sh z25.s, p7/Z, [x26, x27, LSL #1] : ldff1sh (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.s +a53d7f9b : ldff1sh z27.s, p7/Z, [x28, x29, LSL #1] : ldff1sh (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.s +a53e7fff : ldff1sh z31.s, p7/Z, [sp, x30, LSL #1] : ldff1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s # LDFF1SH { .D }, /Z, [, .D, ] (LDFF1SH-Z.P.BZ-D.x32.unscaled) -c4802000 : ldff1sh z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1sh (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d -c4852482 : ldff1sh z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1sh (%x4,%z5.d,uxtw)[8byte] %p1/z -> %z2.d -c48728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1sh (%x6,%z7.d,uxtw)[8byte] %p2/z -> %z4.d -c4892906 : ldff1sh z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1sh (%x8,%z9.d,uxtw)[8byte] %p2/z -> %z6.d -c48b2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1sh (%x10,%z11.d,uxtw)[8byte] %p3/z -> %z8.d -c48d2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1sh (%x11,%z13.d,uxtw)[8byte] %p3/z -> %z10.d -c48f31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1sh (%x13,%z15.d,uxtw)[8byte] %p4/z -> %z12.d -c49131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1sh (%x15,%z17.d,uxtw)[8byte] %p4/z -> %z14.d -c4933630 : ldff1sh z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1sh (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d -c4943671 : ldff1sh z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1sh (%x19,%z20.d,uxtw)[8byte] %p5/z -> %z17.d -c49636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1sh (%x21,%z22.d,uxtw)[8byte] %p5/z -> %z19.d -c4983af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1sh (%x23,%z24.d,uxtw)[8byte] %p6/z -> %z21.d -c49a3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1sh (%x24,%z26.d,uxtw)[8byte] %p6/z -> %z23.d -c49c3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1sh (%x26,%z28.d,uxtw)[8byte] %p7/z -> %z25.d -c49e3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1sh (%x28,%z30.d,uxtw)[8byte] %p7/z -> %z27.d -c49f3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1sh (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d -c4c02000 : ldff1sh z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1sh (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d -c4c52482 : ldff1sh z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1sh (%x4,%z5.d,sxtw)[8byte] %p1/z -> %z2.d -c4c728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1sh (%x6,%z7.d,sxtw)[8byte] %p2/z -> %z4.d -c4c92906 : ldff1sh z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1sh (%x8,%z9.d,sxtw)[8byte] %p2/z -> %z6.d -c4cb2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1sh (%x10,%z11.d,sxtw)[8byte] %p3/z -> %z8.d -c4cd2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1sh (%x11,%z13.d,sxtw)[8byte] %p3/z -> %z10.d -c4cf31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1sh (%x13,%z15.d,sxtw)[8byte] %p4/z -> %z12.d -c4d131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1sh (%x15,%z17.d,sxtw)[8byte] %p4/z -> %z14.d -c4d33630 : ldff1sh z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1sh (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d -c4d43671 : ldff1sh z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1sh (%x19,%z20.d,sxtw)[8byte] %p5/z -> %z17.d -c4d636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1sh (%x21,%z22.d,sxtw)[8byte] %p5/z -> %z19.d -c4d83af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1sh (%x23,%z24.d,sxtw)[8byte] %p6/z -> %z21.d -c4da3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1sh (%x24,%z26.d,sxtw)[8byte] %p6/z -> %z23.d -c4dc3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1sh (%x26,%z28.d,sxtw)[8byte] %p7/z -> %z25.d -c4de3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1sh (%x28,%z30.d,sxtw)[8byte] %p7/z -> %z27.d -c4df3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1sh (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d +c4802000 : ldff1sh z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1sh (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d +c4852482 : ldff1sh z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1sh (%x4,%z5.d,uxtw)[2byte] %p1/z -> %z2.d +c48728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1sh (%x6,%z7.d,uxtw)[2byte] %p2/z -> %z4.d +c4892906 : ldff1sh z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1sh (%x8,%z9.d,uxtw)[2byte] %p2/z -> %z6.d +c48b2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1sh (%x10,%z11.d,uxtw)[2byte] %p3/z -> %z8.d +c48d2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1sh (%x11,%z13.d,uxtw)[2byte] %p3/z -> %z10.d +c48f31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1sh (%x13,%z15.d,uxtw)[2byte] %p4/z -> %z12.d +c49131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1sh (%x15,%z17.d,uxtw)[2byte] %p4/z -> %z14.d +c4933630 : ldff1sh z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1sh (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d +c4943671 : ldff1sh z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1sh (%x19,%z20.d,uxtw)[2byte] %p5/z -> %z17.d +c49636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1sh (%x21,%z22.d,uxtw)[2byte] %p5/z -> %z19.d +c4983af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1sh (%x23,%z24.d,uxtw)[2byte] %p6/z -> %z21.d +c49a3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1sh (%x24,%z26.d,uxtw)[2byte] %p6/z -> %z23.d +c49c3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1sh (%x26,%z28.d,uxtw)[2byte] %p7/z -> %z25.d +c49e3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1sh (%x28,%z30.d,uxtw)[2byte] %p7/z -> %z27.d +c49f3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1sh (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d +c4c02000 : ldff1sh z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1sh (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d +c4c52482 : ldff1sh z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1sh (%x4,%z5.d,sxtw)[2byte] %p1/z -> %z2.d +c4c728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1sh (%x6,%z7.d,sxtw)[2byte] %p2/z -> %z4.d +c4c92906 : ldff1sh z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1sh (%x8,%z9.d,sxtw)[2byte] %p2/z -> %z6.d +c4cb2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1sh (%x10,%z11.d,sxtw)[2byte] %p3/z -> %z8.d +c4cd2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1sh (%x11,%z13.d,sxtw)[2byte] %p3/z -> %z10.d +c4cf31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1sh (%x13,%z15.d,sxtw)[2byte] %p4/z -> %z12.d +c4d131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1sh (%x15,%z17.d,sxtw)[2byte] %p4/z -> %z14.d +c4d33630 : ldff1sh z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1sh (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d +c4d43671 : ldff1sh z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1sh (%x19,%z20.d,sxtw)[2byte] %p5/z -> %z17.d +c4d636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1sh (%x21,%z22.d,sxtw)[2byte] %p5/z -> %z19.d +c4d83af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1sh (%x23,%z24.d,sxtw)[2byte] %p6/z -> %z21.d +c4da3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1sh (%x24,%z26.d,sxtw)[2byte] %p6/z -> %z23.d +c4dc3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1sh (%x26,%z28.d,sxtw)[2byte] %p7/z -> %z25.d +c4de3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1sh (%x28,%z30.d,sxtw)[2byte] %p7/z -> %z27.d +c4df3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1sh (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d # LDFF1SH { .D }, /Z, [, .D, #1] (LDFF1SH-Z.P.BZ-D.x32.scaled) -c4a02000 : ldff1sh z0.d, p0/Z, [x0, z0.d, UXTW #1] : ldff1sh (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d -c4a52482 : ldff1sh z2.d, p1/Z, [x4, z5.d, UXTW #1] : ldff1sh (%x4,%z5.d,uxtw #1)[8byte] %p1/z -> %z2.d -c4a728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, UXTW #1] : ldff1sh (%x6,%z7.d,uxtw #1)[8byte] %p2/z -> %z4.d -c4a92906 : ldff1sh z6.d, p2/Z, [x8, z9.d, UXTW #1] : ldff1sh (%x8,%z9.d,uxtw #1)[8byte] %p2/z -> %z6.d -c4ab2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, UXTW #1] : ldff1sh (%x10,%z11.d,uxtw #1)[8byte] %p3/z -> %z8.d -c4ad2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, UXTW #1] : ldff1sh (%x11,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d -c4af31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, UXTW #1] : ldff1sh (%x13,%z15.d,uxtw #1)[8byte] %p4/z -> %z12.d -c4b131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, UXTW #1] : ldff1sh (%x15,%z17.d,uxtw #1)[8byte] %p4/z -> %z14.d -c4b33630 : ldff1sh z16.d, p5/Z, [x17, z19.d, UXTW #1] : ldff1sh (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d -c4b43671 : ldff1sh z17.d, p5/Z, [x19, z20.d, UXTW #1] : ldff1sh (%x19,%z20.d,uxtw #1)[8byte] %p5/z -> %z17.d -c4b636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, UXTW #1] : ldff1sh (%x21,%z22.d,uxtw #1)[8byte] %p5/z -> %z19.d -c4b83af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, UXTW #1] : ldff1sh (%x23,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d -c4ba3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, UXTW #1] : ldff1sh (%x24,%z26.d,uxtw #1)[8byte] %p6/z -> %z23.d -c4bc3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, UXTW #1] : ldff1sh (%x26,%z28.d,uxtw #1)[8byte] %p7/z -> %z25.d -c4be3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, UXTW #1] : ldff1sh (%x28,%z30.d,uxtw #1)[8byte] %p7/z -> %z27.d -c4bf3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, UXTW #1] : ldff1sh (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d -c4e02000 : ldff1sh z0.d, p0/Z, [x0, z0.d, SXTW #1] : ldff1sh (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d -c4e52482 : ldff1sh z2.d, p1/Z, [x4, z5.d, SXTW #1] : ldff1sh (%x4,%z5.d,sxtw #1)[8byte] %p1/z -> %z2.d -c4e728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, SXTW #1] : ldff1sh (%x6,%z7.d,sxtw #1)[8byte] %p2/z -> %z4.d -c4e92906 : ldff1sh z6.d, p2/Z, [x8, z9.d, SXTW #1] : ldff1sh (%x8,%z9.d,sxtw #1)[8byte] %p2/z -> %z6.d -c4eb2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, SXTW #1] : ldff1sh (%x10,%z11.d,sxtw #1)[8byte] %p3/z -> %z8.d -c4ed2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, SXTW #1] : ldff1sh (%x11,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d -c4ef31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, SXTW #1] : ldff1sh (%x13,%z15.d,sxtw #1)[8byte] %p4/z -> %z12.d -c4f131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, SXTW #1] : ldff1sh (%x15,%z17.d,sxtw #1)[8byte] %p4/z -> %z14.d -c4f33630 : ldff1sh z16.d, p5/Z, [x17, z19.d, SXTW #1] : ldff1sh (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d -c4f43671 : ldff1sh z17.d, p5/Z, [x19, z20.d, SXTW #1] : ldff1sh (%x19,%z20.d,sxtw #1)[8byte] %p5/z -> %z17.d -c4f636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, SXTW #1] : ldff1sh (%x21,%z22.d,sxtw #1)[8byte] %p5/z -> %z19.d -c4f83af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, SXTW #1] : ldff1sh (%x23,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d -c4fa3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, SXTW #1] : ldff1sh (%x24,%z26.d,sxtw #1)[8byte] %p6/z -> %z23.d -c4fc3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, SXTW #1] : ldff1sh (%x26,%z28.d,sxtw #1)[8byte] %p7/z -> %z25.d -c4fe3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, SXTW #1] : ldff1sh (%x28,%z30.d,sxtw #1)[8byte] %p7/z -> %z27.d -c4ff3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, SXTW #1] : ldff1sh (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d +c4a02000 : ldff1sh z0.d, p0/Z, [x0, z0.d, UXTW #1] : ldff1sh (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d +c4a52482 : ldff1sh z2.d, p1/Z, [x4, z5.d, UXTW #1] : ldff1sh (%x4,%z5.d,uxtw #1)[2byte] %p1/z -> %z2.d +c4a728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, UXTW #1] : ldff1sh (%x6,%z7.d,uxtw #1)[2byte] %p2/z -> %z4.d +c4a92906 : ldff1sh z6.d, p2/Z, [x8, z9.d, UXTW #1] : ldff1sh (%x8,%z9.d,uxtw #1)[2byte] %p2/z -> %z6.d +c4ab2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, UXTW #1] : ldff1sh (%x10,%z11.d,uxtw #1)[2byte] %p3/z -> %z8.d +c4ad2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, UXTW #1] : ldff1sh (%x11,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d +c4af31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, UXTW #1] : ldff1sh (%x13,%z15.d,uxtw #1)[2byte] %p4/z -> %z12.d +c4b131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, UXTW #1] : ldff1sh (%x15,%z17.d,uxtw #1)[2byte] %p4/z -> %z14.d +c4b33630 : ldff1sh z16.d, p5/Z, [x17, z19.d, UXTW #1] : ldff1sh (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d +c4b43671 : ldff1sh z17.d, p5/Z, [x19, z20.d, UXTW #1] : ldff1sh (%x19,%z20.d,uxtw #1)[2byte] %p5/z -> %z17.d +c4b636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, UXTW #1] : ldff1sh (%x21,%z22.d,uxtw #1)[2byte] %p5/z -> %z19.d +c4b83af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, UXTW #1] : ldff1sh (%x23,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d +c4ba3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, UXTW #1] : ldff1sh (%x24,%z26.d,uxtw #1)[2byte] %p6/z -> %z23.d +c4bc3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, UXTW #1] : ldff1sh (%x26,%z28.d,uxtw #1)[2byte] %p7/z -> %z25.d +c4be3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, UXTW #1] : ldff1sh (%x28,%z30.d,uxtw #1)[2byte] %p7/z -> %z27.d +c4bf3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, UXTW #1] : ldff1sh (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d +c4e02000 : ldff1sh z0.d, p0/Z, [x0, z0.d, SXTW #1] : ldff1sh (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d +c4e52482 : ldff1sh z2.d, p1/Z, [x4, z5.d, SXTW #1] : ldff1sh (%x4,%z5.d,sxtw #1)[2byte] %p1/z -> %z2.d +c4e728c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, SXTW #1] : ldff1sh (%x6,%z7.d,sxtw #1)[2byte] %p2/z -> %z4.d +c4e92906 : ldff1sh z6.d, p2/Z, [x8, z9.d, SXTW #1] : ldff1sh (%x8,%z9.d,sxtw #1)[2byte] %p2/z -> %z6.d +c4eb2d48 : ldff1sh z8.d, p3/Z, [x10, z11.d, SXTW #1] : ldff1sh (%x10,%z11.d,sxtw #1)[2byte] %p3/z -> %z8.d +c4ed2d6a : ldff1sh z10.d, p3/Z, [x11, z13.d, SXTW #1] : ldff1sh (%x11,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d +c4ef31ac : ldff1sh z12.d, p4/Z, [x13, z15.d, SXTW #1] : ldff1sh (%x13,%z15.d,sxtw #1)[2byte] %p4/z -> %z12.d +c4f131ee : ldff1sh z14.d, p4/Z, [x15, z17.d, SXTW #1] : ldff1sh (%x15,%z17.d,sxtw #1)[2byte] %p4/z -> %z14.d +c4f33630 : ldff1sh z16.d, p5/Z, [x17, z19.d, SXTW #1] : ldff1sh (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d +c4f43671 : ldff1sh z17.d, p5/Z, [x19, z20.d, SXTW #1] : ldff1sh (%x19,%z20.d,sxtw #1)[2byte] %p5/z -> %z17.d +c4f636b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, SXTW #1] : ldff1sh (%x21,%z22.d,sxtw #1)[2byte] %p5/z -> %z19.d +c4f83af5 : ldff1sh z21.d, p6/Z, [x23, z24.d, SXTW #1] : ldff1sh (%x23,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d +c4fa3b17 : ldff1sh z23.d, p6/Z, [x24, z26.d, SXTW #1] : ldff1sh (%x24,%z26.d,sxtw #1)[2byte] %p6/z -> %z23.d +c4fc3f59 : ldff1sh z25.d, p7/Z, [x26, z28.d, SXTW #1] : ldff1sh (%x26,%z28.d,sxtw #1)[2byte] %p7/z -> %z25.d +c4fe3f9b : ldff1sh z27.d, p7/Z, [x28, z30.d, SXTW #1] : ldff1sh (%x28,%z30.d,sxtw #1)[2byte] %p7/z -> %z27.d +c4ff3fff : ldff1sh z31.d, p7/Z, [sp, z31.d, SXTW #1] : ldff1sh (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d # LDFF1SH { .D }, /Z, [.D{, #}] (LDFF1SH-Z.P.AI-D) -c4a0a000 : ldff1sh z0.d, p0/Z, [z0.d, #0] : ldff1sh (%z0.d)[8byte] %p0/z -> %z0.d -c4a2a482 : ldff1sh z2.d, p1/Z, [z4.d, #4] : ldff1sh +0x04(%z4.d)[8byte] %p1/z -> %z2.d -c4a4a8c4 : ldff1sh z4.d, p2/Z, [z6.d, #8] : ldff1sh +0x08(%z6.d)[8byte] %p2/z -> %z4.d -c4a6a906 : ldff1sh z6.d, p2/Z, [z8.d, #12] : ldff1sh +0x0c(%z8.d)[8byte] %p2/z -> %z6.d -c4a8ad48 : ldff1sh z8.d, p3/Z, [z10.d, #16] : ldff1sh +0x10(%z10.d)[8byte] %p3/z -> %z8.d -c4aaad8a : ldff1sh z10.d, p3/Z, [z12.d, #20] : ldff1sh +0x14(%z12.d)[8byte] %p3/z -> %z10.d -c4acb1cc : ldff1sh z12.d, p4/Z, [z14.d, #24] : ldff1sh +0x18(%z14.d)[8byte] %p4/z -> %z12.d -c4aeb20e : ldff1sh z14.d, p4/Z, [z16.d, #28] : ldff1sh +0x1c(%z16.d)[8byte] %p4/z -> %z14.d -c4b0b650 : ldff1sh z16.d, p5/Z, [z18.d, #32] : ldff1sh +0x20(%z18.d)[8byte] %p5/z -> %z16.d -c4b1b671 : ldff1sh z17.d, p5/Z, [z19.d, #34] : ldff1sh +0x22(%z19.d)[8byte] %p5/z -> %z17.d -c4b3b6b3 : ldff1sh z19.d, p5/Z, [z21.d, #38] : ldff1sh +0x26(%z21.d)[8byte] %p5/z -> %z19.d -c4b5baf5 : ldff1sh z21.d, p6/Z, [z23.d, #42] : ldff1sh +0x2a(%z23.d)[8byte] %p6/z -> %z21.d -c4b7bb37 : ldff1sh z23.d, p6/Z, [z25.d, #46] : ldff1sh +0x2e(%z25.d)[8byte] %p6/z -> %z23.d -c4b9bf79 : ldff1sh z25.d, p7/Z, [z27.d, #50] : ldff1sh +0x32(%z27.d)[8byte] %p7/z -> %z25.d -c4bbbfbb : ldff1sh z27.d, p7/Z, [z29.d, #54] : ldff1sh +0x36(%z29.d)[8byte] %p7/z -> %z27.d -c4bfbfff : ldff1sh z31.d, p7/Z, [z31.d, #62] : ldff1sh +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +c4a0a000 : ldff1sh z0.d, p0/Z, [z0.d, #0] : ldff1sh (%z0.d)[2byte] %p0/z -> %z0.d +c4a2a482 : ldff1sh z2.d, p1/Z, [z4.d, #4] : ldff1sh +0x04(%z4.d)[2byte] %p1/z -> %z2.d +c4a4a8c4 : ldff1sh z4.d, p2/Z, [z6.d, #8] : ldff1sh +0x08(%z6.d)[2byte] %p2/z -> %z4.d +c4a6a906 : ldff1sh z6.d, p2/Z, [z8.d, #12] : ldff1sh +0x0c(%z8.d)[2byte] %p2/z -> %z6.d +c4a8ad48 : ldff1sh z8.d, p3/Z, [z10.d, #16] : ldff1sh +0x10(%z10.d)[2byte] %p3/z -> %z8.d +c4aaad8a : ldff1sh z10.d, p3/Z, [z12.d, #20] : ldff1sh +0x14(%z12.d)[2byte] %p3/z -> %z10.d +c4acb1cc : ldff1sh z12.d, p4/Z, [z14.d, #24] : ldff1sh +0x18(%z14.d)[2byte] %p4/z -> %z12.d +c4aeb20e : ldff1sh z14.d, p4/Z, [z16.d, #28] : ldff1sh +0x1c(%z16.d)[2byte] %p4/z -> %z14.d +c4b0b650 : ldff1sh z16.d, p5/Z, [z18.d, #32] : ldff1sh +0x20(%z18.d)[2byte] %p5/z -> %z16.d +c4b1b671 : ldff1sh z17.d, p5/Z, [z19.d, #34] : ldff1sh +0x22(%z19.d)[2byte] %p5/z -> %z17.d +c4b3b6b3 : ldff1sh z19.d, p5/Z, [z21.d, #38] : ldff1sh +0x26(%z21.d)[2byte] %p5/z -> %z19.d +c4b5baf5 : ldff1sh z21.d, p6/Z, [z23.d, #42] : ldff1sh +0x2a(%z23.d)[2byte] %p6/z -> %z21.d +c4b7bb37 : ldff1sh z23.d, p6/Z, [z25.d, #46] : ldff1sh +0x2e(%z25.d)[2byte] %p6/z -> %z23.d +c4b9bf79 : ldff1sh z25.d, p7/Z, [z27.d, #50] : ldff1sh +0x32(%z27.d)[2byte] %p7/z -> %z25.d +c4bbbfbb : ldff1sh z27.d, p7/Z, [z29.d, #54] : ldff1sh +0x36(%z29.d)[2byte] %p7/z -> %z27.d +c4bfbfff : ldff1sh z31.d, p7/Z, [z31.d, #62] : ldff1sh +0x3e(%z31.d)[2byte] %p7/z -> %z31.d # LDFF1SH { .D }, /Z, [, .D] (LDFF1SH-Z.P.BZ-D.64.unscaled) -c4c0a000 : ldff1sh z0.d, p0/Z, [x0, z0.d] : ldff1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d -c4c5a482 : ldff1sh z2.d, p1/Z, [x4, z5.d] : ldff1sh (%x4,%z5.d)[8byte] %p1/z -> %z2.d -c4c7a8c4 : ldff1sh z4.d, p2/Z, [x6, z7.d] : ldff1sh (%x6,%z7.d)[8byte] %p2/z -> %z4.d -c4c9a906 : ldff1sh z6.d, p2/Z, [x8, z9.d] : ldff1sh (%x8,%z9.d)[8byte] %p2/z -> %z6.d -c4cbad48 : ldff1sh z8.d, p3/Z, [x10, z11.d] : ldff1sh (%x10,%z11.d)[8byte] %p3/z -> %z8.d -c4cdad6a : ldff1sh z10.d, p3/Z, [x11, z13.d] : ldff1sh (%x11,%z13.d)[8byte] %p3/z -> %z10.d -c4cfb1ac : ldff1sh z12.d, p4/Z, [x13, z15.d] : ldff1sh (%x13,%z15.d)[8byte] %p4/z -> %z12.d -c4d1b1ee : ldff1sh z14.d, p4/Z, [x15, z17.d] : ldff1sh (%x15,%z17.d)[8byte] %p4/z -> %z14.d -c4d3b630 : ldff1sh z16.d, p5/Z, [x17, z19.d] : ldff1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d -c4d4b671 : ldff1sh z17.d, p5/Z, [x19, z20.d] : ldff1sh (%x19,%z20.d)[8byte] %p5/z -> %z17.d -c4d6b6b3 : ldff1sh z19.d, p5/Z, [x21, z22.d] : ldff1sh (%x21,%z22.d)[8byte] %p5/z -> %z19.d -c4d8baf5 : ldff1sh z21.d, p6/Z, [x23, z24.d] : ldff1sh (%x23,%z24.d)[8byte] %p6/z -> %z21.d -c4dabb17 : ldff1sh z23.d, p6/Z, [x24, z26.d] : ldff1sh (%x24,%z26.d)[8byte] %p6/z -> %z23.d -c4dcbf59 : ldff1sh z25.d, p7/Z, [x26, z28.d] : ldff1sh (%x26,%z28.d)[8byte] %p7/z -> %z25.d -c4debf9b : ldff1sh z27.d, p7/Z, [x28, z30.d] : ldff1sh (%x28,%z30.d)[8byte] %p7/z -> %z27.d -c4dfbfff : ldff1sh z31.d, p7/Z, [sp, z31.d] : ldff1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d +c4c0a000 : ldff1sh z0.d, p0/Z, [x0, z0.d] : ldff1sh (%x0,%z0.d)[2byte] %p0/z -> %z0.d +c4c5a482 : ldff1sh z2.d, p1/Z, [x4, z5.d] : ldff1sh (%x4,%z5.d)[2byte] %p1/z -> %z2.d +c4c7a8c4 : ldff1sh z4.d, p2/Z, [x6, z7.d] : ldff1sh (%x6,%z7.d)[2byte] %p2/z -> %z4.d +c4c9a906 : ldff1sh z6.d, p2/Z, [x8, z9.d] : ldff1sh (%x8,%z9.d)[2byte] %p2/z -> %z6.d +c4cbad48 : ldff1sh z8.d, p3/Z, [x10, z11.d] : ldff1sh (%x10,%z11.d)[2byte] %p3/z -> %z8.d +c4cdad6a : ldff1sh z10.d, p3/Z, [x11, z13.d] : ldff1sh (%x11,%z13.d)[2byte] %p3/z -> %z10.d +c4cfb1ac : ldff1sh z12.d, p4/Z, [x13, z15.d] : ldff1sh (%x13,%z15.d)[2byte] %p4/z -> %z12.d +c4d1b1ee : ldff1sh z14.d, p4/Z, [x15, z17.d] : ldff1sh (%x15,%z17.d)[2byte] %p4/z -> %z14.d +c4d3b630 : ldff1sh z16.d, p5/Z, [x17, z19.d] : ldff1sh (%x17,%z19.d)[2byte] %p5/z -> %z16.d +c4d4b671 : ldff1sh z17.d, p5/Z, [x19, z20.d] : ldff1sh (%x19,%z20.d)[2byte] %p5/z -> %z17.d +c4d6b6b3 : ldff1sh z19.d, p5/Z, [x21, z22.d] : ldff1sh (%x21,%z22.d)[2byte] %p5/z -> %z19.d +c4d8baf5 : ldff1sh z21.d, p6/Z, [x23, z24.d] : ldff1sh (%x23,%z24.d)[2byte] %p6/z -> %z21.d +c4dabb17 : ldff1sh z23.d, p6/Z, [x24, z26.d] : ldff1sh (%x24,%z26.d)[2byte] %p6/z -> %z23.d +c4dcbf59 : ldff1sh z25.d, p7/Z, [x26, z28.d] : ldff1sh (%x26,%z28.d)[2byte] %p7/z -> %z25.d +c4debf9b : ldff1sh z27.d, p7/Z, [x28, z30.d] : ldff1sh (%x28,%z30.d)[2byte] %p7/z -> %z27.d +c4dfbfff : ldff1sh z31.d, p7/Z, [sp, z31.d] : ldff1sh (%sp,%z31.d)[2byte] %p7/z -> %z31.d # LDFF1SH { .D }, /Z, [, .D, LSL #1] (LDFF1SH-Z.P.BZ-D.64.scaled) -c4e0a000 : ldff1sh z0.d, p0/Z, [x0, z0.d, LSL #1] : ldff1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d -c4e5a482 : ldff1sh z2.d, p1/Z, [x4, z5.d, LSL #1] : ldff1sh (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d -c4e7a8c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, LSL #1] : ldff1sh (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d -c4e9a906 : ldff1sh z6.d, p2/Z, [x8, z9.d, LSL #1] : ldff1sh (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d -c4ebad48 : ldff1sh z8.d, p3/Z, [x10, z11.d, LSL #1] : ldff1sh (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d -c4edad6a : ldff1sh z10.d, p3/Z, [x11, z13.d, LSL #1] : ldff1sh (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d -c4efb1ac : ldff1sh z12.d, p4/Z, [x13, z15.d, LSL #1] : ldff1sh (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d -c4f1b1ee : ldff1sh z14.d, p4/Z, [x15, z17.d, LSL #1] : ldff1sh (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d -c4f3b630 : ldff1sh z16.d, p5/Z, [x17, z19.d, LSL #1] : ldff1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d -c4f4b671 : ldff1sh z17.d, p5/Z, [x19, z20.d, LSL #1] : ldff1sh (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d -c4f6b6b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, LSL #1] : ldff1sh (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d -c4f8baf5 : ldff1sh z21.d, p6/Z, [x23, z24.d, LSL #1] : ldff1sh (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d -c4fabb17 : ldff1sh z23.d, p6/Z, [x24, z26.d, LSL #1] : ldff1sh (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d -c4fcbf59 : ldff1sh z25.d, p7/Z, [x26, z28.d, LSL #1] : ldff1sh (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d -c4febf9b : ldff1sh z27.d, p7/Z, [x28, z30.d, LSL #1] : ldff1sh (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d -c4ffbfff : ldff1sh z31.d, p7/Z, [sp, z31.d, LSL #1] : ldff1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d +c4e0a000 : ldff1sh z0.d, p0/Z, [x0, z0.d, LSL #1] : ldff1sh (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d +c4e5a482 : ldff1sh z2.d, p1/Z, [x4, z5.d, LSL #1] : ldff1sh (%x4,%z5.d,lsl #1)[2byte] %p1/z -> %z2.d +c4e7a8c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, LSL #1] : ldff1sh (%x6,%z7.d,lsl #1)[2byte] %p2/z -> %z4.d +c4e9a906 : ldff1sh z6.d, p2/Z, [x8, z9.d, LSL #1] : ldff1sh (%x8,%z9.d,lsl #1)[2byte] %p2/z -> %z6.d +c4ebad48 : ldff1sh z8.d, p3/Z, [x10, z11.d, LSL #1] : ldff1sh (%x10,%z11.d,lsl #1)[2byte] %p3/z -> %z8.d +c4edad6a : ldff1sh z10.d, p3/Z, [x11, z13.d, LSL #1] : ldff1sh (%x11,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d +c4efb1ac : ldff1sh z12.d, p4/Z, [x13, z15.d, LSL #1] : ldff1sh (%x13,%z15.d,lsl #1)[2byte] %p4/z -> %z12.d +c4f1b1ee : ldff1sh z14.d, p4/Z, [x15, z17.d, LSL #1] : ldff1sh (%x15,%z17.d,lsl #1)[2byte] %p4/z -> %z14.d +c4f3b630 : ldff1sh z16.d, p5/Z, [x17, z19.d, LSL #1] : ldff1sh (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d +c4f4b671 : ldff1sh z17.d, p5/Z, [x19, z20.d, LSL #1] : ldff1sh (%x19,%z20.d,lsl #1)[2byte] %p5/z -> %z17.d +c4f6b6b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, LSL #1] : ldff1sh (%x21,%z22.d,lsl #1)[2byte] %p5/z -> %z19.d +c4f8baf5 : ldff1sh z21.d, p6/Z, [x23, z24.d, LSL #1] : ldff1sh (%x23,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d +c4fabb17 : ldff1sh z23.d, p6/Z, [x24, z26.d, LSL #1] : ldff1sh (%x24,%z26.d,lsl #1)[2byte] %p6/z -> %z23.d +c4fcbf59 : ldff1sh z25.d, p7/Z, [x26, z28.d, LSL #1] : ldff1sh (%x26,%z28.d,lsl #1)[2byte] %p7/z -> %z25.d +c4febf9b : ldff1sh z27.d, p7/Z, [x28, z30.d, LSL #1] : ldff1sh (%x28,%z30.d,lsl #1)[2byte] %p7/z -> %z27.d +c4ffbfff : ldff1sh z31.d, p7/Z, [sp, z31.d, LSL #1] : ldff1sh (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d # LDFF1SW { .D }, /Z, [{, , LSL #2}] (LDFF1SW-Z.P.BR-S64) -a4806000 : ldff1sw z0.d, p0/Z, [x0, x0, LSL #2] : ldff1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d -a4856482 : ldff1sw z2.d, p1/Z, [x4, x5, LSL #2] : ldff1sw (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d -a48768c4 : ldff1sw z4.d, p2/Z, [x6, x7, LSL #2] : ldff1sw (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.d -a4896906 : ldff1sw z6.d, p2/Z, [x8, x9, LSL #2] : ldff1sw (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.d -a48b6d48 : ldff1sw z8.d, p3/Z, [x10, x11, LSL #2] : ldff1sw (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.d -a48c6d6a : ldff1sw z10.d, p3/Z, [x11, x12, LSL #2] : ldff1sw (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.d -a48e71ac : ldff1sw z12.d, p4/Z, [x13, x14, LSL #2] : ldff1sw (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.d -a49071ee : ldff1sw z14.d, p4/Z, [x15, x16, LSL #2] : ldff1sw (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.d -a4927630 : ldff1sw z16.d, p5/Z, [x17, x18, LSL #2] : ldff1sw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d -a4947671 : ldff1sw z17.d, p5/Z, [x19, x20, LSL #2] : ldff1sw (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.d -a49676b3 : ldff1sw z19.d, p5/Z, [x21, x22, LSL #2] : ldff1sw (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.d -a4987af5 : ldff1sw z21.d, p6/Z, [x23, x24, LSL #2] : ldff1sw (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.d -a4997b17 : ldff1sw z23.d, p6/Z, [x24, x25, LSL #2] : ldff1sw (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.d -a49b7f59 : ldff1sw z25.d, p7/Z, [x26, x27, LSL #2] : ldff1sw (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.d -a49d7f9b : ldff1sw z27.d, p7/Z, [x28, x29, LSL #2] : ldff1sw (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.d -a49e7fff : ldff1sw z31.d, p7/Z, [sp, x30, LSL #2] : ldff1sw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d +a4806000 : ldff1sw z0.d, p0/Z, [x0, x0, LSL #2] : ldff1sw (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d +a4856482 : ldff1sw z2.d, p1/Z, [x4, x5, LSL #2] : ldff1sw (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.d +a48768c4 : ldff1sw z4.d, p2/Z, [x6, x7, LSL #2] : ldff1sw (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.d +a4896906 : ldff1sw z6.d, p2/Z, [x8, x9, LSL #2] : ldff1sw (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.d +a48b6d48 : ldff1sw z8.d, p3/Z, [x10, x11, LSL #2] : ldff1sw (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.d +a48c6d6a : ldff1sw z10.d, p3/Z, [x11, x12, LSL #2] : ldff1sw (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.d +a48e71ac : ldff1sw z12.d, p4/Z, [x13, x14, LSL #2] : ldff1sw (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.d +a49071ee : ldff1sw z14.d, p4/Z, [x15, x16, LSL #2] : ldff1sw (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.d +a4927630 : ldff1sw z16.d, p5/Z, [x17, x18, LSL #2] : ldff1sw (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d +a4947671 : ldff1sw z17.d, p5/Z, [x19, x20, LSL #2] : ldff1sw (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.d +a49676b3 : ldff1sw z19.d, p5/Z, [x21, x22, LSL #2] : ldff1sw (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.d +a4987af5 : ldff1sw z21.d, p6/Z, [x23, x24, LSL #2] : ldff1sw (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.d +a4997b17 : ldff1sw z23.d, p6/Z, [x24, x25, LSL #2] : ldff1sw (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.d +a49b7f59 : ldff1sw z25.d, p7/Z, [x26, x27, LSL #2] : ldff1sw (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.d +a49d7f9b : ldff1sw z27.d, p7/Z, [x28, x29, LSL #2] : ldff1sw (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.d +a49e7fff : ldff1sw z31.d, p7/Z, [sp, x30, LSL #2] : ldff1sw (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d # LDFF1SW { .D }, /Z, [, .D, ] (LDFF1SW-Z.P.BZ-D.x32.unscaled) -c5002000 : ldff1sw z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1sw (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d -c5052482 : ldff1sw z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1sw (%x4,%z5.d,uxtw)[16byte] %p1/z -> %z2.d -c50728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1sw (%x6,%z7.d,uxtw)[16byte] %p2/z -> %z4.d -c5092906 : ldff1sw z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1sw (%x8,%z9.d,uxtw)[16byte] %p2/z -> %z6.d -c50b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1sw (%x10,%z11.d,uxtw)[16byte] %p3/z -> %z8.d -c50d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1sw (%x11,%z13.d,uxtw)[16byte] %p3/z -> %z10.d -c50f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1sw (%x13,%z15.d,uxtw)[16byte] %p4/z -> %z12.d -c51131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1sw (%x15,%z17.d,uxtw)[16byte] %p4/z -> %z14.d -c5133630 : ldff1sw z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1sw (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d -c5143671 : ldff1sw z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1sw (%x19,%z20.d,uxtw)[16byte] %p5/z -> %z17.d -c51636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1sw (%x21,%z22.d,uxtw)[16byte] %p5/z -> %z19.d -c5183af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1sw (%x23,%z24.d,uxtw)[16byte] %p6/z -> %z21.d -c51a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1sw (%x24,%z26.d,uxtw)[16byte] %p6/z -> %z23.d -c51c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1sw (%x26,%z28.d,uxtw)[16byte] %p7/z -> %z25.d -c51e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1sw (%x28,%z30.d,uxtw)[16byte] %p7/z -> %z27.d -c51f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1sw (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d -c5402000 : ldff1sw z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1sw (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d -c5452482 : ldff1sw z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1sw (%x4,%z5.d,sxtw)[16byte] %p1/z -> %z2.d -c54728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1sw (%x6,%z7.d,sxtw)[16byte] %p2/z -> %z4.d -c5492906 : ldff1sw z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1sw (%x8,%z9.d,sxtw)[16byte] %p2/z -> %z6.d -c54b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1sw (%x10,%z11.d,sxtw)[16byte] %p3/z -> %z8.d -c54d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1sw (%x11,%z13.d,sxtw)[16byte] %p3/z -> %z10.d -c54f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1sw (%x13,%z15.d,sxtw)[16byte] %p4/z -> %z12.d -c55131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1sw (%x15,%z17.d,sxtw)[16byte] %p4/z -> %z14.d -c5533630 : ldff1sw z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1sw (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d -c5543671 : ldff1sw z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1sw (%x19,%z20.d,sxtw)[16byte] %p5/z -> %z17.d -c55636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1sw (%x21,%z22.d,sxtw)[16byte] %p5/z -> %z19.d -c5583af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1sw (%x23,%z24.d,sxtw)[16byte] %p6/z -> %z21.d -c55a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1sw (%x24,%z26.d,sxtw)[16byte] %p6/z -> %z23.d -c55c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1sw (%x26,%z28.d,sxtw)[16byte] %p7/z -> %z25.d -c55e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1sw (%x28,%z30.d,sxtw)[16byte] %p7/z -> %z27.d -c55f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1sw (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d +c5002000 : ldff1sw z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1sw (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d +c5052482 : ldff1sw z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1sw (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d +c50728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1sw (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d +c5092906 : ldff1sw z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1sw (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d +c50b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1sw (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d +c50d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1sw (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d +c50f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1sw (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d +c51131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1sw (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d +c5133630 : ldff1sw z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1sw (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d +c5143671 : ldff1sw z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1sw (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d +c51636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1sw (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d +c5183af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1sw (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d +c51a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1sw (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d +c51c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1sw (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d +c51e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1sw (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d +c51f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1sw (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d +c5402000 : ldff1sw z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1sw (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d +c5452482 : ldff1sw z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1sw (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d +c54728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1sw (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d +c5492906 : ldff1sw z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1sw (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d +c54b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1sw (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d +c54d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1sw (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d +c54f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1sw (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d +c55131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1sw (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d +c5533630 : ldff1sw z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1sw (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d +c5543671 : ldff1sw z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1sw (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d +c55636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1sw (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d +c5583af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1sw (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d +c55a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1sw (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d +c55c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1sw (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d +c55e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1sw (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d +c55f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1sw (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d # LDFF1SW { .D }, /Z, [, .D, #2] (LDFF1SW-Z.P.BZ-D.x32.scaled) -c5202000 : ldff1sw z0.d, p0/Z, [x0, z0.d, UXTW #2] : ldff1sw (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d -c5252482 : ldff1sw z2.d, p1/Z, [x4, z5.d, UXTW #2] : ldff1sw (%x4,%z5.d,uxtw #2)[16byte] %p1/z -> %z2.d -c52728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, UXTW #2] : ldff1sw (%x6,%z7.d,uxtw #2)[16byte] %p2/z -> %z4.d -c5292906 : ldff1sw z6.d, p2/Z, [x8, z9.d, UXTW #2] : ldff1sw (%x8,%z9.d,uxtw #2)[16byte] %p2/z -> %z6.d -c52b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, UXTW #2] : ldff1sw (%x10,%z11.d,uxtw #2)[16byte] %p3/z -> %z8.d -c52d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, UXTW #2] : ldff1sw (%x11,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d -c52f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, UXTW #2] : ldff1sw (%x13,%z15.d,uxtw #2)[16byte] %p4/z -> %z12.d -c53131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, UXTW #2] : ldff1sw (%x15,%z17.d,uxtw #2)[16byte] %p4/z -> %z14.d -c5333630 : ldff1sw z16.d, p5/Z, [x17, z19.d, UXTW #2] : ldff1sw (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d -c5343671 : ldff1sw z17.d, p5/Z, [x19, z20.d, UXTW #2] : ldff1sw (%x19,%z20.d,uxtw #2)[16byte] %p5/z -> %z17.d -c53636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, UXTW #2] : ldff1sw (%x21,%z22.d,uxtw #2)[16byte] %p5/z -> %z19.d -c5383af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, UXTW #2] : ldff1sw (%x23,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d -c53a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, UXTW #2] : ldff1sw (%x24,%z26.d,uxtw #2)[16byte] %p6/z -> %z23.d -c53c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, UXTW #2] : ldff1sw (%x26,%z28.d,uxtw #2)[16byte] %p7/z -> %z25.d -c53e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, UXTW #2] : ldff1sw (%x28,%z30.d,uxtw #2)[16byte] %p7/z -> %z27.d -c53f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, UXTW #2] : ldff1sw (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d -c5602000 : ldff1sw z0.d, p0/Z, [x0, z0.d, SXTW #2] : ldff1sw (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d -c5652482 : ldff1sw z2.d, p1/Z, [x4, z5.d, SXTW #2] : ldff1sw (%x4,%z5.d,sxtw #2)[16byte] %p1/z -> %z2.d -c56728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, SXTW #2] : ldff1sw (%x6,%z7.d,sxtw #2)[16byte] %p2/z -> %z4.d -c5692906 : ldff1sw z6.d, p2/Z, [x8, z9.d, SXTW #2] : ldff1sw (%x8,%z9.d,sxtw #2)[16byte] %p2/z -> %z6.d -c56b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, SXTW #2] : ldff1sw (%x10,%z11.d,sxtw #2)[16byte] %p3/z -> %z8.d -c56d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, SXTW #2] : ldff1sw (%x11,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d -c56f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, SXTW #2] : ldff1sw (%x13,%z15.d,sxtw #2)[16byte] %p4/z -> %z12.d -c57131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, SXTW #2] : ldff1sw (%x15,%z17.d,sxtw #2)[16byte] %p4/z -> %z14.d -c5733630 : ldff1sw z16.d, p5/Z, [x17, z19.d, SXTW #2] : ldff1sw (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d -c5743671 : ldff1sw z17.d, p5/Z, [x19, z20.d, SXTW #2] : ldff1sw (%x19,%z20.d,sxtw #2)[16byte] %p5/z -> %z17.d -c57636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, SXTW #2] : ldff1sw (%x21,%z22.d,sxtw #2)[16byte] %p5/z -> %z19.d -c5783af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, SXTW #2] : ldff1sw (%x23,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d -c57a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, SXTW #2] : ldff1sw (%x24,%z26.d,sxtw #2)[16byte] %p6/z -> %z23.d -c57c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, SXTW #2] : ldff1sw (%x26,%z28.d,sxtw #2)[16byte] %p7/z -> %z25.d -c57e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, SXTW #2] : ldff1sw (%x28,%z30.d,sxtw #2)[16byte] %p7/z -> %z27.d -c57f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, SXTW #2] : ldff1sw (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d +c5202000 : ldff1sw z0.d, p0/Z, [x0, z0.d, UXTW #2] : ldff1sw (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d +c5252482 : ldff1sw z2.d, p1/Z, [x4, z5.d, UXTW #2] : ldff1sw (%x4,%z5.d,uxtw #2)[4byte] %p1/z -> %z2.d +c52728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, UXTW #2] : ldff1sw (%x6,%z7.d,uxtw #2)[4byte] %p2/z -> %z4.d +c5292906 : ldff1sw z6.d, p2/Z, [x8, z9.d, UXTW #2] : ldff1sw (%x8,%z9.d,uxtw #2)[4byte] %p2/z -> %z6.d +c52b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, UXTW #2] : ldff1sw (%x10,%z11.d,uxtw #2)[4byte] %p3/z -> %z8.d +c52d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, UXTW #2] : ldff1sw (%x11,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d +c52f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, UXTW #2] : ldff1sw (%x13,%z15.d,uxtw #2)[4byte] %p4/z -> %z12.d +c53131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, UXTW #2] : ldff1sw (%x15,%z17.d,uxtw #2)[4byte] %p4/z -> %z14.d +c5333630 : ldff1sw z16.d, p5/Z, [x17, z19.d, UXTW #2] : ldff1sw (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d +c5343671 : ldff1sw z17.d, p5/Z, [x19, z20.d, UXTW #2] : ldff1sw (%x19,%z20.d,uxtw #2)[4byte] %p5/z -> %z17.d +c53636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, UXTW #2] : ldff1sw (%x21,%z22.d,uxtw #2)[4byte] %p5/z -> %z19.d +c5383af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, UXTW #2] : ldff1sw (%x23,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d +c53a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, UXTW #2] : ldff1sw (%x24,%z26.d,uxtw #2)[4byte] %p6/z -> %z23.d +c53c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, UXTW #2] : ldff1sw (%x26,%z28.d,uxtw #2)[4byte] %p7/z -> %z25.d +c53e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, UXTW #2] : ldff1sw (%x28,%z30.d,uxtw #2)[4byte] %p7/z -> %z27.d +c53f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, UXTW #2] : ldff1sw (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d +c5602000 : ldff1sw z0.d, p0/Z, [x0, z0.d, SXTW #2] : ldff1sw (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d +c5652482 : ldff1sw z2.d, p1/Z, [x4, z5.d, SXTW #2] : ldff1sw (%x4,%z5.d,sxtw #2)[4byte] %p1/z -> %z2.d +c56728c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, SXTW #2] : ldff1sw (%x6,%z7.d,sxtw #2)[4byte] %p2/z -> %z4.d +c5692906 : ldff1sw z6.d, p2/Z, [x8, z9.d, SXTW #2] : ldff1sw (%x8,%z9.d,sxtw #2)[4byte] %p2/z -> %z6.d +c56b2d48 : ldff1sw z8.d, p3/Z, [x10, z11.d, SXTW #2] : ldff1sw (%x10,%z11.d,sxtw #2)[4byte] %p3/z -> %z8.d +c56d2d6a : ldff1sw z10.d, p3/Z, [x11, z13.d, SXTW #2] : ldff1sw (%x11,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d +c56f31ac : ldff1sw z12.d, p4/Z, [x13, z15.d, SXTW #2] : ldff1sw (%x13,%z15.d,sxtw #2)[4byte] %p4/z -> %z12.d +c57131ee : ldff1sw z14.d, p4/Z, [x15, z17.d, SXTW #2] : ldff1sw (%x15,%z17.d,sxtw #2)[4byte] %p4/z -> %z14.d +c5733630 : ldff1sw z16.d, p5/Z, [x17, z19.d, SXTW #2] : ldff1sw (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d +c5743671 : ldff1sw z17.d, p5/Z, [x19, z20.d, SXTW #2] : ldff1sw (%x19,%z20.d,sxtw #2)[4byte] %p5/z -> %z17.d +c57636b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, SXTW #2] : ldff1sw (%x21,%z22.d,sxtw #2)[4byte] %p5/z -> %z19.d +c5783af5 : ldff1sw z21.d, p6/Z, [x23, z24.d, SXTW #2] : ldff1sw (%x23,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d +c57a3b17 : ldff1sw z23.d, p6/Z, [x24, z26.d, SXTW #2] : ldff1sw (%x24,%z26.d,sxtw #2)[4byte] %p6/z -> %z23.d +c57c3f59 : ldff1sw z25.d, p7/Z, [x26, z28.d, SXTW #2] : ldff1sw (%x26,%z28.d,sxtw #2)[4byte] %p7/z -> %z25.d +c57e3f9b : ldff1sw z27.d, p7/Z, [x28, z30.d, SXTW #2] : ldff1sw (%x28,%z30.d,sxtw #2)[4byte] %p7/z -> %z27.d +c57f3fff : ldff1sw z31.d, p7/Z, [sp, z31.d, SXTW #2] : ldff1sw (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d # LDFF1SW { .D }, /Z, [.D{, #}] (LDFF1SW-Z.P.AI-D) -c520a000 : ldff1sw z0.d, p0/Z, [z0.d, #0] : ldff1sw (%z0.d)[16byte] %p0/z -> %z0.d -c522a482 : ldff1sw z2.d, p1/Z, [z4.d, #8] : ldff1sw +0x08(%z4.d)[16byte] %p1/z -> %z2.d -c524a8c4 : ldff1sw z4.d, p2/Z, [z6.d, #16] : ldff1sw +0x10(%z6.d)[16byte] %p2/z -> %z4.d -c526a906 : ldff1sw z6.d, p2/Z, [z8.d, #24] : ldff1sw +0x18(%z8.d)[16byte] %p2/z -> %z6.d -c528ad48 : ldff1sw z8.d, p3/Z, [z10.d, #32] : ldff1sw +0x20(%z10.d)[16byte] %p3/z -> %z8.d -c52aad8a : ldff1sw z10.d, p3/Z, [z12.d, #40] : ldff1sw +0x28(%z12.d)[16byte] %p3/z -> %z10.d -c52cb1cc : ldff1sw z12.d, p4/Z, [z14.d, #48] : ldff1sw +0x30(%z14.d)[16byte] %p4/z -> %z12.d -c52eb20e : ldff1sw z14.d, p4/Z, [z16.d, #56] : ldff1sw +0x38(%z16.d)[16byte] %p4/z -> %z14.d -c530b650 : ldff1sw z16.d, p5/Z, [z18.d, #64] : ldff1sw +0x40(%z18.d)[16byte] %p5/z -> %z16.d -c531b671 : ldff1sw z17.d, p5/Z, [z19.d, #68] : ldff1sw +0x44(%z19.d)[16byte] %p5/z -> %z17.d -c533b6b3 : ldff1sw z19.d, p5/Z, [z21.d, #76] : ldff1sw +0x4c(%z21.d)[16byte] %p5/z -> %z19.d -c535baf5 : ldff1sw z21.d, p6/Z, [z23.d, #84] : ldff1sw +0x54(%z23.d)[16byte] %p6/z -> %z21.d -c537bb37 : ldff1sw z23.d, p6/Z, [z25.d, #92] : ldff1sw +0x5c(%z25.d)[16byte] %p6/z -> %z23.d -c539bf79 : ldff1sw z25.d, p7/Z, [z27.d, #100] : ldff1sw +0x64(%z27.d)[16byte] %p7/z -> %z25.d -c53bbfbb : ldff1sw z27.d, p7/Z, [z29.d, #108] : ldff1sw +0x6c(%z29.d)[16byte] %p7/z -> %z27.d -c53fbfff : ldff1sw z31.d, p7/Z, [z31.d, #124] : ldff1sw +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +c520a000 : ldff1sw z0.d, p0/Z, [z0.d, #0] : ldff1sw (%z0.d)[4byte] %p0/z -> %z0.d +c522a482 : ldff1sw z2.d, p1/Z, [z4.d, #8] : ldff1sw +0x08(%z4.d)[4byte] %p1/z -> %z2.d +c524a8c4 : ldff1sw z4.d, p2/Z, [z6.d, #16] : ldff1sw +0x10(%z6.d)[4byte] %p2/z -> %z4.d +c526a906 : ldff1sw z6.d, p2/Z, [z8.d, #24] : ldff1sw +0x18(%z8.d)[4byte] %p2/z -> %z6.d +c528ad48 : ldff1sw z8.d, p3/Z, [z10.d, #32] : ldff1sw +0x20(%z10.d)[4byte] %p3/z -> %z8.d +c52aad8a : ldff1sw z10.d, p3/Z, [z12.d, #40] : ldff1sw +0x28(%z12.d)[4byte] %p3/z -> %z10.d +c52cb1cc : ldff1sw z12.d, p4/Z, [z14.d, #48] : ldff1sw +0x30(%z14.d)[4byte] %p4/z -> %z12.d +c52eb20e : ldff1sw z14.d, p4/Z, [z16.d, #56] : ldff1sw +0x38(%z16.d)[4byte] %p4/z -> %z14.d +c530b650 : ldff1sw z16.d, p5/Z, [z18.d, #64] : ldff1sw +0x40(%z18.d)[4byte] %p5/z -> %z16.d +c531b671 : ldff1sw z17.d, p5/Z, [z19.d, #68] : ldff1sw +0x44(%z19.d)[4byte] %p5/z -> %z17.d +c533b6b3 : ldff1sw z19.d, p5/Z, [z21.d, #76] : ldff1sw +0x4c(%z21.d)[4byte] %p5/z -> %z19.d +c535baf5 : ldff1sw z21.d, p6/Z, [z23.d, #84] : ldff1sw +0x54(%z23.d)[4byte] %p6/z -> %z21.d +c537bb37 : ldff1sw z23.d, p6/Z, [z25.d, #92] : ldff1sw +0x5c(%z25.d)[4byte] %p6/z -> %z23.d +c539bf79 : ldff1sw z25.d, p7/Z, [z27.d, #100] : ldff1sw +0x64(%z27.d)[4byte] %p7/z -> %z25.d +c53bbfbb : ldff1sw z27.d, p7/Z, [z29.d, #108] : ldff1sw +0x6c(%z29.d)[4byte] %p7/z -> %z27.d +c53fbfff : ldff1sw z31.d, p7/Z, [z31.d, #124] : ldff1sw +0x7c(%z31.d)[4byte] %p7/z -> %z31.d # LDFF1SW { .D }, /Z, [, .D] (LDFF1SW-Z.P.BZ-D.64.unscaled) -c540a000 : ldff1sw z0.d, p0/Z, [x0, z0.d] : ldff1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d -c545a482 : ldff1sw z2.d, p1/Z, [x4, z5.d] : ldff1sw (%x4,%z5.d)[16byte] %p1/z -> %z2.d -c547a8c4 : ldff1sw z4.d, p2/Z, [x6, z7.d] : ldff1sw (%x6,%z7.d)[16byte] %p2/z -> %z4.d -c549a906 : ldff1sw z6.d, p2/Z, [x8, z9.d] : ldff1sw (%x8,%z9.d)[16byte] %p2/z -> %z6.d -c54bad48 : ldff1sw z8.d, p3/Z, [x10, z11.d] : ldff1sw (%x10,%z11.d)[16byte] %p3/z -> %z8.d -c54dad6a : ldff1sw z10.d, p3/Z, [x11, z13.d] : ldff1sw (%x11,%z13.d)[16byte] %p3/z -> %z10.d -c54fb1ac : ldff1sw z12.d, p4/Z, [x13, z15.d] : ldff1sw (%x13,%z15.d)[16byte] %p4/z -> %z12.d -c551b1ee : ldff1sw z14.d, p4/Z, [x15, z17.d] : ldff1sw (%x15,%z17.d)[16byte] %p4/z -> %z14.d -c553b630 : ldff1sw z16.d, p5/Z, [x17, z19.d] : ldff1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d -c554b671 : ldff1sw z17.d, p5/Z, [x19, z20.d] : ldff1sw (%x19,%z20.d)[16byte] %p5/z -> %z17.d -c556b6b3 : ldff1sw z19.d, p5/Z, [x21, z22.d] : ldff1sw (%x21,%z22.d)[16byte] %p5/z -> %z19.d -c558baf5 : ldff1sw z21.d, p6/Z, [x23, z24.d] : ldff1sw (%x23,%z24.d)[16byte] %p6/z -> %z21.d -c55abb17 : ldff1sw z23.d, p6/Z, [x24, z26.d] : ldff1sw (%x24,%z26.d)[16byte] %p6/z -> %z23.d -c55cbf59 : ldff1sw z25.d, p7/Z, [x26, z28.d] : ldff1sw (%x26,%z28.d)[16byte] %p7/z -> %z25.d -c55ebf9b : ldff1sw z27.d, p7/Z, [x28, z30.d] : ldff1sw (%x28,%z30.d)[16byte] %p7/z -> %z27.d -c55fbfff : ldff1sw z31.d, p7/Z, [sp, z31.d] : ldff1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d +c540a000 : ldff1sw z0.d, p0/Z, [x0, z0.d] : ldff1sw (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c545a482 : ldff1sw z2.d, p1/Z, [x4, z5.d] : ldff1sw (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c547a8c4 : ldff1sw z4.d, p2/Z, [x6, z7.d] : ldff1sw (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c549a906 : ldff1sw z6.d, p2/Z, [x8, z9.d] : ldff1sw (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c54bad48 : ldff1sw z8.d, p3/Z, [x10, z11.d] : ldff1sw (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c54dad6a : ldff1sw z10.d, p3/Z, [x11, z13.d] : ldff1sw (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c54fb1ac : ldff1sw z12.d, p4/Z, [x13, z15.d] : ldff1sw (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c551b1ee : ldff1sw z14.d, p4/Z, [x15, z17.d] : ldff1sw (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c553b630 : ldff1sw z16.d, p5/Z, [x17, z19.d] : ldff1sw (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c554b671 : ldff1sw z17.d, p5/Z, [x19, z20.d] : ldff1sw (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c556b6b3 : ldff1sw z19.d, p5/Z, [x21, z22.d] : ldff1sw (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c558baf5 : ldff1sw z21.d, p6/Z, [x23, z24.d] : ldff1sw (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c55abb17 : ldff1sw z23.d, p6/Z, [x24, z26.d] : ldff1sw (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c55cbf59 : ldff1sw z25.d, p7/Z, [x26, z28.d] : ldff1sw (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c55ebf9b : ldff1sw z27.d, p7/Z, [x28, z30.d] : ldff1sw (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c55fbfff : ldff1sw z31.d, p7/Z, [sp, z31.d] : ldff1sw (%sp,%z31.d)[4byte] %p7/z -> %z31.d # LDFF1SW { .D }, /Z, [, .D, LSL #2] (LDFF1SW-Z.P.BZ-D.64.scaled) -c560a000 : ldff1sw z0.d, p0/Z, [x0, z0.d, LSL #2] : ldff1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d -c565a482 : ldff1sw z2.d, p1/Z, [x4, z5.d, LSL #2] : ldff1sw (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d -c567a8c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, LSL #2] : ldff1sw (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d -c569a906 : ldff1sw z6.d, p2/Z, [x8, z9.d, LSL #2] : ldff1sw (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d -c56bad48 : ldff1sw z8.d, p3/Z, [x10, z11.d, LSL #2] : ldff1sw (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d -c56dad6a : ldff1sw z10.d, p3/Z, [x11, z13.d, LSL #2] : ldff1sw (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d -c56fb1ac : ldff1sw z12.d, p4/Z, [x13, z15.d, LSL #2] : ldff1sw (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d -c571b1ee : ldff1sw z14.d, p4/Z, [x15, z17.d, LSL #2] : ldff1sw (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d -c573b630 : ldff1sw z16.d, p5/Z, [x17, z19.d, LSL #2] : ldff1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d -c574b671 : ldff1sw z17.d, p5/Z, [x19, z20.d, LSL #2] : ldff1sw (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d -c576b6b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, LSL #2] : ldff1sw (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d -c578baf5 : ldff1sw z21.d, p6/Z, [x23, z24.d, LSL #2] : ldff1sw (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d -c57abb17 : ldff1sw z23.d, p6/Z, [x24, z26.d, LSL #2] : ldff1sw (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d -c57cbf59 : ldff1sw z25.d, p7/Z, [x26, z28.d, LSL #2] : ldff1sw (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d -c57ebf9b : ldff1sw z27.d, p7/Z, [x28, z30.d, LSL #2] : ldff1sw (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d -c57fbfff : ldff1sw z31.d, p7/Z, [sp, z31.d, LSL #2] : ldff1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d +c560a000 : ldff1sw z0.d, p0/Z, [x0, z0.d, LSL #2] : ldff1sw (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d +c565a482 : ldff1sw z2.d, p1/Z, [x4, z5.d, LSL #2] : ldff1sw (%x4,%z5.d,lsl #2)[4byte] %p1/z -> %z2.d +c567a8c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, LSL #2] : ldff1sw (%x6,%z7.d,lsl #2)[4byte] %p2/z -> %z4.d +c569a906 : ldff1sw z6.d, p2/Z, [x8, z9.d, LSL #2] : ldff1sw (%x8,%z9.d,lsl #2)[4byte] %p2/z -> %z6.d +c56bad48 : ldff1sw z8.d, p3/Z, [x10, z11.d, LSL #2] : ldff1sw (%x10,%z11.d,lsl #2)[4byte] %p3/z -> %z8.d +c56dad6a : ldff1sw z10.d, p3/Z, [x11, z13.d, LSL #2] : ldff1sw (%x11,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d +c56fb1ac : ldff1sw z12.d, p4/Z, [x13, z15.d, LSL #2] : ldff1sw (%x13,%z15.d,lsl #2)[4byte] %p4/z -> %z12.d +c571b1ee : ldff1sw z14.d, p4/Z, [x15, z17.d, LSL #2] : ldff1sw (%x15,%z17.d,lsl #2)[4byte] %p4/z -> %z14.d +c573b630 : ldff1sw z16.d, p5/Z, [x17, z19.d, LSL #2] : ldff1sw (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d +c574b671 : ldff1sw z17.d, p5/Z, [x19, z20.d, LSL #2] : ldff1sw (%x19,%z20.d,lsl #2)[4byte] %p5/z -> %z17.d +c576b6b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, LSL #2] : ldff1sw (%x21,%z22.d,lsl #2)[4byte] %p5/z -> %z19.d +c578baf5 : ldff1sw z21.d, p6/Z, [x23, z24.d, LSL #2] : ldff1sw (%x23,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d +c57abb17 : ldff1sw z23.d, p6/Z, [x24, z26.d, LSL #2] : ldff1sw (%x24,%z26.d,lsl #2)[4byte] %p6/z -> %z23.d +c57cbf59 : ldff1sw z25.d, p7/Z, [x26, z28.d, LSL #2] : ldff1sw (%x26,%z28.d,lsl #2)[4byte] %p7/z -> %z25.d +c57ebf9b : ldff1sw z27.d, p7/Z, [x28, z30.d, LSL #2] : ldff1sw (%x28,%z30.d,lsl #2)[4byte] %p7/z -> %z27.d +c57fbfff : ldff1sw z31.d, p7/Z, [sp, z31.d, LSL #2] : ldff1sw (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d # LDFF1W { .S }, /Z, [, .S, ] (LDFF1W-Z.P.BZ-S.x32.unscaled) -85006000 : ldff1w z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1w (%x0,%z0.s,uxtw)[32byte] %p0/z -> %z0.s -85056482 : ldff1w z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1w (%x4,%z5.s,uxtw)[32byte] %p1/z -> %z2.s -850768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1w (%x6,%z7.s,uxtw)[32byte] %p2/z -> %z4.s -85096906 : ldff1w z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1w (%x8,%z9.s,uxtw)[32byte] %p2/z -> %z6.s -850b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1w (%x10,%z11.s,uxtw)[32byte] %p3/z -> %z8.s -850d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1w (%x11,%z13.s,uxtw)[32byte] %p3/z -> %z10.s -850f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1w (%x13,%z15.s,uxtw)[32byte] %p4/z -> %z12.s -851171ee : ldff1w z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1w (%x15,%z17.s,uxtw)[32byte] %p4/z -> %z14.s -85137630 : ldff1w z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1w (%x17,%z19.s,uxtw)[32byte] %p5/z -> %z16.s -85147671 : ldff1w z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1w (%x19,%z20.s,uxtw)[32byte] %p5/z -> %z17.s -851676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1w (%x21,%z22.s,uxtw)[32byte] %p5/z -> %z19.s -85187af5 : ldff1w z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1w (%x23,%z24.s,uxtw)[32byte] %p6/z -> %z21.s -851a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1w (%x24,%z26.s,uxtw)[32byte] %p6/z -> %z23.s -851c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1w (%x26,%z28.s,uxtw)[32byte] %p7/z -> %z25.s -851e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1w (%x28,%z30.s,uxtw)[32byte] %p7/z -> %z27.s -851f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1w (%sp,%z31.s,uxtw)[32byte] %p7/z -> %z31.s -85406000 : ldff1w z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1w (%x0,%z0.s,sxtw)[32byte] %p0/z -> %z0.s -85456482 : ldff1w z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1w (%x4,%z5.s,sxtw)[32byte] %p1/z -> %z2.s -854768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1w (%x6,%z7.s,sxtw)[32byte] %p2/z -> %z4.s -85496906 : ldff1w z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1w (%x8,%z9.s,sxtw)[32byte] %p2/z -> %z6.s -854b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1w (%x10,%z11.s,sxtw)[32byte] %p3/z -> %z8.s -854d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1w (%x11,%z13.s,sxtw)[32byte] %p3/z -> %z10.s -854f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1w (%x13,%z15.s,sxtw)[32byte] %p4/z -> %z12.s -855171ee : ldff1w z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1w (%x15,%z17.s,sxtw)[32byte] %p4/z -> %z14.s -85537630 : ldff1w z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1w (%x17,%z19.s,sxtw)[32byte] %p5/z -> %z16.s -85547671 : ldff1w z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1w (%x19,%z20.s,sxtw)[32byte] %p5/z -> %z17.s -855676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1w (%x21,%z22.s,sxtw)[32byte] %p5/z -> %z19.s -85587af5 : ldff1w z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1w (%x23,%z24.s,sxtw)[32byte] %p6/z -> %z21.s -855a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1w (%x24,%z26.s,sxtw)[32byte] %p6/z -> %z23.s -855c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1w (%x26,%z28.s,sxtw)[32byte] %p7/z -> %z25.s -855e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1w (%x28,%z30.s,sxtw)[32byte] %p7/z -> %z27.s -855f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1w (%sp,%z31.s,sxtw)[32byte] %p7/z -> %z31.s +85006000 : ldff1w z0.s, p0/Z, [x0, z0.s, UXTW] : ldff1w (%x0,%z0.s,uxtw)[4byte] %p0/z -> %z0.s +85056482 : ldff1w z2.s, p1/Z, [x4, z5.s, UXTW] : ldff1w (%x4,%z5.s,uxtw)[4byte] %p1/z -> %z2.s +850768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, UXTW] : ldff1w (%x6,%z7.s,uxtw)[4byte] %p2/z -> %z4.s +85096906 : ldff1w z6.s, p2/Z, [x8, z9.s, UXTW] : ldff1w (%x8,%z9.s,uxtw)[4byte] %p2/z -> %z6.s +850b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, UXTW] : ldff1w (%x10,%z11.s,uxtw)[4byte] %p3/z -> %z8.s +850d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, UXTW] : ldff1w (%x11,%z13.s,uxtw)[4byte] %p3/z -> %z10.s +850f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, UXTW] : ldff1w (%x13,%z15.s,uxtw)[4byte] %p4/z -> %z12.s +851171ee : ldff1w z14.s, p4/Z, [x15, z17.s, UXTW] : ldff1w (%x15,%z17.s,uxtw)[4byte] %p4/z -> %z14.s +85137630 : ldff1w z16.s, p5/Z, [x17, z19.s, UXTW] : ldff1w (%x17,%z19.s,uxtw)[4byte] %p5/z -> %z16.s +85147671 : ldff1w z17.s, p5/Z, [x19, z20.s, UXTW] : ldff1w (%x19,%z20.s,uxtw)[4byte] %p5/z -> %z17.s +851676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, UXTW] : ldff1w (%x21,%z22.s,uxtw)[4byte] %p5/z -> %z19.s +85187af5 : ldff1w z21.s, p6/Z, [x23, z24.s, UXTW] : ldff1w (%x23,%z24.s,uxtw)[4byte] %p6/z -> %z21.s +851a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, UXTW] : ldff1w (%x24,%z26.s,uxtw)[4byte] %p6/z -> %z23.s +851c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, UXTW] : ldff1w (%x26,%z28.s,uxtw)[4byte] %p7/z -> %z25.s +851e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, UXTW] : ldff1w (%x28,%z30.s,uxtw)[4byte] %p7/z -> %z27.s +851f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, UXTW] : ldff1w (%sp,%z31.s,uxtw)[4byte] %p7/z -> %z31.s +85406000 : ldff1w z0.s, p0/Z, [x0, z0.s, SXTW] : ldff1w (%x0,%z0.s,sxtw)[4byte] %p0/z -> %z0.s +85456482 : ldff1w z2.s, p1/Z, [x4, z5.s, SXTW] : ldff1w (%x4,%z5.s,sxtw)[4byte] %p1/z -> %z2.s +854768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, SXTW] : ldff1w (%x6,%z7.s,sxtw)[4byte] %p2/z -> %z4.s +85496906 : ldff1w z6.s, p2/Z, [x8, z9.s, SXTW] : ldff1w (%x8,%z9.s,sxtw)[4byte] %p2/z -> %z6.s +854b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, SXTW] : ldff1w (%x10,%z11.s,sxtw)[4byte] %p3/z -> %z8.s +854d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, SXTW] : ldff1w (%x11,%z13.s,sxtw)[4byte] %p3/z -> %z10.s +854f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, SXTW] : ldff1w (%x13,%z15.s,sxtw)[4byte] %p4/z -> %z12.s +855171ee : ldff1w z14.s, p4/Z, [x15, z17.s, SXTW] : ldff1w (%x15,%z17.s,sxtw)[4byte] %p4/z -> %z14.s +85537630 : ldff1w z16.s, p5/Z, [x17, z19.s, SXTW] : ldff1w (%x17,%z19.s,sxtw)[4byte] %p5/z -> %z16.s +85547671 : ldff1w z17.s, p5/Z, [x19, z20.s, SXTW] : ldff1w (%x19,%z20.s,sxtw)[4byte] %p5/z -> %z17.s +855676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, SXTW] : ldff1w (%x21,%z22.s,sxtw)[4byte] %p5/z -> %z19.s +85587af5 : ldff1w z21.s, p6/Z, [x23, z24.s, SXTW] : ldff1w (%x23,%z24.s,sxtw)[4byte] %p6/z -> %z21.s +855a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, SXTW] : ldff1w (%x24,%z26.s,sxtw)[4byte] %p6/z -> %z23.s +855c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, SXTW] : ldff1w (%x26,%z28.s,sxtw)[4byte] %p7/z -> %z25.s +855e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, SXTW] : ldff1w (%x28,%z30.s,sxtw)[4byte] %p7/z -> %z27.s +855f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, SXTW] : ldff1w (%sp,%z31.s,sxtw)[4byte] %p7/z -> %z31.s # LDFF1W { .S }, /Z, [, .S, #2] (LDFF1W-Z.P.BZ-S.x32.scaled) -85206000 : ldff1w z0.s, p0/Z, [x0, z0.s, UXTW #2] : ldff1w (%x0,%z0.s,uxtw #2)[32byte] %p0/z -> %z0.s -85256482 : ldff1w z2.s, p1/Z, [x4, z5.s, UXTW #2] : ldff1w (%x4,%z5.s,uxtw #2)[32byte] %p1/z -> %z2.s -852768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, UXTW #2] : ldff1w (%x6,%z7.s,uxtw #2)[32byte] %p2/z -> %z4.s -85296906 : ldff1w z6.s, p2/Z, [x8, z9.s, UXTW #2] : ldff1w (%x8,%z9.s,uxtw #2)[32byte] %p2/z -> %z6.s -852b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, UXTW #2] : ldff1w (%x10,%z11.s,uxtw #2)[32byte] %p3/z -> %z8.s -852d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, UXTW #2] : ldff1w (%x11,%z13.s,uxtw #2)[32byte] %p3/z -> %z10.s -852f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, UXTW #2] : ldff1w (%x13,%z15.s,uxtw #2)[32byte] %p4/z -> %z12.s -853171ee : ldff1w z14.s, p4/Z, [x15, z17.s, UXTW #2] : ldff1w (%x15,%z17.s,uxtw #2)[32byte] %p4/z -> %z14.s -85337630 : ldff1w z16.s, p5/Z, [x17, z19.s, UXTW #2] : ldff1w (%x17,%z19.s,uxtw #2)[32byte] %p5/z -> %z16.s -85347671 : ldff1w z17.s, p5/Z, [x19, z20.s, UXTW #2] : ldff1w (%x19,%z20.s,uxtw #2)[32byte] %p5/z -> %z17.s -853676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, UXTW #2] : ldff1w (%x21,%z22.s,uxtw #2)[32byte] %p5/z -> %z19.s -85387af5 : ldff1w z21.s, p6/Z, [x23, z24.s, UXTW #2] : ldff1w (%x23,%z24.s,uxtw #2)[32byte] %p6/z -> %z21.s -853a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, UXTW #2] : ldff1w (%x24,%z26.s,uxtw #2)[32byte] %p6/z -> %z23.s -853c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, UXTW #2] : ldff1w (%x26,%z28.s,uxtw #2)[32byte] %p7/z -> %z25.s -853e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, UXTW #2] : ldff1w (%x28,%z30.s,uxtw #2)[32byte] %p7/z -> %z27.s -853f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, UXTW #2] : ldff1w (%sp,%z31.s,uxtw #2)[32byte] %p7/z -> %z31.s -85606000 : ldff1w z0.s, p0/Z, [x0, z0.s, SXTW #2] : ldff1w (%x0,%z0.s,sxtw #2)[32byte] %p0/z -> %z0.s -85656482 : ldff1w z2.s, p1/Z, [x4, z5.s, SXTW #2] : ldff1w (%x4,%z5.s,sxtw #2)[32byte] %p1/z -> %z2.s -856768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, SXTW #2] : ldff1w (%x6,%z7.s,sxtw #2)[32byte] %p2/z -> %z4.s -85696906 : ldff1w z6.s, p2/Z, [x8, z9.s, SXTW #2] : ldff1w (%x8,%z9.s,sxtw #2)[32byte] %p2/z -> %z6.s -856b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, SXTW #2] : ldff1w (%x10,%z11.s,sxtw #2)[32byte] %p3/z -> %z8.s -856d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, SXTW #2] : ldff1w (%x11,%z13.s,sxtw #2)[32byte] %p3/z -> %z10.s -856f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, SXTW #2] : ldff1w (%x13,%z15.s,sxtw #2)[32byte] %p4/z -> %z12.s -857171ee : ldff1w z14.s, p4/Z, [x15, z17.s, SXTW #2] : ldff1w (%x15,%z17.s,sxtw #2)[32byte] %p4/z -> %z14.s -85737630 : ldff1w z16.s, p5/Z, [x17, z19.s, SXTW #2] : ldff1w (%x17,%z19.s,sxtw #2)[32byte] %p5/z -> %z16.s -85747671 : ldff1w z17.s, p5/Z, [x19, z20.s, SXTW #2] : ldff1w (%x19,%z20.s,sxtw #2)[32byte] %p5/z -> %z17.s -857676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, SXTW #2] : ldff1w (%x21,%z22.s,sxtw #2)[32byte] %p5/z -> %z19.s -85787af5 : ldff1w z21.s, p6/Z, [x23, z24.s, SXTW #2] : ldff1w (%x23,%z24.s,sxtw #2)[32byte] %p6/z -> %z21.s -857a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, SXTW #2] : ldff1w (%x24,%z26.s,sxtw #2)[32byte] %p6/z -> %z23.s -857c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, SXTW #2] : ldff1w (%x26,%z28.s,sxtw #2)[32byte] %p7/z -> %z25.s -857e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, SXTW #2] : ldff1w (%x28,%z30.s,sxtw #2)[32byte] %p7/z -> %z27.s -857f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, SXTW #2] : ldff1w (%sp,%z31.s,sxtw #2)[32byte] %p7/z -> %z31.s +85206000 : ldff1w z0.s, p0/Z, [x0, z0.s, UXTW #2] : ldff1w (%x0,%z0.s,uxtw #2)[4byte] %p0/z -> %z0.s +85256482 : ldff1w z2.s, p1/Z, [x4, z5.s, UXTW #2] : ldff1w (%x4,%z5.s,uxtw #2)[4byte] %p1/z -> %z2.s +852768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, UXTW #2] : ldff1w (%x6,%z7.s,uxtw #2)[4byte] %p2/z -> %z4.s +85296906 : ldff1w z6.s, p2/Z, [x8, z9.s, UXTW #2] : ldff1w (%x8,%z9.s,uxtw #2)[4byte] %p2/z -> %z6.s +852b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, UXTW #2] : ldff1w (%x10,%z11.s,uxtw #2)[4byte] %p3/z -> %z8.s +852d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, UXTW #2] : ldff1w (%x11,%z13.s,uxtw #2)[4byte] %p3/z -> %z10.s +852f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, UXTW #2] : ldff1w (%x13,%z15.s,uxtw #2)[4byte] %p4/z -> %z12.s +853171ee : ldff1w z14.s, p4/Z, [x15, z17.s, UXTW #2] : ldff1w (%x15,%z17.s,uxtw #2)[4byte] %p4/z -> %z14.s +85337630 : ldff1w z16.s, p5/Z, [x17, z19.s, UXTW #2] : ldff1w (%x17,%z19.s,uxtw #2)[4byte] %p5/z -> %z16.s +85347671 : ldff1w z17.s, p5/Z, [x19, z20.s, UXTW #2] : ldff1w (%x19,%z20.s,uxtw #2)[4byte] %p5/z -> %z17.s +853676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, UXTW #2] : ldff1w (%x21,%z22.s,uxtw #2)[4byte] %p5/z -> %z19.s +85387af5 : ldff1w z21.s, p6/Z, [x23, z24.s, UXTW #2] : ldff1w (%x23,%z24.s,uxtw #2)[4byte] %p6/z -> %z21.s +853a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, UXTW #2] : ldff1w (%x24,%z26.s,uxtw #2)[4byte] %p6/z -> %z23.s +853c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, UXTW #2] : ldff1w (%x26,%z28.s,uxtw #2)[4byte] %p7/z -> %z25.s +853e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, UXTW #2] : ldff1w (%x28,%z30.s,uxtw #2)[4byte] %p7/z -> %z27.s +853f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, UXTW #2] : ldff1w (%sp,%z31.s,uxtw #2)[4byte] %p7/z -> %z31.s +85606000 : ldff1w z0.s, p0/Z, [x0, z0.s, SXTW #2] : ldff1w (%x0,%z0.s,sxtw #2)[4byte] %p0/z -> %z0.s +85656482 : ldff1w z2.s, p1/Z, [x4, z5.s, SXTW #2] : ldff1w (%x4,%z5.s,sxtw #2)[4byte] %p1/z -> %z2.s +856768c4 : ldff1w z4.s, p2/Z, [x6, z7.s, SXTW #2] : ldff1w (%x6,%z7.s,sxtw #2)[4byte] %p2/z -> %z4.s +85696906 : ldff1w z6.s, p2/Z, [x8, z9.s, SXTW #2] : ldff1w (%x8,%z9.s,sxtw #2)[4byte] %p2/z -> %z6.s +856b6d48 : ldff1w z8.s, p3/Z, [x10, z11.s, SXTW #2] : ldff1w (%x10,%z11.s,sxtw #2)[4byte] %p3/z -> %z8.s +856d6d6a : ldff1w z10.s, p3/Z, [x11, z13.s, SXTW #2] : ldff1w (%x11,%z13.s,sxtw #2)[4byte] %p3/z -> %z10.s +856f71ac : ldff1w z12.s, p4/Z, [x13, z15.s, SXTW #2] : ldff1w (%x13,%z15.s,sxtw #2)[4byte] %p4/z -> %z12.s +857171ee : ldff1w z14.s, p4/Z, [x15, z17.s, SXTW #2] : ldff1w (%x15,%z17.s,sxtw #2)[4byte] %p4/z -> %z14.s +85737630 : ldff1w z16.s, p5/Z, [x17, z19.s, SXTW #2] : ldff1w (%x17,%z19.s,sxtw #2)[4byte] %p5/z -> %z16.s +85747671 : ldff1w z17.s, p5/Z, [x19, z20.s, SXTW #2] : ldff1w (%x19,%z20.s,sxtw #2)[4byte] %p5/z -> %z17.s +857676b3 : ldff1w z19.s, p5/Z, [x21, z22.s, SXTW #2] : ldff1w (%x21,%z22.s,sxtw #2)[4byte] %p5/z -> %z19.s +85787af5 : ldff1w z21.s, p6/Z, [x23, z24.s, SXTW #2] : ldff1w (%x23,%z24.s,sxtw #2)[4byte] %p6/z -> %z21.s +857a7b17 : ldff1w z23.s, p6/Z, [x24, z26.s, SXTW #2] : ldff1w (%x24,%z26.s,sxtw #2)[4byte] %p6/z -> %z23.s +857c7f59 : ldff1w z25.s, p7/Z, [x26, z28.s, SXTW #2] : ldff1w (%x26,%z28.s,sxtw #2)[4byte] %p7/z -> %z25.s +857e7f9b : ldff1w z27.s, p7/Z, [x28, z30.s, SXTW #2] : ldff1w (%x28,%z30.s,sxtw #2)[4byte] %p7/z -> %z27.s +857f7fff : ldff1w z31.s, p7/Z, [sp, z31.s, SXTW #2] : ldff1w (%sp,%z31.s,sxtw #2)[4byte] %p7/z -> %z31.s # LDFF1W { .S }, /Z, [.S{, #}] (LDFF1W-Z.P.AI-S) -8520e000 : ldff1w z0.s, p0/Z, [z0.s, #0] : ldff1w (%z0.s)[32byte] %p0/z -> %z0.s -8522e482 : ldff1w z2.s, p1/Z, [z4.s, #8] : ldff1w +0x08(%z4.s)[32byte] %p1/z -> %z2.s -8524e8c4 : ldff1w z4.s, p2/Z, [z6.s, #16] : ldff1w +0x10(%z6.s)[32byte] %p2/z -> %z4.s -8526e906 : ldff1w z6.s, p2/Z, [z8.s, #24] : ldff1w +0x18(%z8.s)[32byte] %p2/z -> %z6.s -8528ed48 : ldff1w z8.s, p3/Z, [z10.s, #32] : ldff1w +0x20(%z10.s)[32byte] %p3/z -> %z8.s -852aed8a : ldff1w z10.s, p3/Z, [z12.s, #40] : ldff1w +0x28(%z12.s)[32byte] %p3/z -> %z10.s -852cf1cc : ldff1w z12.s, p4/Z, [z14.s, #48] : ldff1w +0x30(%z14.s)[32byte] %p4/z -> %z12.s -852ef20e : ldff1w z14.s, p4/Z, [z16.s, #56] : ldff1w +0x38(%z16.s)[32byte] %p4/z -> %z14.s -8530f650 : ldff1w z16.s, p5/Z, [z18.s, #64] : ldff1w +0x40(%z18.s)[32byte] %p5/z -> %z16.s -8531f671 : ldff1w z17.s, p5/Z, [z19.s, #68] : ldff1w +0x44(%z19.s)[32byte] %p5/z -> %z17.s -8533f6b3 : ldff1w z19.s, p5/Z, [z21.s, #76] : ldff1w +0x4c(%z21.s)[32byte] %p5/z -> %z19.s -8535faf5 : ldff1w z21.s, p6/Z, [z23.s, #84] : ldff1w +0x54(%z23.s)[32byte] %p6/z -> %z21.s -8537fb37 : ldff1w z23.s, p6/Z, [z25.s, #92] : ldff1w +0x5c(%z25.s)[32byte] %p6/z -> %z23.s -8539ff79 : ldff1w z25.s, p7/Z, [z27.s, #100] : ldff1w +0x64(%z27.s)[32byte] %p7/z -> %z25.s -853bffbb : ldff1w z27.s, p7/Z, [z29.s, #108] : ldff1w +0x6c(%z29.s)[32byte] %p7/z -> %z27.s -853fffff : ldff1w z31.s, p7/Z, [z31.s, #124] : ldff1w +0x7c(%z31.s)[32byte] %p7/z -> %z31.s +8520e000 : ldff1w z0.s, p0/Z, [z0.s, #0] : ldff1w (%z0.s)[4byte] %p0/z -> %z0.s +8522e482 : ldff1w z2.s, p1/Z, [z4.s, #8] : ldff1w +0x08(%z4.s)[4byte] %p1/z -> %z2.s +8524e8c4 : ldff1w z4.s, p2/Z, [z6.s, #16] : ldff1w +0x10(%z6.s)[4byte] %p2/z -> %z4.s +8526e906 : ldff1w z6.s, p2/Z, [z8.s, #24] : ldff1w +0x18(%z8.s)[4byte] %p2/z -> %z6.s +8528ed48 : ldff1w z8.s, p3/Z, [z10.s, #32] : ldff1w +0x20(%z10.s)[4byte] %p3/z -> %z8.s +852aed8a : ldff1w z10.s, p3/Z, [z12.s, #40] : ldff1w +0x28(%z12.s)[4byte] %p3/z -> %z10.s +852cf1cc : ldff1w z12.s, p4/Z, [z14.s, #48] : ldff1w +0x30(%z14.s)[4byte] %p4/z -> %z12.s +852ef20e : ldff1w z14.s, p4/Z, [z16.s, #56] : ldff1w +0x38(%z16.s)[4byte] %p4/z -> %z14.s +8530f650 : ldff1w z16.s, p5/Z, [z18.s, #64] : ldff1w +0x40(%z18.s)[4byte] %p5/z -> %z16.s +8531f671 : ldff1w z17.s, p5/Z, [z19.s, #68] : ldff1w +0x44(%z19.s)[4byte] %p5/z -> %z17.s +8533f6b3 : ldff1w z19.s, p5/Z, [z21.s, #76] : ldff1w +0x4c(%z21.s)[4byte] %p5/z -> %z19.s +8535faf5 : ldff1w z21.s, p6/Z, [z23.s, #84] : ldff1w +0x54(%z23.s)[4byte] %p6/z -> %z21.s +8537fb37 : ldff1w z23.s, p6/Z, [z25.s, #92] : ldff1w +0x5c(%z25.s)[4byte] %p6/z -> %z23.s +8539ff79 : ldff1w z25.s, p7/Z, [z27.s, #100] : ldff1w +0x64(%z27.s)[4byte] %p7/z -> %z25.s +853bffbb : ldff1w z27.s, p7/Z, [z29.s, #108] : ldff1w +0x6c(%z29.s)[4byte] %p7/z -> %z27.s +853fffff : ldff1w z31.s, p7/Z, [z31.s, #124] : ldff1w +0x7c(%z31.s)[4byte] %p7/z -> %z31.s # LDFF1W { .S }, /Z, [{, , LSL #2}] (LDFF1W-Z.P.BR-U32) -a5406000 : ldff1w z0.s, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s -a5456482 : ldff1w z2.s, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[32byte] %p1/z -> %z2.s -a54768c4 : ldff1w z4.s, p2/Z, [x6, x7, LSL #2] : ldff1w (%x6,%x7,lsl #2)[32byte] %p2/z -> %z4.s -a5496906 : ldff1w z6.s, p2/Z, [x8, x9, LSL #2] : ldff1w (%x8,%x9,lsl #2)[32byte] %p2/z -> %z6.s -a54b6d48 : ldff1w z8.s, p3/Z, [x10, x11, LSL #2] : ldff1w (%x10,%x11,lsl #2)[32byte] %p3/z -> %z8.s -a54c6d6a : ldff1w z10.s, p3/Z, [x11, x12, LSL #2] : ldff1w (%x11,%x12,lsl #2)[32byte] %p3/z -> %z10.s -a54e71ac : ldff1w z12.s, p4/Z, [x13, x14, LSL #2] : ldff1w (%x13,%x14,lsl #2)[32byte] %p4/z -> %z12.s -a55071ee : ldff1w z14.s, p4/Z, [x15, x16, LSL #2] : ldff1w (%x15,%x16,lsl #2)[32byte] %p4/z -> %z14.s -a5527630 : ldff1w z16.s, p5/Z, [x17, x18, LSL #2] : ldff1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s -a5547671 : ldff1w z17.s, p5/Z, [x19, x20, LSL #2] : ldff1w (%x19,%x20,lsl #2)[32byte] %p5/z -> %z17.s -a55676b3 : ldff1w z19.s, p5/Z, [x21, x22, LSL #2] : ldff1w (%x21,%x22,lsl #2)[32byte] %p5/z -> %z19.s -a5587af5 : ldff1w z21.s, p6/Z, [x23, x24, LSL #2] : ldff1w (%x23,%x24,lsl #2)[32byte] %p6/z -> %z21.s -a5597b17 : ldff1w z23.s, p6/Z, [x24, x25, LSL #2] : ldff1w (%x24,%x25,lsl #2)[32byte] %p6/z -> %z23.s -a55b7f59 : ldff1w z25.s, p7/Z, [x26, x27, LSL #2] : ldff1w (%x26,%x27,lsl #2)[32byte] %p7/z -> %z25.s -a55d7f9b : ldff1w z27.s, p7/Z, [x28, x29, LSL #2] : ldff1w (%x28,%x29,lsl #2)[32byte] %p7/z -> %z27.s -a55e7fff : ldff1w z31.s, p7/Z, [sp, x30, LSL #2] : ldff1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s +a5406000 : ldff1w z0.s, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s +a5456482 : ldff1w z2.s, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s +a54768c4 : ldff1w z4.s, p2/Z, [x6, x7, LSL #2] : ldff1w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s +a5496906 : ldff1w z6.s, p2/Z, [x8, x9, LSL #2] : ldff1w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s +a54b6d48 : ldff1w z8.s, p3/Z, [x10, x11, LSL #2] : ldff1w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s +a54c6d6a : ldff1w z10.s, p3/Z, [x11, x12, LSL #2] : ldff1w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s +a54e71ac : ldff1w z12.s, p4/Z, [x13, x14, LSL #2] : ldff1w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s +a55071ee : ldff1w z14.s, p4/Z, [x15, x16, LSL #2] : ldff1w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s +a5527630 : ldff1w z16.s, p5/Z, [x17, x18, LSL #2] : ldff1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s +a5547671 : ldff1w z17.s, p5/Z, [x19, x20, LSL #2] : ldff1w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s +a55676b3 : ldff1w z19.s, p5/Z, [x21, x22, LSL #2] : ldff1w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s +a5587af5 : ldff1w z21.s, p6/Z, [x23, x24, LSL #2] : ldff1w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s +a5597b17 : ldff1w z23.s, p6/Z, [x24, x25, LSL #2] : ldff1w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s +a55b7f59 : ldff1w z25.s, p7/Z, [x26, x27, LSL #2] : ldff1w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s +a55d7f9b : ldff1w z27.s, p7/Z, [x28, x29, LSL #2] : ldff1w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s +a55e7fff : ldff1w z31.s, p7/Z, [sp, x30, LSL #2] : ldff1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s # LDFF1W { .D }, /Z, [{, , LSL #2}] (LDFF1W-Z.P.BR-U64) -a5606000 : ldff1w z0.d, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d -a5656482 : ldff1w z2.d, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d -a56768c4 : ldff1w z4.d, p2/Z, [x6, x7, LSL #2] : ldff1w (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.d -a5696906 : ldff1w z6.d, p2/Z, [x8, x9, LSL #2] : ldff1w (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.d -a56b6d48 : ldff1w z8.d, p3/Z, [x10, x11, LSL #2] : ldff1w (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.d -a56c6d6a : ldff1w z10.d, p3/Z, [x11, x12, LSL #2] : ldff1w (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.d -a56e71ac : ldff1w z12.d, p4/Z, [x13, x14, LSL #2] : ldff1w (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.d -a57071ee : ldff1w z14.d, p4/Z, [x15, x16, LSL #2] : ldff1w (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.d -a5727630 : ldff1w z16.d, p5/Z, [x17, x18, LSL #2] : ldff1w (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d -a5747671 : ldff1w z17.d, p5/Z, [x19, x20, LSL #2] : ldff1w (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.d -a57676b3 : ldff1w z19.d, p5/Z, [x21, x22, LSL #2] : ldff1w (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.d -a5787af5 : ldff1w z21.d, p6/Z, [x23, x24, LSL #2] : ldff1w (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.d -a5797b17 : ldff1w z23.d, p6/Z, [x24, x25, LSL #2] : ldff1w (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.d -a57b7f59 : ldff1w z25.d, p7/Z, [x26, x27, LSL #2] : ldff1w (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.d -a57d7f9b : ldff1w z27.d, p7/Z, [x28, x29, LSL #2] : ldff1w (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.d -a57e7fff : ldff1w z31.d, p7/Z, [sp, x30, LSL #2] : ldff1w (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d +a5606000 : ldff1w z0.d, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d +a5656482 : ldff1w z2.d, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.d +a56768c4 : ldff1w z4.d, p2/Z, [x6, x7, LSL #2] : ldff1w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.d +a5696906 : ldff1w z6.d, p2/Z, [x8, x9, LSL #2] : ldff1w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.d +a56b6d48 : ldff1w z8.d, p3/Z, [x10, x11, LSL #2] : ldff1w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.d +a56c6d6a : ldff1w z10.d, p3/Z, [x11, x12, LSL #2] : ldff1w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.d +a56e71ac : ldff1w z12.d, p4/Z, [x13, x14, LSL #2] : ldff1w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.d +a57071ee : ldff1w z14.d, p4/Z, [x15, x16, LSL #2] : ldff1w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.d +a5727630 : ldff1w z16.d, p5/Z, [x17, x18, LSL #2] : ldff1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d +a5747671 : ldff1w z17.d, p5/Z, [x19, x20, LSL #2] : ldff1w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.d +a57676b3 : ldff1w z19.d, p5/Z, [x21, x22, LSL #2] : ldff1w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.d +a5787af5 : ldff1w z21.d, p6/Z, [x23, x24, LSL #2] : ldff1w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.d +a5797b17 : ldff1w z23.d, p6/Z, [x24, x25, LSL #2] : ldff1w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.d +a57b7f59 : ldff1w z25.d, p7/Z, [x26, x27, LSL #2] : ldff1w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.d +a57d7f9b : ldff1w z27.d, p7/Z, [x28, x29, LSL #2] : ldff1w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.d +a57e7fff : ldff1w z31.d, p7/Z, [sp, x30, LSL #2] : ldff1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d # LDFF1W { .D }, /Z, [, .D, ] (LDFF1W-Z.P.BZ-D.x32.unscaled) -c5006000 : ldff1w z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1w (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d -c5056482 : ldff1w z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1w (%x4,%z5.d,uxtw)[16byte] %p1/z -> %z2.d -c50768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1w (%x6,%z7.d,uxtw)[16byte] %p2/z -> %z4.d -c5096906 : ldff1w z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1w (%x8,%z9.d,uxtw)[16byte] %p2/z -> %z6.d -c50b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1w (%x10,%z11.d,uxtw)[16byte] %p3/z -> %z8.d -c50d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1w (%x11,%z13.d,uxtw)[16byte] %p3/z -> %z10.d -c50f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1w (%x13,%z15.d,uxtw)[16byte] %p4/z -> %z12.d -c51171ee : ldff1w z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1w (%x15,%z17.d,uxtw)[16byte] %p4/z -> %z14.d -c5137630 : ldff1w z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1w (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d -c5147671 : ldff1w z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1w (%x19,%z20.d,uxtw)[16byte] %p5/z -> %z17.d -c51676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1w (%x21,%z22.d,uxtw)[16byte] %p5/z -> %z19.d -c5187af5 : ldff1w z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1w (%x23,%z24.d,uxtw)[16byte] %p6/z -> %z21.d -c51a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1w (%x24,%z26.d,uxtw)[16byte] %p6/z -> %z23.d -c51c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1w (%x26,%z28.d,uxtw)[16byte] %p7/z -> %z25.d -c51e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1w (%x28,%z30.d,uxtw)[16byte] %p7/z -> %z27.d -c51f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1w (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d -c5406000 : ldff1w z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1w (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d -c5456482 : ldff1w z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1w (%x4,%z5.d,sxtw)[16byte] %p1/z -> %z2.d -c54768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1w (%x6,%z7.d,sxtw)[16byte] %p2/z -> %z4.d -c5496906 : ldff1w z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1w (%x8,%z9.d,sxtw)[16byte] %p2/z -> %z6.d -c54b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1w (%x10,%z11.d,sxtw)[16byte] %p3/z -> %z8.d -c54d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1w (%x11,%z13.d,sxtw)[16byte] %p3/z -> %z10.d -c54f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1w (%x13,%z15.d,sxtw)[16byte] %p4/z -> %z12.d -c55171ee : ldff1w z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1w (%x15,%z17.d,sxtw)[16byte] %p4/z -> %z14.d -c5537630 : ldff1w z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1w (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d -c5547671 : ldff1w z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1w (%x19,%z20.d,sxtw)[16byte] %p5/z -> %z17.d -c55676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1w (%x21,%z22.d,sxtw)[16byte] %p5/z -> %z19.d -c5587af5 : ldff1w z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1w (%x23,%z24.d,sxtw)[16byte] %p6/z -> %z21.d -c55a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1w (%x24,%z26.d,sxtw)[16byte] %p6/z -> %z23.d -c55c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1w (%x26,%z28.d,sxtw)[16byte] %p7/z -> %z25.d -c55e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1w (%x28,%z30.d,sxtw)[16byte] %p7/z -> %z27.d -c55f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1w (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d +c5006000 : ldff1w z0.d, p0/Z, [x0, z0.d, UXTW] : ldff1w (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d +c5056482 : ldff1w z2.d, p1/Z, [x4, z5.d, UXTW] : ldff1w (%x4,%z5.d,uxtw)[4byte] %p1/z -> %z2.d +c50768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, UXTW] : ldff1w (%x6,%z7.d,uxtw)[4byte] %p2/z -> %z4.d +c5096906 : ldff1w z6.d, p2/Z, [x8, z9.d, UXTW] : ldff1w (%x8,%z9.d,uxtw)[4byte] %p2/z -> %z6.d +c50b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, UXTW] : ldff1w (%x10,%z11.d,uxtw)[4byte] %p3/z -> %z8.d +c50d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, UXTW] : ldff1w (%x11,%z13.d,uxtw)[4byte] %p3/z -> %z10.d +c50f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, UXTW] : ldff1w (%x13,%z15.d,uxtw)[4byte] %p4/z -> %z12.d +c51171ee : ldff1w z14.d, p4/Z, [x15, z17.d, UXTW] : ldff1w (%x15,%z17.d,uxtw)[4byte] %p4/z -> %z14.d +c5137630 : ldff1w z16.d, p5/Z, [x17, z19.d, UXTW] : ldff1w (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d +c5147671 : ldff1w z17.d, p5/Z, [x19, z20.d, UXTW] : ldff1w (%x19,%z20.d,uxtw)[4byte] %p5/z -> %z17.d +c51676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, UXTW] : ldff1w (%x21,%z22.d,uxtw)[4byte] %p5/z -> %z19.d +c5187af5 : ldff1w z21.d, p6/Z, [x23, z24.d, UXTW] : ldff1w (%x23,%z24.d,uxtw)[4byte] %p6/z -> %z21.d +c51a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, UXTW] : ldff1w (%x24,%z26.d,uxtw)[4byte] %p6/z -> %z23.d +c51c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, UXTW] : ldff1w (%x26,%z28.d,uxtw)[4byte] %p7/z -> %z25.d +c51e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, UXTW] : ldff1w (%x28,%z30.d,uxtw)[4byte] %p7/z -> %z27.d +c51f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, UXTW] : ldff1w (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d +c5406000 : ldff1w z0.d, p0/Z, [x0, z0.d, SXTW] : ldff1w (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d +c5456482 : ldff1w z2.d, p1/Z, [x4, z5.d, SXTW] : ldff1w (%x4,%z5.d,sxtw)[4byte] %p1/z -> %z2.d +c54768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, SXTW] : ldff1w (%x6,%z7.d,sxtw)[4byte] %p2/z -> %z4.d +c5496906 : ldff1w z6.d, p2/Z, [x8, z9.d, SXTW] : ldff1w (%x8,%z9.d,sxtw)[4byte] %p2/z -> %z6.d +c54b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, SXTW] : ldff1w (%x10,%z11.d,sxtw)[4byte] %p3/z -> %z8.d +c54d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, SXTW] : ldff1w (%x11,%z13.d,sxtw)[4byte] %p3/z -> %z10.d +c54f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, SXTW] : ldff1w (%x13,%z15.d,sxtw)[4byte] %p4/z -> %z12.d +c55171ee : ldff1w z14.d, p4/Z, [x15, z17.d, SXTW] : ldff1w (%x15,%z17.d,sxtw)[4byte] %p4/z -> %z14.d +c5537630 : ldff1w z16.d, p5/Z, [x17, z19.d, SXTW] : ldff1w (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d +c5547671 : ldff1w z17.d, p5/Z, [x19, z20.d, SXTW] : ldff1w (%x19,%z20.d,sxtw)[4byte] %p5/z -> %z17.d +c55676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, SXTW] : ldff1w (%x21,%z22.d,sxtw)[4byte] %p5/z -> %z19.d +c5587af5 : ldff1w z21.d, p6/Z, [x23, z24.d, SXTW] : ldff1w (%x23,%z24.d,sxtw)[4byte] %p6/z -> %z21.d +c55a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, SXTW] : ldff1w (%x24,%z26.d,sxtw)[4byte] %p6/z -> %z23.d +c55c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, SXTW] : ldff1w (%x26,%z28.d,sxtw)[4byte] %p7/z -> %z25.d +c55e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, SXTW] : ldff1w (%x28,%z30.d,sxtw)[4byte] %p7/z -> %z27.d +c55f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, SXTW] : ldff1w (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d # LDFF1W { .D }, /Z, [, .D, #2] (LDFF1W-Z.P.BZ-D.x32.scaled) -c5206000 : ldff1w z0.d, p0/Z, [x0, z0.d, UXTW #2] : ldff1w (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d -c5256482 : ldff1w z2.d, p1/Z, [x4, z5.d, UXTW #2] : ldff1w (%x4,%z5.d,uxtw #2)[16byte] %p1/z -> %z2.d -c52768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, UXTW #2] : ldff1w (%x6,%z7.d,uxtw #2)[16byte] %p2/z -> %z4.d -c5296906 : ldff1w z6.d, p2/Z, [x8, z9.d, UXTW #2] : ldff1w (%x8,%z9.d,uxtw #2)[16byte] %p2/z -> %z6.d -c52b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, UXTW #2] : ldff1w (%x10,%z11.d,uxtw #2)[16byte] %p3/z -> %z8.d -c52d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, UXTW #2] : ldff1w (%x11,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d -c52f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, UXTW #2] : ldff1w (%x13,%z15.d,uxtw #2)[16byte] %p4/z -> %z12.d -c53171ee : ldff1w z14.d, p4/Z, [x15, z17.d, UXTW #2] : ldff1w (%x15,%z17.d,uxtw #2)[16byte] %p4/z -> %z14.d -c5337630 : ldff1w z16.d, p5/Z, [x17, z19.d, UXTW #2] : ldff1w (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d -c5347671 : ldff1w z17.d, p5/Z, [x19, z20.d, UXTW #2] : ldff1w (%x19,%z20.d,uxtw #2)[16byte] %p5/z -> %z17.d -c53676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, UXTW #2] : ldff1w (%x21,%z22.d,uxtw #2)[16byte] %p5/z -> %z19.d -c5387af5 : ldff1w z21.d, p6/Z, [x23, z24.d, UXTW #2] : ldff1w (%x23,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d -c53a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, UXTW #2] : ldff1w (%x24,%z26.d,uxtw #2)[16byte] %p6/z -> %z23.d -c53c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, UXTW #2] : ldff1w (%x26,%z28.d,uxtw #2)[16byte] %p7/z -> %z25.d -c53e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, UXTW #2] : ldff1w (%x28,%z30.d,uxtw #2)[16byte] %p7/z -> %z27.d -c53f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, UXTW #2] : ldff1w (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d -c5606000 : ldff1w z0.d, p0/Z, [x0, z0.d, SXTW #2] : ldff1w (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d -c5656482 : ldff1w z2.d, p1/Z, [x4, z5.d, SXTW #2] : ldff1w (%x4,%z5.d,sxtw #2)[16byte] %p1/z -> %z2.d -c56768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, SXTW #2] : ldff1w (%x6,%z7.d,sxtw #2)[16byte] %p2/z -> %z4.d -c5696906 : ldff1w z6.d, p2/Z, [x8, z9.d, SXTW #2] : ldff1w (%x8,%z9.d,sxtw #2)[16byte] %p2/z -> %z6.d -c56b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, SXTW #2] : ldff1w (%x10,%z11.d,sxtw #2)[16byte] %p3/z -> %z8.d -c56d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, SXTW #2] : ldff1w (%x11,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d -c56f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, SXTW #2] : ldff1w (%x13,%z15.d,sxtw #2)[16byte] %p4/z -> %z12.d -c57171ee : ldff1w z14.d, p4/Z, [x15, z17.d, SXTW #2] : ldff1w (%x15,%z17.d,sxtw #2)[16byte] %p4/z -> %z14.d -c5737630 : ldff1w z16.d, p5/Z, [x17, z19.d, SXTW #2] : ldff1w (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d -c5747671 : ldff1w z17.d, p5/Z, [x19, z20.d, SXTW #2] : ldff1w (%x19,%z20.d,sxtw #2)[16byte] %p5/z -> %z17.d -c57676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, SXTW #2] : ldff1w (%x21,%z22.d,sxtw #2)[16byte] %p5/z -> %z19.d -c5787af5 : ldff1w z21.d, p6/Z, [x23, z24.d, SXTW #2] : ldff1w (%x23,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d -c57a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, SXTW #2] : ldff1w (%x24,%z26.d,sxtw #2)[16byte] %p6/z -> %z23.d -c57c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, SXTW #2] : ldff1w (%x26,%z28.d,sxtw #2)[16byte] %p7/z -> %z25.d -c57e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, SXTW #2] : ldff1w (%x28,%z30.d,sxtw #2)[16byte] %p7/z -> %z27.d -c57f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, SXTW #2] : ldff1w (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d +c5206000 : ldff1w z0.d, p0/Z, [x0, z0.d, UXTW #2] : ldff1w (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d +c5256482 : ldff1w z2.d, p1/Z, [x4, z5.d, UXTW #2] : ldff1w (%x4,%z5.d,uxtw #2)[4byte] %p1/z -> %z2.d +c52768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, UXTW #2] : ldff1w (%x6,%z7.d,uxtw #2)[4byte] %p2/z -> %z4.d +c5296906 : ldff1w z6.d, p2/Z, [x8, z9.d, UXTW #2] : ldff1w (%x8,%z9.d,uxtw #2)[4byte] %p2/z -> %z6.d +c52b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, UXTW #2] : ldff1w (%x10,%z11.d,uxtw #2)[4byte] %p3/z -> %z8.d +c52d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, UXTW #2] : ldff1w (%x11,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d +c52f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, UXTW #2] : ldff1w (%x13,%z15.d,uxtw #2)[4byte] %p4/z -> %z12.d +c53171ee : ldff1w z14.d, p4/Z, [x15, z17.d, UXTW #2] : ldff1w (%x15,%z17.d,uxtw #2)[4byte] %p4/z -> %z14.d +c5337630 : ldff1w z16.d, p5/Z, [x17, z19.d, UXTW #2] : ldff1w (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d +c5347671 : ldff1w z17.d, p5/Z, [x19, z20.d, UXTW #2] : ldff1w (%x19,%z20.d,uxtw #2)[4byte] %p5/z -> %z17.d +c53676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, UXTW #2] : ldff1w (%x21,%z22.d,uxtw #2)[4byte] %p5/z -> %z19.d +c5387af5 : ldff1w z21.d, p6/Z, [x23, z24.d, UXTW #2] : ldff1w (%x23,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d +c53a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, UXTW #2] : ldff1w (%x24,%z26.d,uxtw #2)[4byte] %p6/z -> %z23.d +c53c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, UXTW #2] : ldff1w (%x26,%z28.d,uxtw #2)[4byte] %p7/z -> %z25.d +c53e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, UXTW #2] : ldff1w (%x28,%z30.d,uxtw #2)[4byte] %p7/z -> %z27.d +c53f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, UXTW #2] : ldff1w (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d +c5606000 : ldff1w z0.d, p0/Z, [x0, z0.d, SXTW #2] : ldff1w (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d +c5656482 : ldff1w z2.d, p1/Z, [x4, z5.d, SXTW #2] : ldff1w (%x4,%z5.d,sxtw #2)[4byte] %p1/z -> %z2.d +c56768c4 : ldff1w z4.d, p2/Z, [x6, z7.d, SXTW #2] : ldff1w (%x6,%z7.d,sxtw #2)[4byte] %p2/z -> %z4.d +c5696906 : ldff1w z6.d, p2/Z, [x8, z9.d, SXTW #2] : ldff1w (%x8,%z9.d,sxtw #2)[4byte] %p2/z -> %z6.d +c56b6d48 : ldff1w z8.d, p3/Z, [x10, z11.d, SXTW #2] : ldff1w (%x10,%z11.d,sxtw #2)[4byte] %p3/z -> %z8.d +c56d6d6a : ldff1w z10.d, p3/Z, [x11, z13.d, SXTW #2] : ldff1w (%x11,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d +c56f71ac : ldff1w z12.d, p4/Z, [x13, z15.d, SXTW #2] : ldff1w (%x13,%z15.d,sxtw #2)[4byte] %p4/z -> %z12.d +c57171ee : ldff1w z14.d, p4/Z, [x15, z17.d, SXTW #2] : ldff1w (%x15,%z17.d,sxtw #2)[4byte] %p4/z -> %z14.d +c5737630 : ldff1w z16.d, p5/Z, [x17, z19.d, SXTW #2] : ldff1w (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d +c5747671 : ldff1w z17.d, p5/Z, [x19, z20.d, SXTW #2] : ldff1w (%x19,%z20.d,sxtw #2)[4byte] %p5/z -> %z17.d +c57676b3 : ldff1w z19.d, p5/Z, [x21, z22.d, SXTW #2] : ldff1w (%x21,%z22.d,sxtw #2)[4byte] %p5/z -> %z19.d +c5787af5 : ldff1w z21.d, p6/Z, [x23, z24.d, SXTW #2] : ldff1w (%x23,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d +c57a7b17 : ldff1w z23.d, p6/Z, [x24, z26.d, SXTW #2] : ldff1w (%x24,%z26.d,sxtw #2)[4byte] %p6/z -> %z23.d +c57c7f59 : ldff1w z25.d, p7/Z, [x26, z28.d, SXTW #2] : ldff1w (%x26,%z28.d,sxtw #2)[4byte] %p7/z -> %z25.d +c57e7f9b : ldff1w z27.d, p7/Z, [x28, z30.d, SXTW #2] : ldff1w (%x28,%z30.d,sxtw #2)[4byte] %p7/z -> %z27.d +c57f7fff : ldff1w z31.d, p7/Z, [sp, z31.d, SXTW #2] : ldff1w (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d # LDFF1W { .D }, /Z, [.D{, #}] (LDFF1W-Z.P.AI-D) -c520e000 : ldff1w z0.d, p0/Z, [z0.d, #0] : ldff1w (%z0.d)[16byte] %p0/z -> %z0.d -c522e482 : ldff1w z2.d, p1/Z, [z4.d, #8] : ldff1w +0x08(%z4.d)[16byte] %p1/z -> %z2.d -c524e8c4 : ldff1w z4.d, p2/Z, [z6.d, #16] : ldff1w +0x10(%z6.d)[16byte] %p2/z -> %z4.d -c526e906 : ldff1w z6.d, p2/Z, [z8.d, #24] : ldff1w +0x18(%z8.d)[16byte] %p2/z -> %z6.d -c528ed48 : ldff1w z8.d, p3/Z, [z10.d, #32] : ldff1w +0x20(%z10.d)[16byte] %p3/z -> %z8.d -c52aed8a : ldff1w z10.d, p3/Z, [z12.d, #40] : ldff1w +0x28(%z12.d)[16byte] %p3/z -> %z10.d -c52cf1cc : ldff1w z12.d, p4/Z, [z14.d, #48] : ldff1w +0x30(%z14.d)[16byte] %p4/z -> %z12.d -c52ef20e : ldff1w z14.d, p4/Z, [z16.d, #56] : ldff1w +0x38(%z16.d)[16byte] %p4/z -> %z14.d -c530f650 : ldff1w z16.d, p5/Z, [z18.d, #64] : ldff1w +0x40(%z18.d)[16byte] %p5/z -> %z16.d -c531f671 : ldff1w z17.d, p5/Z, [z19.d, #68] : ldff1w +0x44(%z19.d)[16byte] %p5/z -> %z17.d -c533f6b3 : ldff1w z19.d, p5/Z, [z21.d, #76] : ldff1w +0x4c(%z21.d)[16byte] %p5/z -> %z19.d -c535faf5 : ldff1w z21.d, p6/Z, [z23.d, #84] : ldff1w +0x54(%z23.d)[16byte] %p6/z -> %z21.d -c537fb37 : ldff1w z23.d, p6/Z, [z25.d, #92] : ldff1w +0x5c(%z25.d)[16byte] %p6/z -> %z23.d -c539ff79 : ldff1w z25.d, p7/Z, [z27.d, #100] : ldff1w +0x64(%z27.d)[16byte] %p7/z -> %z25.d -c53bffbb : ldff1w z27.d, p7/Z, [z29.d, #108] : ldff1w +0x6c(%z29.d)[16byte] %p7/z -> %z27.d -c53fffff : ldff1w z31.d, p7/Z, [z31.d, #124] : ldff1w +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +c520e000 : ldff1w z0.d, p0/Z, [z0.d, #0] : ldff1w (%z0.d)[4byte] %p0/z -> %z0.d +c522e482 : ldff1w z2.d, p1/Z, [z4.d, #8] : ldff1w +0x08(%z4.d)[4byte] %p1/z -> %z2.d +c524e8c4 : ldff1w z4.d, p2/Z, [z6.d, #16] : ldff1w +0x10(%z6.d)[4byte] %p2/z -> %z4.d +c526e906 : ldff1w z6.d, p2/Z, [z8.d, #24] : ldff1w +0x18(%z8.d)[4byte] %p2/z -> %z6.d +c528ed48 : ldff1w z8.d, p3/Z, [z10.d, #32] : ldff1w +0x20(%z10.d)[4byte] %p3/z -> %z8.d +c52aed8a : ldff1w z10.d, p3/Z, [z12.d, #40] : ldff1w +0x28(%z12.d)[4byte] %p3/z -> %z10.d +c52cf1cc : ldff1w z12.d, p4/Z, [z14.d, #48] : ldff1w +0x30(%z14.d)[4byte] %p4/z -> %z12.d +c52ef20e : ldff1w z14.d, p4/Z, [z16.d, #56] : ldff1w +0x38(%z16.d)[4byte] %p4/z -> %z14.d +c530f650 : ldff1w z16.d, p5/Z, [z18.d, #64] : ldff1w +0x40(%z18.d)[4byte] %p5/z -> %z16.d +c531f671 : ldff1w z17.d, p5/Z, [z19.d, #68] : ldff1w +0x44(%z19.d)[4byte] %p5/z -> %z17.d +c533f6b3 : ldff1w z19.d, p5/Z, [z21.d, #76] : ldff1w +0x4c(%z21.d)[4byte] %p5/z -> %z19.d +c535faf5 : ldff1w z21.d, p6/Z, [z23.d, #84] : ldff1w +0x54(%z23.d)[4byte] %p6/z -> %z21.d +c537fb37 : ldff1w z23.d, p6/Z, [z25.d, #92] : ldff1w +0x5c(%z25.d)[4byte] %p6/z -> %z23.d +c539ff79 : ldff1w z25.d, p7/Z, [z27.d, #100] : ldff1w +0x64(%z27.d)[4byte] %p7/z -> %z25.d +c53bffbb : ldff1w z27.d, p7/Z, [z29.d, #108] : ldff1w +0x6c(%z29.d)[4byte] %p7/z -> %z27.d +c53fffff : ldff1w z31.d, p7/Z, [z31.d, #124] : ldff1w +0x7c(%z31.d)[4byte] %p7/z -> %z31.d # LDFF1W { .D }, /Z, [, .D] (LDFF1W-Z.P.BZ-D.64.unscaled) -c540e000 : ldff1w z0.d, p0/Z, [x0, z0.d] : ldff1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d -c545e482 : ldff1w z2.d, p1/Z, [x4, z5.d] : ldff1w (%x4,%z5.d)[16byte] %p1/z -> %z2.d -c547e8c4 : ldff1w z4.d, p2/Z, [x6, z7.d] : ldff1w (%x6,%z7.d)[16byte] %p2/z -> %z4.d -c549e906 : ldff1w z6.d, p2/Z, [x8, z9.d] : ldff1w (%x8,%z9.d)[16byte] %p2/z -> %z6.d -c54bed48 : ldff1w z8.d, p3/Z, [x10, z11.d] : ldff1w (%x10,%z11.d)[16byte] %p3/z -> %z8.d -c54ded6a : ldff1w z10.d, p3/Z, [x11, z13.d] : ldff1w (%x11,%z13.d)[16byte] %p3/z -> %z10.d -c54ff1ac : ldff1w z12.d, p4/Z, [x13, z15.d] : ldff1w (%x13,%z15.d)[16byte] %p4/z -> %z12.d -c551f1ee : ldff1w z14.d, p4/Z, [x15, z17.d] : ldff1w (%x15,%z17.d)[16byte] %p4/z -> %z14.d -c553f630 : ldff1w z16.d, p5/Z, [x17, z19.d] : ldff1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d -c554f671 : ldff1w z17.d, p5/Z, [x19, z20.d] : ldff1w (%x19,%z20.d)[16byte] %p5/z -> %z17.d -c556f6b3 : ldff1w z19.d, p5/Z, [x21, z22.d] : ldff1w (%x21,%z22.d)[16byte] %p5/z -> %z19.d -c558faf5 : ldff1w z21.d, p6/Z, [x23, z24.d] : ldff1w (%x23,%z24.d)[16byte] %p6/z -> %z21.d -c55afb17 : ldff1w z23.d, p6/Z, [x24, z26.d] : ldff1w (%x24,%z26.d)[16byte] %p6/z -> %z23.d -c55cff59 : ldff1w z25.d, p7/Z, [x26, z28.d] : ldff1w (%x26,%z28.d)[16byte] %p7/z -> %z25.d -c55eff9b : ldff1w z27.d, p7/Z, [x28, z30.d] : ldff1w (%x28,%z30.d)[16byte] %p7/z -> %z27.d -c55fffff : ldff1w z31.d, p7/Z, [sp, z31.d] : ldff1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d +c540e000 : ldff1w z0.d, p0/Z, [x0, z0.d] : ldff1w (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c545e482 : ldff1w z2.d, p1/Z, [x4, z5.d] : ldff1w (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c547e8c4 : ldff1w z4.d, p2/Z, [x6, z7.d] : ldff1w (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c549e906 : ldff1w z6.d, p2/Z, [x8, z9.d] : ldff1w (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c54bed48 : ldff1w z8.d, p3/Z, [x10, z11.d] : ldff1w (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c54ded6a : ldff1w z10.d, p3/Z, [x11, z13.d] : ldff1w (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c54ff1ac : ldff1w z12.d, p4/Z, [x13, z15.d] : ldff1w (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c551f1ee : ldff1w z14.d, p4/Z, [x15, z17.d] : ldff1w (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c553f630 : ldff1w z16.d, p5/Z, [x17, z19.d] : ldff1w (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c554f671 : ldff1w z17.d, p5/Z, [x19, z20.d] : ldff1w (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c556f6b3 : ldff1w z19.d, p5/Z, [x21, z22.d] : ldff1w (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c558faf5 : ldff1w z21.d, p6/Z, [x23, z24.d] : ldff1w (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c55afb17 : ldff1w z23.d, p6/Z, [x24, z26.d] : ldff1w (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c55cff59 : ldff1w z25.d, p7/Z, [x26, z28.d] : ldff1w (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c55eff9b : ldff1w z27.d, p7/Z, [x28, z30.d] : ldff1w (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c55fffff : ldff1w z31.d, p7/Z, [sp, z31.d] : ldff1w (%sp,%z31.d)[4byte] %p7/z -> %z31.d # LDFF1W { .D }, /Z, [, .D, LSL #2] (LDFF1W-Z.P.BZ-D.64.scaled) -c560e000 : ldff1w z0.d, p0/Z, [x0, z0.d, LSL #2] : ldff1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d -c565e482 : ldff1w z2.d, p1/Z, [x4, z5.d, LSL #2] : ldff1w (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d -c567e8c4 : ldff1w z4.d, p2/Z, [x6, z7.d, LSL #2] : ldff1w (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d -c569e906 : ldff1w z6.d, p2/Z, [x8, z9.d, LSL #2] : ldff1w (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d -c56bed48 : ldff1w z8.d, p3/Z, [x10, z11.d, LSL #2] : ldff1w (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d -c56ded6a : ldff1w z10.d, p3/Z, [x11, z13.d, LSL #2] : ldff1w (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d -c56ff1ac : ldff1w z12.d, p4/Z, [x13, z15.d, LSL #2] : ldff1w (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d -c571f1ee : ldff1w z14.d, p4/Z, [x15, z17.d, LSL #2] : ldff1w (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d -c573f630 : ldff1w z16.d, p5/Z, [x17, z19.d, LSL #2] : ldff1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d -c574f671 : ldff1w z17.d, p5/Z, [x19, z20.d, LSL #2] : ldff1w (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d -c576f6b3 : ldff1w z19.d, p5/Z, [x21, z22.d, LSL #2] : ldff1w (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d -c578faf5 : ldff1w z21.d, p6/Z, [x23, z24.d, LSL #2] : ldff1w (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d -c57afb17 : ldff1w z23.d, p6/Z, [x24, z26.d, LSL #2] : ldff1w (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d -c57cff59 : ldff1w z25.d, p7/Z, [x26, z28.d, LSL #2] : ldff1w (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d -c57eff9b : ldff1w z27.d, p7/Z, [x28, z30.d, LSL #2] : ldff1w (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d -c57fffff : ldff1w z31.d, p7/Z, [sp, z31.d, LSL #2] : ldff1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d +c560e000 : ldff1w z0.d, p0/Z, [x0, z0.d, LSL #2] : ldff1w (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d +c565e482 : ldff1w z2.d, p1/Z, [x4, z5.d, LSL #2] : ldff1w (%x4,%z5.d,lsl #2)[4byte] %p1/z -> %z2.d +c567e8c4 : ldff1w z4.d, p2/Z, [x6, z7.d, LSL #2] : ldff1w (%x6,%z7.d,lsl #2)[4byte] %p2/z -> %z4.d +c569e906 : ldff1w z6.d, p2/Z, [x8, z9.d, LSL #2] : ldff1w (%x8,%z9.d,lsl #2)[4byte] %p2/z -> %z6.d +c56bed48 : ldff1w z8.d, p3/Z, [x10, z11.d, LSL #2] : ldff1w (%x10,%z11.d,lsl #2)[4byte] %p3/z -> %z8.d +c56ded6a : ldff1w z10.d, p3/Z, [x11, z13.d, LSL #2] : ldff1w (%x11,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d +c56ff1ac : ldff1w z12.d, p4/Z, [x13, z15.d, LSL #2] : ldff1w (%x13,%z15.d,lsl #2)[4byte] %p4/z -> %z12.d +c571f1ee : ldff1w z14.d, p4/Z, [x15, z17.d, LSL #2] : ldff1w (%x15,%z17.d,lsl #2)[4byte] %p4/z -> %z14.d +c573f630 : ldff1w z16.d, p5/Z, [x17, z19.d, LSL #2] : ldff1w (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d +c574f671 : ldff1w z17.d, p5/Z, [x19, z20.d, LSL #2] : ldff1w (%x19,%z20.d,lsl #2)[4byte] %p5/z -> %z17.d +c576f6b3 : ldff1w z19.d, p5/Z, [x21, z22.d, LSL #2] : ldff1w (%x21,%z22.d,lsl #2)[4byte] %p5/z -> %z19.d +c578faf5 : ldff1w z21.d, p6/Z, [x23, z24.d, LSL #2] : ldff1w (%x23,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d +c57afb17 : ldff1w z23.d, p6/Z, [x24, z26.d, LSL #2] : ldff1w (%x24,%z26.d,lsl #2)[4byte] %p6/z -> %z23.d +c57cff59 : ldff1w z25.d, p7/Z, [x26, z28.d, LSL #2] : ldff1w (%x26,%z28.d,lsl #2)[4byte] %p7/z -> %z25.d +c57eff9b : ldff1w z27.d, p7/Z, [x28, z30.d, LSL #2] : ldff1w (%x28,%z30.d,lsl #2)[4byte] %p7/z -> %z27.d +c57fffff : ldff1w z31.d, p7/Z, [sp, z31.d, LSL #2] : ldff1w (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d # LDNF1B { .B }, /Z, [{, #, MUL VL}] (LDNF1B-Z.P.BI-U8) -a418a000 : ldnf1b z0.b, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x0100(%x0)[32byte] %p0/z -> %z0.b -a419a482 : ldnf1b z2.b, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0xe0(%x4)[32byte] %p1/z -> %z2.b -a41aa8c4 : ldnf1b z4.b, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0xc0(%x6)[32byte] %p2/z -> %z4.b -a41ba906 : ldnf1b z6.b, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0xa0(%x8)[32byte] %p2/z -> %z6.b -a41cad48 : ldnf1b z8.b, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x80(%x10)[32byte] %p3/z -> %z8.b -a41dad6a : ldnf1b z10.b, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x60(%x11)[32byte] %p3/z -> %z10.b -a41eb1ac : ldnf1b z12.b, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x40(%x13)[32byte] %p4/z -> %z12.b -a41fb1ee : ldnf1b z14.b, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x20(%x15)[32byte] %p4/z -> %z14.b -a410b630 : ldnf1b z16.b, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[32byte] %p5/z -> %z16.b -a410b671 : ldnf1b z17.b, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[32byte] %p5/z -> %z17.b -a411b6b3 : ldnf1b z19.b, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x20(%x21)[32byte] %p5/z -> %z19.b -a412baf5 : ldnf1b z21.b, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x40(%x23)[32byte] %p6/z -> %z21.b -a413bb17 : ldnf1b z23.b, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x60(%x24)[32byte] %p6/z -> %z23.b -a414bf59 : ldnf1b z25.b, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x80(%x26)[32byte] %p7/z -> %z25.b -a415bf9b : ldnf1b z27.b, p7/Z, [x28, #5, MUL VL] : ldnf1b +0xa0(%x28)[32byte] %p7/z -> %z27.b -a417bfff : ldnf1b z31.b, p7/Z, [sp, #7, MUL VL] : ldnf1b +0xe0(%sp)[32byte] %p7/z -> %z31.b +a418a000 : ldnf1b z0.b, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x0100(%x0)[1byte] %p0/z -> %z0.b +a419a482 : ldnf1b z2.b, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0xe0(%x4)[1byte] %p1/z -> %z2.b +a41aa8c4 : ldnf1b z4.b, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0xc0(%x6)[1byte] %p2/z -> %z4.b +a41ba906 : ldnf1b z6.b, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0xa0(%x8)[1byte] %p2/z -> %z6.b +a41cad48 : ldnf1b z8.b, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x80(%x10)[1byte] %p3/z -> %z8.b +a41dad6a : ldnf1b z10.b, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x60(%x11)[1byte] %p3/z -> %z10.b +a41eb1ac : ldnf1b z12.b, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x40(%x13)[1byte] %p4/z -> %z12.b +a41fb1ee : ldnf1b z14.b, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x20(%x15)[1byte] %p4/z -> %z14.b +a410b630 : ldnf1b z16.b, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[1byte] %p5/z -> %z16.b +a410b671 : ldnf1b z17.b, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[1byte] %p5/z -> %z17.b +a411b6b3 : ldnf1b z19.b, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x20(%x21)[1byte] %p5/z -> %z19.b +a412baf5 : ldnf1b z21.b, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x40(%x23)[1byte] %p6/z -> %z21.b +a413bb17 : ldnf1b z23.b, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x60(%x24)[1byte] %p6/z -> %z23.b +a414bf59 : ldnf1b z25.b, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x80(%x26)[1byte] %p7/z -> %z25.b +a415bf9b : ldnf1b z27.b, p7/Z, [x28, #5, MUL VL] : ldnf1b +0xa0(%x28)[1byte] %p7/z -> %z27.b +a417bfff : ldnf1b z31.b, p7/Z, [sp, #7, MUL VL] : ldnf1b +0xe0(%sp)[1byte] %p7/z -> %z31.b # LDNF1B { .H }, /Z, [{, #, MUL VL}] (LDNF1B-Z.P.BI-U16) -a438a000 : ldnf1b z0.h, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x80(%x0)[16byte] %p0/z -> %z0.h -a439a482 : ldnf1b z2.h, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0x70(%x4)[16byte] %p1/z -> %z2.h -a43aa8c4 : ldnf1b z4.h, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0x60(%x6)[16byte] %p2/z -> %z4.h -a43ba906 : ldnf1b z6.h, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0x50(%x8)[16byte] %p2/z -> %z6.h -a43cad48 : ldnf1b z8.h, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x40(%x10)[16byte] %p3/z -> %z8.h -a43dad6a : ldnf1b z10.h, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x30(%x11)[16byte] %p3/z -> %z10.h -a43eb1ac : ldnf1b z12.h, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x20(%x13)[16byte] %p4/z -> %z12.h -a43fb1ee : ldnf1b z14.h, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x10(%x15)[16byte] %p4/z -> %z14.h -a430b630 : ldnf1b z16.h, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[16byte] %p5/z -> %z16.h -a430b671 : ldnf1b z17.h, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[16byte] %p5/z -> %z17.h -a431b6b3 : ldnf1b z19.h, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x10(%x21)[16byte] %p5/z -> %z19.h -a432baf5 : ldnf1b z21.h, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x20(%x23)[16byte] %p6/z -> %z21.h -a433bb17 : ldnf1b z23.h, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x30(%x24)[16byte] %p6/z -> %z23.h -a434bf59 : ldnf1b z25.h, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x40(%x26)[16byte] %p7/z -> %z25.h -a435bf9b : ldnf1b z27.h, p7/Z, [x28, #5, MUL VL] : ldnf1b +0x50(%x28)[16byte] %p7/z -> %z27.h -a437bfff : ldnf1b z31.h, p7/Z, [sp, #7, MUL VL] : ldnf1b +0x70(%sp)[16byte] %p7/z -> %z31.h +a438a000 : ldnf1b z0.h, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x80(%x0)[1byte] %p0/z -> %z0.h +a439a482 : ldnf1b z2.h, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0x70(%x4)[1byte] %p1/z -> %z2.h +a43aa8c4 : ldnf1b z4.h, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0x60(%x6)[1byte] %p2/z -> %z4.h +a43ba906 : ldnf1b z6.h, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0x50(%x8)[1byte] %p2/z -> %z6.h +a43cad48 : ldnf1b z8.h, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x40(%x10)[1byte] %p3/z -> %z8.h +a43dad6a : ldnf1b z10.h, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x30(%x11)[1byte] %p3/z -> %z10.h +a43eb1ac : ldnf1b z12.h, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x20(%x13)[1byte] %p4/z -> %z12.h +a43fb1ee : ldnf1b z14.h, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x10(%x15)[1byte] %p4/z -> %z14.h +a430b630 : ldnf1b z16.h, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[1byte] %p5/z -> %z16.h +a430b671 : ldnf1b z17.h, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[1byte] %p5/z -> %z17.h +a431b6b3 : ldnf1b z19.h, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x10(%x21)[1byte] %p5/z -> %z19.h +a432baf5 : ldnf1b z21.h, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x20(%x23)[1byte] %p6/z -> %z21.h +a433bb17 : ldnf1b z23.h, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x30(%x24)[1byte] %p6/z -> %z23.h +a434bf59 : ldnf1b z25.h, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x40(%x26)[1byte] %p7/z -> %z25.h +a435bf9b : ldnf1b z27.h, p7/Z, [x28, #5, MUL VL] : ldnf1b +0x50(%x28)[1byte] %p7/z -> %z27.h +a437bfff : ldnf1b z31.h, p7/Z, [sp, #7, MUL VL] : ldnf1b +0x70(%sp)[1byte] %p7/z -> %z31.h # LDNF1B { .S }, /Z, [{, #, MUL VL}] (LDNF1B-Z.P.BI-U32) -a458a000 : ldnf1b z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x40(%x0)[8byte] %p0/z -> %z0.s -a459a482 : ldnf1b z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0x38(%x4)[8byte] %p1/z -> %z2.s -a45aa8c4 : ldnf1b z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0x30(%x6)[8byte] %p2/z -> %z4.s -a45ba906 : ldnf1b z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0x28(%x8)[8byte] %p2/z -> %z6.s -a45cad48 : ldnf1b z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x20(%x10)[8byte] %p3/z -> %z8.s -a45dad6a : ldnf1b z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x18(%x11)[8byte] %p3/z -> %z10.s -a45eb1ac : ldnf1b z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x10(%x13)[8byte] %p4/z -> %z12.s -a45fb1ee : ldnf1b z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x08(%x15)[8byte] %p4/z -> %z14.s -a450b630 : ldnf1b z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[8byte] %p5/z -> %z16.s -a450b671 : ldnf1b z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[8byte] %p5/z -> %z17.s -a451b6b3 : ldnf1b z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x08(%x21)[8byte] %p5/z -> %z19.s -a452baf5 : ldnf1b z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x10(%x23)[8byte] %p6/z -> %z21.s -a453bb17 : ldnf1b z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x18(%x24)[8byte] %p6/z -> %z23.s -a454bf59 : ldnf1b z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x20(%x26)[8byte] %p7/z -> %z25.s -a455bf9b : ldnf1b z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1b +0x28(%x28)[8byte] %p7/z -> %z27.s -a457bfff : ldnf1b z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1b +0x38(%sp)[8byte] %p7/z -> %z31.s +a458a000 : ldnf1b z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x40(%x0)[1byte] %p0/z -> %z0.s +a459a482 : ldnf1b z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0x38(%x4)[1byte] %p1/z -> %z2.s +a45aa8c4 : ldnf1b z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0x30(%x6)[1byte] %p2/z -> %z4.s +a45ba906 : ldnf1b z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0x28(%x8)[1byte] %p2/z -> %z6.s +a45cad48 : ldnf1b z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x20(%x10)[1byte] %p3/z -> %z8.s +a45dad6a : ldnf1b z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x18(%x11)[1byte] %p3/z -> %z10.s +a45eb1ac : ldnf1b z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x10(%x13)[1byte] %p4/z -> %z12.s +a45fb1ee : ldnf1b z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x08(%x15)[1byte] %p4/z -> %z14.s +a450b630 : ldnf1b z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[1byte] %p5/z -> %z16.s +a450b671 : ldnf1b z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[1byte] %p5/z -> %z17.s +a451b6b3 : ldnf1b z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x08(%x21)[1byte] %p5/z -> %z19.s +a452baf5 : ldnf1b z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x10(%x23)[1byte] %p6/z -> %z21.s +a453bb17 : ldnf1b z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x18(%x24)[1byte] %p6/z -> %z23.s +a454bf59 : ldnf1b z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x20(%x26)[1byte] %p7/z -> %z25.s +a455bf9b : ldnf1b z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1b +0x28(%x28)[1byte] %p7/z -> %z27.s +a457bfff : ldnf1b z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1b +0x38(%sp)[1byte] %p7/z -> %z31.s # LDNF1B { .D }, /Z, [{, #, MUL VL}] (LDNF1B-Z.P.BI-U64) -a478a000 : ldnf1b z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x20(%x0)[4byte] %p0/z -> %z0.d -a479a482 : ldnf1b z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0x1c(%x4)[4byte] %p1/z -> %z2.d -a47aa8c4 : ldnf1b z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0x18(%x6)[4byte] %p2/z -> %z4.d -a47ba906 : ldnf1b z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0x14(%x8)[4byte] %p2/z -> %z6.d -a47cad48 : ldnf1b z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x10(%x10)[4byte] %p3/z -> %z8.d -a47dad6a : ldnf1b z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x0c(%x11)[4byte] %p3/z -> %z10.d -a47eb1ac : ldnf1b z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x08(%x13)[4byte] %p4/z -> %z12.d -a47fb1ee : ldnf1b z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x04(%x15)[4byte] %p4/z -> %z14.d -a470b630 : ldnf1b z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[4byte] %p5/z -> %z16.d -a470b671 : ldnf1b z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[4byte] %p5/z -> %z17.d -a471b6b3 : ldnf1b z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x04(%x21)[4byte] %p5/z -> %z19.d -a472baf5 : ldnf1b z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x08(%x23)[4byte] %p6/z -> %z21.d -a473bb17 : ldnf1b z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x0c(%x24)[4byte] %p6/z -> %z23.d -a474bf59 : ldnf1b z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x10(%x26)[4byte] %p7/z -> %z25.d -a475bf9b : ldnf1b z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1b +0x14(%x28)[4byte] %p7/z -> %z27.d -a477bfff : ldnf1b z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1b +0x1c(%sp)[4byte] %p7/z -> %z31.d +a478a000 : ldnf1b z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1b -0x20(%x0)[1byte] %p0/z -> %z0.d +a479a482 : ldnf1b z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1b -0x1c(%x4)[1byte] %p1/z -> %z2.d +a47aa8c4 : ldnf1b z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1b -0x18(%x6)[1byte] %p2/z -> %z4.d +a47ba906 : ldnf1b z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1b -0x14(%x8)[1byte] %p2/z -> %z6.d +a47cad48 : ldnf1b z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1b -0x10(%x10)[1byte] %p3/z -> %z8.d +a47dad6a : ldnf1b z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1b -0x0c(%x11)[1byte] %p3/z -> %z10.d +a47eb1ac : ldnf1b z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1b -0x08(%x13)[1byte] %p4/z -> %z12.d +a47fb1ee : ldnf1b z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1b -0x04(%x15)[1byte] %p4/z -> %z14.d +a470b630 : ldnf1b z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1b (%x17)[1byte] %p5/z -> %z16.d +a470b671 : ldnf1b z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1b (%x19)[1byte] %p5/z -> %z17.d +a471b6b3 : ldnf1b z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1b +0x04(%x21)[1byte] %p5/z -> %z19.d +a472baf5 : ldnf1b z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1b +0x08(%x23)[1byte] %p6/z -> %z21.d +a473bb17 : ldnf1b z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1b +0x0c(%x24)[1byte] %p6/z -> %z23.d +a474bf59 : ldnf1b z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1b +0x10(%x26)[1byte] %p7/z -> %z25.d +a475bf9b : ldnf1b z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1b +0x14(%x28)[1byte] %p7/z -> %z27.d +a477bfff : ldnf1b z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1b +0x1c(%sp)[1byte] %p7/z -> %z31.d # LDNF1D { .D }, /Z, [{, #, MUL VL}] (LDNF1D-Z.P.BI-U64) -a5f8a000 : ldnf1d z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1d -0x0100(%x0)[32byte] %p0/z -> %z0.d -a5f9a482 : ldnf1d z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1d -0xe0(%x4)[32byte] %p1/z -> %z2.d -a5faa8c4 : ldnf1d z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1d -0xc0(%x6)[32byte] %p2/z -> %z4.d -a5fba906 : ldnf1d z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1d -0xa0(%x8)[32byte] %p2/z -> %z6.d -a5fcad48 : ldnf1d z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1d -0x80(%x10)[32byte] %p3/z -> %z8.d -a5fdad6a : ldnf1d z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1d -0x60(%x11)[32byte] %p3/z -> %z10.d -a5feb1ac : ldnf1d z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1d -0x40(%x13)[32byte] %p4/z -> %z12.d -a5ffb1ee : ldnf1d z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1d -0x20(%x15)[32byte] %p4/z -> %z14.d -a5f0b630 : ldnf1d z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1d (%x17)[32byte] %p5/z -> %z16.d -a5f0b671 : ldnf1d z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1d (%x19)[32byte] %p5/z -> %z17.d -a5f1b6b3 : ldnf1d z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1d +0x20(%x21)[32byte] %p5/z -> %z19.d -a5f2baf5 : ldnf1d z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1d +0x40(%x23)[32byte] %p6/z -> %z21.d -a5f3bb17 : ldnf1d z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1d +0x60(%x24)[32byte] %p6/z -> %z23.d -a5f4bf59 : ldnf1d z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1d +0x80(%x26)[32byte] %p7/z -> %z25.d -a5f5bf9b : ldnf1d z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1d +0xa0(%x28)[32byte] %p7/z -> %z27.d -a5f7bfff : ldnf1d z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1d +0xe0(%sp)[32byte] %p7/z -> %z31.d +a5f8a000 : ldnf1d z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1d -0x0100(%x0)[8byte] %p0/z -> %z0.d +a5f9a482 : ldnf1d z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1d -0xe0(%x4)[8byte] %p1/z -> %z2.d +a5faa8c4 : ldnf1d z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1d -0xc0(%x6)[8byte] %p2/z -> %z4.d +a5fba906 : ldnf1d z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1d -0xa0(%x8)[8byte] %p2/z -> %z6.d +a5fcad48 : ldnf1d z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1d -0x80(%x10)[8byte] %p3/z -> %z8.d +a5fdad6a : ldnf1d z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1d -0x60(%x11)[8byte] %p3/z -> %z10.d +a5feb1ac : ldnf1d z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1d -0x40(%x13)[8byte] %p4/z -> %z12.d +a5ffb1ee : ldnf1d z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1d -0x20(%x15)[8byte] %p4/z -> %z14.d +a5f0b630 : ldnf1d z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1d (%x17)[8byte] %p5/z -> %z16.d +a5f0b671 : ldnf1d z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1d (%x19)[8byte] %p5/z -> %z17.d +a5f1b6b3 : ldnf1d z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1d +0x20(%x21)[8byte] %p5/z -> %z19.d +a5f2baf5 : ldnf1d z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1d +0x40(%x23)[8byte] %p6/z -> %z21.d +a5f3bb17 : ldnf1d z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1d +0x60(%x24)[8byte] %p6/z -> %z23.d +a5f4bf59 : ldnf1d z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1d +0x80(%x26)[8byte] %p7/z -> %z25.d +a5f5bf9b : ldnf1d z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1d +0xa0(%x28)[8byte] %p7/z -> %z27.d +a5f7bfff : ldnf1d z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1d +0xe0(%sp)[8byte] %p7/z -> %z31.d # LDNF1H { .H }, /Z, [{, #, MUL VL}] (LDNF1H-Z.P.BI-U16) -a4b8a000 : ldnf1h z0.h, p0/Z, [x0, #-8, MUL VL] : ldnf1h -0x0100(%x0)[32byte] %p0/z -> %z0.h -a4b9a482 : ldnf1h z2.h, p1/Z, [x4, #-7, MUL VL] : ldnf1h -0xe0(%x4)[32byte] %p1/z -> %z2.h -a4baa8c4 : ldnf1h z4.h, p2/Z, [x6, #-6, MUL VL] : ldnf1h -0xc0(%x6)[32byte] %p2/z -> %z4.h -a4bba906 : ldnf1h z6.h, p2/Z, [x8, #-5, MUL VL] : ldnf1h -0xa0(%x8)[32byte] %p2/z -> %z6.h -a4bcad48 : ldnf1h z8.h, p3/Z, [x10, #-4, MUL VL] : ldnf1h -0x80(%x10)[32byte] %p3/z -> %z8.h -a4bdad6a : ldnf1h z10.h, p3/Z, [x11, #-3, MUL VL] : ldnf1h -0x60(%x11)[32byte] %p3/z -> %z10.h -a4beb1ac : ldnf1h z12.h, p4/Z, [x13, #-2, MUL VL] : ldnf1h -0x40(%x13)[32byte] %p4/z -> %z12.h -a4bfb1ee : ldnf1h z14.h, p4/Z, [x15, #-1, MUL VL] : ldnf1h -0x20(%x15)[32byte] %p4/z -> %z14.h -a4b0b630 : ldnf1h z16.h, p5/Z, [x17, #0, MUL VL] : ldnf1h (%x17)[32byte] %p5/z -> %z16.h -a4b0b671 : ldnf1h z17.h, p5/Z, [x19, #0, MUL VL] : ldnf1h (%x19)[32byte] %p5/z -> %z17.h -a4b1b6b3 : ldnf1h z19.h, p5/Z, [x21, #1, MUL VL] : ldnf1h +0x20(%x21)[32byte] %p5/z -> %z19.h -a4b2baf5 : ldnf1h z21.h, p6/Z, [x23, #2, MUL VL] : ldnf1h +0x40(%x23)[32byte] %p6/z -> %z21.h -a4b3bb17 : ldnf1h z23.h, p6/Z, [x24, #3, MUL VL] : ldnf1h +0x60(%x24)[32byte] %p6/z -> %z23.h -a4b4bf59 : ldnf1h z25.h, p7/Z, [x26, #4, MUL VL] : ldnf1h +0x80(%x26)[32byte] %p7/z -> %z25.h -a4b5bf9b : ldnf1h z27.h, p7/Z, [x28, #5, MUL VL] : ldnf1h +0xa0(%x28)[32byte] %p7/z -> %z27.h -a4b7bfff : ldnf1h z31.h, p7/Z, [sp, #7, MUL VL] : ldnf1h +0xe0(%sp)[32byte] %p7/z -> %z31.h +a4b8a000 : ldnf1h z0.h, p0/Z, [x0, #-8, MUL VL] : ldnf1h -0x0100(%x0)[2byte] %p0/z -> %z0.h +a4b9a482 : ldnf1h z2.h, p1/Z, [x4, #-7, MUL VL] : ldnf1h -0xe0(%x4)[2byte] %p1/z -> %z2.h +a4baa8c4 : ldnf1h z4.h, p2/Z, [x6, #-6, MUL VL] : ldnf1h -0xc0(%x6)[2byte] %p2/z -> %z4.h +a4bba906 : ldnf1h z6.h, p2/Z, [x8, #-5, MUL VL] : ldnf1h -0xa0(%x8)[2byte] %p2/z -> %z6.h +a4bcad48 : ldnf1h z8.h, p3/Z, [x10, #-4, MUL VL] : ldnf1h -0x80(%x10)[2byte] %p3/z -> %z8.h +a4bdad6a : ldnf1h z10.h, p3/Z, [x11, #-3, MUL VL] : ldnf1h -0x60(%x11)[2byte] %p3/z -> %z10.h +a4beb1ac : ldnf1h z12.h, p4/Z, [x13, #-2, MUL VL] : ldnf1h -0x40(%x13)[2byte] %p4/z -> %z12.h +a4bfb1ee : ldnf1h z14.h, p4/Z, [x15, #-1, MUL VL] : ldnf1h -0x20(%x15)[2byte] %p4/z -> %z14.h +a4b0b630 : ldnf1h z16.h, p5/Z, [x17, #0, MUL VL] : ldnf1h (%x17)[2byte] %p5/z -> %z16.h +a4b0b671 : ldnf1h z17.h, p5/Z, [x19, #0, MUL VL] : ldnf1h (%x19)[2byte] %p5/z -> %z17.h +a4b1b6b3 : ldnf1h z19.h, p5/Z, [x21, #1, MUL VL] : ldnf1h +0x20(%x21)[2byte] %p5/z -> %z19.h +a4b2baf5 : ldnf1h z21.h, p6/Z, [x23, #2, MUL VL] : ldnf1h +0x40(%x23)[2byte] %p6/z -> %z21.h +a4b3bb17 : ldnf1h z23.h, p6/Z, [x24, #3, MUL VL] : ldnf1h +0x60(%x24)[2byte] %p6/z -> %z23.h +a4b4bf59 : ldnf1h z25.h, p7/Z, [x26, #4, MUL VL] : ldnf1h +0x80(%x26)[2byte] %p7/z -> %z25.h +a4b5bf9b : ldnf1h z27.h, p7/Z, [x28, #5, MUL VL] : ldnf1h +0xa0(%x28)[2byte] %p7/z -> %z27.h +a4b7bfff : ldnf1h z31.h, p7/Z, [sp, #7, MUL VL] : ldnf1h +0xe0(%sp)[2byte] %p7/z -> %z31.h # LDNF1H { .S }, /Z, [{, #, MUL VL}] (LDNF1H-Z.P.BI-U32) -a4d8a000 : ldnf1h z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1h -0x80(%x0)[16byte] %p0/z -> %z0.s -a4d9a482 : ldnf1h z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1h -0x70(%x4)[16byte] %p1/z -> %z2.s -a4daa8c4 : ldnf1h z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1h -0x60(%x6)[16byte] %p2/z -> %z4.s -a4dba906 : ldnf1h z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1h -0x50(%x8)[16byte] %p2/z -> %z6.s -a4dcad48 : ldnf1h z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1h -0x40(%x10)[16byte] %p3/z -> %z8.s -a4ddad6a : ldnf1h z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1h -0x30(%x11)[16byte] %p3/z -> %z10.s -a4deb1ac : ldnf1h z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1h -0x20(%x13)[16byte] %p4/z -> %z12.s -a4dfb1ee : ldnf1h z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1h -0x10(%x15)[16byte] %p4/z -> %z14.s -a4d0b630 : ldnf1h z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1h (%x17)[16byte] %p5/z -> %z16.s -a4d0b671 : ldnf1h z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1h (%x19)[16byte] %p5/z -> %z17.s -a4d1b6b3 : ldnf1h z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1h +0x10(%x21)[16byte] %p5/z -> %z19.s -a4d2baf5 : ldnf1h z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1h +0x20(%x23)[16byte] %p6/z -> %z21.s -a4d3bb17 : ldnf1h z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1h +0x30(%x24)[16byte] %p6/z -> %z23.s -a4d4bf59 : ldnf1h z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1h +0x40(%x26)[16byte] %p7/z -> %z25.s -a4d5bf9b : ldnf1h z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1h +0x50(%x28)[16byte] %p7/z -> %z27.s -a4d7bfff : ldnf1h z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1h +0x70(%sp)[16byte] %p7/z -> %z31.s +a4d8a000 : ldnf1h z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1h -0x80(%x0)[2byte] %p0/z -> %z0.s +a4d9a482 : ldnf1h z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1h -0x70(%x4)[2byte] %p1/z -> %z2.s +a4daa8c4 : ldnf1h z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1h -0x60(%x6)[2byte] %p2/z -> %z4.s +a4dba906 : ldnf1h z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1h -0x50(%x8)[2byte] %p2/z -> %z6.s +a4dcad48 : ldnf1h z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1h -0x40(%x10)[2byte] %p3/z -> %z8.s +a4ddad6a : ldnf1h z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1h -0x30(%x11)[2byte] %p3/z -> %z10.s +a4deb1ac : ldnf1h z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1h -0x20(%x13)[2byte] %p4/z -> %z12.s +a4dfb1ee : ldnf1h z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1h -0x10(%x15)[2byte] %p4/z -> %z14.s +a4d0b630 : ldnf1h z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1h (%x17)[2byte] %p5/z -> %z16.s +a4d0b671 : ldnf1h z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1h (%x19)[2byte] %p5/z -> %z17.s +a4d1b6b3 : ldnf1h z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1h +0x10(%x21)[2byte] %p5/z -> %z19.s +a4d2baf5 : ldnf1h z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1h +0x20(%x23)[2byte] %p6/z -> %z21.s +a4d3bb17 : ldnf1h z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1h +0x30(%x24)[2byte] %p6/z -> %z23.s +a4d4bf59 : ldnf1h z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1h +0x40(%x26)[2byte] %p7/z -> %z25.s +a4d5bf9b : ldnf1h z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1h +0x50(%x28)[2byte] %p7/z -> %z27.s +a4d7bfff : ldnf1h z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1h +0x70(%sp)[2byte] %p7/z -> %z31.s # LDNF1H { .D }, /Z, [{, #, MUL VL}] (LDNF1H-Z.P.BI-U64) -a4f8a000 : ldnf1h z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1h -0x40(%x0)[8byte] %p0/z -> %z0.d -a4f9a482 : ldnf1h z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1h -0x38(%x4)[8byte] %p1/z -> %z2.d -a4faa8c4 : ldnf1h z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1h -0x30(%x6)[8byte] %p2/z -> %z4.d -a4fba906 : ldnf1h z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1h -0x28(%x8)[8byte] %p2/z -> %z6.d -a4fcad48 : ldnf1h z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1h -0x20(%x10)[8byte] %p3/z -> %z8.d -a4fdad6a : ldnf1h z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1h -0x18(%x11)[8byte] %p3/z -> %z10.d -a4feb1ac : ldnf1h z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1h -0x10(%x13)[8byte] %p4/z -> %z12.d -a4ffb1ee : ldnf1h z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1h -0x08(%x15)[8byte] %p4/z -> %z14.d -a4f0b630 : ldnf1h z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1h (%x17)[8byte] %p5/z -> %z16.d -a4f0b671 : ldnf1h z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1h (%x19)[8byte] %p5/z -> %z17.d -a4f1b6b3 : ldnf1h z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1h +0x08(%x21)[8byte] %p5/z -> %z19.d -a4f2baf5 : ldnf1h z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1h +0x10(%x23)[8byte] %p6/z -> %z21.d -a4f3bb17 : ldnf1h z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1h +0x18(%x24)[8byte] %p6/z -> %z23.d -a4f4bf59 : ldnf1h z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1h +0x20(%x26)[8byte] %p7/z -> %z25.d -a4f5bf9b : ldnf1h z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1h +0x28(%x28)[8byte] %p7/z -> %z27.d -a4f7bfff : ldnf1h z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1h +0x38(%sp)[8byte] %p7/z -> %z31.d +a4f8a000 : ldnf1h z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1h -0x40(%x0)[2byte] %p0/z -> %z0.d +a4f9a482 : ldnf1h z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1h -0x38(%x4)[2byte] %p1/z -> %z2.d +a4faa8c4 : ldnf1h z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1h -0x30(%x6)[2byte] %p2/z -> %z4.d +a4fba906 : ldnf1h z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1h -0x28(%x8)[2byte] %p2/z -> %z6.d +a4fcad48 : ldnf1h z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1h -0x20(%x10)[2byte] %p3/z -> %z8.d +a4fdad6a : ldnf1h z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1h -0x18(%x11)[2byte] %p3/z -> %z10.d +a4feb1ac : ldnf1h z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1h -0x10(%x13)[2byte] %p4/z -> %z12.d +a4ffb1ee : ldnf1h z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1h -0x08(%x15)[2byte] %p4/z -> %z14.d +a4f0b630 : ldnf1h z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1h (%x17)[2byte] %p5/z -> %z16.d +a4f0b671 : ldnf1h z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1h (%x19)[2byte] %p5/z -> %z17.d +a4f1b6b3 : ldnf1h z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1h +0x08(%x21)[2byte] %p5/z -> %z19.d +a4f2baf5 : ldnf1h z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1h +0x10(%x23)[2byte] %p6/z -> %z21.d +a4f3bb17 : ldnf1h z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1h +0x18(%x24)[2byte] %p6/z -> %z23.d +a4f4bf59 : ldnf1h z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1h +0x20(%x26)[2byte] %p7/z -> %z25.d +a4f5bf9b : ldnf1h z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1h +0x28(%x28)[2byte] %p7/z -> %z27.d +a4f7bfff : ldnf1h z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1h +0x38(%sp)[2byte] %p7/z -> %z31.d # LDNF1SB { .D }, /Z, [{, #, MUL VL}] (LDNF1SB-Z.P.BI-S64) -a598a000 : ldnf1sb z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1sb -0x20(%x0)[4byte] %p0/z -> %z0.d -a599a482 : ldnf1sb z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1sb -0x1c(%x4)[4byte] %p1/z -> %z2.d -a59aa8c4 : ldnf1sb z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1sb -0x18(%x6)[4byte] %p2/z -> %z4.d -a59ba906 : ldnf1sb z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1sb -0x14(%x8)[4byte] %p2/z -> %z6.d -a59cad48 : ldnf1sb z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1sb -0x10(%x10)[4byte] %p3/z -> %z8.d -a59dad6a : ldnf1sb z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1sb -0x0c(%x11)[4byte] %p3/z -> %z10.d -a59eb1ac : ldnf1sb z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1sb -0x08(%x13)[4byte] %p4/z -> %z12.d -a59fb1ee : ldnf1sb z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1sb -0x04(%x15)[4byte] %p4/z -> %z14.d -a590b630 : ldnf1sb z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1sb (%x17)[4byte] %p5/z -> %z16.d -a590b671 : ldnf1sb z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1sb (%x19)[4byte] %p5/z -> %z17.d -a591b6b3 : ldnf1sb z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1sb +0x04(%x21)[4byte] %p5/z -> %z19.d -a592baf5 : ldnf1sb z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1sb +0x08(%x23)[4byte] %p6/z -> %z21.d -a593bb17 : ldnf1sb z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1sb +0x0c(%x24)[4byte] %p6/z -> %z23.d -a594bf59 : ldnf1sb z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1sb +0x10(%x26)[4byte] %p7/z -> %z25.d -a595bf9b : ldnf1sb z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1sb +0x14(%x28)[4byte] %p7/z -> %z27.d -a597bfff : ldnf1sb z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1sb +0x1c(%sp)[4byte] %p7/z -> %z31.d +a598a000 : ldnf1sb z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1sb -0x20(%x0)[1byte] %p0/z -> %z0.d +a599a482 : ldnf1sb z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1sb -0x1c(%x4)[1byte] %p1/z -> %z2.d +a59aa8c4 : ldnf1sb z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1sb -0x18(%x6)[1byte] %p2/z -> %z4.d +a59ba906 : ldnf1sb z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1sb -0x14(%x8)[1byte] %p2/z -> %z6.d +a59cad48 : ldnf1sb z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1sb -0x10(%x10)[1byte] %p3/z -> %z8.d +a59dad6a : ldnf1sb z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1sb -0x0c(%x11)[1byte] %p3/z -> %z10.d +a59eb1ac : ldnf1sb z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1sb -0x08(%x13)[1byte] %p4/z -> %z12.d +a59fb1ee : ldnf1sb z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1sb -0x04(%x15)[1byte] %p4/z -> %z14.d +a590b630 : ldnf1sb z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1sb (%x17)[1byte] %p5/z -> %z16.d +a590b671 : ldnf1sb z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1sb (%x19)[1byte] %p5/z -> %z17.d +a591b6b3 : ldnf1sb z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1sb +0x04(%x21)[1byte] %p5/z -> %z19.d +a592baf5 : ldnf1sb z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1sb +0x08(%x23)[1byte] %p6/z -> %z21.d +a593bb17 : ldnf1sb z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1sb +0x0c(%x24)[1byte] %p6/z -> %z23.d +a594bf59 : ldnf1sb z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1sb +0x10(%x26)[1byte] %p7/z -> %z25.d +a595bf9b : ldnf1sb z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1sb +0x14(%x28)[1byte] %p7/z -> %z27.d +a597bfff : ldnf1sb z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1sb +0x1c(%sp)[1byte] %p7/z -> %z31.d # LDNF1SB { .S }, /Z, [{, #, MUL VL}] (LDNF1SB-Z.P.BI-S32) -a5b8a000 : ldnf1sb z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1sb -0x40(%x0)[8byte] %p0/z -> %z0.s -a5b9a482 : ldnf1sb z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1sb -0x38(%x4)[8byte] %p1/z -> %z2.s -a5baa8c4 : ldnf1sb z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1sb -0x30(%x6)[8byte] %p2/z -> %z4.s -a5bba906 : ldnf1sb z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1sb -0x28(%x8)[8byte] %p2/z -> %z6.s -a5bcad48 : ldnf1sb z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1sb -0x20(%x10)[8byte] %p3/z -> %z8.s -a5bdad6a : ldnf1sb z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1sb -0x18(%x11)[8byte] %p3/z -> %z10.s -a5beb1ac : ldnf1sb z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1sb -0x10(%x13)[8byte] %p4/z -> %z12.s -a5bfb1ee : ldnf1sb z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1sb -0x08(%x15)[8byte] %p4/z -> %z14.s -a5b0b630 : ldnf1sb z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1sb (%x17)[8byte] %p5/z -> %z16.s -a5b0b671 : ldnf1sb z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1sb (%x19)[8byte] %p5/z -> %z17.s -a5b1b6b3 : ldnf1sb z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1sb +0x08(%x21)[8byte] %p5/z -> %z19.s -a5b2baf5 : ldnf1sb z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1sb +0x10(%x23)[8byte] %p6/z -> %z21.s -a5b3bb17 : ldnf1sb z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1sb +0x18(%x24)[8byte] %p6/z -> %z23.s -a5b4bf59 : ldnf1sb z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1sb +0x20(%x26)[8byte] %p7/z -> %z25.s -a5b5bf9b : ldnf1sb z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1sb +0x28(%x28)[8byte] %p7/z -> %z27.s -a5b7bfff : ldnf1sb z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1sb +0x38(%sp)[8byte] %p7/z -> %z31.s +a5b8a000 : ldnf1sb z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1sb -0x40(%x0)[1byte] %p0/z -> %z0.s +a5b9a482 : ldnf1sb z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1sb -0x38(%x4)[1byte] %p1/z -> %z2.s +a5baa8c4 : ldnf1sb z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1sb -0x30(%x6)[1byte] %p2/z -> %z4.s +a5bba906 : ldnf1sb z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1sb -0x28(%x8)[1byte] %p2/z -> %z6.s +a5bcad48 : ldnf1sb z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1sb -0x20(%x10)[1byte] %p3/z -> %z8.s +a5bdad6a : ldnf1sb z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1sb -0x18(%x11)[1byte] %p3/z -> %z10.s +a5beb1ac : ldnf1sb z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1sb -0x10(%x13)[1byte] %p4/z -> %z12.s +a5bfb1ee : ldnf1sb z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1sb -0x08(%x15)[1byte] %p4/z -> %z14.s +a5b0b630 : ldnf1sb z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1sb (%x17)[1byte] %p5/z -> %z16.s +a5b0b671 : ldnf1sb z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1sb (%x19)[1byte] %p5/z -> %z17.s +a5b1b6b3 : ldnf1sb z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1sb +0x08(%x21)[1byte] %p5/z -> %z19.s +a5b2baf5 : ldnf1sb z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1sb +0x10(%x23)[1byte] %p6/z -> %z21.s +a5b3bb17 : ldnf1sb z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1sb +0x18(%x24)[1byte] %p6/z -> %z23.s +a5b4bf59 : ldnf1sb z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1sb +0x20(%x26)[1byte] %p7/z -> %z25.s +a5b5bf9b : ldnf1sb z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1sb +0x28(%x28)[1byte] %p7/z -> %z27.s +a5b7bfff : ldnf1sb z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1sb +0x38(%sp)[1byte] %p7/z -> %z31.s # LDNF1SB { .H }, /Z, [{, #, MUL VL}] (LDNF1SB-Z.P.BI-S16) -a5d8a000 : ldnf1sb z0.h, p0/Z, [x0, #-8, MUL VL] : ldnf1sb -0x80(%x0)[16byte] %p0/z -> %z0.h -a5d9a482 : ldnf1sb z2.h, p1/Z, [x4, #-7, MUL VL] : ldnf1sb -0x70(%x4)[16byte] %p1/z -> %z2.h -a5daa8c4 : ldnf1sb z4.h, p2/Z, [x6, #-6, MUL VL] : ldnf1sb -0x60(%x6)[16byte] %p2/z -> %z4.h -a5dba906 : ldnf1sb z6.h, p2/Z, [x8, #-5, MUL VL] : ldnf1sb -0x50(%x8)[16byte] %p2/z -> %z6.h -a5dcad48 : ldnf1sb z8.h, p3/Z, [x10, #-4, MUL VL] : ldnf1sb -0x40(%x10)[16byte] %p3/z -> %z8.h -a5ddad6a : ldnf1sb z10.h, p3/Z, [x11, #-3, MUL VL] : ldnf1sb -0x30(%x11)[16byte] %p3/z -> %z10.h -a5deb1ac : ldnf1sb z12.h, p4/Z, [x13, #-2, MUL VL] : ldnf1sb -0x20(%x13)[16byte] %p4/z -> %z12.h -a5dfb1ee : ldnf1sb z14.h, p4/Z, [x15, #-1, MUL VL] : ldnf1sb -0x10(%x15)[16byte] %p4/z -> %z14.h -a5d0b630 : ldnf1sb z16.h, p5/Z, [x17, #0, MUL VL] : ldnf1sb (%x17)[16byte] %p5/z -> %z16.h -a5d0b671 : ldnf1sb z17.h, p5/Z, [x19, #0, MUL VL] : ldnf1sb (%x19)[16byte] %p5/z -> %z17.h -a5d1b6b3 : ldnf1sb z19.h, p5/Z, [x21, #1, MUL VL] : ldnf1sb +0x10(%x21)[16byte] %p5/z -> %z19.h -a5d2baf5 : ldnf1sb z21.h, p6/Z, [x23, #2, MUL VL] : ldnf1sb +0x20(%x23)[16byte] %p6/z -> %z21.h -a5d3bb17 : ldnf1sb z23.h, p6/Z, [x24, #3, MUL VL] : ldnf1sb +0x30(%x24)[16byte] %p6/z -> %z23.h -a5d4bf59 : ldnf1sb z25.h, p7/Z, [x26, #4, MUL VL] : ldnf1sb +0x40(%x26)[16byte] %p7/z -> %z25.h -a5d5bf9b : ldnf1sb z27.h, p7/Z, [x28, #5, MUL VL] : ldnf1sb +0x50(%x28)[16byte] %p7/z -> %z27.h -a5d7bfff : ldnf1sb z31.h, p7/Z, [sp, #7, MUL VL] : ldnf1sb +0x70(%sp)[16byte] %p7/z -> %z31.h +a5d8a000 : ldnf1sb z0.h, p0/Z, [x0, #-8, MUL VL] : ldnf1sb -0x80(%x0)[1byte] %p0/z -> %z0.h +a5d9a482 : ldnf1sb z2.h, p1/Z, [x4, #-7, MUL VL] : ldnf1sb -0x70(%x4)[1byte] %p1/z -> %z2.h +a5daa8c4 : ldnf1sb z4.h, p2/Z, [x6, #-6, MUL VL] : ldnf1sb -0x60(%x6)[1byte] %p2/z -> %z4.h +a5dba906 : ldnf1sb z6.h, p2/Z, [x8, #-5, MUL VL] : ldnf1sb -0x50(%x8)[1byte] %p2/z -> %z6.h +a5dcad48 : ldnf1sb z8.h, p3/Z, [x10, #-4, MUL VL] : ldnf1sb -0x40(%x10)[1byte] %p3/z -> %z8.h +a5ddad6a : ldnf1sb z10.h, p3/Z, [x11, #-3, MUL VL] : ldnf1sb -0x30(%x11)[1byte] %p3/z -> %z10.h +a5deb1ac : ldnf1sb z12.h, p4/Z, [x13, #-2, MUL VL] : ldnf1sb -0x20(%x13)[1byte] %p4/z -> %z12.h +a5dfb1ee : ldnf1sb z14.h, p4/Z, [x15, #-1, MUL VL] : ldnf1sb -0x10(%x15)[1byte] %p4/z -> %z14.h +a5d0b630 : ldnf1sb z16.h, p5/Z, [x17, #0, MUL VL] : ldnf1sb (%x17)[1byte] %p5/z -> %z16.h +a5d0b671 : ldnf1sb z17.h, p5/Z, [x19, #0, MUL VL] : ldnf1sb (%x19)[1byte] %p5/z -> %z17.h +a5d1b6b3 : ldnf1sb z19.h, p5/Z, [x21, #1, MUL VL] : ldnf1sb +0x10(%x21)[1byte] %p5/z -> %z19.h +a5d2baf5 : ldnf1sb z21.h, p6/Z, [x23, #2, MUL VL] : ldnf1sb +0x20(%x23)[1byte] %p6/z -> %z21.h +a5d3bb17 : ldnf1sb z23.h, p6/Z, [x24, #3, MUL VL] : ldnf1sb +0x30(%x24)[1byte] %p6/z -> %z23.h +a5d4bf59 : ldnf1sb z25.h, p7/Z, [x26, #4, MUL VL] : ldnf1sb +0x40(%x26)[1byte] %p7/z -> %z25.h +a5d5bf9b : ldnf1sb z27.h, p7/Z, [x28, #5, MUL VL] : ldnf1sb +0x50(%x28)[1byte] %p7/z -> %z27.h +a5d7bfff : ldnf1sb z31.h, p7/Z, [sp, #7, MUL VL] : ldnf1sb +0x70(%sp)[1byte] %p7/z -> %z31.h # LDNF1SH { .D }, /Z, [{, #, MUL VL}] (LDNF1SH-Z.P.BI-S64) -a518a000 : ldnf1sh z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1sh -0x40(%x0)[8byte] %p0/z -> %z0.d -a519a482 : ldnf1sh z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1sh -0x38(%x4)[8byte] %p1/z -> %z2.d -a51aa8c4 : ldnf1sh z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1sh -0x30(%x6)[8byte] %p2/z -> %z4.d -a51ba906 : ldnf1sh z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1sh -0x28(%x8)[8byte] %p2/z -> %z6.d -a51cad48 : ldnf1sh z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1sh -0x20(%x10)[8byte] %p3/z -> %z8.d -a51dad6a : ldnf1sh z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1sh -0x18(%x11)[8byte] %p3/z -> %z10.d -a51eb1ac : ldnf1sh z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1sh -0x10(%x13)[8byte] %p4/z -> %z12.d -a51fb1ee : ldnf1sh z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1sh -0x08(%x15)[8byte] %p4/z -> %z14.d -a510b630 : ldnf1sh z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1sh (%x17)[8byte] %p5/z -> %z16.d -a510b671 : ldnf1sh z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1sh (%x19)[8byte] %p5/z -> %z17.d -a511b6b3 : ldnf1sh z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1sh +0x08(%x21)[8byte] %p5/z -> %z19.d -a512baf5 : ldnf1sh z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1sh +0x10(%x23)[8byte] %p6/z -> %z21.d -a513bb17 : ldnf1sh z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1sh +0x18(%x24)[8byte] %p6/z -> %z23.d -a514bf59 : ldnf1sh z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1sh +0x20(%x26)[8byte] %p7/z -> %z25.d -a515bf9b : ldnf1sh z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1sh +0x28(%x28)[8byte] %p7/z -> %z27.d -a517bfff : ldnf1sh z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1sh +0x38(%sp)[8byte] %p7/z -> %z31.d +a518a000 : ldnf1sh z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1sh -0x40(%x0)[2byte] %p0/z -> %z0.d +a519a482 : ldnf1sh z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1sh -0x38(%x4)[2byte] %p1/z -> %z2.d +a51aa8c4 : ldnf1sh z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1sh -0x30(%x6)[2byte] %p2/z -> %z4.d +a51ba906 : ldnf1sh z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1sh -0x28(%x8)[2byte] %p2/z -> %z6.d +a51cad48 : ldnf1sh z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1sh -0x20(%x10)[2byte] %p3/z -> %z8.d +a51dad6a : ldnf1sh z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1sh -0x18(%x11)[2byte] %p3/z -> %z10.d +a51eb1ac : ldnf1sh z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1sh -0x10(%x13)[2byte] %p4/z -> %z12.d +a51fb1ee : ldnf1sh z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1sh -0x08(%x15)[2byte] %p4/z -> %z14.d +a510b630 : ldnf1sh z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1sh (%x17)[2byte] %p5/z -> %z16.d +a510b671 : ldnf1sh z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1sh (%x19)[2byte] %p5/z -> %z17.d +a511b6b3 : ldnf1sh z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1sh +0x08(%x21)[2byte] %p5/z -> %z19.d +a512baf5 : ldnf1sh z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1sh +0x10(%x23)[2byte] %p6/z -> %z21.d +a513bb17 : ldnf1sh z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1sh +0x18(%x24)[2byte] %p6/z -> %z23.d +a514bf59 : ldnf1sh z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1sh +0x20(%x26)[2byte] %p7/z -> %z25.d +a515bf9b : ldnf1sh z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1sh +0x28(%x28)[2byte] %p7/z -> %z27.d +a517bfff : ldnf1sh z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1sh +0x38(%sp)[2byte] %p7/z -> %z31.d # LDNF1SH { .S }, /Z, [{, #, MUL VL}] (LDNF1SH-Z.P.BI-S32) -a538a000 : ldnf1sh z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1sh -0x80(%x0)[16byte] %p0/z -> %z0.s -a539a482 : ldnf1sh z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1sh -0x70(%x4)[16byte] %p1/z -> %z2.s -a53aa8c4 : ldnf1sh z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1sh -0x60(%x6)[16byte] %p2/z -> %z4.s -a53ba906 : ldnf1sh z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1sh -0x50(%x8)[16byte] %p2/z -> %z6.s -a53cad48 : ldnf1sh z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1sh -0x40(%x10)[16byte] %p3/z -> %z8.s -a53dad6a : ldnf1sh z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1sh -0x30(%x11)[16byte] %p3/z -> %z10.s -a53eb1ac : ldnf1sh z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1sh -0x20(%x13)[16byte] %p4/z -> %z12.s -a53fb1ee : ldnf1sh z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1sh -0x10(%x15)[16byte] %p4/z -> %z14.s -a530b630 : ldnf1sh z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1sh (%x17)[16byte] %p5/z -> %z16.s -a530b671 : ldnf1sh z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1sh (%x19)[16byte] %p5/z -> %z17.s -a531b6b3 : ldnf1sh z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1sh +0x10(%x21)[16byte] %p5/z -> %z19.s -a532baf5 : ldnf1sh z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1sh +0x20(%x23)[16byte] %p6/z -> %z21.s -a533bb17 : ldnf1sh z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1sh +0x30(%x24)[16byte] %p6/z -> %z23.s -a534bf59 : ldnf1sh z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1sh +0x40(%x26)[16byte] %p7/z -> %z25.s -a535bf9b : ldnf1sh z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1sh +0x50(%x28)[16byte] %p7/z -> %z27.s -a537bfff : ldnf1sh z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1sh +0x70(%sp)[16byte] %p7/z -> %z31.s +a538a000 : ldnf1sh z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1sh -0x80(%x0)[2byte] %p0/z -> %z0.s +a539a482 : ldnf1sh z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1sh -0x70(%x4)[2byte] %p1/z -> %z2.s +a53aa8c4 : ldnf1sh z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1sh -0x60(%x6)[2byte] %p2/z -> %z4.s +a53ba906 : ldnf1sh z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1sh -0x50(%x8)[2byte] %p2/z -> %z6.s +a53cad48 : ldnf1sh z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1sh -0x40(%x10)[2byte] %p3/z -> %z8.s +a53dad6a : ldnf1sh z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1sh -0x30(%x11)[2byte] %p3/z -> %z10.s +a53eb1ac : ldnf1sh z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1sh -0x20(%x13)[2byte] %p4/z -> %z12.s +a53fb1ee : ldnf1sh z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1sh -0x10(%x15)[2byte] %p4/z -> %z14.s +a530b630 : ldnf1sh z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1sh (%x17)[2byte] %p5/z -> %z16.s +a530b671 : ldnf1sh z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1sh (%x19)[2byte] %p5/z -> %z17.s +a531b6b3 : ldnf1sh z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1sh +0x10(%x21)[2byte] %p5/z -> %z19.s +a532baf5 : ldnf1sh z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1sh +0x20(%x23)[2byte] %p6/z -> %z21.s +a533bb17 : ldnf1sh z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1sh +0x30(%x24)[2byte] %p6/z -> %z23.s +a534bf59 : ldnf1sh z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1sh +0x40(%x26)[2byte] %p7/z -> %z25.s +a535bf9b : ldnf1sh z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1sh +0x50(%x28)[2byte] %p7/z -> %z27.s +a537bfff : ldnf1sh z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1sh +0x70(%sp)[2byte] %p7/z -> %z31.s # LDNF1SW { .D }, /Z, [{, #, MUL VL}] (LDNF1SW-Z.P.BI-S64) -a498a000 : ldnf1sw z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1sw -0x80(%x0)[16byte] %p0/z -> %z0.d -a499a482 : ldnf1sw z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1sw -0x70(%x4)[16byte] %p1/z -> %z2.d -a49aa8c4 : ldnf1sw z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1sw -0x60(%x6)[16byte] %p2/z -> %z4.d -a49ba906 : ldnf1sw z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1sw -0x50(%x8)[16byte] %p2/z -> %z6.d -a49cad48 : ldnf1sw z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1sw -0x40(%x10)[16byte] %p3/z -> %z8.d -a49dad6a : ldnf1sw z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1sw -0x30(%x11)[16byte] %p3/z -> %z10.d -a49eb1ac : ldnf1sw z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1sw -0x20(%x13)[16byte] %p4/z -> %z12.d -a49fb1ee : ldnf1sw z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1sw -0x10(%x15)[16byte] %p4/z -> %z14.d -a490b630 : ldnf1sw z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1sw (%x17)[16byte] %p5/z -> %z16.d -a490b671 : ldnf1sw z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1sw (%x19)[16byte] %p5/z -> %z17.d -a491b6b3 : ldnf1sw z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1sw +0x10(%x21)[16byte] %p5/z -> %z19.d -a492baf5 : ldnf1sw z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1sw +0x20(%x23)[16byte] %p6/z -> %z21.d -a493bb17 : ldnf1sw z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1sw +0x30(%x24)[16byte] %p6/z -> %z23.d -a494bf59 : ldnf1sw z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1sw +0x40(%x26)[16byte] %p7/z -> %z25.d -a495bf9b : ldnf1sw z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1sw +0x50(%x28)[16byte] %p7/z -> %z27.d -a497bfff : ldnf1sw z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1sw +0x70(%sp)[16byte] %p7/z -> %z31.d +a498a000 : ldnf1sw z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1sw -0x80(%x0)[4byte] %p0/z -> %z0.d +a499a482 : ldnf1sw z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1sw -0x70(%x4)[4byte] %p1/z -> %z2.d +a49aa8c4 : ldnf1sw z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1sw -0x60(%x6)[4byte] %p2/z -> %z4.d +a49ba906 : ldnf1sw z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1sw -0x50(%x8)[4byte] %p2/z -> %z6.d +a49cad48 : ldnf1sw z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1sw -0x40(%x10)[4byte] %p3/z -> %z8.d +a49dad6a : ldnf1sw z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1sw -0x30(%x11)[4byte] %p3/z -> %z10.d +a49eb1ac : ldnf1sw z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1sw -0x20(%x13)[4byte] %p4/z -> %z12.d +a49fb1ee : ldnf1sw z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1sw -0x10(%x15)[4byte] %p4/z -> %z14.d +a490b630 : ldnf1sw z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1sw (%x17)[4byte] %p5/z -> %z16.d +a490b671 : ldnf1sw z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1sw (%x19)[4byte] %p5/z -> %z17.d +a491b6b3 : ldnf1sw z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1sw +0x10(%x21)[4byte] %p5/z -> %z19.d +a492baf5 : ldnf1sw z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1sw +0x20(%x23)[4byte] %p6/z -> %z21.d +a493bb17 : ldnf1sw z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1sw +0x30(%x24)[4byte] %p6/z -> %z23.d +a494bf59 : ldnf1sw z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1sw +0x40(%x26)[4byte] %p7/z -> %z25.d +a495bf9b : ldnf1sw z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1sw +0x50(%x28)[4byte] %p7/z -> %z27.d +a497bfff : ldnf1sw z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1sw +0x70(%sp)[4byte] %p7/z -> %z31.d # LDNF1W { .S }, /Z, [{, #, MUL VL}] (LDNF1W-Z.P.BI-U32) -a558a000 : ldnf1w z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1w -0x0100(%x0)[32byte] %p0/z -> %z0.s -a559a482 : ldnf1w z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1w -0xe0(%x4)[32byte] %p1/z -> %z2.s -a55aa8c4 : ldnf1w z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1w -0xc0(%x6)[32byte] %p2/z -> %z4.s -a55ba906 : ldnf1w z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1w -0xa0(%x8)[32byte] %p2/z -> %z6.s -a55cad48 : ldnf1w z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1w -0x80(%x10)[32byte] %p3/z -> %z8.s -a55dad6a : ldnf1w z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1w -0x60(%x11)[32byte] %p3/z -> %z10.s -a55eb1ac : ldnf1w z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1w -0x40(%x13)[32byte] %p4/z -> %z12.s -a55fb1ee : ldnf1w z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1w -0x20(%x15)[32byte] %p4/z -> %z14.s -a550b630 : ldnf1w z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1w (%x17)[32byte] %p5/z -> %z16.s -a550b671 : ldnf1w z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1w (%x19)[32byte] %p5/z -> %z17.s -a551b6b3 : ldnf1w z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1w +0x20(%x21)[32byte] %p5/z -> %z19.s -a552baf5 : ldnf1w z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1w +0x40(%x23)[32byte] %p6/z -> %z21.s -a553bb17 : ldnf1w z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1w +0x60(%x24)[32byte] %p6/z -> %z23.s -a554bf59 : ldnf1w z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1w +0x80(%x26)[32byte] %p7/z -> %z25.s -a555bf9b : ldnf1w z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1w +0xa0(%x28)[32byte] %p7/z -> %z27.s -a557bfff : ldnf1w z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1w +0xe0(%sp)[32byte] %p7/z -> %z31.s +a558a000 : ldnf1w z0.s, p0/Z, [x0, #-8, MUL VL] : ldnf1w -0x0100(%x0)[4byte] %p0/z -> %z0.s +a559a482 : ldnf1w z2.s, p1/Z, [x4, #-7, MUL VL] : ldnf1w -0xe0(%x4)[4byte] %p1/z -> %z2.s +a55aa8c4 : ldnf1w z4.s, p2/Z, [x6, #-6, MUL VL] : ldnf1w -0xc0(%x6)[4byte] %p2/z -> %z4.s +a55ba906 : ldnf1w z6.s, p2/Z, [x8, #-5, MUL VL] : ldnf1w -0xa0(%x8)[4byte] %p2/z -> %z6.s +a55cad48 : ldnf1w z8.s, p3/Z, [x10, #-4, MUL VL] : ldnf1w -0x80(%x10)[4byte] %p3/z -> %z8.s +a55dad6a : ldnf1w z10.s, p3/Z, [x11, #-3, MUL VL] : ldnf1w -0x60(%x11)[4byte] %p3/z -> %z10.s +a55eb1ac : ldnf1w z12.s, p4/Z, [x13, #-2, MUL VL] : ldnf1w -0x40(%x13)[4byte] %p4/z -> %z12.s +a55fb1ee : ldnf1w z14.s, p4/Z, [x15, #-1, MUL VL] : ldnf1w -0x20(%x15)[4byte] %p4/z -> %z14.s +a550b630 : ldnf1w z16.s, p5/Z, [x17, #0, MUL VL] : ldnf1w (%x17)[4byte] %p5/z -> %z16.s +a550b671 : ldnf1w z17.s, p5/Z, [x19, #0, MUL VL] : ldnf1w (%x19)[4byte] %p5/z -> %z17.s +a551b6b3 : ldnf1w z19.s, p5/Z, [x21, #1, MUL VL] : ldnf1w +0x20(%x21)[4byte] %p5/z -> %z19.s +a552baf5 : ldnf1w z21.s, p6/Z, [x23, #2, MUL VL] : ldnf1w +0x40(%x23)[4byte] %p6/z -> %z21.s +a553bb17 : ldnf1w z23.s, p6/Z, [x24, #3, MUL VL] : ldnf1w +0x60(%x24)[4byte] %p6/z -> %z23.s +a554bf59 : ldnf1w z25.s, p7/Z, [x26, #4, MUL VL] : ldnf1w +0x80(%x26)[4byte] %p7/z -> %z25.s +a555bf9b : ldnf1w z27.s, p7/Z, [x28, #5, MUL VL] : ldnf1w +0xa0(%x28)[4byte] %p7/z -> %z27.s +a557bfff : ldnf1w z31.s, p7/Z, [sp, #7, MUL VL] : ldnf1w +0xe0(%sp)[4byte] %p7/z -> %z31.s # LDNF1W { .D }, /Z, [{, #, MUL VL}] (LDNF1W-Z.P.BI-U64) -a578a000 : ldnf1w z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1w -0x80(%x0)[16byte] %p0/z -> %z0.d -a579a482 : ldnf1w z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1w -0x70(%x4)[16byte] %p1/z -> %z2.d -a57aa8c4 : ldnf1w z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1w -0x60(%x6)[16byte] %p2/z -> %z4.d -a57ba906 : ldnf1w z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1w -0x50(%x8)[16byte] %p2/z -> %z6.d -a57cad48 : ldnf1w z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1w -0x40(%x10)[16byte] %p3/z -> %z8.d -a57dad6a : ldnf1w z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1w -0x30(%x11)[16byte] %p3/z -> %z10.d -a57eb1ac : ldnf1w z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1w -0x20(%x13)[16byte] %p4/z -> %z12.d -a57fb1ee : ldnf1w z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1w -0x10(%x15)[16byte] %p4/z -> %z14.d -a570b630 : ldnf1w z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1w (%x17)[16byte] %p5/z -> %z16.d -a570b671 : ldnf1w z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1w (%x19)[16byte] %p5/z -> %z17.d -a571b6b3 : ldnf1w z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1w +0x10(%x21)[16byte] %p5/z -> %z19.d -a572baf5 : ldnf1w z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1w +0x20(%x23)[16byte] %p6/z -> %z21.d -a573bb17 : ldnf1w z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1w +0x30(%x24)[16byte] %p6/z -> %z23.d -a574bf59 : ldnf1w z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1w +0x40(%x26)[16byte] %p7/z -> %z25.d -a575bf9b : ldnf1w z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1w +0x50(%x28)[16byte] %p7/z -> %z27.d -a577bfff : ldnf1w z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1w +0x70(%sp)[16byte] %p7/z -> %z31.d +a578a000 : ldnf1w z0.d, p0/Z, [x0, #-8, MUL VL] : ldnf1w -0x80(%x0)[4byte] %p0/z -> %z0.d +a579a482 : ldnf1w z2.d, p1/Z, [x4, #-7, MUL VL] : ldnf1w -0x70(%x4)[4byte] %p1/z -> %z2.d +a57aa8c4 : ldnf1w z4.d, p2/Z, [x6, #-6, MUL VL] : ldnf1w -0x60(%x6)[4byte] %p2/z -> %z4.d +a57ba906 : ldnf1w z6.d, p2/Z, [x8, #-5, MUL VL] : ldnf1w -0x50(%x8)[4byte] %p2/z -> %z6.d +a57cad48 : ldnf1w z8.d, p3/Z, [x10, #-4, MUL VL] : ldnf1w -0x40(%x10)[4byte] %p3/z -> %z8.d +a57dad6a : ldnf1w z10.d, p3/Z, [x11, #-3, MUL VL] : ldnf1w -0x30(%x11)[4byte] %p3/z -> %z10.d +a57eb1ac : ldnf1w z12.d, p4/Z, [x13, #-2, MUL VL] : ldnf1w -0x20(%x13)[4byte] %p4/z -> %z12.d +a57fb1ee : ldnf1w z14.d, p4/Z, [x15, #-1, MUL VL] : ldnf1w -0x10(%x15)[4byte] %p4/z -> %z14.d +a570b630 : ldnf1w z16.d, p5/Z, [x17, #0, MUL VL] : ldnf1w (%x17)[4byte] %p5/z -> %z16.d +a570b671 : ldnf1w z17.d, p5/Z, [x19, #0, MUL VL] : ldnf1w (%x19)[4byte] %p5/z -> %z17.d +a571b6b3 : ldnf1w z19.d, p5/Z, [x21, #1, MUL VL] : ldnf1w +0x10(%x21)[4byte] %p5/z -> %z19.d +a572baf5 : ldnf1w z21.d, p6/Z, [x23, #2, MUL VL] : ldnf1w +0x20(%x23)[4byte] %p6/z -> %z21.d +a573bb17 : ldnf1w z23.d, p6/Z, [x24, #3, MUL VL] : ldnf1w +0x30(%x24)[4byte] %p6/z -> %z23.d +a574bf59 : ldnf1w z25.d, p7/Z, [x26, #4, MUL VL] : ldnf1w +0x40(%x26)[4byte] %p7/z -> %z25.d +a575bf9b : ldnf1w z27.d, p7/Z, [x28, #5, MUL VL] : ldnf1w +0x50(%x28)[4byte] %p7/z -> %z27.d +a577bfff : ldnf1w z31.d, p7/Z, [sp, #7, MUL VL] : ldnf1w +0x70(%sp)[4byte] %p7/z -> %z31.d # LDNT1B { .B }, /Z, [, ] (LDNT1B-Z.P.BR-Contiguous) -a400c000 : ldnt1b z0.b, p0/Z, [x0, x0] : ldnt1b (%x0,%x0)[32byte] %p0/z -> %z0.b -a405c482 : ldnt1b z2.b, p1/Z, [x4, x5] : ldnt1b (%x4,%x5)[32byte] %p1/z -> %z2.b -a407c8c4 : ldnt1b z4.b, p2/Z, [x6, x7] : ldnt1b (%x6,%x7)[32byte] %p2/z -> %z4.b -a409c906 : ldnt1b z6.b, p2/Z, [x8, x9] : ldnt1b (%x8,%x9)[32byte] %p2/z -> %z6.b -a40bcd48 : ldnt1b z8.b, p3/Z, [x10, x11] : ldnt1b (%x10,%x11)[32byte] %p3/z -> %z8.b -a40ccd6a : ldnt1b z10.b, p3/Z, [x11, x12] : ldnt1b (%x11,%x12)[32byte] %p3/z -> %z10.b -a40ed1ac : ldnt1b z12.b, p4/Z, [x13, x14] : ldnt1b (%x13,%x14)[32byte] %p4/z -> %z12.b -a410d1ee : ldnt1b z14.b, p4/Z, [x15, x16] : ldnt1b (%x15,%x16)[32byte] %p4/z -> %z14.b -a412d630 : ldnt1b z16.b, p5/Z, [x17, x18] : ldnt1b (%x17,%x18)[32byte] %p5/z -> %z16.b -a414d671 : ldnt1b z17.b, p5/Z, [x19, x20] : ldnt1b (%x19,%x20)[32byte] %p5/z -> %z17.b -a416d6b3 : ldnt1b z19.b, p5/Z, [x21, x22] : ldnt1b (%x21,%x22)[32byte] %p5/z -> %z19.b -a418daf5 : ldnt1b z21.b, p6/Z, [x23, x24] : ldnt1b (%x23,%x24)[32byte] %p6/z -> %z21.b -a419db17 : ldnt1b z23.b, p6/Z, [x24, x25] : ldnt1b (%x24,%x25)[32byte] %p6/z -> %z23.b -a41bdf59 : ldnt1b z25.b, p7/Z, [x26, x27] : ldnt1b (%x26,%x27)[32byte] %p7/z -> %z25.b -a41ddf9b : ldnt1b z27.b, p7/Z, [x28, x29] : ldnt1b (%x28,%x29)[32byte] %p7/z -> %z27.b -a41edfff : ldnt1b z31.b, p7/Z, [sp, x30] : ldnt1b (%sp,%x30)[32byte] %p7/z -> %z31.b +a400c000 : ldnt1b z0.b, p0/Z, [x0, x0] : ldnt1b (%x0,%x0)[1byte] %p0/z -> %z0.b +a405c482 : ldnt1b z2.b, p1/Z, [x4, x5] : ldnt1b (%x4,%x5)[1byte] %p1/z -> %z2.b +a407c8c4 : ldnt1b z4.b, p2/Z, [x6, x7] : ldnt1b (%x6,%x7)[1byte] %p2/z -> %z4.b +a409c906 : ldnt1b z6.b, p2/Z, [x8, x9] : ldnt1b (%x8,%x9)[1byte] %p2/z -> %z6.b +a40bcd48 : ldnt1b z8.b, p3/Z, [x10, x11] : ldnt1b (%x10,%x11)[1byte] %p3/z -> %z8.b +a40ccd6a : ldnt1b z10.b, p3/Z, [x11, x12] : ldnt1b (%x11,%x12)[1byte] %p3/z -> %z10.b +a40ed1ac : ldnt1b z12.b, p4/Z, [x13, x14] : ldnt1b (%x13,%x14)[1byte] %p4/z -> %z12.b +a410d1ee : ldnt1b z14.b, p4/Z, [x15, x16] : ldnt1b (%x15,%x16)[1byte] %p4/z -> %z14.b +a412d630 : ldnt1b z16.b, p5/Z, [x17, x18] : ldnt1b (%x17,%x18)[1byte] %p5/z -> %z16.b +a414d671 : ldnt1b z17.b, p5/Z, [x19, x20] : ldnt1b (%x19,%x20)[1byte] %p5/z -> %z17.b +a416d6b3 : ldnt1b z19.b, p5/Z, [x21, x22] : ldnt1b (%x21,%x22)[1byte] %p5/z -> %z19.b +a418daf5 : ldnt1b z21.b, p6/Z, [x23, x24] : ldnt1b (%x23,%x24)[1byte] %p6/z -> %z21.b +a419db17 : ldnt1b z23.b, p6/Z, [x24, x25] : ldnt1b (%x24,%x25)[1byte] %p6/z -> %z23.b +a41bdf59 : ldnt1b z25.b, p7/Z, [x26, x27] : ldnt1b (%x26,%x27)[1byte] %p7/z -> %z25.b +a41ddf9b : ldnt1b z27.b, p7/Z, [x28, x29] : ldnt1b (%x28,%x29)[1byte] %p7/z -> %z27.b +a41edfff : ldnt1b z31.b, p7/Z, [sp, x30] : ldnt1b (%sp,%x30)[1byte] %p7/z -> %z31.b # LDNT1B { .B }, /Z, [{, #, MUL VL}] (LDNT1B-Z.P.BI-Contiguous) -a408e000 : ldnt1b z0.b, p0/Z, [x0, #-8, MUL VL] : ldnt1b -0x0100(%x0)[32byte] %p0/z -> %z0.b -a409e482 : ldnt1b z2.b, p1/Z, [x4, #-7, MUL VL] : ldnt1b -0xe0(%x4)[32byte] %p1/z -> %z2.b -a40ae8c4 : ldnt1b z4.b, p2/Z, [x6, #-6, MUL VL] : ldnt1b -0xc0(%x6)[32byte] %p2/z -> %z4.b -a40be906 : ldnt1b z6.b, p2/Z, [x8, #-5, MUL VL] : ldnt1b -0xa0(%x8)[32byte] %p2/z -> %z6.b -a40ced48 : ldnt1b z8.b, p3/Z, [x10, #-4, MUL VL] : ldnt1b -0x80(%x10)[32byte] %p3/z -> %z8.b -a40ded6a : ldnt1b z10.b, p3/Z, [x11, #-3, MUL VL] : ldnt1b -0x60(%x11)[32byte] %p3/z -> %z10.b -a40ef1ac : ldnt1b z12.b, p4/Z, [x13, #-2, MUL VL] : ldnt1b -0x40(%x13)[32byte] %p4/z -> %z12.b -a40ff1ee : ldnt1b z14.b, p4/Z, [x15, #-1, MUL VL] : ldnt1b -0x20(%x15)[32byte] %p4/z -> %z14.b -a400f630 : ldnt1b z16.b, p5/Z, [x17, #0, MUL VL] : ldnt1b (%x17)[32byte] %p5/z -> %z16.b -a400f671 : ldnt1b z17.b, p5/Z, [x19, #0, MUL VL] : ldnt1b (%x19)[32byte] %p5/z -> %z17.b -a401f6b3 : ldnt1b z19.b, p5/Z, [x21, #1, MUL VL] : ldnt1b +0x20(%x21)[32byte] %p5/z -> %z19.b -a402faf5 : ldnt1b z21.b, p6/Z, [x23, #2, MUL VL] : ldnt1b +0x40(%x23)[32byte] %p6/z -> %z21.b -a403fb17 : ldnt1b z23.b, p6/Z, [x24, #3, MUL VL] : ldnt1b +0x60(%x24)[32byte] %p6/z -> %z23.b -a404ff59 : ldnt1b z25.b, p7/Z, [x26, #4, MUL VL] : ldnt1b +0x80(%x26)[32byte] %p7/z -> %z25.b -a405ff9b : ldnt1b z27.b, p7/Z, [x28, #5, MUL VL] : ldnt1b +0xa0(%x28)[32byte] %p7/z -> %z27.b -a407ffff : ldnt1b z31.b, p7/Z, [sp, #7, MUL VL] : ldnt1b +0xe0(%sp)[32byte] %p7/z -> %z31.b +a408e000 : ldnt1b z0.b, p0/Z, [x0, #-8, MUL VL] : ldnt1b -0x0100(%x0)[1byte] %p0/z -> %z0.b +a409e482 : ldnt1b z2.b, p1/Z, [x4, #-7, MUL VL] : ldnt1b -0xe0(%x4)[1byte] %p1/z -> %z2.b +a40ae8c4 : ldnt1b z4.b, p2/Z, [x6, #-6, MUL VL] : ldnt1b -0xc0(%x6)[1byte] %p2/z -> %z4.b +a40be906 : ldnt1b z6.b, p2/Z, [x8, #-5, MUL VL] : ldnt1b -0xa0(%x8)[1byte] %p2/z -> %z6.b +a40ced48 : ldnt1b z8.b, p3/Z, [x10, #-4, MUL VL] : ldnt1b -0x80(%x10)[1byte] %p3/z -> %z8.b +a40ded6a : ldnt1b z10.b, p3/Z, [x11, #-3, MUL VL] : ldnt1b -0x60(%x11)[1byte] %p3/z -> %z10.b +a40ef1ac : ldnt1b z12.b, p4/Z, [x13, #-2, MUL VL] : ldnt1b -0x40(%x13)[1byte] %p4/z -> %z12.b +a40ff1ee : ldnt1b z14.b, p4/Z, [x15, #-1, MUL VL] : ldnt1b -0x20(%x15)[1byte] %p4/z -> %z14.b +a400f630 : ldnt1b z16.b, p5/Z, [x17, #0, MUL VL] : ldnt1b (%x17)[1byte] %p5/z -> %z16.b +a400f671 : ldnt1b z17.b, p5/Z, [x19, #0, MUL VL] : ldnt1b (%x19)[1byte] %p5/z -> %z17.b +a401f6b3 : ldnt1b z19.b, p5/Z, [x21, #1, MUL VL] : ldnt1b +0x20(%x21)[1byte] %p5/z -> %z19.b +a402faf5 : ldnt1b z21.b, p6/Z, [x23, #2, MUL VL] : ldnt1b +0x40(%x23)[1byte] %p6/z -> %z21.b +a403fb17 : ldnt1b z23.b, p6/Z, [x24, #3, MUL VL] : ldnt1b +0x60(%x24)[1byte] %p6/z -> %z23.b +a404ff59 : ldnt1b z25.b, p7/Z, [x26, #4, MUL VL] : ldnt1b +0x80(%x26)[1byte] %p7/z -> %z25.b +a405ff9b : ldnt1b z27.b, p7/Z, [x28, #5, MUL VL] : ldnt1b +0xa0(%x28)[1byte] %p7/z -> %z27.b +a407ffff : ldnt1b z31.b, p7/Z, [sp, #7, MUL VL] : ldnt1b +0xe0(%sp)[1byte] %p7/z -> %z31.b # LDNT1D { .D }, /Z, [, , LSL #3] (LDNT1D-Z.P.BR-Contiguous) -a580c000 : ldnt1d z0.d, p0/Z, [x0, x0, LSL #3] : ldnt1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d -a585c482 : ldnt1d z2.d, p1/Z, [x4, x5, LSL #3] : ldnt1d (%x4,%x5,lsl #3)[32byte] %p1/z -> %z2.d -a587c8c4 : ldnt1d z4.d, p2/Z, [x6, x7, LSL #3] : ldnt1d (%x6,%x7,lsl #3)[32byte] %p2/z -> %z4.d -a589c906 : ldnt1d z6.d, p2/Z, [x8, x9, LSL #3] : ldnt1d (%x8,%x9,lsl #3)[32byte] %p2/z -> %z6.d -a58bcd48 : ldnt1d z8.d, p3/Z, [x10, x11, LSL #3] : ldnt1d (%x10,%x11,lsl #3)[32byte] %p3/z -> %z8.d -a58ccd6a : ldnt1d z10.d, p3/Z, [x11, x12, LSL #3] : ldnt1d (%x11,%x12,lsl #3)[32byte] %p3/z -> %z10.d -a58ed1ac : ldnt1d z12.d, p4/Z, [x13, x14, LSL #3] : ldnt1d (%x13,%x14,lsl #3)[32byte] %p4/z -> %z12.d -a590d1ee : ldnt1d z14.d, p4/Z, [x15, x16, LSL #3] : ldnt1d (%x15,%x16,lsl #3)[32byte] %p4/z -> %z14.d -a592d630 : ldnt1d z16.d, p5/Z, [x17, x18, LSL #3] : ldnt1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d -a594d671 : ldnt1d z17.d, p5/Z, [x19, x20, LSL #3] : ldnt1d (%x19,%x20,lsl #3)[32byte] %p5/z -> %z17.d -a596d6b3 : ldnt1d z19.d, p5/Z, [x21, x22, LSL #3] : ldnt1d (%x21,%x22,lsl #3)[32byte] %p5/z -> %z19.d -a598daf5 : ldnt1d z21.d, p6/Z, [x23, x24, LSL #3] : ldnt1d (%x23,%x24,lsl #3)[32byte] %p6/z -> %z21.d -a599db17 : ldnt1d z23.d, p6/Z, [x24, x25, LSL #3] : ldnt1d (%x24,%x25,lsl #3)[32byte] %p6/z -> %z23.d -a59bdf59 : ldnt1d z25.d, p7/Z, [x26, x27, LSL #3] : ldnt1d (%x26,%x27,lsl #3)[32byte] %p7/z -> %z25.d -a59ddf9b : ldnt1d z27.d, p7/Z, [x28, x29, LSL #3] : ldnt1d (%x28,%x29,lsl #3)[32byte] %p7/z -> %z27.d -a59edfff : ldnt1d z31.d, p7/Z, [sp, x30, LSL #3] : ldnt1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d +a580c000 : ldnt1d z0.d, p0/Z, [x0, x0, LSL #3] : ldnt1d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d +a585c482 : ldnt1d z2.d, p1/Z, [x4, x5, LSL #3] : ldnt1d (%x4,%x5,lsl #3)[8byte] %p1/z -> %z2.d +a587c8c4 : ldnt1d z4.d, p2/Z, [x6, x7, LSL #3] : ldnt1d (%x6,%x7,lsl #3)[8byte] %p2/z -> %z4.d +a589c906 : ldnt1d z6.d, p2/Z, [x8, x9, LSL #3] : ldnt1d (%x8,%x9,lsl #3)[8byte] %p2/z -> %z6.d +a58bcd48 : ldnt1d z8.d, p3/Z, [x10, x11, LSL #3] : ldnt1d (%x10,%x11,lsl #3)[8byte] %p3/z -> %z8.d +a58ccd6a : ldnt1d z10.d, p3/Z, [x11, x12, LSL #3] : ldnt1d (%x11,%x12,lsl #3)[8byte] %p3/z -> %z10.d +a58ed1ac : ldnt1d z12.d, p4/Z, [x13, x14, LSL #3] : ldnt1d (%x13,%x14,lsl #3)[8byte] %p4/z -> %z12.d +a590d1ee : ldnt1d z14.d, p4/Z, [x15, x16, LSL #3] : ldnt1d (%x15,%x16,lsl #3)[8byte] %p4/z -> %z14.d +a592d630 : ldnt1d z16.d, p5/Z, [x17, x18, LSL #3] : ldnt1d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d +a594d671 : ldnt1d z17.d, p5/Z, [x19, x20, LSL #3] : ldnt1d (%x19,%x20,lsl #3)[8byte] %p5/z -> %z17.d +a596d6b3 : ldnt1d z19.d, p5/Z, [x21, x22, LSL #3] : ldnt1d (%x21,%x22,lsl #3)[8byte] %p5/z -> %z19.d +a598daf5 : ldnt1d z21.d, p6/Z, [x23, x24, LSL #3] : ldnt1d (%x23,%x24,lsl #3)[8byte] %p6/z -> %z21.d +a599db17 : ldnt1d z23.d, p6/Z, [x24, x25, LSL #3] : ldnt1d (%x24,%x25,lsl #3)[8byte] %p6/z -> %z23.d +a59bdf59 : ldnt1d z25.d, p7/Z, [x26, x27, LSL #3] : ldnt1d (%x26,%x27,lsl #3)[8byte] %p7/z -> %z25.d +a59ddf9b : ldnt1d z27.d, p7/Z, [x28, x29, LSL #3] : ldnt1d (%x28,%x29,lsl #3)[8byte] %p7/z -> %z27.d +a59edfff : ldnt1d z31.d, p7/Z, [sp, x30, LSL #3] : ldnt1d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d # LDNT1D { .D }, /Z, [{, #, MUL VL}] (LDNT1D-Z.P.BI-Contiguous) -a588e000 : ldnt1d z0.d, p0/Z, [x0, #-8, MUL VL] : ldnt1d -0x0100(%x0)[32byte] %p0/z -> %z0.d -a589e482 : ldnt1d z2.d, p1/Z, [x4, #-7, MUL VL] : ldnt1d -0xe0(%x4)[32byte] %p1/z -> %z2.d -a58ae8c4 : ldnt1d z4.d, p2/Z, [x6, #-6, MUL VL] : ldnt1d -0xc0(%x6)[32byte] %p2/z -> %z4.d -a58be906 : ldnt1d z6.d, p2/Z, [x8, #-5, MUL VL] : ldnt1d -0xa0(%x8)[32byte] %p2/z -> %z6.d -a58ced48 : ldnt1d z8.d, p3/Z, [x10, #-4, MUL VL] : ldnt1d -0x80(%x10)[32byte] %p3/z -> %z8.d -a58ded6a : ldnt1d z10.d, p3/Z, [x11, #-3, MUL VL] : ldnt1d -0x60(%x11)[32byte] %p3/z -> %z10.d -a58ef1ac : ldnt1d z12.d, p4/Z, [x13, #-2, MUL VL] : ldnt1d -0x40(%x13)[32byte] %p4/z -> %z12.d -a58ff1ee : ldnt1d z14.d, p4/Z, [x15, #-1, MUL VL] : ldnt1d -0x20(%x15)[32byte] %p4/z -> %z14.d -a580f630 : ldnt1d z16.d, p5/Z, [x17, #0, MUL VL] : ldnt1d (%x17)[32byte] %p5/z -> %z16.d -a580f671 : ldnt1d z17.d, p5/Z, [x19, #0, MUL VL] : ldnt1d (%x19)[32byte] %p5/z -> %z17.d -a581f6b3 : ldnt1d z19.d, p5/Z, [x21, #1, MUL VL] : ldnt1d +0x20(%x21)[32byte] %p5/z -> %z19.d -a582faf5 : ldnt1d z21.d, p6/Z, [x23, #2, MUL VL] : ldnt1d +0x40(%x23)[32byte] %p6/z -> %z21.d -a583fb17 : ldnt1d z23.d, p6/Z, [x24, #3, MUL VL] : ldnt1d +0x60(%x24)[32byte] %p6/z -> %z23.d -a584ff59 : ldnt1d z25.d, p7/Z, [x26, #4, MUL VL] : ldnt1d +0x80(%x26)[32byte] %p7/z -> %z25.d -a585ff9b : ldnt1d z27.d, p7/Z, [x28, #5, MUL VL] : ldnt1d +0xa0(%x28)[32byte] %p7/z -> %z27.d -a587ffff : ldnt1d z31.d, p7/Z, [sp, #7, MUL VL] : ldnt1d +0xe0(%sp)[32byte] %p7/z -> %z31.d +a588e000 : ldnt1d z0.d, p0/Z, [x0, #-8, MUL VL] : ldnt1d -0x0100(%x0)[8byte] %p0/z -> %z0.d +a589e482 : ldnt1d z2.d, p1/Z, [x4, #-7, MUL VL] : ldnt1d -0xe0(%x4)[8byte] %p1/z -> %z2.d +a58ae8c4 : ldnt1d z4.d, p2/Z, [x6, #-6, MUL VL] : ldnt1d -0xc0(%x6)[8byte] %p2/z -> %z4.d +a58be906 : ldnt1d z6.d, p2/Z, [x8, #-5, MUL VL] : ldnt1d -0xa0(%x8)[8byte] %p2/z -> %z6.d +a58ced48 : ldnt1d z8.d, p3/Z, [x10, #-4, MUL VL] : ldnt1d -0x80(%x10)[8byte] %p3/z -> %z8.d +a58ded6a : ldnt1d z10.d, p3/Z, [x11, #-3, MUL VL] : ldnt1d -0x60(%x11)[8byte] %p3/z -> %z10.d +a58ef1ac : ldnt1d z12.d, p4/Z, [x13, #-2, MUL VL] : ldnt1d -0x40(%x13)[8byte] %p4/z -> %z12.d +a58ff1ee : ldnt1d z14.d, p4/Z, [x15, #-1, MUL VL] : ldnt1d -0x20(%x15)[8byte] %p4/z -> %z14.d +a580f630 : ldnt1d z16.d, p5/Z, [x17, #0, MUL VL] : ldnt1d (%x17)[8byte] %p5/z -> %z16.d +a580f671 : ldnt1d z17.d, p5/Z, [x19, #0, MUL VL] : ldnt1d (%x19)[8byte] %p5/z -> %z17.d +a581f6b3 : ldnt1d z19.d, p5/Z, [x21, #1, MUL VL] : ldnt1d +0x20(%x21)[8byte] %p5/z -> %z19.d +a582faf5 : ldnt1d z21.d, p6/Z, [x23, #2, MUL VL] : ldnt1d +0x40(%x23)[8byte] %p6/z -> %z21.d +a583fb17 : ldnt1d z23.d, p6/Z, [x24, #3, MUL VL] : ldnt1d +0x60(%x24)[8byte] %p6/z -> %z23.d +a584ff59 : ldnt1d z25.d, p7/Z, [x26, #4, MUL VL] : ldnt1d +0x80(%x26)[8byte] %p7/z -> %z25.d +a585ff9b : ldnt1d z27.d, p7/Z, [x28, #5, MUL VL] : ldnt1d +0xa0(%x28)[8byte] %p7/z -> %z27.d +a587ffff : ldnt1d z31.d, p7/Z, [sp, #7, MUL VL] : ldnt1d +0xe0(%sp)[8byte] %p7/z -> %z31.d # LDNT1H { .H }, /Z, [, , LSL #1] (LDNT1H-Z.P.BR-Contiguous) -a480c000 : ldnt1h z0.h, p0/Z, [x0, x0, LSL #1] : ldnt1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h -a485c482 : ldnt1h z2.h, p1/Z, [x4, x5, LSL #1] : ldnt1h (%x4,%x5,lsl #1)[32byte] %p1/z -> %z2.h -a487c8c4 : ldnt1h z4.h, p2/Z, [x6, x7, LSL #1] : ldnt1h (%x6,%x7,lsl #1)[32byte] %p2/z -> %z4.h -a489c906 : ldnt1h z6.h, p2/Z, [x8, x9, LSL #1] : ldnt1h (%x8,%x9,lsl #1)[32byte] %p2/z -> %z6.h -a48bcd48 : ldnt1h z8.h, p3/Z, [x10, x11, LSL #1] : ldnt1h (%x10,%x11,lsl #1)[32byte] %p3/z -> %z8.h -a48ccd6a : ldnt1h z10.h, p3/Z, [x11, x12, LSL #1] : ldnt1h (%x11,%x12,lsl #1)[32byte] %p3/z -> %z10.h -a48ed1ac : ldnt1h z12.h, p4/Z, [x13, x14, LSL #1] : ldnt1h (%x13,%x14,lsl #1)[32byte] %p4/z -> %z12.h -a490d1ee : ldnt1h z14.h, p4/Z, [x15, x16, LSL #1] : ldnt1h (%x15,%x16,lsl #1)[32byte] %p4/z -> %z14.h -a492d630 : ldnt1h z16.h, p5/Z, [x17, x18, LSL #1] : ldnt1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h -a494d671 : ldnt1h z17.h, p5/Z, [x19, x20, LSL #1] : ldnt1h (%x19,%x20,lsl #1)[32byte] %p5/z -> %z17.h -a496d6b3 : ldnt1h z19.h, p5/Z, [x21, x22, LSL #1] : ldnt1h (%x21,%x22,lsl #1)[32byte] %p5/z -> %z19.h -a498daf5 : ldnt1h z21.h, p6/Z, [x23, x24, LSL #1] : ldnt1h (%x23,%x24,lsl #1)[32byte] %p6/z -> %z21.h -a499db17 : ldnt1h z23.h, p6/Z, [x24, x25, LSL #1] : ldnt1h (%x24,%x25,lsl #1)[32byte] %p6/z -> %z23.h -a49bdf59 : ldnt1h z25.h, p7/Z, [x26, x27, LSL #1] : ldnt1h (%x26,%x27,lsl #1)[32byte] %p7/z -> %z25.h -a49ddf9b : ldnt1h z27.h, p7/Z, [x28, x29, LSL #1] : ldnt1h (%x28,%x29,lsl #1)[32byte] %p7/z -> %z27.h -a49edfff : ldnt1h z31.h, p7/Z, [sp, x30, LSL #1] : ldnt1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h +a480c000 : ldnt1h z0.h, p0/Z, [x0, x0, LSL #1] : ldnt1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h +a485c482 : ldnt1h z2.h, p1/Z, [x4, x5, LSL #1] : ldnt1h (%x4,%x5,lsl #1)[2byte] %p1/z -> %z2.h +a487c8c4 : ldnt1h z4.h, p2/Z, [x6, x7, LSL #1] : ldnt1h (%x6,%x7,lsl #1)[2byte] %p2/z -> %z4.h +a489c906 : ldnt1h z6.h, p2/Z, [x8, x9, LSL #1] : ldnt1h (%x8,%x9,lsl #1)[2byte] %p2/z -> %z6.h +a48bcd48 : ldnt1h z8.h, p3/Z, [x10, x11, LSL #1] : ldnt1h (%x10,%x11,lsl #1)[2byte] %p3/z -> %z8.h +a48ccd6a : ldnt1h z10.h, p3/Z, [x11, x12, LSL #1] : ldnt1h (%x11,%x12,lsl #1)[2byte] %p3/z -> %z10.h +a48ed1ac : ldnt1h z12.h, p4/Z, [x13, x14, LSL #1] : ldnt1h (%x13,%x14,lsl #1)[2byte] %p4/z -> %z12.h +a490d1ee : ldnt1h z14.h, p4/Z, [x15, x16, LSL #1] : ldnt1h (%x15,%x16,lsl #1)[2byte] %p4/z -> %z14.h +a492d630 : ldnt1h z16.h, p5/Z, [x17, x18, LSL #1] : ldnt1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h +a494d671 : ldnt1h z17.h, p5/Z, [x19, x20, LSL #1] : ldnt1h (%x19,%x20,lsl #1)[2byte] %p5/z -> %z17.h +a496d6b3 : ldnt1h z19.h, p5/Z, [x21, x22, LSL #1] : ldnt1h (%x21,%x22,lsl #1)[2byte] %p5/z -> %z19.h +a498daf5 : ldnt1h z21.h, p6/Z, [x23, x24, LSL #1] : ldnt1h (%x23,%x24,lsl #1)[2byte] %p6/z -> %z21.h +a499db17 : ldnt1h z23.h, p6/Z, [x24, x25, LSL #1] : ldnt1h (%x24,%x25,lsl #1)[2byte] %p6/z -> %z23.h +a49bdf59 : ldnt1h z25.h, p7/Z, [x26, x27, LSL #1] : ldnt1h (%x26,%x27,lsl #1)[2byte] %p7/z -> %z25.h +a49ddf9b : ldnt1h z27.h, p7/Z, [x28, x29, LSL #1] : ldnt1h (%x28,%x29,lsl #1)[2byte] %p7/z -> %z27.h +a49edfff : ldnt1h z31.h, p7/Z, [sp, x30, LSL #1] : ldnt1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h # LDNT1H { .H }, /Z, [{, #, MUL VL}] (LDNT1H-Z.P.BI-Contiguous) -a488e000 : ldnt1h z0.h, p0/Z, [x0, #-8, MUL VL] : ldnt1h -0x0100(%x0)[32byte] %p0/z -> %z0.h -a489e482 : ldnt1h z2.h, p1/Z, [x4, #-7, MUL VL] : ldnt1h -0xe0(%x4)[32byte] %p1/z -> %z2.h -a48ae8c4 : ldnt1h z4.h, p2/Z, [x6, #-6, MUL VL] : ldnt1h -0xc0(%x6)[32byte] %p2/z -> %z4.h -a48be906 : ldnt1h z6.h, p2/Z, [x8, #-5, MUL VL] : ldnt1h -0xa0(%x8)[32byte] %p2/z -> %z6.h -a48ced48 : ldnt1h z8.h, p3/Z, [x10, #-4, MUL VL] : ldnt1h -0x80(%x10)[32byte] %p3/z -> %z8.h -a48ded6a : ldnt1h z10.h, p3/Z, [x11, #-3, MUL VL] : ldnt1h -0x60(%x11)[32byte] %p3/z -> %z10.h -a48ef1ac : ldnt1h z12.h, p4/Z, [x13, #-2, MUL VL] : ldnt1h -0x40(%x13)[32byte] %p4/z -> %z12.h -a48ff1ee : ldnt1h z14.h, p4/Z, [x15, #-1, MUL VL] : ldnt1h -0x20(%x15)[32byte] %p4/z -> %z14.h -a480f630 : ldnt1h z16.h, p5/Z, [x17, #0, MUL VL] : ldnt1h (%x17)[32byte] %p5/z -> %z16.h -a480f671 : ldnt1h z17.h, p5/Z, [x19, #0, MUL VL] : ldnt1h (%x19)[32byte] %p5/z -> %z17.h -a481f6b3 : ldnt1h z19.h, p5/Z, [x21, #1, MUL VL] : ldnt1h +0x20(%x21)[32byte] %p5/z -> %z19.h -a482faf5 : ldnt1h z21.h, p6/Z, [x23, #2, MUL VL] : ldnt1h +0x40(%x23)[32byte] %p6/z -> %z21.h -a483fb17 : ldnt1h z23.h, p6/Z, [x24, #3, MUL VL] : ldnt1h +0x60(%x24)[32byte] %p6/z -> %z23.h -a484ff59 : ldnt1h z25.h, p7/Z, [x26, #4, MUL VL] : ldnt1h +0x80(%x26)[32byte] %p7/z -> %z25.h -a485ff9b : ldnt1h z27.h, p7/Z, [x28, #5, MUL VL] : ldnt1h +0xa0(%x28)[32byte] %p7/z -> %z27.h -a487ffff : ldnt1h z31.h, p7/Z, [sp, #7, MUL VL] : ldnt1h +0xe0(%sp)[32byte] %p7/z -> %z31.h +a488e000 : ldnt1h z0.h, p0/Z, [x0, #-8, MUL VL] : ldnt1h -0x0100(%x0)[2byte] %p0/z -> %z0.h +a489e482 : ldnt1h z2.h, p1/Z, [x4, #-7, MUL VL] : ldnt1h -0xe0(%x4)[2byte] %p1/z -> %z2.h +a48ae8c4 : ldnt1h z4.h, p2/Z, [x6, #-6, MUL VL] : ldnt1h -0xc0(%x6)[2byte] %p2/z -> %z4.h +a48be906 : ldnt1h z6.h, p2/Z, [x8, #-5, MUL VL] : ldnt1h -0xa0(%x8)[2byte] %p2/z -> %z6.h +a48ced48 : ldnt1h z8.h, p3/Z, [x10, #-4, MUL VL] : ldnt1h -0x80(%x10)[2byte] %p3/z -> %z8.h +a48ded6a : ldnt1h z10.h, p3/Z, [x11, #-3, MUL VL] : ldnt1h -0x60(%x11)[2byte] %p3/z -> %z10.h +a48ef1ac : ldnt1h z12.h, p4/Z, [x13, #-2, MUL VL] : ldnt1h -0x40(%x13)[2byte] %p4/z -> %z12.h +a48ff1ee : ldnt1h z14.h, p4/Z, [x15, #-1, MUL VL] : ldnt1h -0x20(%x15)[2byte] %p4/z -> %z14.h +a480f630 : ldnt1h z16.h, p5/Z, [x17, #0, MUL VL] : ldnt1h (%x17)[2byte] %p5/z -> %z16.h +a480f671 : ldnt1h z17.h, p5/Z, [x19, #0, MUL VL] : ldnt1h (%x19)[2byte] %p5/z -> %z17.h +a481f6b3 : ldnt1h z19.h, p5/Z, [x21, #1, MUL VL] : ldnt1h +0x20(%x21)[2byte] %p5/z -> %z19.h +a482faf5 : ldnt1h z21.h, p6/Z, [x23, #2, MUL VL] : ldnt1h +0x40(%x23)[2byte] %p6/z -> %z21.h +a483fb17 : ldnt1h z23.h, p6/Z, [x24, #3, MUL VL] : ldnt1h +0x60(%x24)[2byte] %p6/z -> %z23.h +a484ff59 : ldnt1h z25.h, p7/Z, [x26, #4, MUL VL] : ldnt1h +0x80(%x26)[2byte] %p7/z -> %z25.h +a485ff9b : ldnt1h z27.h, p7/Z, [x28, #5, MUL VL] : ldnt1h +0xa0(%x28)[2byte] %p7/z -> %z27.h +a487ffff : ldnt1h z31.h, p7/Z, [sp, #7, MUL VL] : ldnt1h +0xe0(%sp)[2byte] %p7/z -> %z31.h # LDNT1W { .S }, /Z, [, , LSL #2] (LDNT1W-Z.P.BR-Contiguous) -a500c000 : ldnt1w z0.s, p0/Z, [x0, x0, LSL #2] : ldnt1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s -a505c482 : ldnt1w z2.s, p1/Z, [x4, x5, LSL #2] : ldnt1w (%x4,%x5,lsl #2)[32byte] %p1/z -> %z2.s -a507c8c4 : ldnt1w z4.s, p2/Z, [x6, x7, LSL #2] : ldnt1w (%x6,%x7,lsl #2)[32byte] %p2/z -> %z4.s -a509c906 : ldnt1w z6.s, p2/Z, [x8, x9, LSL #2] : ldnt1w (%x8,%x9,lsl #2)[32byte] %p2/z -> %z6.s -a50bcd48 : ldnt1w z8.s, p3/Z, [x10, x11, LSL #2] : ldnt1w (%x10,%x11,lsl #2)[32byte] %p3/z -> %z8.s -a50ccd6a : ldnt1w z10.s, p3/Z, [x11, x12, LSL #2] : ldnt1w (%x11,%x12,lsl #2)[32byte] %p3/z -> %z10.s -a50ed1ac : ldnt1w z12.s, p4/Z, [x13, x14, LSL #2] : ldnt1w (%x13,%x14,lsl #2)[32byte] %p4/z -> %z12.s -a510d1ee : ldnt1w z14.s, p4/Z, [x15, x16, LSL #2] : ldnt1w (%x15,%x16,lsl #2)[32byte] %p4/z -> %z14.s -a512d630 : ldnt1w z16.s, p5/Z, [x17, x18, LSL #2] : ldnt1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s -a514d671 : ldnt1w z17.s, p5/Z, [x19, x20, LSL #2] : ldnt1w (%x19,%x20,lsl #2)[32byte] %p5/z -> %z17.s -a516d6b3 : ldnt1w z19.s, p5/Z, [x21, x22, LSL #2] : ldnt1w (%x21,%x22,lsl #2)[32byte] %p5/z -> %z19.s -a518daf5 : ldnt1w z21.s, p6/Z, [x23, x24, LSL #2] : ldnt1w (%x23,%x24,lsl #2)[32byte] %p6/z -> %z21.s -a519db17 : ldnt1w z23.s, p6/Z, [x24, x25, LSL #2] : ldnt1w (%x24,%x25,lsl #2)[32byte] %p6/z -> %z23.s -a51bdf59 : ldnt1w z25.s, p7/Z, [x26, x27, LSL #2] : ldnt1w (%x26,%x27,lsl #2)[32byte] %p7/z -> %z25.s -a51ddf9b : ldnt1w z27.s, p7/Z, [x28, x29, LSL #2] : ldnt1w (%x28,%x29,lsl #2)[32byte] %p7/z -> %z27.s -a51edfff : ldnt1w z31.s, p7/Z, [sp, x30, LSL #2] : ldnt1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s +a500c000 : ldnt1w z0.s, p0/Z, [x0, x0, LSL #2] : ldnt1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s +a505c482 : ldnt1w z2.s, p1/Z, [x4, x5, LSL #2] : ldnt1w (%x4,%x5,lsl #2)[4byte] %p1/z -> %z2.s +a507c8c4 : ldnt1w z4.s, p2/Z, [x6, x7, LSL #2] : ldnt1w (%x6,%x7,lsl #2)[4byte] %p2/z -> %z4.s +a509c906 : ldnt1w z6.s, p2/Z, [x8, x9, LSL #2] : ldnt1w (%x8,%x9,lsl #2)[4byte] %p2/z -> %z6.s +a50bcd48 : ldnt1w z8.s, p3/Z, [x10, x11, LSL #2] : ldnt1w (%x10,%x11,lsl #2)[4byte] %p3/z -> %z8.s +a50ccd6a : ldnt1w z10.s, p3/Z, [x11, x12, LSL #2] : ldnt1w (%x11,%x12,lsl #2)[4byte] %p3/z -> %z10.s +a50ed1ac : ldnt1w z12.s, p4/Z, [x13, x14, LSL #2] : ldnt1w (%x13,%x14,lsl #2)[4byte] %p4/z -> %z12.s +a510d1ee : ldnt1w z14.s, p4/Z, [x15, x16, LSL #2] : ldnt1w (%x15,%x16,lsl #2)[4byte] %p4/z -> %z14.s +a512d630 : ldnt1w z16.s, p5/Z, [x17, x18, LSL #2] : ldnt1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s +a514d671 : ldnt1w z17.s, p5/Z, [x19, x20, LSL #2] : ldnt1w (%x19,%x20,lsl #2)[4byte] %p5/z -> %z17.s +a516d6b3 : ldnt1w z19.s, p5/Z, [x21, x22, LSL #2] : ldnt1w (%x21,%x22,lsl #2)[4byte] %p5/z -> %z19.s +a518daf5 : ldnt1w z21.s, p6/Z, [x23, x24, LSL #2] : ldnt1w (%x23,%x24,lsl #2)[4byte] %p6/z -> %z21.s +a519db17 : ldnt1w z23.s, p6/Z, [x24, x25, LSL #2] : ldnt1w (%x24,%x25,lsl #2)[4byte] %p6/z -> %z23.s +a51bdf59 : ldnt1w z25.s, p7/Z, [x26, x27, LSL #2] : ldnt1w (%x26,%x27,lsl #2)[4byte] %p7/z -> %z25.s +a51ddf9b : ldnt1w z27.s, p7/Z, [x28, x29, LSL #2] : ldnt1w (%x28,%x29,lsl #2)[4byte] %p7/z -> %z27.s +a51edfff : ldnt1w z31.s, p7/Z, [sp, x30, LSL #2] : ldnt1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s # LDNT1W { .S }, /Z, [{, #, MUL VL}] (LDNT1W-Z.P.BI-Contiguous) -a508e000 : ldnt1w z0.s, p0/Z, [x0, #-8, MUL VL] : ldnt1w -0x0100(%x0)[32byte] %p0/z -> %z0.s -a509e482 : ldnt1w z2.s, p1/Z, [x4, #-7, MUL VL] : ldnt1w -0xe0(%x4)[32byte] %p1/z -> %z2.s -a50ae8c4 : ldnt1w z4.s, p2/Z, [x6, #-6, MUL VL] : ldnt1w -0xc0(%x6)[32byte] %p2/z -> %z4.s -a50be906 : ldnt1w z6.s, p2/Z, [x8, #-5, MUL VL] : ldnt1w -0xa0(%x8)[32byte] %p2/z -> %z6.s -a50ced48 : ldnt1w z8.s, p3/Z, [x10, #-4, MUL VL] : ldnt1w -0x80(%x10)[32byte] %p3/z -> %z8.s -a50ded6a : ldnt1w z10.s, p3/Z, [x11, #-3, MUL VL] : ldnt1w -0x60(%x11)[32byte] %p3/z -> %z10.s -a50ef1ac : ldnt1w z12.s, p4/Z, [x13, #-2, MUL VL] : ldnt1w -0x40(%x13)[32byte] %p4/z -> %z12.s -a50ff1ee : ldnt1w z14.s, p4/Z, [x15, #-1, MUL VL] : ldnt1w -0x20(%x15)[32byte] %p4/z -> %z14.s -a500f630 : ldnt1w z16.s, p5/Z, [x17, #0, MUL VL] : ldnt1w (%x17)[32byte] %p5/z -> %z16.s -a500f671 : ldnt1w z17.s, p5/Z, [x19, #0, MUL VL] : ldnt1w (%x19)[32byte] %p5/z -> %z17.s -a501f6b3 : ldnt1w z19.s, p5/Z, [x21, #1, MUL VL] : ldnt1w +0x20(%x21)[32byte] %p5/z -> %z19.s -a502faf5 : ldnt1w z21.s, p6/Z, [x23, #2, MUL VL] : ldnt1w +0x40(%x23)[32byte] %p6/z -> %z21.s -a503fb17 : ldnt1w z23.s, p6/Z, [x24, #3, MUL VL] : ldnt1w +0x60(%x24)[32byte] %p6/z -> %z23.s -a504ff59 : ldnt1w z25.s, p7/Z, [x26, #4, MUL VL] : ldnt1w +0x80(%x26)[32byte] %p7/z -> %z25.s -a505ff9b : ldnt1w z27.s, p7/Z, [x28, #5, MUL VL] : ldnt1w +0xa0(%x28)[32byte] %p7/z -> %z27.s -a507ffff : ldnt1w z31.s, p7/Z, [sp, #7, MUL VL] : ldnt1w +0xe0(%sp)[32byte] %p7/z -> %z31.s +a508e000 : ldnt1w z0.s, p0/Z, [x0, #-8, MUL VL] : ldnt1w -0x0100(%x0)[4byte] %p0/z -> %z0.s +a509e482 : ldnt1w z2.s, p1/Z, [x4, #-7, MUL VL] : ldnt1w -0xe0(%x4)[4byte] %p1/z -> %z2.s +a50ae8c4 : ldnt1w z4.s, p2/Z, [x6, #-6, MUL VL] : ldnt1w -0xc0(%x6)[4byte] %p2/z -> %z4.s +a50be906 : ldnt1w z6.s, p2/Z, [x8, #-5, MUL VL] : ldnt1w -0xa0(%x8)[4byte] %p2/z -> %z6.s +a50ced48 : ldnt1w z8.s, p3/Z, [x10, #-4, MUL VL] : ldnt1w -0x80(%x10)[4byte] %p3/z -> %z8.s +a50ded6a : ldnt1w z10.s, p3/Z, [x11, #-3, MUL VL] : ldnt1w -0x60(%x11)[4byte] %p3/z -> %z10.s +a50ef1ac : ldnt1w z12.s, p4/Z, [x13, #-2, MUL VL] : ldnt1w -0x40(%x13)[4byte] %p4/z -> %z12.s +a50ff1ee : ldnt1w z14.s, p4/Z, [x15, #-1, MUL VL] : ldnt1w -0x20(%x15)[4byte] %p4/z -> %z14.s +a500f630 : ldnt1w z16.s, p5/Z, [x17, #0, MUL VL] : ldnt1w (%x17)[4byte] %p5/z -> %z16.s +a500f671 : ldnt1w z17.s, p5/Z, [x19, #0, MUL VL] : ldnt1w (%x19)[4byte] %p5/z -> %z17.s +a501f6b3 : ldnt1w z19.s, p5/Z, [x21, #1, MUL VL] : ldnt1w +0x20(%x21)[4byte] %p5/z -> %z19.s +a502faf5 : ldnt1w z21.s, p6/Z, [x23, #2, MUL VL] : ldnt1w +0x40(%x23)[4byte] %p6/z -> %z21.s +a503fb17 : ldnt1w z23.s, p6/Z, [x24, #3, MUL VL] : ldnt1w +0x60(%x24)[4byte] %p6/z -> %z23.s +a504ff59 : ldnt1w z25.s, p7/Z, [x26, #4, MUL VL] : ldnt1w +0x80(%x26)[4byte] %p7/z -> %z25.s +a505ff9b : ldnt1w z27.s, p7/Z, [x28, #5, MUL VL] : ldnt1w +0xa0(%x28)[4byte] %p7/z -> %z27.s +a507ffff : ldnt1w z31.s, p7/Z, [sp, #7, MUL VL] : ldnt1w +0xe0(%sp)[4byte] %p7/z -> %z31.s # LDR , [{, #, MUL VL}] 858003c0 : ldr p0, [x30] : ldr (%x30)[4byte] -> %p0 @@ -20872,1576 +20872,1576 @@ c51fffef : prfw 15, p7, [z31.d, #124] : prfw $0x0f %p7 +0x7c(%z 25e6dffe : sqsub z30.d, z30.d, #0xff, lsl #0 : sqsub %z30.d $0xff lsl $0x00 -> %z30.d # ST1B { . }, , [, ] (ST1B-Z.P.BR-_) -e4004000 : st1b z0.b, p0, [x0, x0] : st1b %z0.b %p0 -> (%x0,%x0)[32byte] -e4054482 : st1b z2.b, p1, [x4, x5] : st1b %z2.b %p1 -> (%x4,%x5)[32byte] -e40748c4 : st1b z4.b, p2, [x6, x7] : st1b %z4.b %p2 -> (%x6,%x7)[32byte] -e4094906 : st1b z6.b, p2, [x8, x9] : st1b %z6.b %p2 -> (%x8,%x9)[32byte] -e40b4d48 : st1b z8.b, p3, [x10, x11] : st1b %z8.b %p3 -> (%x10,%x11)[32byte] -e40c4d6a : st1b z10.b, p3, [x11, x12] : st1b %z10.b %p3 -> (%x11,%x12)[32byte] -e40e51ac : st1b z12.b, p4, [x13, x14] : st1b %z12.b %p4 -> (%x13,%x14)[32byte] -e41051ee : st1b z14.b, p4, [x15, x16] : st1b %z14.b %p4 -> (%x15,%x16)[32byte] -e4125630 : st1b z16.b, p5, [x17, x18] : st1b %z16.b %p5 -> (%x17,%x18)[32byte] -e4145671 : st1b z17.b, p5, [x19, x20] : st1b %z17.b %p5 -> (%x19,%x20)[32byte] -e41656b3 : st1b z19.b, p5, [x21, x22] : st1b %z19.b %p5 -> (%x21,%x22)[32byte] -e4185af5 : st1b z21.b, p6, [x23, x24] : st1b %z21.b %p6 -> (%x23,%x24)[32byte] -e4195b17 : st1b z23.b, p6, [x24, x25] : st1b %z23.b %p6 -> (%x24,%x25)[32byte] -e41b5f59 : st1b z25.b, p7, [x26, x27] : st1b %z25.b %p7 -> (%x26,%x27)[32byte] -e41d5f9b : st1b z27.b, p7, [x28, x29] : st1b %z27.b %p7 -> (%x28,%x29)[32byte] -e41e5fff : st1b z31.b, p7, [sp, x30] : st1b %z31.b %p7 -> (%sp,%x30)[32byte] -e4204000 : st1b z0.h, p0, [x0, x0] : st1b %z0.h %p0 -> (%x0,%x0)[16byte] -e4254482 : st1b z2.h, p1, [x4, x5] : st1b %z2.h %p1 -> (%x4,%x5)[16byte] -e42748c4 : st1b z4.h, p2, [x6, x7] : st1b %z4.h %p2 -> (%x6,%x7)[16byte] -e4294906 : st1b z6.h, p2, [x8, x9] : st1b %z6.h %p2 -> (%x8,%x9)[16byte] -e42b4d48 : st1b z8.h, p3, [x10, x11] : st1b %z8.h %p3 -> (%x10,%x11)[16byte] -e42c4d6a : st1b z10.h, p3, [x11, x12] : st1b %z10.h %p3 -> (%x11,%x12)[16byte] -e42e51ac : st1b z12.h, p4, [x13, x14] : st1b %z12.h %p4 -> (%x13,%x14)[16byte] -e43051ee : st1b z14.h, p4, [x15, x16] : st1b %z14.h %p4 -> (%x15,%x16)[16byte] -e4325630 : st1b z16.h, p5, [x17, x18] : st1b %z16.h %p5 -> (%x17,%x18)[16byte] -e4345671 : st1b z17.h, p5, [x19, x20] : st1b %z17.h %p5 -> (%x19,%x20)[16byte] -e43656b3 : st1b z19.h, p5, [x21, x22] : st1b %z19.h %p5 -> (%x21,%x22)[16byte] -e4385af5 : st1b z21.h, p6, [x23, x24] : st1b %z21.h %p6 -> (%x23,%x24)[16byte] -e4395b17 : st1b z23.h, p6, [x24, x25] : st1b %z23.h %p6 -> (%x24,%x25)[16byte] -e43b5f59 : st1b z25.h, p7, [x26, x27] : st1b %z25.h %p7 -> (%x26,%x27)[16byte] -e43d5f9b : st1b z27.h, p7, [x28, x29] : st1b %z27.h %p7 -> (%x28,%x29)[16byte] -e43e5fff : st1b z31.h, p7, [sp, x30] : st1b %z31.h %p7 -> (%sp,%x30)[16byte] -e4404000 : st1b z0.s, p0, [x0, x0] : st1b %z0.s %p0 -> (%x0,%x0)[8byte] -e4454482 : st1b z2.s, p1, [x4, x5] : st1b %z2.s %p1 -> (%x4,%x5)[8byte] -e44748c4 : st1b z4.s, p2, [x6, x7] : st1b %z4.s %p2 -> (%x6,%x7)[8byte] -e4494906 : st1b z6.s, p2, [x8, x9] : st1b %z6.s %p2 -> (%x8,%x9)[8byte] -e44b4d48 : st1b z8.s, p3, [x10, x11] : st1b %z8.s %p3 -> (%x10,%x11)[8byte] -e44c4d6a : st1b z10.s, p3, [x11, x12] : st1b %z10.s %p3 -> (%x11,%x12)[8byte] -e44e51ac : st1b z12.s, p4, [x13, x14] : st1b %z12.s %p4 -> (%x13,%x14)[8byte] -e45051ee : st1b z14.s, p4, [x15, x16] : st1b %z14.s %p4 -> (%x15,%x16)[8byte] -e4525630 : st1b z16.s, p5, [x17, x18] : st1b %z16.s %p5 -> (%x17,%x18)[8byte] -e4545671 : st1b z17.s, p5, [x19, x20] : st1b %z17.s %p5 -> (%x19,%x20)[8byte] -e45656b3 : st1b z19.s, p5, [x21, x22] : st1b %z19.s %p5 -> (%x21,%x22)[8byte] -e4585af5 : st1b z21.s, p6, [x23, x24] : st1b %z21.s %p6 -> (%x23,%x24)[8byte] -e4595b17 : st1b z23.s, p6, [x24, x25] : st1b %z23.s %p6 -> (%x24,%x25)[8byte] -e45b5f59 : st1b z25.s, p7, [x26, x27] : st1b %z25.s %p7 -> (%x26,%x27)[8byte] -e45d5f9b : st1b z27.s, p7, [x28, x29] : st1b %z27.s %p7 -> (%x28,%x29)[8byte] -e45e5fff : st1b z31.s, p7, [sp, x30] : st1b %z31.s %p7 -> (%sp,%x30)[8byte] -e4604000 : st1b z0.d, p0, [x0, x0] : st1b %z0.d %p0 -> (%x0,%x0)[4byte] -e4654482 : st1b z2.d, p1, [x4, x5] : st1b %z2.d %p1 -> (%x4,%x5)[4byte] -e46748c4 : st1b z4.d, p2, [x6, x7] : st1b %z4.d %p2 -> (%x6,%x7)[4byte] -e4694906 : st1b z6.d, p2, [x8, x9] : st1b %z6.d %p2 -> (%x8,%x9)[4byte] -e46b4d48 : st1b z8.d, p3, [x10, x11] : st1b %z8.d %p3 -> (%x10,%x11)[4byte] -e46c4d6a : st1b z10.d, p3, [x11, x12] : st1b %z10.d %p3 -> (%x11,%x12)[4byte] -e46e51ac : st1b z12.d, p4, [x13, x14] : st1b %z12.d %p4 -> (%x13,%x14)[4byte] -e47051ee : st1b z14.d, p4, [x15, x16] : st1b %z14.d %p4 -> (%x15,%x16)[4byte] -e4725630 : st1b z16.d, p5, [x17, x18] : st1b %z16.d %p5 -> (%x17,%x18)[4byte] -e4745671 : st1b z17.d, p5, [x19, x20] : st1b %z17.d %p5 -> (%x19,%x20)[4byte] -e47656b3 : st1b z19.d, p5, [x21, x22] : st1b %z19.d %p5 -> (%x21,%x22)[4byte] -e4785af5 : st1b z21.d, p6, [x23, x24] : st1b %z21.d %p6 -> (%x23,%x24)[4byte] -e4795b17 : st1b z23.d, p6, [x24, x25] : st1b %z23.d %p6 -> (%x24,%x25)[4byte] -e47b5f59 : st1b z25.d, p7, [x26, x27] : st1b %z25.d %p7 -> (%x26,%x27)[4byte] -e47d5f9b : st1b z27.d, p7, [x28, x29] : st1b %z27.d %p7 -> (%x28,%x29)[4byte] -e47e5fff : st1b z31.d, p7, [sp, x30] : st1b %z31.d %p7 -> (%sp,%x30)[4byte] +e4004000 : st1b z0.b, p0, [x0, x0] : st1b %z0.b %p0 -> (%x0,%x0)[1byte] +e4054482 : st1b z2.b, p1, [x4, x5] : st1b %z2.b %p1 -> (%x4,%x5)[1byte] +e40748c4 : st1b z4.b, p2, [x6, x7] : st1b %z4.b %p2 -> (%x6,%x7)[1byte] +e4094906 : st1b z6.b, p2, [x8, x9] : st1b %z6.b %p2 -> (%x8,%x9)[1byte] +e40b4d48 : st1b z8.b, p3, [x10, x11] : st1b %z8.b %p3 -> (%x10,%x11)[1byte] +e40c4d6a : st1b z10.b, p3, [x11, x12] : st1b %z10.b %p3 -> (%x11,%x12)[1byte] +e40e51ac : st1b z12.b, p4, [x13, x14] : st1b %z12.b %p4 -> (%x13,%x14)[1byte] +e41051ee : st1b z14.b, p4, [x15, x16] : st1b %z14.b %p4 -> (%x15,%x16)[1byte] +e4125630 : st1b z16.b, p5, [x17, x18] : st1b %z16.b %p5 -> (%x17,%x18)[1byte] +e4145671 : st1b z17.b, p5, [x19, x20] : st1b %z17.b %p5 -> (%x19,%x20)[1byte] +e41656b3 : st1b z19.b, p5, [x21, x22] : st1b %z19.b %p5 -> (%x21,%x22)[1byte] +e4185af5 : st1b z21.b, p6, [x23, x24] : st1b %z21.b %p6 -> (%x23,%x24)[1byte] +e4195b17 : st1b z23.b, p6, [x24, x25] : st1b %z23.b %p6 -> (%x24,%x25)[1byte] +e41b5f59 : st1b z25.b, p7, [x26, x27] : st1b %z25.b %p7 -> (%x26,%x27)[1byte] +e41d5f9b : st1b z27.b, p7, [x28, x29] : st1b %z27.b %p7 -> (%x28,%x29)[1byte] +e41e5fff : st1b z31.b, p7, [sp, x30] : st1b %z31.b %p7 -> (%sp,%x30)[1byte] +e4204000 : st1b z0.h, p0, [x0, x0] : st1b %z0.h %p0 -> (%x0,%x0)[1byte] +e4254482 : st1b z2.h, p1, [x4, x5] : st1b %z2.h %p1 -> (%x4,%x5)[1byte] +e42748c4 : st1b z4.h, p2, [x6, x7] : st1b %z4.h %p2 -> (%x6,%x7)[1byte] +e4294906 : st1b z6.h, p2, [x8, x9] : st1b %z6.h %p2 -> (%x8,%x9)[1byte] +e42b4d48 : st1b z8.h, p3, [x10, x11] : st1b %z8.h %p3 -> (%x10,%x11)[1byte] +e42c4d6a : st1b z10.h, p3, [x11, x12] : st1b %z10.h %p3 -> (%x11,%x12)[1byte] +e42e51ac : st1b z12.h, p4, [x13, x14] : st1b %z12.h %p4 -> (%x13,%x14)[1byte] +e43051ee : st1b z14.h, p4, [x15, x16] : st1b %z14.h %p4 -> (%x15,%x16)[1byte] +e4325630 : st1b z16.h, p5, [x17, x18] : st1b %z16.h %p5 -> (%x17,%x18)[1byte] +e4345671 : st1b z17.h, p5, [x19, x20] : st1b %z17.h %p5 -> (%x19,%x20)[1byte] +e43656b3 : st1b z19.h, p5, [x21, x22] : st1b %z19.h %p5 -> (%x21,%x22)[1byte] +e4385af5 : st1b z21.h, p6, [x23, x24] : st1b %z21.h %p6 -> (%x23,%x24)[1byte] +e4395b17 : st1b z23.h, p6, [x24, x25] : st1b %z23.h %p6 -> (%x24,%x25)[1byte] +e43b5f59 : st1b z25.h, p7, [x26, x27] : st1b %z25.h %p7 -> (%x26,%x27)[1byte] +e43d5f9b : st1b z27.h, p7, [x28, x29] : st1b %z27.h %p7 -> (%x28,%x29)[1byte] +e43e5fff : st1b z31.h, p7, [sp, x30] : st1b %z31.h %p7 -> (%sp,%x30)[1byte] +e4404000 : st1b z0.s, p0, [x0, x0] : st1b %z0.s %p0 -> (%x0,%x0)[1byte] +e4454482 : st1b z2.s, p1, [x4, x5] : st1b %z2.s %p1 -> (%x4,%x5)[1byte] +e44748c4 : st1b z4.s, p2, [x6, x7] : st1b %z4.s %p2 -> (%x6,%x7)[1byte] +e4494906 : st1b z6.s, p2, [x8, x9] : st1b %z6.s %p2 -> (%x8,%x9)[1byte] +e44b4d48 : st1b z8.s, p3, [x10, x11] : st1b %z8.s %p3 -> (%x10,%x11)[1byte] +e44c4d6a : st1b z10.s, p3, [x11, x12] : st1b %z10.s %p3 -> (%x11,%x12)[1byte] +e44e51ac : st1b z12.s, p4, [x13, x14] : st1b %z12.s %p4 -> (%x13,%x14)[1byte] +e45051ee : st1b z14.s, p4, [x15, x16] : st1b %z14.s %p4 -> (%x15,%x16)[1byte] +e4525630 : st1b z16.s, p5, [x17, x18] : st1b %z16.s %p5 -> (%x17,%x18)[1byte] +e4545671 : st1b z17.s, p5, [x19, x20] : st1b %z17.s %p5 -> (%x19,%x20)[1byte] +e45656b3 : st1b z19.s, p5, [x21, x22] : st1b %z19.s %p5 -> (%x21,%x22)[1byte] +e4585af5 : st1b z21.s, p6, [x23, x24] : st1b %z21.s %p6 -> (%x23,%x24)[1byte] +e4595b17 : st1b z23.s, p6, [x24, x25] : st1b %z23.s %p6 -> (%x24,%x25)[1byte] +e45b5f59 : st1b z25.s, p7, [x26, x27] : st1b %z25.s %p7 -> (%x26,%x27)[1byte] +e45d5f9b : st1b z27.s, p7, [x28, x29] : st1b %z27.s %p7 -> (%x28,%x29)[1byte] +e45e5fff : st1b z31.s, p7, [sp, x30] : st1b %z31.s %p7 -> (%sp,%x30)[1byte] +e4604000 : st1b z0.d, p0, [x0, x0] : st1b %z0.d %p0 -> (%x0,%x0)[1byte] +e4654482 : st1b z2.d, p1, [x4, x5] : st1b %z2.d %p1 -> (%x4,%x5)[1byte] +e46748c4 : st1b z4.d, p2, [x6, x7] : st1b %z4.d %p2 -> (%x6,%x7)[1byte] +e4694906 : st1b z6.d, p2, [x8, x9] : st1b %z6.d %p2 -> (%x8,%x9)[1byte] +e46b4d48 : st1b z8.d, p3, [x10, x11] : st1b %z8.d %p3 -> (%x10,%x11)[1byte] +e46c4d6a : st1b z10.d, p3, [x11, x12] : st1b %z10.d %p3 -> (%x11,%x12)[1byte] +e46e51ac : st1b z12.d, p4, [x13, x14] : st1b %z12.d %p4 -> (%x13,%x14)[1byte] +e47051ee : st1b z14.d, p4, [x15, x16] : st1b %z14.d %p4 -> (%x15,%x16)[1byte] +e4725630 : st1b z16.d, p5, [x17, x18] : st1b %z16.d %p5 -> (%x17,%x18)[1byte] +e4745671 : st1b z17.d, p5, [x19, x20] : st1b %z17.d %p5 -> (%x19,%x20)[1byte] +e47656b3 : st1b z19.d, p5, [x21, x22] : st1b %z19.d %p5 -> (%x21,%x22)[1byte] +e4785af5 : st1b z21.d, p6, [x23, x24] : st1b %z21.d %p6 -> (%x23,%x24)[1byte] +e4795b17 : st1b z23.d, p6, [x24, x25] : st1b %z23.d %p6 -> (%x24,%x25)[1byte] +e47b5f59 : st1b z25.d, p7, [x26, x27] : st1b %z25.d %p7 -> (%x26,%x27)[1byte] +e47d5f9b : st1b z27.d, p7, [x28, x29] : st1b %z27.d %p7 -> (%x28,%x29)[1byte] +e47e5fff : st1b z31.d, p7, [sp, x30] : st1b %z31.d %p7 -> (%sp,%x30)[1byte] # ST1B { .D }, , [, .D, ] (ST1B-Z.P.BZ-D.x32.unscaled) -e4008000 : st1b z0.d, p0, [x0, z0.d, UXTW] : st1b %z0.d %p0 -> (%x0,%z0.d,uxtw)[4byte] -e4058482 : st1b z2.d, p1, [x4, z5.d, UXTW] : st1b %z2.d %p1 -> (%x4,%z5.d,uxtw)[4byte] -e40788c4 : st1b z4.d, p2, [x6, z7.d, UXTW] : st1b %z4.d %p2 -> (%x6,%z7.d,uxtw)[4byte] -e4098906 : st1b z6.d, p2, [x8, z9.d, UXTW] : st1b %z6.d %p2 -> (%x8,%z9.d,uxtw)[4byte] -e40b8d48 : st1b z8.d, p3, [x10, z11.d, UXTW] : st1b %z8.d %p3 -> (%x10,%z11.d,uxtw)[4byte] -e40d8d6a : st1b z10.d, p3, [x11, z13.d, UXTW] : st1b %z10.d %p3 -> (%x11,%z13.d,uxtw)[4byte] -e40f91ac : st1b z12.d, p4, [x13, z15.d, UXTW] : st1b %z12.d %p4 -> (%x13,%z15.d,uxtw)[4byte] -e41191ee : st1b z14.d, p4, [x15, z17.d, UXTW] : st1b %z14.d %p4 -> (%x15,%z17.d,uxtw)[4byte] -e4139630 : st1b z16.d, p5, [x17, z19.d, UXTW] : st1b %z16.d %p5 -> (%x17,%z19.d,uxtw)[4byte] -e4149671 : st1b z17.d, p5, [x19, z20.d, UXTW] : st1b %z17.d %p5 -> (%x19,%z20.d,uxtw)[4byte] -e41696b3 : st1b z19.d, p5, [x21, z22.d, UXTW] : st1b %z19.d %p5 -> (%x21,%z22.d,uxtw)[4byte] -e4189af5 : st1b z21.d, p6, [x23, z24.d, UXTW] : st1b %z21.d %p6 -> (%x23,%z24.d,uxtw)[4byte] -e41a9b17 : st1b z23.d, p6, [x24, z26.d, UXTW] : st1b %z23.d %p6 -> (%x24,%z26.d,uxtw)[4byte] -e41c9f59 : st1b z25.d, p7, [x26, z28.d, UXTW] : st1b %z25.d %p7 -> (%x26,%z28.d,uxtw)[4byte] -e41e9f9b : st1b z27.d, p7, [x28, z30.d, UXTW] : st1b %z27.d %p7 -> (%x28,%z30.d,uxtw)[4byte] -e41f9fff : st1b z31.d, p7, [sp, z31.d, UXTW] : st1b %z31.d %p7 -> (%sp,%z31.d,uxtw)[4byte] -e400c000 : st1b z0.d, p0, [x0, z0.d, SXTW] : st1b %z0.d %p0 -> (%x0,%z0.d,sxtw)[4byte] -e405c482 : st1b z2.d, p1, [x4, z5.d, SXTW] : st1b %z2.d %p1 -> (%x4,%z5.d,sxtw)[4byte] -e407c8c4 : st1b z4.d, p2, [x6, z7.d, SXTW] : st1b %z4.d %p2 -> (%x6,%z7.d,sxtw)[4byte] -e409c906 : st1b z6.d, p2, [x8, z9.d, SXTW] : st1b %z6.d %p2 -> (%x8,%z9.d,sxtw)[4byte] -e40bcd48 : st1b z8.d, p3, [x10, z11.d, SXTW] : st1b %z8.d %p3 -> (%x10,%z11.d,sxtw)[4byte] -e40dcd6a : st1b z10.d, p3, [x11, z13.d, SXTW] : st1b %z10.d %p3 -> (%x11,%z13.d,sxtw)[4byte] -e40fd1ac : st1b z12.d, p4, [x13, z15.d, SXTW] : st1b %z12.d %p4 -> (%x13,%z15.d,sxtw)[4byte] -e411d1ee : st1b z14.d, p4, [x15, z17.d, SXTW] : st1b %z14.d %p4 -> (%x15,%z17.d,sxtw)[4byte] -e413d630 : st1b z16.d, p5, [x17, z19.d, SXTW] : st1b %z16.d %p5 -> (%x17,%z19.d,sxtw)[4byte] -e414d671 : st1b z17.d, p5, [x19, z20.d, SXTW] : st1b %z17.d %p5 -> (%x19,%z20.d,sxtw)[4byte] -e416d6b3 : st1b z19.d, p5, [x21, z22.d, SXTW] : st1b %z19.d %p5 -> (%x21,%z22.d,sxtw)[4byte] -e418daf5 : st1b z21.d, p6, [x23, z24.d, SXTW] : st1b %z21.d %p6 -> (%x23,%z24.d,sxtw)[4byte] -e41adb17 : st1b z23.d, p6, [x24, z26.d, SXTW] : st1b %z23.d %p6 -> (%x24,%z26.d,sxtw)[4byte] -e41cdf59 : st1b z25.d, p7, [x26, z28.d, SXTW] : st1b %z25.d %p7 -> (%x26,%z28.d,sxtw)[4byte] -e41edf9b : st1b z27.d, p7, [x28, z30.d, SXTW] : st1b %z27.d %p7 -> (%x28,%z30.d,sxtw)[4byte] -e41fdfff : st1b z31.d, p7, [sp, z31.d, SXTW] : st1b %z31.d %p7 -> (%sp,%z31.d,sxtw)[4byte] +e4008000 : st1b z0.d, p0, [x0, z0.d, UXTW] : st1b %z0.d %p0 -> (%x0,%z0.d,uxtw)[1byte] +e4058482 : st1b z2.d, p1, [x4, z5.d, UXTW] : st1b %z2.d %p1 -> (%x4,%z5.d,uxtw)[1byte] +e40788c4 : st1b z4.d, p2, [x6, z7.d, UXTW] : st1b %z4.d %p2 -> (%x6,%z7.d,uxtw)[1byte] +e4098906 : st1b z6.d, p2, [x8, z9.d, UXTW] : st1b %z6.d %p2 -> (%x8,%z9.d,uxtw)[1byte] +e40b8d48 : st1b z8.d, p3, [x10, z11.d, UXTW] : st1b %z8.d %p3 -> (%x10,%z11.d,uxtw)[1byte] +e40d8d6a : st1b z10.d, p3, [x11, z13.d, UXTW] : st1b %z10.d %p3 -> (%x11,%z13.d,uxtw)[1byte] +e40f91ac : st1b z12.d, p4, [x13, z15.d, UXTW] : st1b %z12.d %p4 -> (%x13,%z15.d,uxtw)[1byte] +e41191ee : st1b z14.d, p4, [x15, z17.d, UXTW] : st1b %z14.d %p4 -> (%x15,%z17.d,uxtw)[1byte] +e4139630 : st1b z16.d, p5, [x17, z19.d, UXTW] : st1b %z16.d %p5 -> (%x17,%z19.d,uxtw)[1byte] +e4149671 : st1b z17.d, p5, [x19, z20.d, UXTW] : st1b %z17.d %p5 -> (%x19,%z20.d,uxtw)[1byte] +e41696b3 : st1b z19.d, p5, [x21, z22.d, UXTW] : st1b %z19.d %p5 -> (%x21,%z22.d,uxtw)[1byte] +e4189af5 : st1b z21.d, p6, [x23, z24.d, UXTW] : st1b %z21.d %p6 -> (%x23,%z24.d,uxtw)[1byte] +e41a9b17 : st1b z23.d, p6, [x24, z26.d, UXTW] : st1b %z23.d %p6 -> (%x24,%z26.d,uxtw)[1byte] +e41c9f59 : st1b z25.d, p7, [x26, z28.d, UXTW] : st1b %z25.d %p7 -> (%x26,%z28.d,uxtw)[1byte] +e41e9f9b : st1b z27.d, p7, [x28, z30.d, UXTW] : st1b %z27.d %p7 -> (%x28,%z30.d,uxtw)[1byte] +e41f9fff : st1b z31.d, p7, [sp, z31.d, UXTW] : st1b %z31.d %p7 -> (%sp,%z31.d,uxtw)[1byte] +e400c000 : st1b z0.d, p0, [x0, z0.d, SXTW] : st1b %z0.d %p0 -> (%x0,%z0.d,sxtw)[1byte] +e405c482 : st1b z2.d, p1, [x4, z5.d, SXTW] : st1b %z2.d %p1 -> (%x4,%z5.d,sxtw)[1byte] +e407c8c4 : st1b z4.d, p2, [x6, z7.d, SXTW] : st1b %z4.d %p2 -> (%x6,%z7.d,sxtw)[1byte] +e409c906 : st1b z6.d, p2, [x8, z9.d, SXTW] : st1b %z6.d %p2 -> (%x8,%z9.d,sxtw)[1byte] +e40bcd48 : st1b z8.d, p3, [x10, z11.d, SXTW] : st1b %z8.d %p3 -> (%x10,%z11.d,sxtw)[1byte] +e40dcd6a : st1b z10.d, p3, [x11, z13.d, SXTW] : st1b %z10.d %p3 -> (%x11,%z13.d,sxtw)[1byte] +e40fd1ac : st1b z12.d, p4, [x13, z15.d, SXTW] : st1b %z12.d %p4 -> (%x13,%z15.d,sxtw)[1byte] +e411d1ee : st1b z14.d, p4, [x15, z17.d, SXTW] : st1b %z14.d %p4 -> (%x15,%z17.d,sxtw)[1byte] +e413d630 : st1b z16.d, p5, [x17, z19.d, SXTW] : st1b %z16.d %p5 -> (%x17,%z19.d,sxtw)[1byte] +e414d671 : st1b z17.d, p5, [x19, z20.d, SXTW] : st1b %z17.d %p5 -> (%x19,%z20.d,sxtw)[1byte] +e416d6b3 : st1b z19.d, p5, [x21, z22.d, SXTW] : st1b %z19.d %p5 -> (%x21,%z22.d,sxtw)[1byte] +e418daf5 : st1b z21.d, p6, [x23, z24.d, SXTW] : st1b %z21.d %p6 -> (%x23,%z24.d,sxtw)[1byte] +e41adb17 : st1b z23.d, p6, [x24, z26.d, SXTW] : st1b %z23.d %p6 -> (%x24,%z26.d,sxtw)[1byte] +e41cdf59 : st1b z25.d, p7, [x26, z28.d, SXTW] : st1b %z25.d %p7 -> (%x26,%z28.d,sxtw)[1byte] +e41edf9b : st1b z27.d, p7, [x28, z30.d, SXTW] : st1b %z27.d %p7 -> (%x28,%z30.d,sxtw)[1byte] +e41fdfff : st1b z31.d, p7, [sp, z31.d, SXTW] : st1b %z31.d %p7 -> (%sp,%z31.d,sxtw)[1byte] # ST1B { .D }, , [, .D] (ST1B-Z.P.BZ-D.64.unscaled) -e400a000 : st1b z0.d, p0, [x0, z0.d] : st1b %z0.d %p0 -> (%x0,%z0.d)[4byte] -e405a482 : st1b z2.d, p1, [x4, z5.d] : st1b %z2.d %p1 -> (%x4,%z5.d)[4byte] -e407a8c4 : st1b z4.d, p2, [x6, z7.d] : st1b %z4.d %p2 -> (%x6,%z7.d)[4byte] -e409a906 : st1b z6.d, p2, [x8, z9.d] : st1b %z6.d %p2 -> (%x8,%z9.d)[4byte] -e40bad48 : st1b z8.d, p3, [x10, z11.d] : st1b %z8.d %p3 -> (%x10,%z11.d)[4byte] -e40dad6a : st1b z10.d, p3, [x11, z13.d] : st1b %z10.d %p3 -> (%x11,%z13.d)[4byte] -e40fb1ac : st1b z12.d, p4, [x13, z15.d] : st1b %z12.d %p4 -> (%x13,%z15.d)[4byte] -e411b1ee : st1b z14.d, p4, [x15, z17.d] : st1b %z14.d %p4 -> (%x15,%z17.d)[4byte] -e413b630 : st1b z16.d, p5, [x17, z19.d] : st1b %z16.d %p5 -> (%x17,%z19.d)[4byte] -e414b671 : st1b z17.d, p5, [x19, z20.d] : st1b %z17.d %p5 -> (%x19,%z20.d)[4byte] -e416b6b3 : st1b z19.d, p5, [x21, z22.d] : st1b %z19.d %p5 -> (%x21,%z22.d)[4byte] -e418baf5 : st1b z21.d, p6, [x23, z24.d] : st1b %z21.d %p6 -> (%x23,%z24.d)[4byte] -e41abb17 : st1b z23.d, p6, [x24, z26.d] : st1b %z23.d %p6 -> (%x24,%z26.d)[4byte] -e41cbf59 : st1b z25.d, p7, [x26, z28.d] : st1b %z25.d %p7 -> (%x26,%z28.d)[4byte] -e41ebf9b : st1b z27.d, p7, [x28, z30.d] : st1b %z27.d %p7 -> (%x28,%z30.d)[4byte] -e41fbfff : st1b z31.d, p7, [sp, z31.d] : st1b %z31.d %p7 -> (%sp,%z31.d)[4byte] +e400a000 : st1b z0.d, p0, [x0, z0.d] : st1b %z0.d %p0 -> (%x0,%z0.d)[1byte] +e405a482 : st1b z2.d, p1, [x4, z5.d] : st1b %z2.d %p1 -> (%x4,%z5.d)[1byte] +e407a8c4 : st1b z4.d, p2, [x6, z7.d] : st1b %z4.d %p2 -> (%x6,%z7.d)[1byte] +e409a906 : st1b z6.d, p2, [x8, z9.d] : st1b %z6.d %p2 -> (%x8,%z9.d)[1byte] +e40bad48 : st1b z8.d, p3, [x10, z11.d] : st1b %z8.d %p3 -> (%x10,%z11.d)[1byte] +e40dad6a : st1b z10.d, p3, [x11, z13.d] : st1b %z10.d %p3 -> (%x11,%z13.d)[1byte] +e40fb1ac : st1b z12.d, p4, [x13, z15.d] : st1b %z12.d %p4 -> (%x13,%z15.d)[1byte] +e411b1ee : st1b z14.d, p4, [x15, z17.d] : st1b %z14.d %p4 -> (%x15,%z17.d)[1byte] +e413b630 : st1b z16.d, p5, [x17, z19.d] : st1b %z16.d %p5 -> (%x17,%z19.d)[1byte] +e414b671 : st1b z17.d, p5, [x19, z20.d] : st1b %z17.d %p5 -> (%x19,%z20.d)[1byte] +e416b6b3 : st1b z19.d, p5, [x21, z22.d] : st1b %z19.d %p5 -> (%x21,%z22.d)[1byte] +e418baf5 : st1b z21.d, p6, [x23, z24.d] : st1b %z21.d %p6 -> (%x23,%z24.d)[1byte] +e41abb17 : st1b z23.d, p6, [x24, z26.d] : st1b %z23.d %p6 -> (%x24,%z26.d)[1byte] +e41cbf59 : st1b z25.d, p7, [x26, z28.d] : st1b %z25.d %p7 -> (%x26,%z28.d)[1byte] +e41ebf9b : st1b z27.d, p7, [x28, z30.d] : st1b %z27.d %p7 -> (%x28,%z30.d)[1byte] +e41fbfff : st1b z31.d, p7, [sp, z31.d] : st1b %z31.d %p7 -> (%sp,%z31.d)[1byte] # ST1B { . }, , [{, #, MUL VL}] (ST1B-Z.P.BI-_) -e408e000 : st1b z0.b, p0, [x0, #-8, MUL VL] : st1b %z0.b %p0 -> -0x0100(%x0)[32byte] -e409e482 : st1b z2.b, p1, [x4, #-7, MUL VL] : st1b %z2.b %p1 -> -0xe0(%x4)[32byte] -e40ae8c4 : st1b z4.b, p2, [x6, #-6, MUL VL] : st1b %z4.b %p2 -> -0xc0(%x6)[32byte] -e40be906 : st1b z6.b, p2, [x8, #-5, MUL VL] : st1b %z6.b %p2 -> -0xa0(%x8)[32byte] -e40ced48 : st1b z8.b, p3, [x10, #-4, MUL VL] : st1b %z8.b %p3 -> -0x80(%x10)[32byte] -e40ded6a : st1b z10.b, p3, [x11, #-3, MUL VL] : st1b %z10.b %p3 -> -0x60(%x11)[32byte] -e40ef1ac : st1b z12.b, p4, [x13, #-2, MUL VL] : st1b %z12.b %p4 -> -0x40(%x13)[32byte] -e40ff1ee : st1b z14.b, p4, [x15, #-1, MUL VL] : st1b %z14.b %p4 -> -0x20(%x15)[32byte] -e400f630 : st1b z16.b, p5, [x17, #0, MUL VL] : st1b %z16.b %p5 -> (%x17)[32byte] -e400f671 : st1b z17.b, p5, [x19, #0, MUL VL] : st1b %z17.b %p5 -> (%x19)[32byte] -e401f6b3 : st1b z19.b, p5, [x21, #1, MUL VL] : st1b %z19.b %p5 -> +0x20(%x21)[32byte] -e402faf5 : st1b z21.b, p6, [x23, #2, MUL VL] : st1b %z21.b %p6 -> +0x40(%x23)[32byte] -e403fb17 : st1b z23.b, p6, [x24, #3, MUL VL] : st1b %z23.b %p6 -> +0x60(%x24)[32byte] -e404ff59 : st1b z25.b, p7, [x26, #4, MUL VL] : st1b %z25.b %p7 -> +0x80(%x26)[32byte] -e405ff9b : st1b z27.b, p7, [x28, #5, MUL VL] : st1b %z27.b %p7 -> +0xa0(%x28)[32byte] -e407ffff : st1b z31.b, p7, [sp, #7, MUL VL] : st1b %z31.b %p7 -> +0xe0(%sp)[32byte] -e428e000 : st1b z0.h, p0, [x0, #-8, MUL VL] : st1b %z0.h %p0 -> -0x80(%x0)[16byte] -e429e482 : st1b z2.h, p1, [x4, #-7, MUL VL] : st1b %z2.h %p1 -> -0x70(%x4)[16byte] -e42ae8c4 : st1b z4.h, p2, [x6, #-6, MUL VL] : st1b %z4.h %p2 -> -0x60(%x6)[16byte] -e42be906 : st1b z6.h, p2, [x8, #-5, MUL VL] : st1b %z6.h %p2 -> -0x50(%x8)[16byte] -e42ced48 : st1b z8.h, p3, [x10, #-4, MUL VL] : st1b %z8.h %p3 -> -0x40(%x10)[16byte] -e42ded6a : st1b z10.h, p3, [x11, #-3, MUL VL] : st1b %z10.h %p3 -> -0x30(%x11)[16byte] -e42ef1ac : st1b z12.h, p4, [x13, #-2, MUL VL] : st1b %z12.h %p4 -> -0x20(%x13)[16byte] -e42ff1ee : st1b z14.h, p4, [x15, #-1, MUL VL] : st1b %z14.h %p4 -> -0x10(%x15)[16byte] -e420f630 : st1b z16.h, p5, [x17, #0, MUL VL] : st1b %z16.h %p5 -> (%x17)[16byte] -e420f671 : st1b z17.h, p5, [x19, #0, MUL VL] : st1b %z17.h %p5 -> (%x19)[16byte] -e421f6b3 : st1b z19.h, p5, [x21, #1, MUL VL] : st1b %z19.h %p5 -> +0x10(%x21)[16byte] -e422faf5 : st1b z21.h, p6, [x23, #2, MUL VL] : st1b %z21.h %p6 -> +0x20(%x23)[16byte] -e423fb17 : st1b z23.h, p6, [x24, #3, MUL VL] : st1b %z23.h %p6 -> +0x30(%x24)[16byte] -e424ff59 : st1b z25.h, p7, [x26, #4, MUL VL] : st1b %z25.h %p7 -> +0x40(%x26)[16byte] -e425ff9b : st1b z27.h, p7, [x28, #5, MUL VL] : st1b %z27.h %p7 -> +0x50(%x28)[16byte] -e427ffff : st1b z31.h, p7, [sp, #7, MUL VL] : st1b %z31.h %p7 -> +0x70(%sp)[16byte] -e448e000 : st1b z0.s, p0, [x0, #-8, MUL VL] : st1b %z0.s %p0 -> -0x40(%x0)[8byte] -e449e482 : st1b z2.s, p1, [x4, #-7, MUL VL] : st1b %z2.s %p1 -> -0x38(%x4)[8byte] -e44ae8c4 : st1b z4.s, p2, [x6, #-6, MUL VL] : st1b %z4.s %p2 -> -0x30(%x6)[8byte] -e44be906 : st1b z6.s, p2, [x8, #-5, MUL VL] : st1b %z6.s %p2 -> -0x28(%x8)[8byte] -e44ced48 : st1b z8.s, p3, [x10, #-4, MUL VL] : st1b %z8.s %p3 -> -0x20(%x10)[8byte] -e44ded6a : st1b z10.s, p3, [x11, #-3, MUL VL] : st1b %z10.s %p3 -> -0x18(%x11)[8byte] -e44ef1ac : st1b z12.s, p4, [x13, #-2, MUL VL] : st1b %z12.s %p4 -> -0x10(%x13)[8byte] -e44ff1ee : st1b z14.s, p4, [x15, #-1, MUL VL] : st1b %z14.s %p4 -> -0x08(%x15)[8byte] -e440f630 : st1b z16.s, p5, [x17, #0, MUL VL] : st1b %z16.s %p5 -> (%x17)[8byte] -e440f671 : st1b z17.s, p5, [x19, #0, MUL VL] : st1b %z17.s %p5 -> (%x19)[8byte] -e441f6b3 : st1b z19.s, p5, [x21, #1, MUL VL] : st1b %z19.s %p5 -> +0x08(%x21)[8byte] -e442faf5 : st1b z21.s, p6, [x23, #2, MUL VL] : st1b %z21.s %p6 -> +0x10(%x23)[8byte] -e443fb17 : st1b z23.s, p6, [x24, #3, MUL VL] : st1b %z23.s %p6 -> +0x18(%x24)[8byte] -e444ff59 : st1b z25.s, p7, [x26, #4, MUL VL] : st1b %z25.s %p7 -> +0x20(%x26)[8byte] -e445ff9b : st1b z27.s, p7, [x28, #5, MUL VL] : st1b %z27.s %p7 -> +0x28(%x28)[8byte] -e447ffff : st1b z31.s, p7, [sp, #7, MUL VL] : st1b %z31.s %p7 -> +0x38(%sp)[8byte] -e468e000 : st1b z0.d, p0, [x0, #-8, MUL VL] : st1b %z0.d %p0 -> -0x20(%x0)[4byte] -e469e482 : st1b z2.d, p1, [x4, #-7, MUL VL] : st1b %z2.d %p1 -> -0x1c(%x4)[4byte] -e46ae8c4 : st1b z4.d, p2, [x6, #-6, MUL VL] : st1b %z4.d %p2 -> -0x18(%x6)[4byte] -e46be906 : st1b z6.d, p2, [x8, #-5, MUL VL] : st1b %z6.d %p2 -> -0x14(%x8)[4byte] -e46ced48 : st1b z8.d, p3, [x10, #-4, MUL VL] : st1b %z8.d %p3 -> -0x10(%x10)[4byte] -e46ded6a : st1b z10.d, p3, [x11, #-3, MUL VL] : st1b %z10.d %p3 -> -0x0c(%x11)[4byte] -e46ef1ac : st1b z12.d, p4, [x13, #-2, MUL VL] : st1b %z12.d %p4 -> -0x08(%x13)[4byte] -e46ff1ee : st1b z14.d, p4, [x15, #-1, MUL VL] : st1b %z14.d %p4 -> -0x04(%x15)[4byte] -e460f630 : st1b z16.d, p5, [x17, #0, MUL VL] : st1b %z16.d %p5 -> (%x17)[4byte] -e460f671 : st1b z17.d, p5, [x19, #0, MUL VL] : st1b %z17.d %p5 -> (%x19)[4byte] -e461f6b3 : st1b z19.d, p5, [x21, #1, MUL VL] : st1b %z19.d %p5 -> +0x04(%x21)[4byte] -e462faf5 : st1b z21.d, p6, [x23, #2, MUL VL] : st1b %z21.d %p6 -> +0x08(%x23)[4byte] -e463fb17 : st1b z23.d, p6, [x24, #3, MUL VL] : st1b %z23.d %p6 -> +0x0c(%x24)[4byte] -e464ff59 : st1b z25.d, p7, [x26, #4, MUL VL] : st1b %z25.d %p7 -> +0x10(%x26)[4byte] -e465ff9b : st1b z27.d, p7, [x28, #5, MUL VL] : st1b %z27.d %p7 -> +0x14(%x28)[4byte] -e467ffff : st1b z31.d, p7, [sp, #7, MUL VL] : st1b %z31.d %p7 -> +0x1c(%sp)[4byte] +e408e000 : st1b z0.b, p0, [x0, #-8, MUL VL] : st1b %z0.b %p0 -> -0x0100(%x0)[1byte] +e409e482 : st1b z2.b, p1, [x4, #-7, MUL VL] : st1b %z2.b %p1 -> -0xe0(%x4)[1byte] +e40ae8c4 : st1b z4.b, p2, [x6, #-6, MUL VL] : st1b %z4.b %p2 -> -0xc0(%x6)[1byte] +e40be906 : st1b z6.b, p2, [x8, #-5, MUL VL] : st1b %z6.b %p2 -> -0xa0(%x8)[1byte] +e40ced48 : st1b z8.b, p3, [x10, #-4, MUL VL] : st1b %z8.b %p3 -> -0x80(%x10)[1byte] +e40ded6a : st1b z10.b, p3, [x11, #-3, MUL VL] : st1b %z10.b %p3 -> -0x60(%x11)[1byte] +e40ef1ac : st1b z12.b, p4, [x13, #-2, MUL VL] : st1b %z12.b %p4 -> -0x40(%x13)[1byte] +e40ff1ee : st1b z14.b, p4, [x15, #-1, MUL VL] : st1b %z14.b %p4 -> -0x20(%x15)[1byte] +e400f630 : st1b z16.b, p5, [x17, #0, MUL VL] : st1b %z16.b %p5 -> (%x17)[1byte] +e400f671 : st1b z17.b, p5, [x19, #0, MUL VL] : st1b %z17.b %p5 -> (%x19)[1byte] +e401f6b3 : st1b z19.b, p5, [x21, #1, MUL VL] : st1b %z19.b %p5 -> +0x20(%x21)[1byte] +e402faf5 : st1b z21.b, p6, [x23, #2, MUL VL] : st1b %z21.b %p6 -> +0x40(%x23)[1byte] +e403fb17 : st1b z23.b, p6, [x24, #3, MUL VL] : st1b %z23.b %p6 -> +0x60(%x24)[1byte] +e404ff59 : st1b z25.b, p7, [x26, #4, MUL VL] : st1b %z25.b %p7 -> +0x80(%x26)[1byte] +e405ff9b : st1b z27.b, p7, [x28, #5, MUL VL] : st1b %z27.b %p7 -> +0xa0(%x28)[1byte] +e407ffff : st1b z31.b, p7, [sp, #7, MUL VL] : st1b %z31.b %p7 -> +0xe0(%sp)[1byte] +e428e000 : st1b z0.h, p0, [x0, #-8, MUL VL] : st1b %z0.h %p0 -> -0x80(%x0)[1byte] +e429e482 : st1b z2.h, p1, [x4, #-7, MUL VL] : st1b %z2.h %p1 -> -0x70(%x4)[1byte] +e42ae8c4 : st1b z4.h, p2, [x6, #-6, MUL VL] : st1b %z4.h %p2 -> -0x60(%x6)[1byte] +e42be906 : st1b z6.h, p2, [x8, #-5, MUL VL] : st1b %z6.h %p2 -> -0x50(%x8)[1byte] +e42ced48 : st1b z8.h, p3, [x10, #-4, MUL VL] : st1b %z8.h %p3 -> -0x40(%x10)[1byte] +e42ded6a : st1b z10.h, p3, [x11, #-3, MUL VL] : st1b %z10.h %p3 -> -0x30(%x11)[1byte] +e42ef1ac : st1b z12.h, p4, [x13, #-2, MUL VL] : st1b %z12.h %p4 -> -0x20(%x13)[1byte] +e42ff1ee : st1b z14.h, p4, [x15, #-1, MUL VL] : st1b %z14.h %p4 -> -0x10(%x15)[1byte] +e420f630 : st1b z16.h, p5, [x17, #0, MUL VL] : st1b %z16.h %p5 -> (%x17)[1byte] +e420f671 : st1b z17.h, p5, [x19, #0, MUL VL] : st1b %z17.h %p5 -> (%x19)[1byte] +e421f6b3 : st1b z19.h, p5, [x21, #1, MUL VL] : st1b %z19.h %p5 -> +0x10(%x21)[1byte] +e422faf5 : st1b z21.h, p6, [x23, #2, MUL VL] : st1b %z21.h %p6 -> +0x20(%x23)[1byte] +e423fb17 : st1b z23.h, p6, [x24, #3, MUL VL] : st1b %z23.h %p6 -> +0x30(%x24)[1byte] +e424ff59 : st1b z25.h, p7, [x26, #4, MUL VL] : st1b %z25.h %p7 -> +0x40(%x26)[1byte] +e425ff9b : st1b z27.h, p7, [x28, #5, MUL VL] : st1b %z27.h %p7 -> +0x50(%x28)[1byte] +e427ffff : st1b z31.h, p7, [sp, #7, MUL VL] : st1b %z31.h %p7 -> +0x70(%sp)[1byte] +e448e000 : st1b z0.s, p0, [x0, #-8, MUL VL] : st1b %z0.s %p0 -> -0x40(%x0)[1byte] +e449e482 : st1b z2.s, p1, [x4, #-7, MUL VL] : st1b %z2.s %p1 -> -0x38(%x4)[1byte] +e44ae8c4 : st1b z4.s, p2, [x6, #-6, MUL VL] : st1b %z4.s %p2 -> -0x30(%x6)[1byte] +e44be906 : st1b z6.s, p2, [x8, #-5, MUL VL] : st1b %z6.s %p2 -> -0x28(%x8)[1byte] +e44ced48 : st1b z8.s, p3, [x10, #-4, MUL VL] : st1b %z8.s %p3 -> -0x20(%x10)[1byte] +e44ded6a : st1b z10.s, p3, [x11, #-3, MUL VL] : st1b %z10.s %p3 -> -0x18(%x11)[1byte] +e44ef1ac : st1b z12.s, p4, [x13, #-2, MUL VL] : st1b %z12.s %p4 -> -0x10(%x13)[1byte] +e44ff1ee : st1b z14.s, p4, [x15, #-1, MUL VL] : st1b %z14.s %p4 -> -0x08(%x15)[1byte] +e440f630 : st1b z16.s, p5, [x17, #0, MUL VL] : st1b %z16.s %p5 -> (%x17)[1byte] +e440f671 : st1b z17.s, p5, [x19, #0, MUL VL] : st1b %z17.s %p5 -> (%x19)[1byte] +e441f6b3 : st1b z19.s, p5, [x21, #1, MUL VL] : st1b %z19.s %p5 -> +0x08(%x21)[1byte] +e442faf5 : st1b z21.s, p6, [x23, #2, MUL VL] : st1b %z21.s %p6 -> +0x10(%x23)[1byte] +e443fb17 : st1b z23.s, p6, [x24, #3, MUL VL] : st1b %z23.s %p6 -> +0x18(%x24)[1byte] +e444ff59 : st1b z25.s, p7, [x26, #4, MUL VL] : st1b %z25.s %p7 -> +0x20(%x26)[1byte] +e445ff9b : st1b z27.s, p7, [x28, #5, MUL VL] : st1b %z27.s %p7 -> +0x28(%x28)[1byte] +e447ffff : st1b z31.s, p7, [sp, #7, MUL VL] : st1b %z31.s %p7 -> +0x38(%sp)[1byte] +e468e000 : st1b z0.d, p0, [x0, #-8, MUL VL] : st1b %z0.d %p0 -> -0x20(%x0)[1byte] +e469e482 : st1b z2.d, p1, [x4, #-7, MUL VL] : st1b %z2.d %p1 -> -0x1c(%x4)[1byte] +e46ae8c4 : st1b z4.d, p2, [x6, #-6, MUL VL] : st1b %z4.d %p2 -> -0x18(%x6)[1byte] +e46be906 : st1b z6.d, p2, [x8, #-5, MUL VL] : st1b %z6.d %p2 -> -0x14(%x8)[1byte] +e46ced48 : st1b z8.d, p3, [x10, #-4, MUL VL] : st1b %z8.d %p3 -> -0x10(%x10)[1byte] +e46ded6a : st1b z10.d, p3, [x11, #-3, MUL VL] : st1b %z10.d %p3 -> -0x0c(%x11)[1byte] +e46ef1ac : st1b z12.d, p4, [x13, #-2, MUL VL] : st1b %z12.d %p4 -> -0x08(%x13)[1byte] +e46ff1ee : st1b z14.d, p4, [x15, #-1, MUL VL] : st1b %z14.d %p4 -> -0x04(%x15)[1byte] +e460f630 : st1b z16.d, p5, [x17, #0, MUL VL] : st1b %z16.d %p5 -> (%x17)[1byte] +e460f671 : st1b z17.d, p5, [x19, #0, MUL VL] : st1b %z17.d %p5 -> (%x19)[1byte] +e461f6b3 : st1b z19.d, p5, [x21, #1, MUL VL] : st1b %z19.d %p5 -> +0x04(%x21)[1byte] +e462faf5 : st1b z21.d, p6, [x23, #2, MUL VL] : st1b %z21.d %p6 -> +0x08(%x23)[1byte] +e463fb17 : st1b z23.d, p6, [x24, #3, MUL VL] : st1b %z23.d %p6 -> +0x0c(%x24)[1byte] +e464ff59 : st1b z25.d, p7, [x26, #4, MUL VL] : st1b %z25.d %p7 -> +0x10(%x26)[1byte] +e465ff9b : st1b z27.d, p7, [x28, #5, MUL VL] : st1b %z27.d %p7 -> +0x14(%x28)[1byte] +e467ffff : st1b z31.d, p7, [sp, #7, MUL VL] : st1b %z31.d %p7 -> +0x1c(%sp)[1byte] # ST1B { .S }, , [, .S, ] (ST1B-Z.P.BZ-S.x32.unscaled) -e4408000 : st1b z0.s, p0, [x0, z0.s, UXTW] : st1b %z0.s %p0 -> (%x0,%z0.s,uxtw)[8byte] -e4458482 : st1b z2.s, p1, [x4, z5.s, UXTW] : st1b %z2.s %p1 -> (%x4,%z5.s,uxtw)[8byte] -e44788c4 : st1b z4.s, p2, [x6, z7.s, UXTW] : st1b %z4.s %p2 -> (%x6,%z7.s,uxtw)[8byte] -e4498906 : st1b z6.s, p2, [x8, z9.s, UXTW] : st1b %z6.s %p2 -> (%x8,%z9.s,uxtw)[8byte] -e44b8d48 : st1b z8.s, p3, [x10, z11.s, UXTW] : st1b %z8.s %p3 -> (%x10,%z11.s,uxtw)[8byte] -e44d8d6a : st1b z10.s, p3, [x11, z13.s, UXTW] : st1b %z10.s %p3 -> (%x11,%z13.s,uxtw)[8byte] -e44f91ac : st1b z12.s, p4, [x13, z15.s, UXTW] : st1b %z12.s %p4 -> (%x13,%z15.s,uxtw)[8byte] -e45191ee : st1b z14.s, p4, [x15, z17.s, UXTW] : st1b %z14.s %p4 -> (%x15,%z17.s,uxtw)[8byte] -e4539630 : st1b z16.s, p5, [x17, z19.s, UXTW] : st1b %z16.s %p5 -> (%x17,%z19.s,uxtw)[8byte] -e4549671 : st1b z17.s, p5, [x19, z20.s, UXTW] : st1b %z17.s %p5 -> (%x19,%z20.s,uxtw)[8byte] -e45696b3 : st1b z19.s, p5, [x21, z22.s, UXTW] : st1b %z19.s %p5 -> (%x21,%z22.s,uxtw)[8byte] -e4589af5 : st1b z21.s, p6, [x23, z24.s, UXTW] : st1b %z21.s %p6 -> (%x23,%z24.s,uxtw)[8byte] -e45a9b17 : st1b z23.s, p6, [x24, z26.s, UXTW] : st1b %z23.s %p6 -> (%x24,%z26.s,uxtw)[8byte] -e45c9f59 : st1b z25.s, p7, [x26, z28.s, UXTW] : st1b %z25.s %p7 -> (%x26,%z28.s,uxtw)[8byte] -e45e9f9b : st1b z27.s, p7, [x28, z30.s, UXTW] : st1b %z27.s %p7 -> (%x28,%z30.s,uxtw)[8byte] -e45f9fff : st1b z31.s, p7, [sp, z31.s, UXTW] : st1b %z31.s %p7 -> (%sp,%z31.s,uxtw)[8byte] -e440c000 : st1b z0.s, p0, [x0, z0.s, SXTW] : st1b %z0.s %p0 -> (%x0,%z0.s,sxtw)[8byte] -e445c482 : st1b z2.s, p1, [x4, z5.s, SXTW] : st1b %z2.s %p1 -> (%x4,%z5.s,sxtw)[8byte] -e447c8c4 : st1b z4.s, p2, [x6, z7.s, SXTW] : st1b %z4.s %p2 -> (%x6,%z7.s,sxtw)[8byte] -e449c906 : st1b z6.s, p2, [x8, z9.s, SXTW] : st1b %z6.s %p2 -> (%x8,%z9.s,sxtw)[8byte] -e44bcd48 : st1b z8.s, p3, [x10, z11.s, SXTW] : st1b %z8.s %p3 -> (%x10,%z11.s,sxtw)[8byte] -e44dcd6a : st1b z10.s, p3, [x11, z13.s, SXTW] : st1b %z10.s %p3 -> (%x11,%z13.s,sxtw)[8byte] -e44fd1ac : st1b z12.s, p4, [x13, z15.s, SXTW] : st1b %z12.s %p4 -> (%x13,%z15.s,sxtw)[8byte] -e451d1ee : st1b z14.s, p4, [x15, z17.s, SXTW] : st1b %z14.s %p4 -> (%x15,%z17.s,sxtw)[8byte] -e453d630 : st1b z16.s, p5, [x17, z19.s, SXTW] : st1b %z16.s %p5 -> (%x17,%z19.s,sxtw)[8byte] -e454d671 : st1b z17.s, p5, [x19, z20.s, SXTW] : st1b %z17.s %p5 -> (%x19,%z20.s,sxtw)[8byte] -e456d6b3 : st1b z19.s, p5, [x21, z22.s, SXTW] : st1b %z19.s %p5 -> (%x21,%z22.s,sxtw)[8byte] -e458daf5 : st1b z21.s, p6, [x23, z24.s, SXTW] : st1b %z21.s %p6 -> (%x23,%z24.s,sxtw)[8byte] -e45adb17 : st1b z23.s, p6, [x24, z26.s, SXTW] : st1b %z23.s %p6 -> (%x24,%z26.s,sxtw)[8byte] -e45cdf59 : st1b z25.s, p7, [x26, z28.s, SXTW] : st1b %z25.s %p7 -> (%x26,%z28.s,sxtw)[8byte] -e45edf9b : st1b z27.s, p7, [x28, z30.s, SXTW] : st1b %z27.s %p7 -> (%x28,%z30.s,sxtw)[8byte] -e45fdfff : st1b z31.s, p7, [sp, z31.s, SXTW] : st1b %z31.s %p7 -> (%sp,%z31.s,sxtw)[8byte] +e4408000 : st1b z0.s, p0, [x0, z0.s, UXTW] : st1b %z0.s %p0 -> (%x0,%z0.s,uxtw)[1byte] +e4458482 : st1b z2.s, p1, [x4, z5.s, UXTW] : st1b %z2.s %p1 -> (%x4,%z5.s,uxtw)[1byte] +e44788c4 : st1b z4.s, p2, [x6, z7.s, UXTW] : st1b %z4.s %p2 -> (%x6,%z7.s,uxtw)[1byte] +e4498906 : st1b z6.s, p2, [x8, z9.s, UXTW] : st1b %z6.s %p2 -> (%x8,%z9.s,uxtw)[1byte] +e44b8d48 : st1b z8.s, p3, [x10, z11.s, UXTW] : st1b %z8.s %p3 -> (%x10,%z11.s,uxtw)[1byte] +e44d8d6a : st1b z10.s, p3, [x11, z13.s, UXTW] : st1b %z10.s %p3 -> (%x11,%z13.s,uxtw)[1byte] +e44f91ac : st1b z12.s, p4, [x13, z15.s, UXTW] : st1b %z12.s %p4 -> (%x13,%z15.s,uxtw)[1byte] +e45191ee : st1b z14.s, p4, [x15, z17.s, UXTW] : st1b %z14.s %p4 -> (%x15,%z17.s,uxtw)[1byte] +e4539630 : st1b z16.s, p5, [x17, z19.s, UXTW] : st1b %z16.s %p5 -> (%x17,%z19.s,uxtw)[1byte] +e4549671 : st1b z17.s, p5, [x19, z20.s, UXTW] : st1b %z17.s %p5 -> (%x19,%z20.s,uxtw)[1byte] +e45696b3 : st1b z19.s, p5, [x21, z22.s, UXTW] : st1b %z19.s %p5 -> (%x21,%z22.s,uxtw)[1byte] +e4589af5 : st1b z21.s, p6, [x23, z24.s, UXTW] : st1b %z21.s %p6 -> (%x23,%z24.s,uxtw)[1byte] +e45a9b17 : st1b z23.s, p6, [x24, z26.s, UXTW] : st1b %z23.s %p6 -> (%x24,%z26.s,uxtw)[1byte] +e45c9f59 : st1b z25.s, p7, [x26, z28.s, UXTW] : st1b %z25.s %p7 -> (%x26,%z28.s,uxtw)[1byte] +e45e9f9b : st1b z27.s, p7, [x28, z30.s, UXTW] : st1b %z27.s %p7 -> (%x28,%z30.s,uxtw)[1byte] +e45f9fff : st1b z31.s, p7, [sp, z31.s, UXTW] : st1b %z31.s %p7 -> (%sp,%z31.s,uxtw)[1byte] +e440c000 : st1b z0.s, p0, [x0, z0.s, SXTW] : st1b %z0.s %p0 -> (%x0,%z0.s,sxtw)[1byte] +e445c482 : st1b z2.s, p1, [x4, z5.s, SXTW] : st1b %z2.s %p1 -> (%x4,%z5.s,sxtw)[1byte] +e447c8c4 : st1b z4.s, p2, [x6, z7.s, SXTW] : st1b %z4.s %p2 -> (%x6,%z7.s,sxtw)[1byte] +e449c906 : st1b z6.s, p2, [x8, z9.s, SXTW] : st1b %z6.s %p2 -> (%x8,%z9.s,sxtw)[1byte] +e44bcd48 : st1b z8.s, p3, [x10, z11.s, SXTW] : st1b %z8.s %p3 -> (%x10,%z11.s,sxtw)[1byte] +e44dcd6a : st1b z10.s, p3, [x11, z13.s, SXTW] : st1b %z10.s %p3 -> (%x11,%z13.s,sxtw)[1byte] +e44fd1ac : st1b z12.s, p4, [x13, z15.s, SXTW] : st1b %z12.s %p4 -> (%x13,%z15.s,sxtw)[1byte] +e451d1ee : st1b z14.s, p4, [x15, z17.s, SXTW] : st1b %z14.s %p4 -> (%x15,%z17.s,sxtw)[1byte] +e453d630 : st1b z16.s, p5, [x17, z19.s, SXTW] : st1b %z16.s %p5 -> (%x17,%z19.s,sxtw)[1byte] +e454d671 : st1b z17.s, p5, [x19, z20.s, SXTW] : st1b %z17.s %p5 -> (%x19,%z20.s,sxtw)[1byte] +e456d6b3 : st1b z19.s, p5, [x21, z22.s, SXTW] : st1b %z19.s %p5 -> (%x21,%z22.s,sxtw)[1byte] +e458daf5 : st1b z21.s, p6, [x23, z24.s, SXTW] : st1b %z21.s %p6 -> (%x23,%z24.s,sxtw)[1byte] +e45adb17 : st1b z23.s, p6, [x24, z26.s, SXTW] : st1b %z23.s %p6 -> (%x24,%z26.s,sxtw)[1byte] +e45cdf59 : st1b z25.s, p7, [x26, z28.s, SXTW] : st1b %z25.s %p7 -> (%x26,%z28.s,sxtw)[1byte] +e45edf9b : st1b z27.s, p7, [x28, z30.s, SXTW] : st1b %z27.s %p7 -> (%x28,%z30.s,sxtw)[1byte] +e45fdfff : st1b z31.s, p7, [sp, z31.s, SXTW] : st1b %z31.s %p7 -> (%sp,%z31.s,sxtw)[1byte] # ST1B { .D }, , [.D{, #}] (ST1B-Z.P.AI-D) -e440a000 : st1b z0.d, p0, [z0.d, #0] : st1b %z0.d %p0 -> (%z0.d)[4byte] -e442a482 : st1b z2.d, p1, [z4.d, #2] : st1b %z2.d %p1 -> +0x02(%z4.d)[4byte] -e444a8c4 : st1b z4.d, p2, [z6.d, #4] : st1b %z4.d %p2 -> +0x04(%z6.d)[4byte] -e446a906 : st1b z6.d, p2, [z8.d, #6] : st1b %z6.d %p2 -> +0x06(%z8.d)[4byte] -e448ad48 : st1b z8.d, p3, [z10.d, #8] : st1b %z8.d %p3 -> +0x08(%z10.d)[4byte] -e44aad8a : st1b z10.d, p3, [z12.d, #10] : st1b %z10.d %p3 -> +0x0a(%z12.d)[4byte] -e44cb1cc : st1b z12.d, p4, [z14.d, #12] : st1b %z12.d %p4 -> +0x0c(%z14.d)[4byte] -e44eb20e : st1b z14.d, p4, [z16.d, #14] : st1b %z14.d %p4 -> +0x0e(%z16.d)[4byte] -e450b650 : st1b z16.d, p5, [z18.d, #16] : st1b %z16.d %p5 -> +0x10(%z18.d)[4byte] -e451b671 : st1b z17.d, p5, [z19.d, #17] : st1b %z17.d %p5 -> +0x11(%z19.d)[4byte] -e453b6b3 : st1b z19.d, p5, [z21.d, #19] : st1b %z19.d %p5 -> +0x13(%z21.d)[4byte] -e455baf5 : st1b z21.d, p6, [z23.d, #21] : st1b %z21.d %p6 -> +0x15(%z23.d)[4byte] -e457bb37 : st1b z23.d, p6, [z25.d, #23] : st1b %z23.d %p6 -> +0x17(%z25.d)[4byte] -e459bf79 : st1b z25.d, p7, [z27.d, #25] : st1b %z25.d %p7 -> +0x19(%z27.d)[4byte] -e45bbfbb : st1b z27.d, p7, [z29.d, #27] : st1b %z27.d %p7 -> +0x1b(%z29.d)[4byte] -e45fbfff : st1b z31.d, p7, [z31.d, #31] : st1b %z31.d %p7 -> +0x1f(%z31.d)[4byte] +e440a000 : st1b z0.d, p0, [z0.d, #0] : st1b %z0.d %p0 -> (%z0.d)[1byte] +e442a482 : st1b z2.d, p1, [z4.d, #2] : st1b %z2.d %p1 -> +0x02(%z4.d)[1byte] +e444a8c4 : st1b z4.d, p2, [z6.d, #4] : st1b %z4.d %p2 -> +0x04(%z6.d)[1byte] +e446a906 : st1b z6.d, p2, [z8.d, #6] : st1b %z6.d %p2 -> +0x06(%z8.d)[1byte] +e448ad48 : st1b z8.d, p3, [z10.d, #8] : st1b %z8.d %p3 -> +0x08(%z10.d)[1byte] +e44aad8a : st1b z10.d, p3, [z12.d, #10] : st1b %z10.d %p3 -> +0x0a(%z12.d)[1byte] +e44cb1cc : st1b z12.d, p4, [z14.d, #12] : st1b %z12.d %p4 -> +0x0c(%z14.d)[1byte] +e44eb20e : st1b z14.d, p4, [z16.d, #14] : st1b %z14.d %p4 -> +0x0e(%z16.d)[1byte] +e450b650 : st1b z16.d, p5, [z18.d, #16] : st1b %z16.d %p5 -> +0x10(%z18.d)[1byte] +e451b671 : st1b z17.d, p5, [z19.d, #17] : st1b %z17.d %p5 -> +0x11(%z19.d)[1byte] +e453b6b3 : st1b z19.d, p5, [z21.d, #19] : st1b %z19.d %p5 -> +0x13(%z21.d)[1byte] +e455baf5 : st1b z21.d, p6, [z23.d, #21] : st1b %z21.d %p6 -> +0x15(%z23.d)[1byte] +e457bb37 : st1b z23.d, p6, [z25.d, #23] : st1b %z23.d %p6 -> +0x17(%z25.d)[1byte] +e459bf79 : st1b z25.d, p7, [z27.d, #25] : st1b %z25.d %p7 -> +0x19(%z27.d)[1byte] +e45bbfbb : st1b z27.d, p7, [z29.d, #27] : st1b %z27.d %p7 -> +0x1b(%z29.d)[1byte] +e45fbfff : st1b z31.d, p7, [z31.d, #31] : st1b %z31.d %p7 -> +0x1f(%z31.d)[1byte] # ST1B { .S }, , [.S{, #}] (ST1B-Z.P.AI-S) -e460a000 : st1b z0.s, p0, [z0.s, #0] : st1b %z0.s %p0 -> (%z0.s)[8byte] -e462a482 : st1b z2.s, p1, [z4.s, #2] : st1b %z2.s %p1 -> +0x02(%z4.s)[8byte] -e464a8c4 : st1b z4.s, p2, [z6.s, #4] : st1b %z4.s %p2 -> +0x04(%z6.s)[8byte] -e466a906 : st1b z6.s, p2, [z8.s, #6] : st1b %z6.s %p2 -> +0x06(%z8.s)[8byte] -e468ad48 : st1b z8.s, p3, [z10.s, #8] : st1b %z8.s %p3 -> +0x08(%z10.s)[8byte] -e46aad8a : st1b z10.s, p3, [z12.s, #10] : st1b %z10.s %p3 -> +0x0a(%z12.s)[8byte] -e46cb1cc : st1b z12.s, p4, [z14.s, #12] : st1b %z12.s %p4 -> +0x0c(%z14.s)[8byte] -e46eb20e : st1b z14.s, p4, [z16.s, #14] : st1b %z14.s %p4 -> +0x0e(%z16.s)[8byte] -e470b650 : st1b z16.s, p5, [z18.s, #16] : st1b %z16.s %p5 -> +0x10(%z18.s)[8byte] -e471b671 : st1b z17.s, p5, [z19.s, #17] : st1b %z17.s %p5 -> +0x11(%z19.s)[8byte] -e473b6b3 : st1b z19.s, p5, [z21.s, #19] : st1b %z19.s %p5 -> +0x13(%z21.s)[8byte] -e475baf5 : st1b z21.s, p6, [z23.s, #21] : st1b %z21.s %p6 -> +0x15(%z23.s)[8byte] -e477bb37 : st1b z23.s, p6, [z25.s, #23] : st1b %z23.s %p6 -> +0x17(%z25.s)[8byte] -e479bf79 : st1b z25.s, p7, [z27.s, #25] : st1b %z25.s %p7 -> +0x19(%z27.s)[8byte] -e47bbfbb : st1b z27.s, p7, [z29.s, #27] : st1b %z27.s %p7 -> +0x1b(%z29.s)[8byte] -e47fbfff : st1b z31.s, p7, [z31.s, #31] : st1b %z31.s %p7 -> +0x1f(%z31.s)[8byte] +e460a000 : st1b z0.s, p0, [z0.s, #0] : st1b %z0.s %p0 -> (%z0.s)[1byte] +e462a482 : st1b z2.s, p1, [z4.s, #2] : st1b %z2.s %p1 -> +0x02(%z4.s)[1byte] +e464a8c4 : st1b z4.s, p2, [z6.s, #4] : st1b %z4.s %p2 -> +0x04(%z6.s)[1byte] +e466a906 : st1b z6.s, p2, [z8.s, #6] : st1b %z6.s %p2 -> +0x06(%z8.s)[1byte] +e468ad48 : st1b z8.s, p3, [z10.s, #8] : st1b %z8.s %p3 -> +0x08(%z10.s)[1byte] +e46aad8a : st1b z10.s, p3, [z12.s, #10] : st1b %z10.s %p3 -> +0x0a(%z12.s)[1byte] +e46cb1cc : st1b z12.s, p4, [z14.s, #12] : st1b %z12.s %p4 -> +0x0c(%z14.s)[1byte] +e46eb20e : st1b z14.s, p4, [z16.s, #14] : st1b %z14.s %p4 -> +0x0e(%z16.s)[1byte] +e470b650 : st1b z16.s, p5, [z18.s, #16] : st1b %z16.s %p5 -> +0x10(%z18.s)[1byte] +e471b671 : st1b z17.s, p5, [z19.s, #17] : st1b %z17.s %p5 -> +0x11(%z19.s)[1byte] +e473b6b3 : st1b z19.s, p5, [z21.s, #19] : st1b %z19.s %p5 -> +0x13(%z21.s)[1byte] +e475baf5 : st1b z21.s, p6, [z23.s, #21] : st1b %z21.s %p6 -> +0x15(%z23.s)[1byte] +e477bb37 : st1b z23.s, p6, [z25.s, #23] : st1b %z23.s %p6 -> +0x17(%z25.s)[1byte] +e479bf79 : st1b z25.s, p7, [z27.s, #25] : st1b %z25.s %p7 -> +0x19(%z27.s)[1byte] +e47bbfbb : st1b z27.s, p7, [z29.s, #27] : st1b %z27.s %p7 -> +0x1b(%z29.s)[1byte] +e47fbfff : st1b z31.s, p7, [z31.s, #31] : st1b %z31.s %p7 -> +0x1f(%z31.s)[1byte] # ST1D { .D }, , [, .D, ] (ST1D-Z.P.BZ-D.x32.unscaled) -e5808000 : st1d z0.d, p0, [x0, z0.d, UXTW] : st1d %z0.d %p0 -> (%x0,%z0.d,uxtw)[32byte] -e5858482 : st1d z2.d, p1, [x4, z5.d, UXTW] : st1d %z2.d %p1 -> (%x4,%z5.d,uxtw)[32byte] -e58788c4 : st1d z4.d, p2, [x6, z7.d, UXTW] : st1d %z4.d %p2 -> (%x6,%z7.d,uxtw)[32byte] -e5898906 : st1d z6.d, p2, [x8, z9.d, UXTW] : st1d %z6.d %p2 -> (%x8,%z9.d,uxtw)[32byte] -e58b8d48 : st1d z8.d, p3, [x10, z11.d, UXTW] : st1d %z8.d %p3 -> (%x10,%z11.d,uxtw)[32byte] -e58d8d6a : st1d z10.d, p3, [x11, z13.d, UXTW] : st1d %z10.d %p3 -> (%x11,%z13.d,uxtw)[32byte] -e58f91ac : st1d z12.d, p4, [x13, z15.d, UXTW] : st1d %z12.d %p4 -> (%x13,%z15.d,uxtw)[32byte] -e59191ee : st1d z14.d, p4, [x15, z17.d, UXTW] : st1d %z14.d %p4 -> (%x15,%z17.d,uxtw)[32byte] -e5939630 : st1d z16.d, p5, [x17, z19.d, UXTW] : st1d %z16.d %p5 -> (%x17,%z19.d,uxtw)[32byte] -e5949671 : st1d z17.d, p5, [x19, z20.d, UXTW] : st1d %z17.d %p5 -> (%x19,%z20.d,uxtw)[32byte] -e59696b3 : st1d z19.d, p5, [x21, z22.d, UXTW] : st1d %z19.d %p5 -> (%x21,%z22.d,uxtw)[32byte] -e5989af5 : st1d z21.d, p6, [x23, z24.d, UXTW] : st1d %z21.d %p6 -> (%x23,%z24.d,uxtw)[32byte] -e59a9b17 : st1d z23.d, p6, [x24, z26.d, UXTW] : st1d %z23.d %p6 -> (%x24,%z26.d,uxtw)[32byte] -e59c9f59 : st1d z25.d, p7, [x26, z28.d, UXTW] : st1d %z25.d %p7 -> (%x26,%z28.d,uxtw)[32byte] -e59e9f9b : st1d z27.d, p7, [x28, z30.d, UXTW] : st1d %z27.d %p7 -> (%x28,%z30.d,uxtw)[32byte] -e59f9fff : st1d z31.d, p7, [sp, z31.d, UXTW] : st1d %z31.d %p7 -> (%sp,%z31.d,uxtw)[32byte] -e580c000 : st1d z0.d, p0, [x0, z0.d, SXTW] : st1d %z0.d %p0 -> (%x0,%z0.d,sxtw)[32byte] -e585c482 : st1d z2.d, p1, [x4, z5.d, SXTW] : st1d %z2.d %p1 -> (%x4,%z5.d,sxtw)[32byte] -e587c8c4 : st1d z4.d, p2, [x6, z7.d, SXTW] : st1d %z4.d %p2 -> (%x6,%z7.d,sxtw)[32byte] -e589c906 : st1d z6.d, p2, [x8, z9.d, SXTW] : st1d %z6.d %p2 -> (%x8,%z9.d,sxtw)[32byte] -e58bcd48 : st1d z8.d, p3, [x10, z11.d, SXTW] : st1d %z8.d %p3 -> (%x10,%z11.d,sxtw)[32byte] -e58dcd6a : st1d z10.d, p3, [x11, z13.d, SXTW] : st1d %z10.d %p3 -> (%x11,%z13.d,sxtw)[32byte] -e58fd1ac : st1d z12.d, p4, [x13, z15.d, SXTW] : st1d %z12.d %p4 -> (%x13,%z15.d,sxtw)[32byte] -e591d1ee : st1d z14.d, p4, [x15, z17.d, SXTW] : st1d %z14.d %p4 -> (%x15,%z17.d,sxtw)[32byte] -e593d630 : st1d z16.d, p5, [x17, z19.d, SXTW] : st1d %z16.d %p5 -> (%x17,%z19.d,sxtw)[32byte] -e594d671 : st1d z17.d, p5, [x19, z20.d, SXTW] : st1d %z17.d %p5 -> (%x19,%z20.d,sxtw)[32byte] -e596d6b3 : st1d z19.d, p5, [x21, z22.d, SXTW] : st1d %z19.d %p5 -> (%x21,%z22.d,sxtw)[32byte] -e598daf5 : st1d z21.d, p6, [x23, z24.d, SXTW] : st1d %z21.d %p6 -> (%x23,%z24.d,sxtw)[32byte] -e59adb17 : st1d z23.d, p6, [x24, z26.d, SXTW] : st1d %z23.d %p6 -> (%x24,%z26.d,sxtw)[32byte] -e59cdf59 : st1d z25.d, p7, [x26, z28.d, SXTW] : st1d %z25.d %p7 -> (%x26,%z28.d,sxtw)[32byte] -e59edf9b : st1d z27.d, p7, [x28, z30.d, SXTW] : st1d %z27.d %p7 -> (%x28,%z30.d,sxtw)[32byte] -e59fdfff : st1d z31.d, p7, [sp, z31.d, SXTW] : st1d %z31.d %p7 -> (%sp,%z31.d,sxtw)[32byte] +e5808000 : st1d z0.d, p0, [x0, z0.d, UXTW] : st1d %z0.d %p0 -> (%x0,%z0.d,uxtw)[8byte] +e5858482 : st1d z2.d, p1, [x4, z5.d, UXTW] : st1d %z2.d %p1 -> (%x4,%z5.d,uxtw)[8byte] +e58788c4 : st1d z4.d, p2, [x6, z7.d, UXTW] : st1d %z4.d %p2 -> (%x6,%z7.d,uxtw)[8byte] +e5898906 : st1d z6.d, p2, [x8, z9.d, UXTW] : st1d %z6.d %p2 -> (%x8,%z9.d,uxtw)[8byte] +e58b8d48 : st1d z8.d, p3, [x10, z11.d, UXTW] : st1d %z8.d %p3 -> (%x10,%z11.d,uxtw)[8byte] +e58d8d6a : st1d z10.d, p3, [x11, z13.d, UXTW] : st1d %z10.d %p3 -> (%x11,%z13.d,uxtw)[8byte] +e58f91ac : st1d z12.d, p4, [x13, z15.d, UXTW] : st1d %z12.d %p4 -> (%x13,%z15.d,uxtw)[8byte] +e59191ee : st1d z14.d, p4, [x15, z17.d, UXTW] : st1d %z14.d %p4 -> (%x15,%z17.d,uxtw)[8byte] +e5939630 : st1d z16.d, p5, [x17, z19.d, UXTW] : st1d %z16.d %p5 -> (%x17,%z19.d,uxtw)[8byte] +e5949671 : st1d z17.d, p5, [x19, z20.d, UXTW] : st1d %z17.d %p5 -> (%x19,%z20.d,uxtw)[8byte] +e59696b3 : st1d z19.d, p5, [x21, z22.d, UXTW] : st1d %z19.d %p5 -> (%x21,%z22.d,uxtw)[8byte] +e5989af5 : st1d z21.d, p6, [x23, z24.d, UXTW] : st1d %z21.d %p6 -> (%x23,%z24.d,uxtw)[8byte] +e59a9b17 : st1d z23.d, p6, [x24, z26.d, UXTW] : st1d %z23.d %p6 -> (%x24,%z26.d,uxtw)[8byte] +e59c9f59 : st1d z25.d, p7, [x26, z28.d, UXTW] : st1d %z25.d %p7 -> (%x26,%z28.d,uxtw)[8byte] +e59e9f9b : st1d z27.d, p7, [x28, z30.d, UXTW] : st1d %z27.d %p7 -> (%x28,%z30.d,uxtw)[8byte] +e59f9fff : st1d z31.d, p7, [sp, z31.d, UXTW] : st1d %z31.d %p7 -> (%sp,%z31.d,uxtw)[8byte] +e580c000 : st1d z0.d, p0, [x0, z0.d, SXTW] : st1d %z0.d %p0 -> (%x0,%z0.d,sxtw)[8byte] +e585c482 : st1d z2.d, p1, [x4, z5.d, SXTW] : st1d %z2.d %p1 -> (%x4,%z5.d,sxtw)[8byte] +e587c8c4 : st1d z4.d, p2, [x6, z7.d, SXTW] : st1d %z4.d %p2 -> (%x6,%z7.d,sxtw)[8byte] +e589c906 : st1d z6.d, p2, [x8, z9.d, SXTW] : st1d %z6.d %p2 -> (%x8,%z9.d,sxtw)[8byte] +e58bcd48 : st1d z8.d, p3, [x10, z11.d, SXTW] : st1d %z8.d %p3 -> (%x10,%z11.d,sxtw)[8byte] +e58dcd6a : st1d z10.d, p3, [x11, z13.d, SXTW] : st1d %z10.d %p3 -> (%x11,%z13.d,sxtw)[8byte] +e58fd1ac : st1d z12.d, p4, [x13, z15.d, SXTW] : st1d %z12.d %p4 -> (%x13,%z15.d,sxtw)[8byte] +e591d1ee : st1d z14.d, p4, [x15, z17.d, SXTW] : st1d %z14.d %p4 -> (%x15,%z17.d,sxtw)[8byte] +e593d630 : st1d z16.d, p5, [x17, z19.d, SXTW] : st1d %z16.d %p5 -> (%x17,%z19.d,sxtw)[8byte] +e594d671 : st1d z17.d, p5, [x19, z20.d, SXTW] : st1d %z17.d %p5 -> (%x19,%z20.d,sxtw)[8byte] +e596d6b3 : st1d z19.d, p5, [x21, z22.d, SXTW] : st1d %z19.d %p5 -> (%x21,%z22.d,sxtw)[8byte] +e598daf5 : st1d z21.d, p6, [x23, z24.d, SXTW] : st1d %z21.d %p6 -> (%x23,%z24.d,sxtw)[8byte] +e59adb17 : st1d z23.d, p6, [x24, z26.d, SXTW] : st1d %z23.d %p6 -> (%x24,%z26.d,sxtw)[8byte] +e59cdf59 : st1d z25.d, p7, [x26, z28.d, SXTW] : st1d %z25.d %p7 -> (%x26,%z28.d,sxtw)[8byte] +e59edf9b : st1d z27.d, p7, [x28, z30.d, SXTW] : st1d %z27.d %p7 -> (%x28,%z30.d,sxtw)[8byte] +e59fdfff : st1d z31.d, p7, [sp, z31.d, SXTW] : st1d %z31.d %p7 -> (%sp,%z31.d,sxtw)[8byte] # ST1D { .D }, , [, .D] (ST1D-Z.P.BZ-D.64.unscaled) -e580a000 : st1d z0.d, p0, [x0, z0.d] : st1d %z0.d %p0 -> (%x0,%z0.d)[32byte] -e585a482 : st1d z2.d, p1, [x4, z5.d] : st1d %z2.d %p1 -> (%x4,%z5.d)[32byte] -e587a8c4 : st1d z4.d, p2, [x6, z7.d] : st1d %z4.d %p2 -> (%x6,%z7.d)[32byte] -e589a906 : st1d z6.d, p2, [x8, z9.d] : st1d %z6.d %p2 -> (%x8,%z9.d)[32byte] -e58bad48 : st1d z8.d, p3, [x10, z11.d] : st1d %z8.d %p3 -> (%x10,%z11.d)[32byte] -e58dad6a : st1d z10.d, p3, [x11, z13.d] : st1d %z10.d %p3 -> (%x11,%z13.d)[32byte] -e58fb1ac : st1d z12.d, p4, [x13, z15.d] : st1d %z12.d %p4 -> (%x13,%z15.d)[32byte] -e591b1ee : st1d z14.d, p4, [x15, z17.d] : st1d %z14.d %p4 -> (%x15,%z17.d)[32byte] -e593b630 : st1d z16.d, p5, [x17, z19.d] : st1d %z16.d %p5 -> (%x17,%z19.d)[32byte] -e594b671 : st1d z17.d, p5, [x19, z20.d] : st1d %z17.d %p5 -> (%x19,%z20.d)[32byte] -e596b6b3 : st1d z19.d, p5, [x21, z22.d] : st1d %z19.d %p5 -> (%x21,%z22.d)[32byte] -e598baf5 : st1d z21.d, p6, [x23, z24.d] : st1d %z21.d %p6 -> (%x23,%z24.d)[32byte] -e59abb17 : st1d z23.d, p6, [x24, z26.d] : st1d %z23.d %p6 -> (%x24,%z26.d)[32byte] -e59cbf59 : st1d z25.d, p7, [x26, z28.d] : st1d %z25.d %p7 -> (%x26,%z28.d)[32byte] -e59ebf9b : st1d z27.d, p7, [x28, z30.d] : st1d %z27.d %p7 -> (%x28,%z30.d)[32byte] -e59fbfff : st1d z31.d, p7, [sp, z31.d] : st1d %z31.d %p7 -> (%sp,%z31.d)[32byte] +e580a000 : st1d z0.d, p0, [x0, z0.d] : st1d %z0.d %p0 -> (%x0,%z0.d)[8byte] +e585a482 : st1d z2.d, p1, [x4, z5.d] : st1d %z2.d %p1 -> (%x4,%z5.d)[8byte] +e587a8c4 : st1d z4.d, p2, [x6, z7.d] : st1d %z4.d %p2 -> (%x6,%z7.d)[8byte] +e589a906 : st1d z6.d, p2, [x8, z9.d] : st1d %z6.d %p2 -> (%x8,%z9.d)[8byte] +e58bad48 : st1d z8.d, p3, [x10, z11.d] : st1d %z8.d %p3 -> (%x10,%z11.d)[8byte] +e58dad6a : st1d z10.d, p3, [x11, z13.d] : st1d %z10.d %p3 -> (%x11,%z13.d)[8byte] +e58fb1ac : st1d z12.d, p4, [x13, z15.d] : st1d %z12.d %p4 -> (%x13,%z15.d)[8byte] +e591b1ee : st1d z14.d, p4, [x15, z17.d] : st1d %z14.d %p4 -> (%x15,%z17.d)[8byte] +e593b630 : st1d z16.d, p5, [x17, z19.d] : st1d %z16.d %p5 -> (%x17,%z19.d)[8byte] +e594b671 : st1d z17.d, p5, [x19, z20.d] : st1d %z17.d %p5 -> (%x19,%z20.d)[8byte] +e596b6b3 : st1d z19.d, p5, [x21, z22.d] : st1d %z19.d %p5 -> (%x21,%z22.d)[8byte] +e598baf5 : st1d z21.d, p6, [x23, z24.d] : st1d %z21.d %p6 -> (%x23,%z24.d)[8byte] +e59abb17 : st1d z23.d, p6, [x24, z26.d] : st1d %z23.d %p6 -> (%x24,%z26.d)[8byte] +e59cbf59 : st1d z25.d, p7, [x26, z28.d] : st1d %z25.d %p7 -> (%x26,%z28.d)[8byte] +e59ebf9b : st1d z27.d, p7, [x28, z30.d] : st1d %z27.d %p7 -> (%x28,%z30.d)[8byte] +e59fbfff : st1d z31.d, p7, [sp, z31.d] : st1d %z31.d %p7 -> (%sp,%z31.d)[8byte] # ST1D { .D }, , [, .D, #3] (ST1D-Z.P.BZ-D.x32.scaled) -e5a08000 : st1d z0.d, p0, [x0, z0.d, UXTW #3] : st1d %z0.d %p0 -> (%x0,%z0.d,uxtw #3)[32byte] -e5a58482 : st1d z2.d, p1, [x4, z5.d, UXTW #3] : st1d %z2.d %p1 -> (%x4,%z5.d,uxtw #3)[32byte] -e5a788c4 : st1d z4.d, p2, [x6, z7.d, UXTW #3] : st1d %z4.d %p2 -> (%x6,%z7.d,uxtw #3)[32byte] -e5a98906 : st1d z6.d, p2, [x8, z9.d, UXTW #3] : st1d %z6.d %p2 -> (%x8,%z9.d,uxtw #3)[32byte] -e5ab8d48 : st1d z8.d, p3, [x10, z11.d, UXTW #3] : st1d %z8.d %p3 -> (%x10,%z11.d,uxtw #3)[32byte] -e5ad8d6a : st1d z10.d, p3, [x11, z13.d, UXTW #3] : st1d %z10.d %p3 -> (%x11,%z13.d,uxtw #3)[32byte] -e5af91ac : st1d z12.d, p4, [x13, z15.d, UXTW #3] : st1d %z12.d %p4 -> (%x13,%z15.d,uxtw #3)[32byte] -e5b191ee : st1d z14.d, p4, [x15, z17.d, UXTW #3] : st1d %z14.d %p4 -> (%x15,%z17.d,uxtw #3)[32byte] -e5b39630 : st1d z16.d, p5, [x17, z19.d, UXTW #3] : st1d %z16.d %p5 -> (%x17,%z19.d,uxtw #3)[32byte] -e5b49671 : st1d z17.d, p5, [x19, z20.d, UXTW #3] : st1d %z17.d %p5 -> (%x19,%z20.d,uxtw #3)[32byte] -e5b696b3 : st1d z19.d, p5, [x21, z22.d, UXTW #3] : st1d %z19.d %p5 -> (%x21,%z22.d,uxtw #3)[32byte] -e5b89af5 : st1d z21.d, p6, [x23, z24.d, UXTW #3] : st1d %z21.d %p6 -> (%x23,%z24.d,uxtw #3)[32byte] -e5ba9b17 : st1d z23.d, p6, [x24, z26.d, UXTW #3] : st1d %z23.d %p6 -> (%x24,%z26.d,uxtw #3)[32byte] -e5bc9f59 : st1d z25.d, p7, [x26, z28.d, UXTW #3] : st1d %z25.d %p7 -> (%x26,%z28.d,uxtw #3)[32byte] -e5be9f9b : st1d z27.d, p7, [x28, z30.d, UXTW #3] : st1d %z27.d %p7 -> (%x28,%z30.d,uxtw #3)[32byte] -e5bf9fff : st1d z31.d, p7, [sp, z31.d, UXTW #3] : st1d %z31.d %p7 -> (%sp,%z31.d,uxtw #3)[32byte] -e5a0c000 : st1d z0.d, p0, [x0, z0.d, SXTW #3] : st1d %z0.d %p0 -> (%x0,%z0.d,sxtw #3)[32byte] -e5a5c482 : st1d z2.d, p1, [x4, z5.d, SXTW #3] : st1d %z2.d %p1 -> (%x4,%z5.d,sxtw #3)[32byte] -e5a7c8c4 : st1d z4.d, p2, [x6, z7.d, SXTW #3] : st1d %z4.d %p2 -> (%x6,%z7.d,sxtw #3)[32byte] -e5a9c906 : st1d z6.d, p2, [x8, z9.d, SXTW #3] : st1d %z6.d %p2 -> (%x8,%z9.d,sxtw #3)[32byte] -e5abcd48 : st1d z8.d, p3, [x10, z11.d, SXTW #3] : st1d %z8.d %p3 -> (%x10,%z11.d,sxtw #3)[32byte] -e5adcd6a : st1d z10.d, p3, [x11, z13.d, SXTW #3] : st1d %z10.d %p3 -> (%x11,%z13.d,sxtw #3)[32byte] -e5afd1ac : st1d z12.d, p4, [x13, z15.d, SXTW #3] : st1d %z12.d %p4 -> (%x13,%z15.d,sxtw #3)[32byte] -e5b1d1ee : st1d z14.d, p4, [x15, z17.d, SXTW #3] : st1d %z14.d %p4 -> (%x15,%z17.d,sxtw #3)[32byte] -e5b3d630 : st1d z16.d, p5, [x17, z19.d, SXTW #3] : st1d %z16.d %p5 -> (%x17,%z19.d,sxtw #3)[32byte] -e5b4d671 : st1d z17.d, p5, [x19, z20.d, SXTW #3] : st1d %z17.d %p5 -> (%x19,%z20.d,sxtw #3)[32byte] -e5b6d6b3 : st1d z19.d, p5, [x21, z22.d, SXTW #3] : st1d %z19.d %p5 -> (%x21,%z22.d,sxtw #3)[32byte] -e5b8daf5 : st1d z21.d, p6, [x23, z24.d, SXTW #3] : st1d %z21.d %p6 -> (%x23,%z24.d,sxtw #3)[32byte] -e5badb17 : st1d z23.d, p6, [x24, z26.d, SXTW #3] : st1d %z23.d %p6 -> (%x24,%z26.d,sxtw #3)[32byte] -e5bcdf59 : st1d z25.d, p7, [x26, z28.d, SXTW #3] : st1d %z25.d %p7 -> (%x26,%z28.d,sxtw #3)[32byte] -e5bedf9b : st1d z27.d, p7, [x28, z30.d, SXTW #3] : st1d %z27.d %p7 -> (%x28,%z30.d,sxtw #3)[32byte] -e5bfdfff : st1d z31.d, p7, [sp, z31.d, SXTW #3] : st1d %z31.d %p7 -> (%sp,%z31.d,sxtw #3)[32byte] +e5a08000 : st1d z0.d, p0, [x0, z0.d, UXTW #3] : st1d %z0.d %p0 -> (%x0,%z0.d,uxtw #3)[8byte] +e5a58482 : st1d z2.d, p1, [x4, z5.d, UXTW #3] : st1d %z2.d %p1 -> (%x4,%z5.d,uxtw #3)[8byte] +e5a788c4 : st1d z4.d, p2, [x6, z7.d, UXTW #3] : st1d %z4.d %p2 -> (%x6,%z7.d,uxtw #3)[8byte] +e5a98906 : st1d z6.d, p2, [x8, z9.d, UXTW #3] : st1d %z6.d %p2 -> (%x8,%z9.d,uxtw #3)[8byte] +e5ab8d48 : st1d z8.d, p3, [x10, z11.d, UXTW #3] : st1d %z8.d %p3 -> (%x10,%z11.d,uxtw #3)[8byte] +e5ad8d6a : st1d z10.d, p3, [x11, z13.d, UXTW #3] : st1d %z10.d %p3 -> (%x11,%z13.d,uxtw #3)[8byte] +e5af91ac : st1d z12.d, p4, [x13, z15.d, UXTW #3] : st1d %z12.d %p4 -> (%x13,%z15.d,uxtw #3)[8byte] +e5b191ee : st1d z14.d, p4, [x15, z17.d, UXTW #3] : st1d %z14.d %p4 -> (%x15,%z17.d,uxtw #3)[8byte] +e5b39630 : st1d z16.d, p5, [x17, z19.d, UXTW #3] : st1d %z16.d %p5 -> (%x17,%z19.d,uxtw #3)[8byte] +e5b49671 : st1d z17.d, p5, [x19, z20.d, UXTW #3] : st1d %z17.d %p5 -> (%x19,%z20.d,uxtw #3)[8byte] +e5b696b3 : st1d z19.d, p5, [x21, z22.d, UXTW #3] : st1d %z19.d %p5 -> (%x21,%z22.d,uxtw #3)[8byte] +e5b89af5 : st1d z21.d, p6, [x23, z24.d, UXTW #3] : st1d %z21.d %p6 -> (%x23,%z24.d,uxtw #3)[8byte] +e5ba9b17 : st1d z23.d, p6, [x24, z26.d, UXTW #3] : st1d %z23.d %p6 -> (%x24,%z26.d,uxtw #3)[8byte] +e5bc9f59 : st1d z25.d, p7, [x26, z28.d, UXTW #3] : st1d %z25.d %p7 -> (%x26,%z28.d,uxtw #3)[8byte] +e5be9f9b : st1d z27.d, p7, [x28, z30.d, UXTW #3] : st1d %z27.d %p7 -> (%x28,%z30.d,uxtw #3)[8byte] +e5bf9fff : st1d z31.d, p7, [sp, z31.d, UXTW #3] : st1d %z31.d %p7 -> (%sp,%z31.d,uxtw #3)[8byte] +e5a0c000 : st1d z0.d, p0, [x0, z0.d, SXTW #3] : st1d %z0.d %p0 -> (%x0,%z0.d,sxtw #3)[8byte] +e5a5c482 : st1d z2.d, p1, [x4, z5.d, SXTW #3] : st1d %z2.d %p1 -> (%x4,%z5.d,sxtw #3)[8byte] +e5a7c8c4 : st1d z4.d, p2, [x6, z7.d, SXTW #3] : st1d %z4.d %p2 -> (%x6,%z7.d,sxtw #3)[8byte] +e5a9c906 : st1d z6.d, p2, [x8, z9.d, SXTW #3] : st1d %z6.d %p2 -> (%x8,%z9.d,sxtw #3)[8byte] +e5abcd48 : st1d z8.d, p3, [x10, z11.d, SXTW #3] : st1d %z8.d %p3 -> (%x10,%z11.d,sxtw #3)[8byte] +e5adcd6a : st1d z10.d, p3, [x11, z13.d, SXTW #3] : st1d %z10.d %p3 -> (%x11,%z13.d,sxtw #3)[8byte] +e5afd1ac : st1d z12.d, p4, [x13, z15.d, SXTW #3] : st1d %z12.d %p4 -> (%x13,%z15.d,sxtw #3)[8byte] +e5b1d1ee : st1d z14.d, p4, [x15, z17.d, SXTW #3] : st1d %z14.d %p4 -> (%x15,%z17.d,sxtw #3)[8byte] +e5b3d630 : st1d z16.d, p5, [x17, z19.d, SXTW #3] : st1d %z16.d %p5 -> (%x17,%z19.d,sxtw #3)[8byte] +e5b4d671 : st1d z17.d, p5, [x19, z20.d, SXTW #3] : st1d %z17.d %p5 -> (%x19,%z20.d,sxtw #3)[8byte] +e5b6d6b3 : st1d z19.d, p5, [x21, z22.d, SXTW #3] : st1d %z19.d %p5 -> (%x21,%z22.d,sxtw #3)[8byte] +e5b8daf5 : st1d z21.d, p6, [x23, z24.d, SXTW #3] : st1d %z21.d %p6 -> (%x23,%z24.d,sxtw #3)[8byte] +e5badb17 : st1d z23.d, p6, [x24, z26.d, SXTW #3] : st1d %z23.d %p6 -> (%x24,%z26.d,sxtw #3)[8byte] +e5bcdf59 : st1d z25.d, p7, [x26, z28.d, SXTW #3] : st1d %z25.d %p7 -> (%x26,%z28.d,sxtw #3)[8byte] +e5bedf9b : st1d z27.d, p7, [x28, z30.d, SXTW #3] : st1d %z27.d %p7 -> (%x28,%z30.d,sxtw #3)[8byte] +e5bfdfff : st1d z31.d, p7, [sp, z31.d, SXTW #3] : st1d %z31.d %p7 -> (%sp,%z31.d,sxtw #3)[8byte] # ST1D { .D }, , [, .D, LSL #3] (ST1D-Z.P.BZ-D.64.scaled) -e5a0a000 : st1d z0.d, p0, [x0, z0.d, LSL #3] : st1d %z0.d %p0 -> (%x0,%z0.d,lsl #3)[32byte] -e5a5a482 : st1d z2.d, p1, [x4, z5.d, LSL #3] : st1d %z2.d %p1 -> (%x4,%z5.d,lsl #3)[32byte] -e5a7a8c4 : st1d z4.d, p2, [x6, z7.d, LSL #3] : st1d %z4.d %p2 -> (%x6,%z7.d,lsl #3)[32byte] -e5a9a906 : st1d z6.d, p2, [x8, z9.d, LSL #3] : st1d %z6.d %p2 -> (%x8,%z9.d,lsl #3)[32byte] -e5abad48 : st1d z8.d, p3, [x10, z11.d, LSL #3] : st1d %z8.d %p3 -> (%x10,%z11.d,lsl #3)[32byte] -e5adad6a : st1d z10.d, p3, [x11, z13.d, LSL #3] : st1d %z10.d %p3 -> (%x11,%z13.d,lsl #3)[32byte] -e5afb1ac : st1d z12.d, p4, [x13, z15.d, LSL #3] : st1d %z12.d %p4 -> (%x13,%z15.d,lsl #3)[32byte] -e5b1b1ee : st1d z14.d, p4, [x15, z17.d, LSL #3] : st1d %z14.d %p4 -> (%x15,%z17.d,lsl #3)[32byte] -e5b3b630 : st1d z16.d, p5, [x17, z19.d, LSL #3] : st1d %z16.d %p5 -> (%x17,%z19.d,lsl #3)[32byte] -e5b4b671 : st1d z17.d, p5, [x19, z20.d, LSL #3] : st1d %z17.d %p5 -> (%x19,%z20.d,lsl #3)[32byte] -e5b6b6b3 : st1d z19.d, p5, [x21, z22.d, LSL #3] : st1d %z19.d %p5 -> (%x21,%z22.d,lsl #3)[32byte] -e5b8baf5 : st1d z21.d, p6, [x23, z24.d, LSL #3] : st1d %z21.d %p6 -> (%x23,%z24.d,lsl #3)[32byte] -e5babb17 : st1d z23.d, p6, [x24, z26.d, LSL #3] : st1d %z23.d %p6 -> (%x24,%z26.d,lsl #3)[32byte] -e5bcbf59 : st1d z25.d, p7, [x26, z28.d, LSL #3] : st1d %z25.d %p7 -> (%x26,%z28.d,lsl #3)[32byte] -e5bebf9b : st1d z27.d, p7, [x28, z30.d, LSL #3] : st1d %z27.d %p7 -> (%x28,%z30.d,lsl #3)[32byte] -e5bfbfff : st1d z31.d, p7, [sp, z31.d, LSL #3] : st1d %z31.d %p7 -> (%sp,%z31.d,lsl #3)[32byte] +e5a0a000 : st1d z0.d, p0, [x0, z0.d, LSL #3] : st1d %z0.d %p0 -> (%x0,%z0.d,lsl #3)[8byte] +e5a5a482 : st1d z2.d, p1, [x4, z5.d, LSL #3] : st1d %z2.d %p1 -> (%x4,%z5.d,lsl #3)[8byte] +e5a7a8c4 : st1d z4.d, p2, [x6, z7.d, LSL #3] : st1d %z4.d %p2 -> (%x6,%z7.d,lsl #3)[8byte] +e5a9a906 : st1d z6.d, p2, [x8, z9.d, LSL #3] : st1d %z6.d %p2 -> (%x8,%z9.d,lsl #3)[8byte] +e5abad48 : st1d z8.d, p3, [x10, z11.d, LSL #3] : st1d %z8.d %p3 -> (%x10,%z11.d,lsl #3)[8byte] +e5adad6a : st1d z10.d, p3, [x11, z13.d, LSL #3] : st1d %z10.d %p3 -> (%x11,%z13.d,lsl #3)[8byte] +e5afb1ac : st1d z12.d, p4, [x13, z15.d, LSL #3] : st1d %z12.d %p4 -> (%x13,%z15.d,lsl #3)[8byte] +e5b1b1ee : st1d z14.d, p4, [x15, z17.d, LSL #3] : st1d %z14.d %p4 -> (%x15,%z17.d,lsl #3)[8byte] +e5b3b630 : st1d z16.d, p5, [x17, z19.d, LSL #3] : st1d %z16.d %p5 -> (%x17,%z19.d,lsl #3)[8byte] +e5b4b671 : st1d z17.d, p5, [x19, z20.d, LSL #3] : st1d %z17.d %p5 -> (%x19,%z20.d,lsl #3)[8byte] +e5b6b6b3 : st1d z19.d, p5, [x21, z22.d, LSL #3] : st1d %z19.d %p5 -> (%x21,%z22.d,lsl #3)[8byte] +e5b8baf5 : st1d z21.d, p6, [x23, z24.d, LSL #3] : st1d %z21.d %p6 -> (%x23,%z24.d,lsl #3)[8byte] +e5babb17 : st1d z23.d, p6, [x24, z26.d, LSL #3] : st1d %z23.d %p6 -> (%x24,%z26.d,lsl #3)[8byte] +e5bcbf59 : st1d z25.d, p7, [x26, z28.d, LSL #3] : st1d %z25.d %p7 -> (%x26,%z28.d,lsl #3)[8byte] +e5bebf9b : st1d z27.d, p7, [x28, z30.d, LSL #3] : st1d %z27.d %p7 -> (%x28,%z30.d,lsl #3)[8byte] +e5bfbfff : st1d z31.d, p7, [sp, z31.d, LSL #3] : st1d %z31.d %p7 -> (%sp,%z31.d,lsl #3)[8byte] # ST1D { .D }, , [.D{, #}] (ST1D-Z.P.AI-D) -e5c0a000 : st1d z0.d, p0, [z0.d, #0] : st1d %z0.d %p0 -> (%z0.d)[32byte] -e5c2a482 : st1d z2.d, p1, [z4.d, #16] : st1d %z2.d %p1 -> +0x10(%z4.d)[32byte] -e5c4a8c4 : st1d z4.d, p2, [z6.d, #32] : st1d %z4.d %p2 -> +0x20(%z6.d)[32byte] -e5c6a906 : st1d z6.d, p2, [z8.d, #48] : st1d %z6.d %p2 -> +0x30(%z8.d)[32byte] -e5c8ad48 : st1d z8.d, p3, [z10.d, #64] : st1d %z8.d %p3 -> +0x40(%z10.d)[32byte] -e5caad8a : st1d z10.d, p3, [z12.d, #80] : st1d %z10.d %p3 -> +0x50(%z12.d)[32byte] -e5ccb1cc : st1d z12.d, p4, [z14.d, #96] : st1d %z12.d %p4 -> +0x60(%z14.d)[32byte] -e5ceb20e : st1d z14.d, p4, [z16.d, #112] : st1d %z14.d %p4 -> +0x70(%z16.d)[32byte] -e5d0b650 : st1d z16.d, p5, [z18.d, #128] : st1d %z16.d %p5 -> +0x80(%z18.d)[32byte] -e5d1b671 : st1d z17.d, p5, [z19.d, #136] : st1d %z17.d %p5 -> +0x88(%z19.d)[32byte] -e5d3b6b3 : st1d z19.d, p5, [z21.d, #152] : st1d %z19.d %p5 -> +0x98(%z21.d)[32byte] -e5d5baf5 : st1d z21.d, p6, [z23.d, #168] : st1d %z21.d %p6 -> +0xa8(%z23.d)[32byte] -e5d7bb37 : st1d z23.d, p6, [z25.d, #184] : st1d %z23.d %p6 -> +0xb8(%z25.d)[32byte] -e5d9bf79 : st1d z25.d, p7, [z27.d, #200] : st1d %z25.d %p7 -> +0xc8(%z27.d)[32byte] -e5dbbfbb : st1d z27.d, p7, [z29.d, #216] : st1d %z27.d %p7 -> +0xd8(%z29.d)[32byte] -e5dfbfff : st1d z31.d, p7, [z31.d, #248] : st1d %z31.d %p7 -> +0xf8(%z31.d)[32byte] +e5c0a000 : st1d z0.d, p0, [z0.d, #0] : st1d %z0.d %p0 -> (%z0.d)[8byte] +e5c2a482 : st1d z2.d, p1, [z4.d, #16] : st1d %z2.d %p1 -> +0x10(%z4.d)[8byte] +e5c4a8c4 : st1d z4.d, p2, [z6.d, #32] : st1d %z4.d %p2 -> +0x20(%z6.d)[8byte] +e5c6a906 : st1d z6.d, p2, [z8.d, #48] : st1d %z6.d %p2 -> +0x30(%z8.d)[8byte] +e5c8ad48 : st1d z8.d, p3, [z10.d, #64] : st1d %z8.d %p3 -> +0x40(%z10.d)[8byte] +e5caad8a : st1d z10.d, p3, [z12.d, #80] : st1d %z10.d %p3 -> +0x50(%z12.d)[8byte] +e5ccb1cc : st1d z12.d, p4, [z14.d, #96] : st1d %z12.d %p4 -> +0x60(%z14.d)[8byte] +e5ceb20e : st1d z14.d, p4, [z16.d, #112] : st1d %z14.d %p4 -> +0x70(%z16.d)[8byte] +e5d0b650 : st1d z16.d, p5, [z18.d, #128] : st1d %z16.d %p5 -> +0x80(%z18.d)[8byte] +e5d1b671 : st1d z17.d, p5, [z19.d, #136] : st1d %z17.d %p5 -> +0x88(%z19.d)[8byte] +e5d3b6b3 : st1d z19.d, p5, [z21.d, #152] : st1d %z19.d %p5 -> +0x98(%z21.d)[8byte] +e5d5baf5 : st1d z21.d, p6, [z23.d, #168] : st1d %z21.d %p6 -> +0xa8(%z23.d)[8byte] +e5d7bb37 : st1d z23.d, p6, [z25.d, #184] : st1d %z23.d %p6 -> +0xb8(%z25.d)[8byte] +e5d9bf79 : st1d z25.d, p7, [z27.d, #200] : st1d %z25.d %p7 -> +0xc8(%z27.d)[8byte] +e5dbbfbb : st1d z27.d, p7, [z29.d, #216] : st1d %z27.d %p7 -> +0xd8(%z29.d)[8byte] +e5dfbfff : st1d z31.d, p7, [z31.d, #248] : st1d %z31.d %p7 -> +0xf8(%z31.d)[8byte] # ST1D { .D }, , [, , LSL #3] (ST1D-Z.P.BR-_) -e5e04000 : st1d z0.d, p0, [x0, x0, LSL #3] : st1d %z0.d %p0 -> (%x0,%x0,lsl #3)[32byte] -e5e54482 : st1d z2.d, p1, [x4, x5, LSL #3] : st1d %z2.d %p1 -> (%x4,%x5,lsl #3)[32byte] -e5e748c4 : st1d z4.d, p2, [x6, x7, LSL #3] : st1d %z4.d %p2 -> (%x6,%x7,lsl #3)[32byte] -e5e94906 : st1d z6.d, p2, [x8, x9, LSL #3] : st1d %z6.d %p2 -> (%x8,%x9,lsl #3)[32byte] -e5eb4d48 : st1d z8.d, p3, [x10, x11, LSL #3] : st1d %z8.d %p3 -> (%x10,%x11,lsl #3)[32byte] -e5ec4d6a : st1d z10.d, p3, [x11, x12, LSL #3] : st1d %z10.d %p3 -> (%x11,%x12,lsl #3)[32byte] -e5ee51ac : st1d z12.d, p4, [x13, x14, LSL #3] : st1d %z12.d %p4 -> (%x13,%x14,lsl #3)[32byte] -e5f051ee : st1d z14.d, p4, [x15, x16, LSL #3] : st1d %z14.d %p4 -> (%x15,%x16,lsl #3)[32byte] -e5f25630 : st1d z16.d, p5, [x17, x18, LSL #3] : st1d %z16.d %p5 -> (%x17,%x18,lsl #3)[32byte] -e5f45671 : st1d z17.d, p5, [x19, x20, LSL #3] : st1d %z17.d %p5 -> (%x19,%x20,lsl #3)[32byte] -e5f656b3 : st1d z19.d, p5, [x21, x22, LSL #3] : st1d %z19.d %p5 -> (%x21,%x22,lsl #3)[32byte] -e5f85af5 : st1d z21.d, p6, [x23, x24, LSL #3] : st1d %z21.d %p6 -> (%x23,%x24,lsl #3)[32byte] -e5f95b17 : st1d z23.d, p6, [x24, x25, LSL #3] : st1d %z23.d %p6 -> (%x24,%x25,lsl #3)[32byte] -e5fb5f59 : st1d z25.d, p7, [x26, x27, LSL #3] : st1d %z25.d %p7 -> (%x26,%x27,lsl #3)[32byte] -e5fd5f9b : st1d z27.d, p7, [x28, x29, LSL #3] : st1d %z27.d %p7 -> (%x28,%x29,lsl #3)[32byte] -e5fe5fff : st1d z31.d, p7, [sp, x30, LSL #3] : st1d %z31.d %p7 -> (%sp,%x30,lsl #3)[32byte] +e5e04000 : st1d z0.d, p0, [x0, x0, LSL #3] : st1d %z0.d %p0 -> (%x0,%x0,lsl #3)[8byte] +e5e54482 : st1d z2.d, p1, [x4, x5, LSL #3] : st1d %z2.d %p1 -> (%x4,%x5,lsl #3)[8byte] +e5e748c4 : st1d z4.d, p2, [x6, x7, LSL #3] : st1d %z4.d %p2 -> (%x6,%x7,lsl #3)[8byte] +e5e94906 : st1d z6.d, p2, [x8, x9, LSL #3] : st1d %z6.d %p2 -> (%x8,%x9,lsl #3)[8byte] +e5eb4d48 : st1d z8.d, p3, [x10, x11, LSL #3] : st1d %z8.d %p3 -> (%x10,%x11,lsl #3)[8byte] +e5ec4d6a : st1d z10.d, p3, [x11, x12, LSL #3] : st1d %z10.d %p3 -> (%x11,%x12,lsl #3)[8byte] +e5ee51ac : st1d z12.d, p4, [x13, x14, LSL #3] : st1d %z12.d %p4 -> (%x13,%x14,lsl #3)[8byte] +e5f051ee : st1d z14.d, p4, [x15, x16, LSL #3] : st1d %z14.d %p4 -> (%x15,%x16,lsl #3)[8byte] +e5f25630 : st1d z16.d, p5, [x17, x18, LSL #3] : st1d %z16.d %p5 -> (%x17,%x18,lsl #3)[8byte] +e5f45671 : st1d z17.d, p5, [x19, x20, LSL #3] : st1d %z17.d %p5 -> (%x19,%x20,lsl #3)[8byte] +e5f656b3 : st1d z19.d, p5, [x21, x22, LSL #3] : st1d %z19.d %p5 -> (%x21,%x22,lsl #3)[8byte] +e5f85af5 : st1d z21.d, p6, [x23, x24, LSL #3] : st1d %z21.d %p6 -> (%x23,%x24,lsl #3)[8byte] +e5f95b17 : st1d z23.d, p6, [x24, x25, LSL #3] : st1d %z23.d %p6 -> (%x24,%x25,lsl #3)[8byte] +e5fb5f59 : st1d z25.d, p7, [x26, x27, LSL #3] : st1d %z25.d %p7 -> (%x26,%x27,lsl #3)[8byte] +e5fd5f9b : st1d z27.d, p7, [x28, x29, LSL #3] : st1d %z27.d %p7 -> (%x28,%x29,lsl #3)[8byte] +e5fe5fff : st1d z31.d, p7, [sp, x30, LSL #3] : st1d %z31.d %p7 -> (%sp,%x30,lsl #3)[8byte] # ST1D { .D }, , [{, #, MUL VL}] (ST1D-Z.P.BI-_) -e5e8e000 : st1d z0.d, p0, [x0, #-8, MUL VL] : st1d %z0.d %p0 -> -0x0100(%x0)[32byte] -e5e9e482 : st1d z2.d, p1, [x4, #-7, MUL VL] : st1d %z2.d %p1 -> -0xe0(%x4)[32byte] -e5eae8c4 : st1d z4.d, p2, [x6, #-6, MUL VL] : st1d %z4.d %p2 -> -0xc0(%x6)[32byte] -e5ebe906 : st1d z6.d, p2, [x8, #-5, MUL VL] : st1d %z6.d %p2 -> -0xa0(%x8)[32byte] -e5eced48 : st1d z8.d, p3, [x10, #-4, MUL VL] : st1d %z8.d %p3 -> -0x80(%x10)[32byte] -e5eded6a : st1d z10.d, p3, [x11, #-3, MUL VL] : st1d %z10.d %p3 -> -0x60(%x11)[32byte] -e5eef1ac : st1d z12.d, p4, [x13, #-2, MUL VL] : st1d %z12.d %p4 -> -0x40(%x13)[32byte] -e5eff1ee : st1d z14.d, p4, [x15, #-1, MUL VL] : st1d %z14.d %p4 -> -0x20(%x15)[32byte] -e5e0f630 : st1d z16.d, p5, [x17, #0, MUL VL] : st1d %z16.d %p5 -> (%x17)[32byte] -e5e0f671 : st1d z17.d, p5, [x19, #0, MUL VL] : st1d %z17.d %p5 -> (%x19)[32byte] -e5e1f6b3 : st1d z19.d, p5, [x21, #1, MUL VL] : st1d %z19.d %p5 -> +0x20(%x21)[32byte] -e5e2faf5 : st1d z21.d, p6, [x23, #2, MUL VL] : st1d %z21.d %p6 -> +0x40(%x23)[32byte] -e5e3fb17 : st1d z23.d, p6, [x24, #3, MUL VL] : st1d %z23.d %p6 -> +0x60(%x24)[32byte] -e5e4ff59 : st1d z25.d, p7, [x26, #4, MUL VL] : st1d %z25.d %p7 -> +0x80(%x26)[32byte] -e5e5ff9b : st1d z27.d, p7, [x28, #5, MUL VL] : st1d %z27.d %p7 -> +0xa0(%x28)[32byte] -e5e7ffff : st1d z31.d, p7, [sp, #7, MUL VL] : st1d %z31.d %p7 -> +0xe0(%sp)[32byte] +e5e8e000 : st1d z0.d, p0, [x0, #-8, MUL VL] : st1d %z0.d %p0 -> -0x0100(%x0)[8byte] +e5e9e482 : st1d z2.d, p1, [x4, #-7, MUL VL] : st1d %z2.d %p1 -> -0xe0(%x4)[8byte] +e5eae8c4 : st1d z4.d, p2, [x6, #-6, MUL VL] : st1d %z4.d %p2 -> -0xc0(%x6)[8byte] +e5ebe906 : st1d z6.d, p2, [x8, #-5, MUL VL] : st1d %z6.d %p2 -> -0xa0(%x8)[8byte] +e5eced48 : st1d z8.d, p3, [x10, #-4, MUL VL] : st1d %z8.d %p3 -> -0x80(%x10)[8byte] +e5eded6a : st1d z10.d, p3, [x11, #-3, MUL VL] : st1d %z10.d %p3 -> -0x60(%x11)[8byte] +e5eef1ac : st1d z12.d, p4, [x13, #-2, MUL VL] : st1d %z12.d %p4 -> -0x40(%x13)[8byte] +e5eff1ee : st1d z14.d, p4, [x15, #-1, MUL VL] : st1d %z14.d %p4 -> -0x20(%x15)[8byte] +e5e0f630 : st1d z16.d, p5, [x17, #0, MUL VL] : st1d %z16.d %p5 -> (%x17)[8byte] +e5e0f671 : st1d z17.d, p5, [x19, #0, MUL VL] : st1d %z17.d %p5 -> (%x19)[8byte] +e5e1f6b3 : st1d z19.d, p5, [x21, #1, MUL VL] : st1d %z19.d %p5 -> +0x20(%x21)[8byte] +e5e2faf5 : st1d z21.d, p6, [x23, #2, MUL VL] : st1d %z21.d %p6 -> +0x40(%x23)[8byte] +e5e3fb17 : st1d z23.d, p6, [x24, #3, MUL VL] : st1d %z23.d %p6 -> +0x60(%x24)[8byte] +e5e4ff59 : st1d z25.d, p7, [x26, #4, MUL VL] : st1d %z25.d %p7 -> +0x80(%x26)[8byte] +e5e5ff9b : st1d z27.d, p7, [x28, #5, MUL VL] : st1d %z27.d %p7 -> +0xa0(%x28)[8byte] +e5e7ffff : st1d z31.d, p7, [sp, #7, MUL VL] : st1d %z31.d %p7 -> +0xe0(%sp)[8byte] # ST1H { .D }, , [, .D, ] (ST1H-Z.P.BZ-D.x32.unscaled) -e4808000 : st1h z0.d, p0, [x0, z0.d, UXTW] : st1h %z0.d %p0 -> (%x0,%z0.d,uxtw)[8byte] -e4858482 : st1h z2.d, p1, [x4, z5.d, UXTW] : st1h %z2.d %p1 -> (%x4,%z5.d,uxtw)[8byte] -e48788c4 : st1h z4.d, p2, [x6, z7.d, UXTW] : st1h %z4.d %p2 -> (%x6,%z7.d,uxtw)[8byte] -e4898906 : st1h z6.d, p2, [x8, z9.d, UXTW] : st1h %z6.d %p2 -> (%x8,%z9.d,uxtw)[8byte] -e48b8d48 : st1h z8.d, p3, [x10, z11.d, UXTW] : st1h %z8.d %p3 -> (%x10,%z11.d,uxtw)[8byte] -e48d8d6a : st1h z10.d, p3, [x11, z13.d, UXTW] : st1h %z10.d %p3 -> (%x11,%z13.d,uxtw)[8byte] -e48f91ac : st1h z12.d, p4, [x13, z15.d, UXTW] : st1h %z12.d %p4 -> (%x13,%z15.d,uxtw)[8byte] -e49191ee : st1h z14.d, p4, [x15, z17.d, UXTW] : st1h %z14.d %p4 -> (%x15,%z17.d,uxtw)[8byte] -e4939630 : st1h z16.d, p5, [x17, z19.d, UXTW] : st1h %z16.d %p5 -> (%x17,%z19.d,uxtw)[8byte] -e4949671 : st1h z17.d, p5, [x19, z20.d, UXTW] : st1h %z17.d %p5 -> (%x19,%z20.d,uxtw)[8byte] -e49696b3 : st1h z19.d, p5, [x21, z22.d, UXTW] : st1h %z19.d %p5 -> (%x21,%z22.d,uxtw)[8byte] -e4989af5 : st1h z21.d, p6, [x23, z24.d, UXTW] : st1h %z21.d %p6 -> (%x23,%z24.d,uxtw)[8byte] -e49a9b17 : st1h z23.d, p6, [x24, z26.d, UXTW] : st1h %z23.d %p6 -> (%x24,%z26.d,uxtw)[8byte] -e49c9f59 : st1h z25.d, p7, [x26, z28.d, UXTW] : st1h %z25.d %p7 -> (%x26,%z28.d,uxtw)[8byte] -e49e9f9b : st1h z27.d, p7, [x28, z30.d, UXTW] : st1h %z27.d %p7 -> (%x28,%z30.d,uxtw)[8byte] -e49f9fff : st1h z31.d, p7, [sp, z31.d, UXTW] : st1h %z31.d %p7 -> (%sp,%z31.d,uxtw)[8byte] -e480c000 : st1h z0.d, p0, [x0, z0.d, SXTW] : st1h %z0.d %p0 -> (%x0,%z0.d,sxtw)[8byte] -e485c482 : st1h z2.d, p1, [x4, z5.d, SXTW] : st1h %z2.d %p1 -> (%x4,%z5.d,sxtw)[8byte] -e487c8c4 : st1h z4.d, p2, [x6, z7.d, SXTW] : st1h %z4.d %p2 -> (%x6,%z7.d,sxtw)[8byte] -e489c906 : st1h z6.d, p2, [x8, z9.d, SXTW] : st1h %z6.d %p2 -> (%x8,%z9.d,sxtw)[8byte] -e48bcd48 : st1h z8.d, p3, [x10, z11.d, SXTW] : st1h %z8.d %p3 -> (%x10,%z11.d,sxtw)[8byte] -e48dcd6a : st1h z10.d, p3, [x11, z13.d, SXTW] : st1h %z10.d %p3 -> (%x11,%z13.d,sxtw)[8byte] -e48fd1ac : st1h z12.d, p4, [x13, z15.d, SXTW] : st1h %z12.d %p4 -> (%x13,%z15.d,sxtw)[8byte] -e491d1ee : st1h z14.d, p4, [x15, z17.d, SXTW] : st1h %z14.d %p4 -> (%x15,%z17.d,sxtw)[8byte] -e493d630 : st1h z16.d, p5, [x17, z19.d, SXTW] : st1h %z16.d %p5 -> (%x17,%z19.d,sxtw)[8byte] -e494d671 : st1h z17.d, p5, [x19, z20.d, SXTW] : st1h %z17.d %p5 -> (%x19,%z20.d,sxtw)[8byte] -e496d6b3 : st1h z19.d, p5, [x21, z22.d, SXTW] : st1h %z19.d %p5 -> (%x21,%z22.d,sxtw)[8byte] -e498daf5 : st1h z21.d, p6, [x23, z24.d, SXTW] : st1h %z21.d %p6 -> (%x23,%z24.d,sxtw)[8byte] -e49adb17 : st1h z23.d, p6, [x24, z26.d, SXTW] : st1h %z23.d %p6 -> (%x24,%z26.d,sxtw)[8byte] -e49cdf59 : st1h z25.d, p7, [x26, z28.d, SXTW] : st1h %z25.d %p7 -> (%x26,%z28.d,sxtw)[8byte] -e49edf9b : st1h z27.d, p7, [x28, z30.d, SXTW] : st1h %z27.d %p7 -> (%x28,%z30.d,sxtw)[8byte] -e49fdfff : st1h z31.d, p7, [sp, z31.d, SXTW] : st1h %z31.d %p7 -> (%sp,%z31.d,sxtw)[8byte] +e4808000 : st1h z0.d, p0, [x0, z0.d, UXTW] : st1h %z0.d %p0 -> (%x0,%z0.d,uxtw)[2byte] +e4858482 : st1h z2.d, p1, [x4, z5.d, UXTW] : st1h %z2.d %p1 -> (%x4,%z5.d,uxtw)[2byte] +e48788c4 : st1h z4.d, p2, [x6, z7.d, UXTW] : st1h %z4.d %p2 -> (%x6,%z7.d,uxtw)[2byte] +e4898906 : st1h z6.d, p2, [x8, z9.d, UXTW] : st1h %z6.d %p2 -> (%x8,%z9.d,uxtw)[2byte] +e48b8d48 : st1h z8.d, p3, [x10, z11.d, UXTW] : st1h %z8.d %p3 -> (%x10,%z11.d,uxtw)[2byte] +e48d8d6a : st1h z10.d, p3, [x11, z13.d, UXTW] : st1h %z10.d %p3 -> (%x11,%z13.d,uxtw)[2byte] +e48f91ac : st1h z12.d, p4, [x13, z15.d, UXTW] : st1h %z12.d %p4 -> (%x13,%z15.d,uxtw)[2byte] +e49191ee : st1h z14.d, p4, [x15, z17.d, UXTW] : st1h %z14.d %p4 -> (%x15,%z17.d,uxtw)[2byte] +e4939630 : st1h z16.d, p5, [x17, z19.d, UXTW] : st1h %z16.d %p5 -> (%x17,%z19.d,uxtw)[2byte] +e4949671 : st1h z17.d, p5, [x19, z20.d, UXTW] : st1h %z17.d %p5 -> (%x19,%z20.d,uxtw)[2byte] +e49696b3 : st1h z19.d, p5, [x21, z22.d, UXTW] : st1h %z19.d %p5 -> (%x21,%z22.d,uxtw)[2byte] +e4989af5 : st1h z21.d, p6, [x23, z24.d, UXTW] : st1h %z21.d %p6 -> (%x23,%z24.d,uxtw)[2byte] +e49a9b17 : st1h z23.d, p6, [x24, z26.d, UXTW] : st1h %z23.d %p6 -> (%x24,%z26.d,uxtw)[2byte] +e49c9f59 : st1h z25.d, p7, [x26, z28.d, UXTW] : st1h %z25.d %p7 -> (%x26,%z28.d,uxtw)[2byte] +e49e9f9b : st1h z27.d, p7, [x28, z30.d, UXTW] : st1h %z27.d %p7 -> (%x28,%z30.d,uxtw)[2byte] +e49f9fff : st1h z31.d, p7, [sp, z31.d, UXTW] : st1h %z31.d %p7 -> (%sp,%z31.d,uxtw)[2byte] +e480c000 : st1h z0.d, p0, [x0, z0.d, SXTW] : st1h %z0.d %p0 -> (%x0,%z0.d,sxtw)[2byte] +e485c482 : st1h z2.d, p1, [x4, z5.d, SXTW] : st1h %z2.d %p1 -> (%x4,%z5.d,sxtw)[2byte] +e487c8c4 : st1h z4.d, p2, [x6, z7.d, SXTW] : st1h %z4.d %p2 -> (%x6,%z7.d,sxtw)[2byte] +e489c906 : st1h z6.d, p2, [x8, z9.d, SXTW] : st1h %z6.d %p2 -> (%x8,%z9.d,sxtw)[2byte] +e48bcd48 : st1h z8.d, p3, [x10, z11.d, SXTW] : st1h %z8.d %p3 -> (%x10,%z11.d,sxtw)[2byte] +e48dcd6a : st1h z10.d, p3, [x11, z13.d, SXTW] : st1h %z10.d %p3 -> (%x11,%z13.d,sxtw)[2byte] +e48fd1ac : st1h z12.d, p4, [x13, z15.d, SXTW] : st1h %z12.d %p4 -> (%x13,%z15.d,sxtw)[2byte] +e491d1ee : st1h z14.d, p4, [x15, z17.d, SXTW] : st1h %z14.d %p4 -> (%x15,%z17.d,sxtw)[2byte] +e493d630 : st1h z16.d, p5, [x17, z19.d, SXTW] : st1h %z16.d %p5 -> (%x17,%z19.d,sxtw)[2byte] +e494d671 : st1h z17.d, p5, [x19, z20.d, SXTW] : st1h %z17.d %p5 -> (%x19,%z20.d,sxtw)[2byte] +e496d6b3 : st1h z19.d, p5, [x21, z22.d, SXTW] : st1h %z19.d %p5 -> (%x21,%z22.d,sxtw)[2byte] +e498daf5 : st1h z21.d, p6, [x23, z24.d, SXTW] : st1h %z21.d %p6 -> (%x23,%z24.d,sxtw)[2byte] +e49adb17 : st1h z23.d, p6, [x24, z26.d, SXTW] : st1h %z23.d %p6 -> (%x24,%z26.d,sxtw)[2byte] +e49cdf59 : st1h z25.d, p7, [x26, z28.d, SXTW] : st1h %z25.d %p7 -> (%x26,%z28.d,sxtw)[2byte] +e49edf9b : st1h z27.d, p7, [x28, z30.d, SXTW] : st1h %z27.d %p7 -> (%x28,%z30.d,sxtw)[2byte] +e49fdfff : st1h z31.d, p7, [sp, z31.d, SXTW] : st1h %z31.d %p7 -> (%sp,%z31.d,sxtw)[2byte] # ST1H { .D }, , [, .D] (ST1H-Z.P.BZ-D.64.unscaled) -e480a000 : st1h z0.d, p0, [x0, z0.d] : st1h %z0.d %p0 -> (%x0,%z0.d)[8byte] -e485a482 : st1h z2.d, p1, [x4, z5.d] : st1h %z2.d %p1 -> (%x4,%z5.d)[8byte] -e487a8c4 : st1h z4.d, p2, [x6, z7.d] : st1h %z4.d %p2 -> (%x6,%z7.d)[8byte] -e489a906 : st1h z6.d, p2, [x8, z9.d] : st1h %z6.d %p2 -> (%x8,%z9.d)[8byte] -e48bad48 : st1h z8.d, p3, [x10, z11.d] : st1h %z8.d %p3 -> (%x10,%z11.d)[8byte] -e48dad6a : st1h z10.d, p3, [x11, z13.d] : st1h %z10.d %p3 -> (%x11,%z13.d)[8byte] -e48fb1ac : st1h z12.d, p4, [x13, z15.d] : st1h %z12.d %p4 -> (%x13,%z15.d)[8byte] -e491b1ee : st1h z14.d, p4, [x15, z17.d] : st1h %z14.d %p4 -> (%x15,%z17.d)[8byte] -e493b630 : st1h z16.d, p5, [x17, z19.d] : st1h %z16.d %p5 -> (%x17,%z19.d)[8byte] -e494b671 : st1h z17.d, p5, [x19, z20.d] : st1h %z17.d %p5 -> (%x19,%z20.d)[8byte] -e496b6b3 : st1h z19.d, p5, [x21, z22.d] : st1h %z19.d %p5 -> (%x21,%z22.d)[8byte] -e498baf5 : st1h z21.d, p6, [x23, z24.d] : st1h %z21.d %p6 -> (%x23,%z24.d)[8byte] -e49abb17 : st1h z23.d, p6, [x24, z26.d] : st1h %z23.d %p6 -> (%x24,%z26.d)[8byte] -e49cbf59 : st1h z25.d, p7, [x26, z28.d] : st1h %z25.d %p7 -> (%x26,%z28.d)[8byte] -e49ebf9b : st1h z27.d, p7, [x28, z30.d] : st1h %z27.d %p7 -> (%x28,%z30.d)[8byte] -e49fbfff : st1h z31.d, p7, [sp, z31.d] : st1h %z31.d %p7 -> (%sp,%z31.d)[8byte] +e480a000 : st1h z0.d, p0, [x0, z0.d] : st1h %z0.d %p0 -> (%x0,%z0.d)[2byte] +e485a482 : st1h z2.d, p1, [x4, z5.d] : st1h %z2.d %p1 -> (%x4,%z5.d)[2byte] +e487a8c4 : st1h z4.d, p2, [x6, z7.d] : st1h %z4.d %p2 -> (%x6,%z7.d)[2byte] +e489a906 : st1h z6.d, p2, [x8, z9.d] : st1h %z6.d %p2 -> (%x8,%z9.d)[2byte] +e48bad48 : st1h z8.d, p3, [x10, z11.d] : st1h %z8.d %p3 -> (%x10,%z11.d)[2byte] +e48dad6a : st1h z10.d, p3, [x11, z13.d] : st1h %z10.d %p3 -> (%x11,%z13.d)[2byte] +e48fb1ac : st1h z12.d, p4, [x13, z15.d] : st1h %z12.d %p4 -> (%x13,%z15.d)[2byte] +e491b1ee : st1h z14.d, p4, [x15, z17.d] : st1h %z14.d %p4 -> (%x15,%z17.d)[2byte] +e493b630 : st1h z16.d, p5, [x17, z19.d] : st1h %z16.d %p5 -> (%x17,%z19.d)[2byte] +e494b671 : st1h z17.d, p5, [x19, z20.d] : st1h %z17.d %p5 -> (%x19,%z20.d)[2byte] +e496b6b3 : st1h z19.d, p5, [x21, z22.d] : st1h %z19.d %p5 -> (%x21,%z22.d)[2byte] +e498baf5 : st1h z21.d, p6, [x23, z24.d] : st1h %z21.d %p6 -> (%x23,%z24.d)[2byte] +e49abb17 : st1h z23.d, p6, [x24, z26.d] : st1h %z23.d %p6 -> (%x24,%z26.d)[2byte] +e49cbf59 : st1h z25.d, p7, [x26, z28.d] : st1h %z25.d %p7 -> (%x26,%z28.d)[2byte] +e49ebf9b : st1h z27.d, p7, [x28, z30.d] : st1h %z27.d %p7 -> (%x28,%z30.d)[2byte] +e49fbfff : st1h z31.d, p7, [sp, z31.d] : st1h %z31.d %p7 -> (%sp,%z31.d)[2byte] # ST1H { . }, , [, , LSL #1] (ST1H-Z.P.BR-_) -e4a04000 : st1h z0.h, p0, [x0, x0, LSL #1] : st1h %z0.h %p0 -> (%x0,%x0,lsl #1)[32byte] -e4a54482 : st1h z2.h, p1, [x4, x5, LSL #1] : st1h %z2.h %p1 -> (%x4,%x5,lsl #1)[32byte] -e4a748c4 : st1h z4.h, p2, [x6, x7, LSL #1] : st1h %z4.h %p2 -> (%x6,%x7,lsl #1)[32byte] -e4a94906 : st1h z6.h, p2, [x8, x9, LSL #1] : st1h %z6.h %p2 -> (%x8,%x9,lsl #1)[32byte] -e4ab4d48 : st1h z8.h, p3, [x10, x11, LSL #1] : st1h %z8.h %p3 -> (%x10,%x11,lsl #1)[32byte] -e4ac4d6a : st1h z10.h, p3, [x11, x12, LSL #1] : st1h %z10.h %p3 -> (%x11,%x12,lsl #1)[32byte] -e4ae51ac : st1h z12.h, p4, [x13, x14, LSL #1] : st1h %z12.h %p4 -> (%x13,%x14,lsl #1)[32byte] -e4b051ee : st1h z14.h, p4, [x15, x16, LSL #1] : st1h %z14.h %p4 -> (%x15,%x16,lsl #1)[32byte] -e4b25630 : st1h z16.h, p5, [x17, x18, LSL #1] : st1h %z16.h %p5 -> (%x17,%x18,lsl #1)[32byte] -e4b45671 : st1h z17.h, p5, [x19, x20, LSL #1] : st1h %z17.h %p5 -> (%x19,%x20,lsl #1)[32byte] -e4b656b3 : st1h z19.h, p5, [x21, x22, LSL #1] : st1h %z19.h %p5 -> (%x21,%x22,lsl #1)[32byte] -e4b85af5 : st1h z21.h, p6, [x23, x24, LSL #1] : st1h %z21.h %p6 -> (%x23,%x24,lsl #1)[32byte] -e4b95b17 : st1h z23.h, p6, [x24, x25, LSL #1] : st1h %z23.h %p6 -> (%x24,%x25,lsl #1)[32byte] -e4bb5f59 : st1h z25.h, p7, [x26, x27, LSL #1] : st1h %z25.h %p7 -> (%x26,%x27,lsl #1)[32byte] -e4bd5f9b : st1h z27.h, p7, [x28, x29, LSL #1] : st1h %z27.h %p7 -> (%x28,%x29,lsl #1)[32byte] -e4be5fff : st1h z31.h, p7, [sp, x30, LSL #1] : st1h %z31.h %p7 -> (%sp,%x30,lsl #1)[32byte] -e4c04000 : st1h z0.s, p0, [x0, x0, LSL #1] : st1h %z0.s %p0 -> (%x0,%x0,lsl #1)[16byte] -e4c54482 : st1h z2.s, p1, [x4, x5, LSL #1] : st1h %z2.s %p1 -> (%x4,%x5,lsl #1)[16byte] -e4c748c4 : st1h z4.s, p2, [x6, x7, LSL #1] : st1h %z4.s %p2 -> (%x6,%x7,lsl #1)[16byte] -e4c94906 : st1h z6.s, p2, [x8, x9, LSL #1] : st1h %z6.s %p2 -> (%x8,%x9,lsl #1)[16byte] -e4cb4d48 : st1h z8.s, p3, [x10, x11, LSL #1] : st1h %z8.s %p3 -> (%x10,%x11,lsl #1)[16byte] -e4cc4d6a : st1h z10.s, p3, [x11, x12, LSL #1] : st1h %z10.s %p3 -> (%x11,%x12,lsl #1)[16byte] -e4ce51ac : st1h z12.s, p4, [x13, x14, LSL #1] : st1h %z12.s %p4 -> (%x13,%x14,lsl #1)[16byte] -e4d051ee : st1h z14.s, p4, [x15, x16, LSL #1] : st1h %z14.s %p4 -> (%x15,%x16,lsl #1)[16byte] -e4d25630 : st1h z16.s, p5, [x17, x18, LSL #1] : st1h %z16.s %p5 -> (%x17,%x18,lsl #1)[16byte] -e4d45671 : st1h z17.s, p5, [x19, x20, LSL #1] : st1h %z17.s %p5 -> (%x19,%x20,lsl #1)[16byte] -e4d656b3 : st1h z19.s, p5, [x21, x22, LSL #1] : st1h %z19.s %p5 -> (%x21,%x22,lsl #1)[16byte] -e4d85af5 : st1h z21.s, p6, [x23, x24, LSL #1] : st1h %z21.s %p6 -> (%x23,%x24,lsl #1)[16byte] -e4d95b17 : st1h z23.s, p6, [x24, x25, LSL #1] : st1h %z23.s %p6 -> (%x24,%x25,lsl #1)[16byte] -e4db5f59 : st1h z25.s, p7, [x26, x27, LSL #1] : st1h %z25.s %p7 -> (%x26,%x27,lsl #1)[16byte] -e4dd5f9b : st1h z27.s, p7, [x28, x29, LSL #1] : st1h %z27.s %p7 -> (%x28,%x29,lsl #1)[16byte] -e4de5fff : st1h z31.s, p7, [sp, x30, LSL #1] : st1h %z31.s %p7 -> (%sp,%x30,lsl #1)[16byte] -e4e04000 : st1h z0.d, p0, [x0, x0, LSL #1] : st1h %z0.d %p0 -> (%x0,%x0,lsl #1)[8byte] -e4e54482 : st1h z2.d, p1, [x4, x5, LSL #1] : st1h %z2.d %p1 -> (%x4,%x5,lsl #1)[8byte] -e4e748c4 : st1h z4.d, p2, [x6, x7, LSL #1] : st1h %z4.d %p2 -> (%x6,%x7,lsl #1)[8byte] -e4e94906 : st1h z6.d, p2, [x8, x9, LSL #1] : st1h %z6.d %p2 -> (%x8,%x9,lsl #1)[8byte] -e4eb4d48 : st1h z8.d, p3, [x10, x11, LSL #1] : st1h %z8.d %p3 -> (%x10,%x11,lsl #1)[8byte] -e4ec4d6a : st1h z10.d, p3, [x11, x12, LSL #1] : st1h %z10.d %p3 -> (%x11,%x12,lsl #1)[8byte] -e4ee51ac : st1h z12.d, p4, [x13, x14, LSL #1] : st1h %z12.d %p4 -> (%x13,%x14,lsl #1)[8byte] -e4f051ee : st1h z14.d, p4, [x15, x16, LSL #1] : st1h %z14.d %p4 -> (%x15,%x16,lsl #1)[8byte] -e4f25630 : st1h z16.d, p5, [x17, x18, LSL #1] : st1h %z16.d %p5 -> (%x17,%x18,lsl #1)[8byte] -e4f45671 : st1h z17.d, p5, [x19, x20, LSL #1] : st1h %z17.d %p5 -> (%x19,%x20,lsl #1)[8byte] -e4f656b3 : st1h z19.d, p5, [x21, x22, LSL #1] : st1h %z19.d %p5 -> (%x21,%x22,lsl #1)[8byte] -e4f85af5 : st1h z21.d, p6, [x23, x24, LSL #1] : st1h %z21.d %p6 -> (%x23,%x24,lsl #1)[8byte] -e4f95b17 : st1h z23.d, p6, [x24, x25, LSL #1] : st1h %z23.d %p6 -> (%x24,%x25,lsl #1)[8byte] -e4fb5f59 : st1h z25.d, p7, [x26, x27, LSL #1] : st1h %z25.d %p7 -> (%x26,%x27,lsl #1)[8byte] -e4fd5f9b : st1h z27.d, p7, [x28, x29, LSL #1] : st1h %z27.d %p7 -> (%x28,%x29,lsl #1)[8byte] -e4fe5fff : st1h z31.d, p7, [sp, x30, LSL #1] : st1h %z31.d %p7 -> (%sp,%x30,lsl #1)[8byte] +e4a04000 : st1h z0.h, p0, [x0, x0, LSL #1] : st1h %z0.h %p0 -> (%x0,%x0,lsl #1)[2byte] +e4a54482 : st1h z2.h, p1, [x4, x5, LSL #1] : st1h %z2.h %p1 -> (%x4,%x5,lsl #1)[2byte] +e4a748c4 : st1h z4.h, p2, [x6, x7, LSL #1] : st1h %z4.h %p2 -> (%x6,%x7,lsl #1)[2byte] +e4a94906 : st1h z6.h, p2, [x8, x9, LSL #1] : st1h %z6.h %p2 -> (%x8,%x9,lsl #1)[2byte] +e4ab4d48 : st1h z8.h, p3, [x10, x11, LSL #1] : st1h %z8.h %p3 -> (%x10,%x11,lsl #1)[2byte] +e4ac4d6a : st1h z10.h, p3, [x11, x12, LSL #1] : st1h %z10.h %p3 -> (%x11,%x12,lsl #1)[2byte] +e4ae51ac : st1h z12.h, p4, [x13, x14, LSL #1] : st1h %z12.h %p4 -> (%x13,%x14,lsl #1)[2byte] +e4b051ee : st1h z14.h, p4, [x15, x16, LSL #1] : st1h %z14.h %p4 -> (%x15,%x16,lsl #1)[2byte] +e4b25630 : st1h z16.h, p5, [x17, x18, LSL #1] : st1h %z16.h %p5 -> (%x17,%x18,lsl #1)[2byte] +e4b45671 : st1h z17.h, p5, [x19, x20, LSL #1] : st1h %z17.h %p5 -> (%x19,%x20,lsl #1)[2byte] +e4b656b3 : st1h z19.h, p5, [x21, x22, LSL #1] : st1h %z19.h %p5 -> (%x21,%x22,lsl #1)[2byte] +e4b85af5 : st1h z21.h, p6, [x23, x24, LSL #1] : st1h %z21.h %p6 -> (%x23,%x24,lsl #1)[2byte] +e4b95b17 : st1h z23.h, p6, [x24, x25, LSL #1] : st1h %z23.h %p6 -> (%x24,%x25,lsl #1)[2byte] +e4bb5f59 : st1h z25.h, p7, [x26, x27, LSL #1] : st1h %z25.h %p7 -> (%x26,%x27,lsl #1)[2byte] +e4bd5f9b : st1h z27.h, p7, [x28, x29, LSL #1] : st1h %z27.h %p7 -> (%x28,%x29,lsl #1)[2byte] +e4be5fff : st1h z31.h, p7, [sp, x30, LSL #1] : st1h %z31.h %p7 -> (%sp,%x30,lsl #1)[2byte] +e4c04000 : st1h z0.s, p0, [x0, x0, LSL #1] : st1h %z0.s %p0 -> (%x0,%x0,lsl #1)[2byte] +e4c54482 : st1h z2.s, p1, [x4, x5, LSL #1] : st1h %z2.s %p1 -> (%x4,%x5,lsl #1)[2byte] +e4c748c4 : st1h z4.s, p2, [x6, x7, LSL #1] : st1h %z4.s %p2 -> (%x6,%x7,lsl #1)[2byte] +e4c94906 : st1h z6.s, p2, [x8, x9, LSL #1] : st1h %z6.s %p2 -> (%x8,%x9,lsl #1)[2byte] +e4cb4d48 : st1h z8.s, p3, [x10, x11, LSL #1] : st1h %z8.s %p3 -> (%x10,%x11,lsl #1)[2byte] +e4cc4d6a : st1h z10.s, p3, [x11, x12, LSL #1] : st1h %z10.s %p3 -> (%x11,%x12,lsl #1)[2byte] +e4ce51ac : st1h z12.s, p4, [x13, x14, LSL #1] : st1h %z12.s %p4 -> (%x13,%x14,lsl #1)[2byte] +e4d051ee : st1h z14.s, p4, [x15, x16, LSL #1] : st1h %z14.s %p4 -> (%x15,%x16,lsl #1)[2byte] +e4d25630 : st1h z16.s, p5, [x17, x18, LSL #1] : st1h %z16.s %p5 -> (%x17,%x18,lsl #1)[2byte] +e4d45671 : st1h z17.s, p5, [x19, x20, LSL #1] : st1h %z17.s %p5 -> (%x19,%x20,lsl #1)[2byte] +e4d656b3 : st1h z19.s, p5, [x21, x22, LSL #1] : st1h %z19.s %p5 -> (%x21,%x22,lsl #1)[2byte] +e4d85af5 : st1h z21.s, p6, [x23, x24, LSL #1] : st1h %z21.s %p6 -> (%x23,%x24,lsl #1)[2byte] +e4d95b17 : st1h z23.s, p6, [x24, x25, LSL #1] : st1h %z23.s %p6 -> (%x24,%x25,lsl #1)[2byte] +e4db5f59 : st1h z25.s, p7, [x26, x27, LSL #1] : st1h %z25.s %p7 -> (%x26,%x27,lsl #1)[2byte] +e4dd5f9b : st1h z27.s, p7, [x28, x29, LSL #1] : st1h %z27.s %p7 -> (%x28,%x29,lsl #1)[2byte] +e4de5fff : st1h z31.s, p7, [sp, x30, LSL #1] : st1h %z31.s %p7 -> (%sp,%x30,lsl #1)[2byte] +e4e04000 : st1h z0.d, p0, [x0, x0, LSL #1] : st1h %z0.d %p0 -> (%x0,%x0,lsl #1)[2byte] +e4e54482 : st1h z2.d, p1, [x4, x5, LSL #1] : st1h %z2.d %p1 -> (%x4,%x5,lsl #1)[2byte] +e4e748c4 : st1h z4.d, p2, [x6, x7, LSL #1] : st1h %z4.d %p2 -> (%x6,%x7,lsl #1)[2byte] +e4e94906 : st1h z6.d, p2, [x8, x9, LSL #1] : st1h %z6.d %p2 -> (%x8,%x9,lsl #1)[2byte] +e4eb4d48 : st1h z8.d, p3, [x10, x11, LSL #1] : st1h %z8.d %p3 -> (%x10,%x11,lsl #1)[2byte] +e4ec4d6a : st1h z10.d, p3, [x11, x12, LSL #1] : st1h %z10.d %p3 -> (%x11,%x12,lsl #1)[2byte] +e4ee51ac : st1h z12.d, p4, [x13, x14, LSL #1] : st1h %z12.d %p4 -> (%x13,%x14,lsl #1)[2byte] +e4f051ee : st1h z14.d, p4, [x15, x16, LSL #1] : st1h %z14.d %p4 -> (%x15,%x16,lsl #1)[2byte] +e4f25630 : st1h z16.d, p5, [x17, x18, LSL #1] : st1h %z16.d %p5 -> (%x17,%x18,lsl #1)[2byte] +e4f45671 : st1h z17.d, p5, [x19, x20, LSL #1] : st1h %z17.d %p5 -> (%x19,%x20,lsl #1)[2byte] +e4f656b3 : st1h z19.d, p5, [x21, x22, LSL #1] : st1h %z19.d %p5 -> (%x21,%x22,lsl #1)[2byte] +e4f85af5 : st1h z21.d, p6, [x23, x24, LSL #1] : st1h %z21.d %p6 -> (%x23,%x24,lsl #1)[2byte] +e4f95b17 : st1h z23.d, p6, [x24, x25, LSL #1] : st1h %z23.d %p6 -> (%x24,%x25,lsl #1)[2byte] +e4fb5f59 : st1h z25.d, p7, [x26, x27, LSL #1] : st1h %z25.d %p7 -> (%x26,%x27,lsl #1)[2byte] +e4fd5f9b : st1h z27.d, p7, [x28, x29, LSL #1] : st1h %z27.d %p7 -> (%x28,%x29,lsl #1)[2byte] +e4fe5fff : st1h z31.d, p7, [sp, x30, LSL #1] : st1h %z31.d %p7 -> (%sp,%x30,lsl #1)[2byte] # ST1H { .D }, , [, .D, #1] (ST1H-Z.P.BZ-D.x32.scaled) -e4a08000 : st1h z0.d, p0, [x0, z0.d, UXTW #1] : st1h %z0.d %p0 -> (%x0,%z0.d,uxtw #1)[8byte] -e4a58482 : st1h z2.d, p1, [x4, z5.d, UXTW #1] : st1h %z2.d %p1 -> (%x4,%z5.d,uxtw #1)[8byte] -e4a788c4 : st1h z4.d, p2, [x6, z7.d, UXTW #1] : st1h %z4.d %p2 -> (%x6,%z7.d,uxtw #1)[8byte] -e4a98906 : st1h z6.d, p2, [x8, z9.d, UXTW #1] : st1h %z6.d %p2 -> (%x8,%z9.d,uxtw #1)[8byte] -e4ab8d48 : st1h z8.d, p3, [x10, z11.d, UXTW #1] : st1h %z8.d %p3 -> (%x10,%z11.d,uxtw #1)[8byte] -e4ad8d6a : st1h z10.d, p3, [x11, z13.d, UXTW #1] : st1h %z10.d %p3 -> (%x11,%z13.d,uxtw #1)[8byte] -e4af91ac : st1h z12.d, p4, [x13, z15.d, UXTW #1] : st1h %z12.d %p4 -> (%x13,%z15.d,uxtw #1)[8byte] -e4b191ee : st1h z14.d, p4, [x15, z17.d, UXTW #1] : st1h %z14.d %p4 -> (%x15,%z17.d,uxtw #1)[8byte] -e4b39630 : st1h z16.d, p5, [x17, z19.d, UXTW #1] : st1h %z16.d %p5 -> (%x17,%z19.d,uxtw #1)[8byte] -e4b49671 : st1h z17.d, p5, [x19, z20.d, UXTW #1] : st1h %z17.d %p5 -> (%x19,%z20.d,uxtw #1)[8byte] -e4b696b3 : st1h z19.d, p5, [x21, z22.d, UXTW #1] : st1h %z19.d %p5 -> (%x21,%z22.d,uxtw #1)[8byte] -e4b89af5 : st1h z21.d, p6, [x23, z24.d, UXTW #1] : st1h %z21.d %p6 -> (%x23,%z24.d,uxtw #1)[8byte] -e4ba9b17 : st1h z23.d, p6, [x24, z26.d, UXTW #1] : st1h %z23.d %p6 -> (%x24,%z26.d,uxtw #1)[8byte] -e4bc9f59 : st1h z25.d, p7, [x26, z28.d, UXTW #1] : st1h %z25.d %p7 -> (%x26,%z28.d,uxtw #1)[8byte] -e4be9f9b : st1h z27.d, p7, [x28, z30.d, UXTW #1] : st1h %z27.d %p7 -> (%x28,%z30.d,uxtw #1)[8byte] -e4bf9fff : st1h z31.d, p7, [sp, z31.d, UXTW #1] : st1h %z31.d %p7 -> (%sp,%z31.d,uxtw #1)[8byte] -e4a0c000 : st1h z0.d, p0, [x0, z0.d, SXTW #1] : st1h %z0.d %p0 -> (%x0,%z0.d,sxtw #1)[8byte] -e4a5c482 : st1h z2.d, p1, [x4, z5.d, SXTW #1] : st1h %z2.d %p1 -> (%x4,%z5.d,sxtw #1)[8byte] -e4a7c8c4 : st1h z4.d, p2, [x6, z7.d, SXTW #1] : st1h %z4.d %p2 -> (%x6,%z7.d,sxtw #1)[8byte] -e4a9c906 : st1h z6.d, p2, [x8, z9.d, SXTW #1] : st1h %z6.d %p2 -> (%x8,%z9.d,sxtw #1)[8byte] -e4abcd48 : st1h z8.d, p3, [x10, z11.d, SXTW #1] : st1h %z8.d %p3 -> (%x10,%z11.d,sxtw #1)[8byte] -e4adcd6a : st1h z10.d, p3, [x11, z13.d, SXTW #1] : st1h %z10.d %p3 -> (%x11,%z13.d,sxtw #1)[8byte] -e4afd1ac : st1h z12.d, p4, [x13, z15.d, SXTW #1] : st1h %z12.d %p4 -> (%x13,%z15.d,sxtw #1)[8byte] -e4b1d1ee : st1h z14.d, p4, [x15, z17.d, SXTW #1] : st1h %z14.d %p4 -> (%x15,%z17.d,sxtw #1)[8byte] -e4b3d630 : st1h z16.d, p5, [x17, z19.d, SXTW #1] : st1h %z16.d %p5 -> (%x17,%z19.d,sxtw #1)[8byte] -e4b4d671 : st1h z17.d, p5, [x19, z20.d, SXTW #1] : st1h %z17.d %p5 -> (%x19,%z20.d,sxtw #1)[8byte] -e4b6d6b3 : st1h z19.d, p5, [x21, z22.d, SXTW #1] : st1h %z19.d %p5 -> (%x21,%z22.d,sxtw #1)[8byte] -e4b8daf5 : st1h z21.d, p6, [x23, z24.d, SXTW #1] : st1h %z21.d %p6 -> (%x23,%z24.d,sxtw #1)[8byte] -e4badb17 : st1h z23.d, p6, [x24, z26.d, SXTW #1] : st1h %z23.d %p6 -> (%x24,%z26.d,sxtw #1)[8byte] -e4bcdf59 : st1h z25.d, p7, [x26, z28.d, SXTW #1] : st1h %z25.d %p7 -> (%x26,%z28.d,sxtw #1)[8byte] -e4bedf9b : st1h z27.d, p7, [x28, z30.d, SXTW #1] : st1h %z27.d %p7 -> (%x28,%z30.d,sxtw #1)[8byte] -e4bfdfff : st1h z31.d, p7, [sp, z31.d, SXTW #1] : st1h %z31.d %p7 -> (%sp,%z31.d,sxtw #1)[8byte] +e4a08000 : st1h z0.d, p0, [x0, z0.d, UXTW #1] : st1h %z0.d %p0 -> (%x0,%z0.d,uxtw #1)[2byte] +e4a58482 : st1h z2.d, p1, [x4, z5.d, UXTW #1] : st1h %z2.d %p1 -> (%x4,%z5.d,uxtw #1)[2byte] +e4a788c4 : st1h z4.d, p2, [x6, z7.d, UXTW #1] : st1h %z4.d %p2 -> (%x6,%z7.d,uxtw #1)[2byte] +e4a98906 : st1h z6.d, p2, [x8, z9.d, UXTW #1] : st1h %z6.d %p2 -> (%x8,%z9.d,uxtw #1)[2byte] +e4ab8d48 : st1h z8.d, p3, [x10, z11.d, UXTW #1] : st1h %z8.d %p3 -> (%x10,%z11.d,uxtw #1)[2byte] +e4ad8d6a : st1h z10.d, p3, [x11, z13.d, UXTW #1] : st1h %z10.d %p3 -> (%x11,%z13.d,uxtw #1)[2byte] +e4af91ac : st1h z12.d, p4, [x13, z15.d, UXTW #1] : st1h %z12.d %p4 -> (%x13,%z15.d,uxtw #1)[2byte] +e4b191ee : st1h z14.d, p4, [x15, z17.d, UXTW #1] : st1h %z14.d %p4 -> (%x15,%z17.d,uxtw #1)[2byte] +e4b39630 : st1h z16.d, p5, [x17, z19.d, UXTW #1] : st1h %z16.d %p5 -> (%x17,%z19.d,uxtw #1)[2byte] +e4b49671 : st1h z17.d, p5, [x19, z20.d, UXTW #1] : st1h %z17.d %p5 -> (%x19,%z20.d,uxtw #1)[2byte] +e4b696b3 : st1h z19.d, p5, [x21, z22.d, UXTW #1] : st1h %z19.d %p5 -> (%x21,%z22.d,uxtw #1)[2byte] +e4b89af5 : st1h z21.d, p6, [x23, z24.d, UXTW #1] : st1h %z21.d %p6 -> (%x23,%z24.d,uxtw #1)[2byte] +e4ba9b17 : st1h z23.d, p6, [x24, z26.d, UXTW #1] : st1h %z23.d %p6 -> (%x24,%z26.d,uxtw #1)[2byte] +e4bc9f59 : st1h z25.d, p7, [x26, z28.d, UXTW #1] : st1h %z25.d %p7 -> (%x26,%z28.d,uxtw #1)[2byte] +e4be9f9b : st1h z27.d, p7, [x28, z30.d, UXTW #1] : st1h %z27.d %p7 -> (%x28,%z30.d,uxtw #1)[2byte] +e4bf9fff : st1h z31.d, p7, [sp, z31.d, UXTW #1] : st1h %z31.d %p7 -> (%sp,%z31.d,uxtw #1)[2byte] +e4a0c000 : st1h z0.d, p0, [x0, z0.d, SXTW #1] : st1h %z0.d %p0 -> (%x0,%z0.d,sxtw #1)[2byte] +e4a5c482 : st1h z2.d, p1, [x4, z5.d, SXTW #1] : st1h %z2.d %p1 -> (%x4,%z5.d,sxtw #1)[2byte] +e4a7c8c4 : st1h z4.d, p2, [x6, z7.d, SXTW #1] : st1h %z4.d %p2 -> (%x6,%z7.d,sxtw #1)[2byte] +e4a9c906 : st1h z6.d, p2, [x8, z9.d, SXTW #1] : st1h %z6.d %p2 -> (%x8,%z9.d,sxtw #1)[2byte] +e4abcd48 : st1h z8.d, p3, [x10, z11.d, SXTW #1] : st1h %z8.d %p3 -> (%x10,%z11.d,sxtw #1)[2byte] +e4adcd6a : st1h z10.d, p3, [x11, z13.d, SXTW #1] : st1h %z10.d %p3 -> (%x11,%z13.d,sxtw #1)[2byte] +e4afd1ac : st1h z12.d, p4, [x13, z15.d, SXTW #1] : st1h %z12.d %p4 -> (%x13,%z15.d,sxtw #1)[2byte] +e4b1d1ee : st1h z14.d, p4, [x15, z17.d, SXTW #1] : st1h %z14.d %p4 -> (%x15,%z17.d,sxtw #1)[2byte] +e4b3d630 : st1h z16.d, p5, [x17, z19.d, SXTW #1] : st1h %z16.d %p5 -> (%x17,%z19.d,sxtw #1)[2byte] +e4b4d671 : st1h z17.d, p5, [x19, z20.d, SXTW #1] : st1h %z17.d %p5 -> (%x19,%z20.d,sxtw #1)[2byte] +e4b6d6b3 : st1h z19.d, p5, [x21, z22.d, SXTW #1] : st1h %z19.d %p5 -> (%x21,%z22.d,sxtw #1)[2byte] +e4b8daf5 : st1h z21.d, p6, [x23, z24.d, SXTW #1] : st1h %z21.d %p6 -> (%x23,%z24.d,sxtw #1)[2byte] +e4badb17 : st1h z23.d, p6, [x24, z26.d, SXTW #1] : st1h %z23.d %p6 -> (%x24,%z26.d,sxtw #1)[2byte] +e4bcdf59 : st1h z25.d, p7, [x26, z28.d, SXTW #1] : st1h %z25.d %p7 -> (%x26,%z28.d,sxtw #1)[2byte] +e4bedf9b : st1h z27.d, p7, [x28, z30.d, SXTW #1] : st1h %z27.d %p7 -> (%x28,%z30.d,sxtw #1)[2byte] +e4bfdfff : st1h z31.d, p7, [sp, z31.d, SXTW #1] : st1h %z31.d %p7 -> (%sp,%z31.d,sxtw #1)[2byte] # ST1H { .D }, , [, .D, LSL #1] (ST1H-Z.P.BZ-D.64.scaled) -e4a0a000 : st1h z0.d, p0, [x0, z0.d, LSL #1] : st1h %z0.d %p0 -> (%x0,%z0.d,lsl #1)[8byte] -e4a5a482 : st1h z2.d, p1, [x4, z5.d, LSL #1] : st1h %z2.d %p1 -> (%x4,%z5.d,lsl #1)[8byte] -e4a7a8c4 : st1h z4.d, p2, [x6, z7.d, LSL #1] : st1h %z4.d %p2 -> (%x6,%z7.d,lsl #1)[8byte] -e4a9a906 : st1h z6.d, p2, [x8, z9.d, LSL #1] : st1h %z6.d %p2 -> (%x8,%z9.d,lsl #1)[8byte] -e4abad48 : st1h z8.d, p3, [x10, z11.d, LSL #1] : st1h %z8.d %p3 -> (%x10,%z11.d,lsl #1)[8byte] -e4adad6a : st1h z10.d, p3, [x11, z13.d, LSL #1] : st1h %z10.d %p3 -> (%x11,%z13.d,lsl #1)[8byte] -e4afb1ac : st1h z12.d, p4, [x13, z15.d, LSL #1] : st1h %z12.d %p4 -> (%x13,%z15.d,lsl #1)[8byte] -e4b1b1ee : st1h z14.d, p4, [x15, z17.d, LSL #1] : st1h %z14.d %p4 -> (%x15,%z17.d,lsl #1)[8byte] -e4b3b630 : st1h z16.d, p5, [x17, z19.d, LSL #1] : st1h %z16.d %p5 -> (%x17,%z19.d,lsl #1)[8byte] -e4b4b671 : st1h z17.d, p5, [x19, z20.d, LSL #1] : st1h %z17.d %p5 -> (%x19,%z20.d,lsl #1)[8byte] -e4b6b6b3 : st1h z19.d, p5, [x21, z22.d, LSL #1] : st1h %z19.d %p5 -> (%x21,%z22.d,lsl #1)[8byte] -e4b8baf5 : st1h z21.d, p6, [x23, z24.d, LSL #1] : st1h %z21.d %p6 -> (%x23,%z24.d,lsl #1)[8byte] -e4babb17 : st1h z23.d, p6, [x24, z26.d, LSL #1] : st1h %z23.d %p6 -> (%x24,%z26.d,lsl #1)[8byte] -e4bcbf59 : st1h z25.d, p7, [x26, z28.d, LSL #1] : st1h %z25.d %p7 -> (%x26,%z28.d,lsl #1)[8byte] -e4bebf9b : st1h z27.d, p7, [x28, z30.d, LSL #1] : st1h %z27.d %p7 -> (%x28,%z30.d,lsl #1)[8byte] -e4bfbfff : st1h z31.d, p7, [sp, z31.d, LSL #1] : st1h %z31.d %p7 -> (%sp,%z31.d,lsl #1)[8byte] +e4a0a000 : st1h z0.d, p0, [x0, z0.d, LSL #1] : st1h %z0.d %p0 -> (%x0,%z0.d,lsl #1)[2byte] +e4a5a482 : st1h z2.d, p1, [x4, z5.d, LSL #1] : st1h %z2.d %p1 -> (%x4,%z5.d,lsl #1)[2byte] +e4a7a8c4 : st1h z4.d, p2, [x6, z7.d, LSL #1] : st1h %z4.d %p2 -> (%x6,%z7.d,lsl #1)[2byte] +e4a9a906 : st1h z6.d, p2, [x8, z9.d, LSL #1] : st1h %z6.d %p2 -> (%x8,%z9.d,lsl #1)[2byte] +e4abad48 : st1h z8.d, p3, [x10, z11.d, LSL #1] : st1h %z8.d %p3 -> (%x10,%z11.d,lsl #1)[2byte] +e4adad6a : st1h z10.d, p3, [x11, z13.d, LSL #1] : st1h %z10.d %p3 -> (%x11,%z13.d,lsl #1)[2byte] +e4afb1ac : st1h z12.d, p4, [x13, z15.d, LSL #1] : st1h %z12.d %p4 -> (%x13,%z15.d,lsl #1)[2byte] +e4b1b1ee : st1h z14.d, p4, [x15, z17.d, LSL #1] : st1h %z14.d %p4 -> (%x15,%z17.d,lsl #1)[2byte] +e4b3b630 : st1h z16.d, p5, [x17, z19.d, LSL #1] : st1h %z16.d %p5 -> (%x17,%z19.d,lsl #1)[2byte] +e4b4b671 : st1h z17.d, p5, [x19, z20.d, LSL #1] : st1h %z17.d %p5 -> (%x19,%z20.d,lsl #1)[2byte] +e4b6b6b3 : st1h z19.d, p5, [x21, z22.d, LSL #1] : st1h %z19.d %p5 -> (%x21,%z22.d,lsl #1)[2byte] +e4b8baf5 : st1h z21.d, p6, [x23, z24.d, LSL #1] : st1h %z21.d %p6 -> (%x23,%z24.d,lsl #1)[2byte] +e4babb17 : st1h z23.d, p6, [x24, z26.d, LSL #1] : st1h %z23.d %p6 -> (%x24,%z26.d,lsl #1)[2byte] +e4bcbf59 : st1h z25.d, p7, [x26, z28.d, LSL #1] : st1h %z25.d %p7 -> (%x26,%z28.d,lsl #1)[2byte] +e4bebf9b : st1h z27.d, p7, [x28, z30.d, LSL #1] : st1h %z27.d %p7 -> (%x28,%z30.d,lsl #1)[2byte] +e4bfbfff : st1h z31.d, p7, [sp, z31.d, LSL #1] : st1h %z31.d %p7 -> (%sp,%z31.d,lsl #1)[2byte] # ST1H { . }, , [{, #, MUL VL}] (ST1H-Z.P.BI-_) -e4a8e000 : st1h z0.h, p0, [x0, #-8, MUL VL] : st1h %z0.h %p0 -> -0x0100(%x0)[32byte] -e4a9e482 : st1h z2.h, p1, [x4, #-7, MUL VL] : st1h %z2.h %p1 -> -0xe0(%x4)[32byte] -e4aae8c4 : st1h z4.h, p2, [x6, #-6, MUL VL] : st1h %z4.h %p2 -> -0xc0(%x6)[32byte] -e4abe906 : st1h z6.h, p2, [x8, #-5, MUL VL] : st1h %z6.h %p2 -> -0xa0(%x8)[32byte] -e4aced48 : st1h z8.h, p3, [x10, #-4, MUL VL] : st1h %z8.h %p3 -> -0x80(%x10)[32byte] -e4aded6a : st1h z10.h, p3, [x11, #-3, MUL VL] : st1h %z10.h %p3 -> -0x60(%x11)[32byte] -e4aef1ac : st1h z12.h, p4, [x13, #-2, MUL VL] : st1h %z12.h %p4 -> -0x40(%x13)[32byte] -e4aff1ee : st1h z14.h, p4, [x15, #-1, MUL VL] : st1h %z14.h %p4 -> -0x20(%x15)[32byte] -e4a0f630 : st1h z16.h, p5, [x17, #0, MUL VL] : st1h %z16.h %p5 -> (%x17)[32byte] -e4a0f671 : st1h z17.h, p5, [x19, #0, MUL VL] : st1h %z17.h %p5 -> (%x19)[32byte] -e4a1f6b3 : st1h z19.h, p5, [x21, #1, MUL VL] : st1h %z19.h %p5 -> +0x20(%x21)[32byte] -e4a2faf5 : st1h z21.h, p6, [x23, #2, MUL VL] : st1h %z21.h %p6 -> +0x40(%x23)[32byte] -e4a3fb17 : st1h z23.h, p6, [x24, #3, MUL VL] : st1h %z23.h %p6 -> +0x60(%x24)[32byte] -e4a4ff59 : st1h z25.h, p7, [x26, #4, MUL VL] : st1h %z25.h %p7 -> +0x80(%x26)[32byte] -e4a5ff9b : st1h z27.h, p7, [x28, #5, MUL VL] : st1h %z27.h %p7 -> +0xa0(%x28)[32byte] -e4a7ffff : st1h z31.h, p7, [sp, #7, MUL VL] : st1h %z31.h %p7 -> +0xe0(%sp)[32byte] -e4c8e000 : st1h z0.s, p0, [x0, #-8, MUL VL] : st1h %z0.s %p0 -> -0x80(%x0)[16byte] -e4c9e482 : st1h z2.s, p1, [x4, #-7, MUL VL] : st1h %z2.s %p1 -> -0x70(%x4)[16byte] -e4cae8c4 : st1h z4.s, p2, [x6, #-6, MUL VL] : st1h %z4.s %p2 -> -0x60(%x6)[16byte] -e4cbe906 : st1h z6.s, p2, [x8, #-5, MUL VL] : st1h %z6.s %p2 -> -0x50(%x8)[16byte] -e4cced48 : st1h z8.s, p3, [x10, #-4, MUL VL] : st1h %z8.s %p3 -> -0x40(%x10)[16byte] -e4cded6a : st1h z10.s, p3, [x11, #-3, MUL VL] : st1h %z10.s %p3 -> -0x30(%x11)[16byte] -e4cef1ac : st1h z12.s, p4, [x13, #-2, MUL VL] : st1h %z12.s %p4 -> -0x20(%x13)[16byte] -e4cff1ee : st1h z14.s, p4, [x15, #-1, MUL VL] : st1h %z14.s %p4 -> -0x10(%x15)[16byte] -e4c0f630 : st1h z16.s, p5, [x17, #0, MUL VL] : st1h %z16.s %p5 -> (%x17)[16byte] -e4c0f671 : st1h z17.s, p5, [x19, #0, MUL VL] : st1h %z17.s %p5 -> (%x19)[16byte] -e4c1f6b3 : st1h z19.s, p5, [x21, #1, MUL VL] : st1h %z19.s %p5 -> +0x10(%x21)[16byte] -e4c2faf5 : st1h z21.s, p6, [x23, #2, MUL VL] : st1h %z21.s %p6 -> +0x20(%x23)[16byte] -e4c3fb17 : st1h z23.s, p6, [x24, #3, MUL VL] : st1h %z23.s %p6 -> +0x30(%x24)[16byte] -e4c4ff59 : st1h z25.s, p7, [x26, #4, MUL VL] : st1h %z25.s %p7 -> +0x40(%x26)[16byte] -e4c5ff9b : st1h z27.s, p7, [x28, #5, MUL VL] : st1h %z27.s %p7 -> +0x50(%x28)[16byte] -e4c7ffff : st1h z31.s, p7, [sp, #7, MUL VL] : st1h %z31.s %p7 -> +0x70(%sp)[16byte] -e4e8e000 : st1h z0.d, p0, [x0, #-8, MUL VL] : st1h %z0.d %p0 -> -0x40(%x0)[8byte] -e4e9e482 : st1h z2.d, p1, [x4, #-7, MUL VL] : st1h %z2.d %p1 -> -0x38(%x4)[8byte] -e4eae8c4 : st1h z4.d, p2, [x6, #-6, MUL VL] : st1h %z4.d %p2 -> -0x30(%x6)[8byte] -e4ebe906 : st1h z6.d, p2, [x8, #-5, MUL VL] : st1h %z6.d %p2 -> -0x28(%x8)[8byte] -e4eced48 : st1h z8.d, p3, [x10, #-4, MUL VL] : st1h %z8.d %p3 -> -0x20(%x10)[8byte] -e4eded6a : st1h z10.d, p3, [x11, #-3, MUL VL] : st1h %z10.d %p3 -> -0x18(%x11)[8byte] -e4eef1ac : st1h z12.d, p4, [x13, #-2, MUL VL] : st1h %z12.d %p4 -> -0x10(%x13)[8byte] -e4eff1ee : st1h z14.d, p4, [x15, #-1, MUL VL] : st1h %z14.d %p4 -> -0x08(%x15)[8byte] -e4e0f630 : st1h z16.d, p5, [x17, #0, MUL VL] : st1h %z16.d %p5 -> (%x17)[8byte] -e4e0f671 : st1h z17.d, p5, [x19, #0, MUL VL] : st1h %z17.d %p5 -> (%x19)[8byte] -e4e1f6b3 : st1h z19.d, p5, [x21, #1, MUL VL] : st1h %z19.d %p5 -> +0x08(%x21)[8byte] -e4e2faf5 : st1h z21.d, p6, [x23, #2, MUL VL] : st1h %z21.d %p6 -> +0x10(%x23)[8byte] -e4e3fb17 : st1h z23.d, p6, [x24, #3, MUL VL] : st1h %z23.d %p6 -> +0x18(%x24)[8byte] -e4e4ff59 : st1h z25.d, p7, [x26, #4, MUL VL] : st1h %z25.d %p7 -> +0x20(%x26)[8byte] -e4e5ff9b : st1h z27.d, p7, [x28, #5, MUL VL] : st1h %z27.d %p7 -> +0x28(%x28)[8byte] -e4e7ffff : st1h z31.d, p7, [sp, #7, MUL VL] : st1h %z31.d %p7 -> +0x38(%sp)[8byte] +e4a8e000 : st1h z0.h, p0, [x0, #-8, MUL VL] : st1h %z0.h %p0 -> -0x0100(%x0)[2byte] +e4a9e482 : st1h z2.h, p1, [x4, #-7, MUL VL] : st1h %z2.h %p1 -> -0xe0(%x4)[2byte] +e4aae8c4 : st1h z4.h, p2, [x6, #-6, MUL VL] : st1h %z4.h %p2 -> -0xc0(%x6)[2byte] +e4abe906 : st1h z6.h, p2, [x8, #-5, MUL VL] : st1h %z6.h %p2 -> -0xa0(%x8)[2byte] +e4aced48 : st1h z8.h, p3, [x10, #-4, MUL VL] : st1h %z8.h %p3 -> -0x80(%x10)[2byte] +e4aded6a : st1h z10.h, p3, [x11, #-3, MUL VL] : st1h %z10.h %p3 -> -0x60(%x11)[2byte] +e4aef1ac : st1h z12.h, p4, [x13, #-2, MUL VL] : st1h %z12.h %p4 -> -0x40(%x13)[2byte] +e4aff1ee : st1h z14.h, p4, [x15, #-1, MUL VL] : st1h %z14.h %p4 -> -0x20(%x15)[2byte] +e4a0f630 : st1h z16.h, p5, [x17, #0, MUL VL] : st1h %z16.h %p5 -> (%x17)[2byte] +e4a0f671 : st1h z17.h, p5, [x19, #0, MUL VL] : st1h %z17.h %p5 -> (%x19)[2byte] +e4a1f6b3 : st1h z19.h, p5, [x21, #1, MUL VL] : st1h %z19.h %p5 -> +0x20(%x21)[2byte] +e4a2faf5 : st1h z21.h, p6, [x23, #2, MUL VL] : st1h %z21.h %p6 -> +0x40(%x23)[2byte] +e4a3fb17 : st1h z23.h, p6, [x24, #3, MUL VL] : st1h %z23.h %p6 -> +0x60(%x24)[2byte] +e4a4ff59 : st1h z25.h, p7, [x26, #4, MUL VL] : st1h %z25.h %p7 -> +0x80(%x26)[2byte] +e4a5ff9b : st1h z27.h, p7, [x28, #5, MUL VL] : st1h %z27.h %p7 -> +0xa0(%x28)[2byte] +e4a7ffff : st1h z31.h, p7, [sp, #7, MUL VL] : st1h %z31.h %p7 -> +0xe0(%sp)[2byte] +e4c8e000 : st1h z0.s, p0, [x0, #-8, MUL VL] : st1h %z0.s %p0 -> -0x80(%x0)[2byte] +e4c9e482 : st1h z2.s, p1, [x4, #-7, MUL VL] : st1h %z2.s %p1 -> -0x70(%x4)[2byte] +e4cae8c4 : st1h z4.s, p2, [x6, #-6, MUL VL] : st1h %z4.s %p2 -> -0x60(%x6)[2byte] +e4cbe906 : st1h z6.s, p2, [x8, #-5, MUL VL] : st1h %z6.s %p2 -> -0x50(%x8)[2byte] +e4cced48 : st1h z8.s, p3, [x10, #-4, MUL VL] : st1h %z8.s %p3 -> -0x40(%x10)[2byte] +e4cded6a : st1h z10.s, p3, [x11, #-3, MUL VL] : st1h %z10.s %p3 -> -0x30(%x11)[2byte] +e4cef1ac : st1h z12.s, p4, [x13, #-2, MUL VL] : st1h %z12.s %p4 -> -0x20(%x13)[2byte] +e4cff1ee : st1h z14.s, p4, [x15, #-1, MUL VL] : st1h %z14.s %p4 -> -0x10(%x15)[2byte] +e4c0f630 : st1h z16.s, p5, [x17, #0, MUL VL] : st1h %z16.s %p5 -> (%x17)[2byte] +e4c0f671 : st1h z17.s, p5, [x19, #0, MUL VL] : st1h %z17.s %p5 -> (%x19)[2byte] +e4c1f6b3 : st1h z19.s, p5, [x21, #1, MUL VL] : st1h %z19.s %p5 -> +0x10(%x21)[2byte] +e4c2faf5 : st1h z21.s, p6, [x23, #2, MUL VL] : st1h %z21.s %p6 -> +0x20(%x23)[2byte] +e4c3fb17 : st1h z23.s, p6, [x24, #3, MUL VL] : st1h %z23.s %p6 -> +0x30(%x24)[2byte] +e4c4ff59 : st1h z25.s, p7, [x26, #4, MUL VL] : st1h %z25.s %p7 -> +0x40(%x26)[2byte] +e4c5ff9b : st1h z27.s, p7, [x28, #5, MUL VL] : st1h %z27.s %p7 -> +0x50(%x28)[2byte] +e4c7ffff : st1h z31.s, p7, [sp, #7, MUL VL] : st1h %z31.s %p7 -> +0x70(%sp)[2byte] +e4e8e000 : st1h z0.d, p0, [x0, #-8, MUL VL] : st1h %z0.d %p0 -> -0x40(%x0)[2byte] +e4e9e482 : st1h z2.d, p1, [x4, #-7, MUL VL] : st1h %z2.d %p1 -> -0x38(%x4)[2byte] +e4eae8c4 : st1h z4.d, p2, [x6, #-6, MUL VL] : st1h %z4.d %p2 -> -0x30(%x6)[2byte] +e4ebe906 : st1h z6.d, p2, [x8, #-5, MUL VL] : st1h %z6.d %p2 -> -0x28(%x8)[2byte] +e4eced48 : st1h z8.d, p3, [x10, #-4, MUL VL] : st1h %z8.d %p3 -> -0x20(%x10)[2byte] +e4eded6a : st1h z10.d, p3, [x11, #-3, MUL VL] : st1h %z10.d %p3 -> -0x18(%x11)[2byte] +e4eef1ac : st1h z12.d, p4, [x13, #-2, MUL VL] : st1h %z12.d %p4 -> -0x10(%x13)[2byte] +e4eff1ee : st1h z14.d, p4, [x15, #-1, MUL VL] : st1h %z14.d %p4 -> -0x08(%x15)[2byte] +e4e0f630 : st1h z16.d, p5, [x17, #0, MUL VL] : st1h %z16.d %p5 -> (%x17)[2byte] +e4e0f671 : st1h z17.d, p5, [x19, #0, MUL VL] : st1h %z17.d %p5 -> (%x19)[2byte] +e4e1f6b3 : st1h z19.d, p5, [x21, #1, MUL VL] : st1h %z19.d %p5 -> +0x08(%x21)[2byte] +e4e2faf5 : st1h z21.d, p6, [x23, #2, MUL VL] : st1h %z21.d %p6 -> +0x10(%x23)[2byte] +e4e3fb17 : st1h z23.d, p6, [x24, #3, MUL VL] : st1h %z23.d %p6 -> +0x18(%x24)[2byte] +e4e4ff59 : st1h z25.d, p7, [x26, #4, MUL VL] : st1h %z25.d %p7 -> +0x20(%x26)[2byte] +e4e5ff9b : st1h z27.d, p7, [x28, #5, MUL VL] : st1h %z27.d %p7 -> +0x28(%x28)[2byte] +e4e7ffff : st1h z31.d, p7, [sp, #7, MUL VL] : st1h %z31.d %p7 -> +0x38(%sp)[2byte] # ST1H { .S }, , [, .S, ] (ST1H-Z.P.BZ-S.x32.unscaled) -e4c08000 : st1h z0.s, p0, [x0, z0.s, UXTW] : st1h %z0.s %p0 -> (%x0,%z0.s,uxtw)[16byte] -e4c58482 : st1h z2.s, p1, [x4, z5.s, UXTW] : st1h %z2.s %p1 -> (%x4,%z5.s,uxtw)[16byte] -e4c788c4 : st1h z4.s, p2, [x6, z7.s, UXTW] : st1h %z4.s %p2 -> (%x6,%z7.s,uxtw)[16byte] -e4c98906 : st1h z6.s, p2, [x8, z9.s, UXTW] : st1h %z6.s %p2 -> (%x8,%z9.s,uxtw)[16byte] -e4cb8d48 : st1h z8.s, p3, [x10, z11.s, UXTW] : st1h %z8.s %p3 -> (%x10,%z11.s,uxtw)[16byte] -e4cd8d6a : st1h z10.s, p3, [x11, z13.s, UXTW] : st1h %z10.s %p3 -> (%x11,%z13.s,uxtw)[16byte] -e4cf91ac : st1h z12.s, p4, [x13, z15.s, UXTW] : st1h %z12.s %p4 -> (%x13,%z15.s,uxtw)[16byte] -e4d191ee : st1h z14.s, p4, [x15, z17.s, UXTW] : st1h %z14.s %p4 -> (%x15,%z17.s,uxtw)[16byte] -e4d39630 : st1h z16.s, p5, [x17, z19.s, UXTW] : st1h %z16.s %p5 -> (%x17,%z19.s,uxtw)[16byte] -e4d49671 : st1h z17.s, p5, [x19, z20.s, UXTW] : st1h %z17.s %p5 -> (%x19,%z20.s,uxtw)[16byte] -e4d696b3 : st1h z19.s, p5, [x21, z22.s, UXTW] : st1h %z19.s %p5 -> (%x21,%z22.s,uxtw)[16byte] -e4d89af5 : st1h z21.s, p6, [x23, z24.s, UXTW] : st1h %z21.s %p6 -> (%x23,%z24.s,uxtw)[16byte] -e4da9b17 : st1h z23.s, p6, [x24, z26.s, UXTW] : st1h %z23.s %p6 -> (%x24,%z26.s,uxtw)[16byte] -e4dc9f59 : st1h z25.s, p7, [x26, z28.s, UXTW] : st1h %z25.s %p7 -> (%x26,%z28.s,uxtw)[16byte] -e4de9f9b : st1h z27.s, p7, [x28, z30.s, UXTW] : st1h %z27.s %p7 -> (%x28,%z30.s,uxtw)[16byte] -e4df9fff : st1h z31.s, p7, [sp, z31.s, UXTW] : st1h %z31.s %p7 -> (%sp,%z31.s,uxtw)[16byte] -e4c0c000 : st1h z0.s, p0, [x0, z0.s, SXTW] : st1h %z0.s %p0 -> (%x0,%z0.s,sxtw)[16byte] -e4c5c482 : st1h z2.s, p1, [x4, z5.s, SXTW] : st1h %z2.s %p1 -> (%x4,%z5.s,sxtw)[16byte] -e4c7c8c4 : st1h z4.s, p2, [x6, z7.s, SXTW] : st1h %z4.s %p2 -> (%x6,%z7.s,sxtw)[16byte] -e4c9c906 : st1h z6.s, p2, [x8, z9.s, SXTW] : st1h %z6.s %p2 -> (%x8,%z9.s,sxtw)[16byte] -e4cbcd48 : st1h z8.s, p3, [x10, z11.s, SXTW] : st1h %z8.s %p3 -> (%x10,%z11.s,sxtw)[16byte] -e4cdcd6a : st1h z10.s, p3, [x11, z13.s, SXTW] : st1h %z10.s %p3 -> (%x11,%z13.s,sxtw)[16byte] -e4cfd1ac : st1h z12.s, p4, [x13, z15.s, SXTW] : st1h %z12.s %p4 -> (%x13,%z15.s,sxtw)[16byte] -e4d1d1ee : st1h z14.s, p4, [x15, z17.s, SXTW] : st1h %z14.s %p4 -> (%x15,%z17.s,sxtw)[16byte] -e4d3d630 : st1h z16.s, p5, [x17, z19.s, SXTW] : st1h %z16.s %p5 -> (%x17,%z19.s,sxtw)[16byte] -e4d4d671 : st1h z17.s, p5, [x19, z20.s, SXTW] : st1h %z17.s %p5 -> (%x19,%z20.s,sxtw)[16byte] -e4d6d6b3 : st1h z19.s, p5, [x21, z22.s, SXTW] : st1h %z19.s %p5 -> (%x21,%z22.s,sxtw)[16byte] -e4d8daf5 : st1h z21.s, p6, [x23, z24.s, SXTW] : st1h %z21.s %p6 -> (%x23,%z24.s,sxtw)[16byte] -e4dadb17 : st1h z23.s, p6, [x24, z26.s, SXTW] : st1h %z23.s %p6 -> (%x24,%z26.s,sxtw)[16byte] -e4dcdf59 : st1h z25.s, p7, [x26, z28.s, SXTW] : st1h %z25.s %p7 -> (%x26,%z28.s,sxtw)[16byte] -e4dedf9b : st1h z27.s, p7, [x28, z30.s, SXTW] : st1h %z27.s %p7 -> (%x28,%z30.s,sxtw)[16byte] -e4dfdfff : st1h z31.s, p7, [sp, z31.s, SXTW] : st1h %z31.s %p7 -> (%sp,%z31.s,sxtw)[16byte] +e4c08000 : st1h z0.s, p0, [x0, z0.s, UXTW] : st1h %z0.s %p0 -> (%x0,%z0.s,uxtw)[2byte] +e4c58482 : st1h z2.s, p1, [x4, z5.s, UXTW] : st1h %z2.s %p1 -> (%x4,%z5.s,uxtw)[2byte] +e4c788c4 : st1h z4.s, p2, [x6, z7.s, UXTW] : st1h %z4.s %p2 -> (%x6,%z7.s,uxtw)[2byte] +e4c98906 : st1h z6.s, p2, [x8, z9.s, UXTW] : st1h %z6.s %p2 -> (%x8,%z9.s,uxtw)[2byte] +e4cb8d48 : st1h z8.s, p3, [x10, z11.s, UXTW] : st1h %z8.s %p3 -> (%x10,%z11.s,uxtw)[2byte] +e4cd8d6a : st1h z10.s, p3, [x11, z13.s, UXTW] : st1h %z10.s %p3 -> (%x11,%z13.s,uxtw)[2byte] +e4cf91ac : st1h z12.s, p4, [x13, z15.s, UXTW] : st1h %z12.s %p4 -> (%x13,%z15.s,uxtw)[2byte] +e4d191ee : st1h z14.s, p4, [x15, z17.s, UXTW] : st1h %z14.s %p4 -> (%x15,%z17.s,uxtw)[2byte] +e4d39630 : st1h z16.s, p5, [x17, z19.s, UXTW] : st1h %z16.s %p5 -> (%x17,%z19.s,uxtw)[2byte] +e4d49671 : st1h z17.s, p5, [x19, z20.s, UXTW] : st1h %z17.s %p5 -> (%x19,%z20.s,uxtw)[2byte] +e4d696b3 : st1h z19.s, p5, [x21, z22.s, UXTW] : st1h %z19.s %p5 -> (%x21,%z22.s,uxtw)[2byte] +e4d89af5 : st1h z21.s, p6, [x23, z24.s, UXTW] : st1h %z21.s %p6 -> (%x23,%z24.s,uxtw)[2byte] +e4da9b17 : st1h z23.s, p6, [x24, z26.s, UXTW] : st1h %z23.s %p6 -> (%x24,%z26.s,uxtw)[2byte] +e4dc9f59 : st1h z25.s, p7, [x26, z28.s, UXTW] : st1h %z25.s %p7 -> (%x26,%z28.s,uxtw)[2byte] +e4de9f9b : st1h z27.s, p7, [x28, z30.s, UXTW] : st1h %z27.s %p7 -> (%x28,%z30.s,uxtw)[2byte] +e4df9fff : st1h z31.s, p7, [sp, z31.s, UXTW] : st1h %z31.s %p7 -> (%sp,%z31.s,uxtw)[2byte] +e4c0c000 : st1h z0.s, p0, [x0, z0.s, SXTW] : st1h %z0.s %p0 -> (%x0,%z0.s,sxtw)[2byte] +e4c5c482 : st1h z2.s, p1, [x4, z5.s, SXTW] : st1h %z2.s %p1 -> (%x4,%z5.s,sxtw)[2byte] +e4c7c8c4 : st1h z4.s, p2, [x6, z7.s, SXTW] : st1h %z4.s %p2 -> (%x6,%z7.s,sxtw)[2byte] +e4c9c906 : st1h z6.s, p2, [x8, z9.s, SXTW] : st1h %z6.s %p2 -> (%x8,%z9.s,sxtw)[2byte] +e4cbcd48 : st1h z8.s, p3, [x10, z11.s, SXTW] : st1h %z8.s %p3 -> (%x10,%z11.s,sxtw)[2byte] +e4cdcd6a : st1h z10.s, p3, [x11, z13.s, SXTW] : st1h %z10.s %p3 -> (%x11,%z13.s,sxtw)[2byte] +e4cfd1ac : st1h z12.s, p4, [x13, z15.s, SXTW] : st1h %z12.s %p4 -> (%x13,%z15.s,sxtw)[2byte] +e4d1d1ee : st1h z14.s, p4, [x15, z17.s, SXTW] : st1h %z14.s %p4 -> (%x15,%z17.s,sxtw)[2byte] +e4d3d630 : st1h z16.s, p5, [x17, z19.s, SXTW] : st1h %z16.s %p5 -> (%x17,%z19.s,sxtw)[2byte] +e4d4d671 : st1h z17.s, p5, [x19, z20.s, SXTW] : st1h %z17.s %p5 -> (%x19,%z20.s,sxtw)[2byte] +e4d6d6b3 : st1h z19.s, p5, [x21, z22.s, SXTW] : st1h %z19.s %p5 -> (%x21,%z22.s,sxtw)[2byte] +e4d8daf5 : st1h z21.s, p6, [x23, z24.s, SXTW] : st1h %z21.s %p6 -> (%x23,%z24.s,sxtw)[2byte] +e4dadb17 : st1h z23.s, p6, [x24, z26.s, SXTW] : st1h %z23.s %p6 -> (%x24,%z26.s,sxtw)[2byte] +e4dcdf59 : st1h z25.s, p7, [x26, z28.s, SXTW] : st1h %z25.s %p7 -> (%x26,%z28.s,sxtw)[2byte] +e4dedf9b : st1h z27.s, p7, [x28, z30.s, SXTW] : st1h %z27.s %p7 -> (%x28,%z30.s,sxtw)[2byte] +e4dfdfff : st1h z31.s, p7, [sp, z31.s, SXTW] : st1h %z31.s %p7 -> (%sp,%z31.s,sxtw)[2byte] # ST1H { .D }, , [.D{, #}] (ST1H-Z.P.AI-D) -e4c0a000 : st1h z0.d, p0, [z0.d, #0] : st1h %z0.d %p0 -> (%z0.d)[8byte] -e4c2a482 : st1h z2.d, p1, [z4.d, #4] : st1h %z2.d %p1 -> +0x04(%z4.d)[8byte] -e4c4a8c4 : st1h z4.d, p2, [z6.d, #8] : st1h %z4.d %p2 -> +0x08(%z6.d)[8byte] -e4c6a906 : st1h z6.d, p2, [z8.d, #12] : st1h %z6.d %p2 -> +0x0c(%z8.d)[8byte] -e4c8ad48 : st1h z8.d, p3, [z10.d, #16] : st1h %z8.d %p3 -> +0x10(%z10.d)[8byte] -e4caad8a : st1h z10.d, p3, [z12.d, #20] : st1h %z10.d %p3 -> +0x14(%z12.d)[8byte] -e4ccb1cc : st1h z12.d, p4, [z14.d, #24] : st1h %z12.d %p4 -> +0x18(%z14.d)[8byte] -e4ceb20e : st1h z14.d, p4, [z16.d, #28] : st1h %z14.d %p4 -> +0x1c(%z16.d)[8byte] -e4d0b650 : st1h z16.d, p5, [z18.d, #32] : st1h %z16.d %p5 -> +0x20(%z18.d)[8byte] -e4d1b671 : st1h z17.d, p5, [z19.d, #34] : st1h %z17.d %p5 -> +0x22(%z19.d)[8byte] -e4d3b6b3 : st1h z19.d, p5, [z21.d, #38] : st1h %z19.d %p5 -> +0x26(%z21.d)[8byte] -e4d5baf5 : st1h z21.d, p6, [z23.d, #42] : st1h %z21.d %p6 -> +0x2a(%z23.d)[8byte] -e4d7bb37 : st1h z23.d, p6, [z25.d, #46] : st1h %z23.d %p6 -> +0x2e(%z25.d)[8byte] -e4d9bf79 : st1h z25.d, p7, [z27.d, #50] : st1h %z25.d %p7 -> +0x32(%z27.d)[8byte] -e4dbbfbb : st1h z27.d, p7, [z29.d, #54] : st1h %z27.d %p7 -> +0x36(%z29.d)[8byte] -e4dfbfff : st1h z31.d, p7, [z31.d, #62] : st1h %z31.d %p7 -> +0x3e(%z31.d)[8byte] +e4c0a000 : st1h z0.d, p0, [z0.d, #0] : st1h %z0.d %p0 -> (%z0.d)[2byte] +e4c2a482 : st1h z2.d, p1, [z4.d, #4] : st1h %z2.d %p1 -> +0x04(%z4.d)[2byte] +e4c4a8c4 : st1h z4.d, p2, [z6.d, #8] : st1h %z4.d %p2 -> +0x08(%z6.d)[2byte] +e4c6a906 : st1h z6.d, p2, [z8.d, #12] : st1h %z6.d %p2 -> +0x0c(%z8.d)[2byte] +e4c8ad48 : st1h z8.d, p3, [z10.d, #16] : st1h %z8.d %p3 -> +0x10(%z10.d)[2byte] +e4caad8a : st1h z10.d, p3, [z12.d, #20] : st1h %z10.d %p3 -> +0x14(%z12.d)[2byte] +e4ccb1cc : st1h z12.d, p4, [z14.d, #24] : st1h %z12.d %p4 -> +0x18(%z14.d)[2byte] +e4ceb20e : st1h z14.d, p4, [z16.d, #28] : st1h %z14.d %p4 -> +0x1c(%z16.d)[2byte] +e4d0b650 : st1h z16.d, p5, [z18.d, #32] : st1h %z16.d %p5 -> +0x20(%z18.d)[2byte] +e4d1b671 : st1h z17.d, p5, [z19.d, #34] : st1h %z17.d %p5 -> +0x22(%z19.d)[2byte] +e4d3b6b3 : st1h z19.d, p5, [z21.d, #38] : st1h %z19.d %p5 -> +0x26(%z21.d)[2byte] +e4d5baf5 : st1h z21.d, p6, [z23.d, #42] : st1h %z21.d %p6 -> +0x2a(%z23.d)[2byte] +e4d7bb37 : st1h z23.d, p6, [z25.d, #46] : st1h %z23.d %p6 -> +0x2e(%z25.d)[2byte] +e4d9bf79 : st1h z25.d, p7, [z27.d, #50] : st1h %z25.d %p7 -> +0x32(%z27.d)[2byte] +e4dbbfbb : st1h z27.d, p7, [z29.d, #54] : st1h %z27.d %p7 -> +0x36(%z29.d)[2byte] +e4dfbfff : st1h z31.d, p7, [z31.d, #62] : st1h %z31.d %p7 -> +0x3e(%z31.d)[2byte] # ST1H { .S }, , [, .S, #1] (ST1H-Z.P.BZ-S.x32.scaled) -e4e08000 : st1h z0.s, p0, [x0, z0.s, UXTW #1] : st1h %z0.s %p0 -> (%x0,%z0.s,uxtw #1)[16byte] -e4e58482 : st1h z2.s, p1, [x4, z5.s, UXTW #1] : st1h %z2.s %p1 -> (%x4,%z5.s,uxtw #1)[16byte] -e4e788c4 : st1h z4.s, p2, [x6, z7.s, UXTW #1] : st1h %z4.s %p2 -> (%x6,%z7.s,uxtw #1)[16byte] -e4e98906 : st1h z6.s, p2, [x8, z9.s, UXTW #1] : st1h %z6.s %p2 -> (%x8,%z9.s,uxtw #1)[16byte] -e4eb8d48 : st1h z8.s, p3, [x10, z11.s, UXTW #1] : st1h %z8.s %p3 -> (%x10,%z11.s,uxtw #1)[16byte] -e4ed8d6a : st1h z10.s, p3, [x11, z13.s, UXTW #1] : st1h %z10.s %p3 -> (%x11,%z13.s,uxtw #1)[16byte] -e4ef91ac : st1h z12.s, p4, [x13, z15.s, UXTW #1] : st1h %z12.s %p4 -> (%x13,%z15.s,uxtw #1)[16byte] -e4f191ee : st1h z14.s, p4, [x15, z17.s, UXTW #1] : st1h %z14.s %p4 -> (%x15,%z17.s,uxtw #1)[16byte] -e4f39630 : st1h z16.s, p5, [x17, z19.s, UXTW #1] : st1h %z16.s %p5 -> (%x17,%z19.s,uxtw #1)[16byte] -e4f49671 : st1h z17.s, p5, [x19, z20.s, UXTW #1] : st1h %z17.s %p5 -> (%x19,%z20.s,uxtw #1)[16byte] -e4f696b3 : st1h z19.s, p5, [x21, z22.s, UXTW #1] : st1h %z19.s %p5 -> (%x21,%z22.s,uxtw #1)[16byte] -e4f89af5 : st1h z21.s, p6, [x23, z24.s, UXTW #1] : st1h %z21.s %p6 -> (%x23,%z24.s,uxtw #1)[16byte] -e4fa9b17 : st1h z23.s, p6, [x24, z26.s, UXTW #1] : st1h %z23.s %p6 -> (%x24,%z26.s,uxtw #1)[16byte] -e4fc9f59 : st1h z25.s, p7, [x26, z28.s, UXTW #1] : st1h %z25.s %p7 -> (%x26,%z28.s,uxtw #1)[16byte] -e4fe9f9b : st1h z27.s, p7, [x28, z30.s, UXTW #1] : st1h %z27.s %p7 -> (%x28,%z30.s,uxtw #1)[16byte] -e4ff9fff : st1h z31.s, p7, [sp, z31.s, UXTW #1] : st1h %z31.s %p7 -> (%sp,%z31.s,uxtw #1)[16byte] -e4e0c000 : st1h z0.s, p0, [x0, z0.s, SXTW #1] : st1h %z0.s %p0 -> (%x0,%z0.s,sxtw #1)[16byte] -e4e5c482 : st1h z2.s, p1, [x4, z5.s, SXTW #1] : st1h %z2.s %p1 -> (%x4,%z5.s,sxtw #1)[16byte] -e4e7c8c4 : st1h z4.s, p2, [x6, z7.s, SXTW #1] : st1h %z4.s %p2 -> (%x6,%z7.s,sxtw #1)[16byte] -e4e9c906 : st1h z6.s, p2, [x8, z9.s, SXTW #1] : st1h %z6.s %p2 -> (%x8,%z9.s,sxtw #1)[16byte] -e4ebcd48 : st1h z8.s, p3, [x10, z11.s, SXTW #1] : st1h %z8.s %p3 -> (%x10,%z11.s,sxtw #1)[16byte] -e4edcd6a : st1h z10.s, p3, [x11, z13.s, SXTW #1] : st1h %z10.s %p3 -> (%x11,%z13.s,sxtw #1)[16byte] -e4efd1ac : st1h z12.s, p4, [x13, z15.s, SXTW #1] : st1h %z12.s %p4 -> (%x13,%z15.s,sxtw #1)[16byte] -e4f1d1ee : st1h z14.s, p4, [x15, z17.s, SXTW #1] : st1h %z14.s %p4 -> (%x15,%z17.s,sxtw #1)[16byte] -e4f3d630 : st1h z16.s, p5, [x17, z19.s, SXTW #1] : st1h %z16.s %p5 -> (%x17,%z19.s,sxtw #1)[16byte] -e4f4d671 : st1h z17.s, p5, [x19, z20.s, SXTW #1] : st1h %z17.s %p5 -> (%x19,%z20.s,sxtw #1)[16byte] -e4f6d6b3 : st1h z19.s, p5, [x21, z22.s, SXTW #1] : st1h %z19.s %p5 -> (%x21,%z22.s,sxtw #1)[16byte] -e4f8daf5 : st1h z21.s, p6, [x23, z24.s, SXTW #1] : st1h %z21.s %p6 -> (%x23,%z24.s,sxtw #1)[16byte] -e4fadb17 : st1h z23.s, p6, [x24, z26.s, SXTW #1] : st1h %z23.s %p6 -> (%x24,%z26.s,sxtw #1)[16byte] -e4fcdf59 : st1h z25.s, p7, [x26, z28.s, SXTW #1] : st1h %z25.s %p7 -> (%x26,%z28.s,sxtw #1)[16byte] -e4fedf9b : st1h z27.s, p7, [x28, z30.s, SXTW #1] : st1h %z27.s %p7 -> (%x28,%z30.s,sxtw #1)[16byte] -e4ffdfff : st1h z31.s, p7, [sp, z31.s, SXTW #1] : st1h %z31.s %p7 -> (%sp,%z31.s,sxtw #1)[16byte] +e4e08000 : st1h z0.s, p0, [x0, z0.s, UXTW #1] : st1h %z0.s %p0 -> (%x0,%z0.s,uxtw #1)[2byte] +e4e58482 : st1h z2.s, p1, [x4, z5.s, UXTW #1] : st1h %z2.s %p1 -> (%x4,%z5.s,uxtw #1)[2byte] +e4e788c4 : st1h z4.s, p2, [x6, z7.s, UXTW #1] : st1h %z4.s %p2 -> (%x6,%z7.s,uxtw #1)[2byte] +e4e98906 : st1h z6.s, p2, [x8, z9.s, UXTW #1] : st1h %z6.s %p2 -> (%x8,%z9.s,uxtw #1)[2byte] +e4eb8d48 : st1h z8.s, p3, [x10, z11.s, UXTW #1] : st1h %z8.s %p3 -> (%x10,%z11.s,uxtw #1)[2byte] +e4ed8d6a : st1h z10.s, p3, [x11, z13.s, UXTW #1] : st1h %z10.s %p3 -> (%x11,%z13.s,uxtw #1)[2byte] +e4ef91ac : st1h z12.s, p4, [x13, z15.s, UXTW #1] : st1h %z12.s %p4 -> (%x13,%z15.s,uxtw #1)[2byte] +e4f191ee : st1h z14.s, p4, [x15, z17.s, UXTW #1] : st1h %z14.s %p4 -> (%x15,%z17.s,uxtw #1)[2byte] +e4f39630 : st1h z16.s, p5, [x17, z19.s, UXTW #1] : st1h %z16.s %p5 -> (%x17,%z19.s,uxtw #1)[2byte] +e4f49671 : st1h z17.s, p5, [x19, z20.s, UXTW #1] : st1h %z17.s %p5 -> (%x19,%z20.s,uxtw #1)[2byte] +e4f696b3 : st1h z19.s, p5, [x21, z22.s, UXTW #1] : st1h %z19.s %p5 -> (%x21,%z22.s,uxtw #1)[2byte] +e4f89af5 : st1h z21.s, p6, [x23, z24.s, UXTW #1] : st1h %z21.s %p6 -> (%x23,%z24.s,uxtw #1)[2byte] +e4fa9b17 : st1h z23.s, p6, [x24, z26.s, UXTW #1] : st1h %z23.s %p6 -> (%x24,%z26.s,uxtw #1)[2byte] +e4fc9f59 : st1h z25.s, p7, [x26, z28.s, UXTW #1] : st1h %z25.s %p7 -> (%x26,%z28.s,uxtw #1)[2byte] +e4fe9f9b : st1h z27.s, p7, [x28, z30.s, UXTW #1] : st1h %z27.s %p7 -> (%x28,%z30.s,uxtw #1)[2byte] +e4ff9fff : st1h z31.s, p7, [sp, z31.s, UXTW #1] : st1h %z31.s %p7 -> (%sp,%z31.s,uxtw #1)[2byte] +e4e0c000 : st1h z0.s, p0, [x0, z0.s, SXTW #1] : st1h %z0.s %p0 -> (%x0,%z0.s,sxtw #1)[2byte] +e4e5c482 : st1h z2.s, p1, [x4, z5.s, SXTW #1] : st1h %z2.s %p1 -> (%x4,%z5.s,sxtw #1)[2byte] +e4e7c8c4 : st1h z4.s, p2, [x6, z7.s, SXTW #1] : st1h %z4.s %p2 -> (%x6,%z7.s,sxtw #1)[2byte] +e4e9c906 : st1h z6.s, p2, [x8, z9.s, SXTW #1] : st1h %z6.s %p2 -> (%x8,%z9.s,sxtw #1)[2byte] +e4ebcd48 : st1h z8.s, p3, [x10, z11.s, SXTW #1] : st1h %z8.s %p3 -> (%x10,%z11.s,sxtw #1)[2byte] +e4edcd6a : st1h z10.s, p3, [x11, z13.s, SXTW #1] : st1h %z10.s %p3 -> (%x11,%z13.s,sxtw #1)[2byte] +e4efd1ac : st1h z12.s, p4, [x13, z15.s, SXTW #1] : st1h %z12.s %p4 -> (%x13,%z15.s,sxtw #1)[2byte] +e4f1d1ee : st1h z14.s, p4, [x15, z17.s, SXTW #1] : st1h %z14.s %p4 -> (%x15,%z17.s,sxtw #1)[2byte] +e4f3d630 : st1h z16.s, p5, [x17, z19.s, SXTW #1] : st1h %z16.s %p5 -> (%x17,%z19.s,sxtw #1)[2byte] +e4f4d671 : st1h z17.s, p5, [x19, z20.s, SXTW #1] : st1h %z17.s %p5 -> (%x19,%z20.s,sxtw #1)[2byte] +e4f6d6b3 : st1h z19.s, p5, [x21, z22.s, SXTW #1] : st1h %z19.s %p5 -> (%x21,%z22.s,sxtw #1)[2byte] +e4f8daf5 : st1h z21.s, p6, [x23, z24.s, SXTW #1] : st1h %z21.s %p6 -> (%x23,%z24.s,sxtw #1)[2byte] +e4fadb17 : st1h z23.s, p6, [x24, z26.s, SXTW #1] : st1h %z23.s %p6 -> (%x24,%z26.s,sxtw #1)[2byte] +e4fcdf59 : st1h z25.s, p7, [x26, z28.s, SXTW #1] : st1h %z25.s %p7 -> (%x26,%z28.s,sxtw #1)[2byte] +e4fedf9b : st1h z27.s, p7, [x28, z30.s, SXTW #1] : st1h %z27.s %p7 -> (%x28,%z30.s,sxtw #1)[2byte] +e4ffdfff : st1h z31.s, p7, [sp, z31.s, SXTW #1] : st1h %z31.s %p7 -> (%sp,%z31.s,sxtw #1)[2byte] # ST1H { .S }, , [.S{, #}] (ST1H-Z.P.AI-S) -e4e0a000 : st1h z0.s, p0, [z0.s, #0] : st1h %z0.s %p0 -> (%z0.s)[16byte] -e4e2a482 : st1h z2.s, p1, [z4.s, #4] : st1h %z2.s %p1 -> +0x04(%z4.s)[16byte] -e4e4a8c4 : st1h z4.s, p2, [z6.s, #8] : st1h %z4.s %p2 -> +0x08(%z6.s)[16byte] -e4e6a906 : st1h z6.s, p2, [z8.s, #12] : st1h %z6.s %p2 -> +0x0c(%z8.s)[16byte] -e4e8ad48 : st1h z8.s, p3, [z10.s, #16] : st1h %z8.s %p3 -> +0x10(%z10.s)[16byte] -e4eaad8a : st1h z10.s, p3, [z12.s, #20] : st1h %z10.s %p3 -> +0x14(%z12.s)[16byte] -e4ecb1cc : st1h z12.s, p4, [z14.s, #24] : st1h %z12.s %p4 -> +0x18(%z14.s)[16byte] -e4eeb20e : st1h z14.s, p4, [z16.s, #28] : st1h %z14.s %p4 -> +0x1c(%z16.s)[16byte] -e4f0b650 : st1h z16.s, p5, [z18.s, #32] : st1h %z16.s %p5 -> +0x20(%z18.s)[16byte] -e4f1b671 : st1h z17.s, p5, [z19.s, #34] : st1h %z17.s %p5 -> +0x22(%z19.s)[16byte] -e4f3b6b3 : st1h z19.s, p5, [z21.s, #38] : st1h %z19.s %p5 -> +0x26(%z21.s)[16byte] -e4f5baf5 : st1h z21.s, p6, [z23.s, #42] : st1h %z21.s %p6 -> +0x2a(%z23.s)[16byte] -e4f7bb37 : st1h z23.s, p6, [z25.s, #46] : st1h %z23.s %p6 -> +0x2e(%z25.s)[16byte] -e4f9bf79 : st1h z25.s, p7, [z27.s, #50] : st1h %z25.s %p7 -> +0x32(%z27.s)[16byte] -e4fbbfbb : st1h z27.s, p7, [z29.s, #54] : st1h %z27.s %p7 -> +0x36(%z29.s)[16byte] -e4ffbfff : st1h z31.s, p7, [z31.s, #62] : st1h %z31.s %p7 -> +0x3e(%z31.s)[16byte] +e4e0a000 : st1h z0.s, p0, [z0.s, #0] : st1h %z0.s %p0 -> (%z0.s)[2byte] +e4e2a482 : st1h z2.s, p1, [z4.s, #4] : st1h %z2.s %p1 -> +0x04(%z4.s)[2byte] +e4e4a8c4 : st1h z4.s, p2, [z6.s, #8] : st1h %z4.s %p2 -> +0x08(%z6.s)[2byte] +e4e6a906 : st1h z6.s, p2, [z8.s, #12] : st1h %z6.s %p2 -> +0x0c(%z8.s)[2byte] +e4e8ad48 : st1h z8.s, p3, [z10.s, #16] : st1h %z8.s %p3 -> +0x10(%z10.s)[2byte] +e4eaad8a : st1h z10.s, p3, [z12.s, #20] : st1h %z10.s %p3 -> +0x14(%z12.s)[2byte] +e4ecb1cc : st1h z12.s, p4, [z14.s, #24] : st1h %z12.s %p4 -> +0x18(%z14.s)[2byte] +e4eeb20e : st1h z14.s, p4, [z16.s, #28] : st1h %z14.s %p4 -> +0x1c(%z16.s)[2byte] +e4f0b650 : st1h z16.s, p5, [z18.s, #32] : st1h %z16.s %p5 -> +0x20(%z18.s)[2byte] +e4f1b671 : st1h z17.s, p5, [z19.s, #34] : st1h %z17.s %p5 -> +0x22(%z19.s)[2byte] +e4f3b6b3 : st1h z19.s, p5, [z21.s, #38] : st1h %z19.s %p5 -> +0x26(%z21.s)[2byte] +e4f5baf5 : st1h z21.s, p6, [z23.s, #42] : st1h %z21.s %p6 -> +0x2a(%z23.s)[2byte] +e4f7bb37 : st1h z23.s, p6, [z25.s, #46] : st1h %z23.s %p6 -> +0x2e(%z25.s)[2byte] +e4f9bf79 : st1h z25.s, p7, [z27.s, #50] : st1h %z25.s %p7 -> +0x32(%z27.s)[2byte] +e4fbbfbb : st1h z27.s, p7, [z29.s, #54] : st1h %z27.s %p7 -> +0x36(%z29.s)[2byte] +e4ffbfff : st1h z31.s, p7, [z31.s, #62] : st1h %z31.s %p7 -> +0x3e(%z31.s)[2byte] # ST1W { .D }, , [, .D, ] (ST1W-Z.P.BZ-D.x32.unscaled) -e5008000 : st1w z0.d, p0, [x0, z0.d, UXTW] : st1w %z0.d %p0 -> (%x0,%z0.d,uxtw)[16byte] -e5058482 : st1w z2.d, p1, [x4, z5.d, UXTW] : st1w %z2.d %p1 -> (%x4,%z5.d,uxtw)[16byte] -e50788c4 : st1w z4.d, p2, [x6, z7.d, UXTW] : st1w %z4.d %p2 -> (%x6,%z7.d,uxtw)[16byte] -e5098906 : st1w z6.d, p2, [x8, z9.d, UXTW] : st1w %z6.d %p2 -> (%x8,%z9.d,uxtw)[16byte] -e50b8d48 : st1w z8.d, p3, [x10, z11.d, UXTW] : st1w %z8.d %p3 -> (%x10,%z11.d,uxtw)[16byte] -e50d8d6a : st1w z10.d, p3, [x11, z13.d, UXTW] : st1w %z10.d %p3 -> (%x11,%z13.d,uxtw)[16byte] -e50f91ac : st1w z12.d, p4, [x13, z15.d, UXTW] : st1w %z12.d %p4 -> (%x13,%z15.d,uxtw)[16byte] -e51191ee : st1w z14.d, p4, [x15, z17.d, UXTW] : st1w %z14.d %p4 -> (%x15,%z17.d,uxtw)[16byte] -e5139630 : st1w z16.d, p5, [x17, z19.d, UXTW] : st1w %z16.d %p5 -> (%x17,%z19.d,uxtw)[16byte] -e5149671 : st1w z17.d, p5, [x19, z20.d, UXTW] : st1w %z17.d %p5 -> (%x19,%z20.d,uxtw)[16byte] -e51696b3 : st1w z19.d, p5, [x21, z22.d, UXTW] : st1w %z19.d %p5 -> (%x21,%z22.d,uxtw)[16byte] -e5189af5 : st1w z21.d, p6, [x23, z24.d, UXTW] : st1w %z21.d %p6 -> (%x23,%z24.d,uxtw)[16byte] -e51a9b17 : st1w z23.d, p6, [x24, z26.d, UXTW] : st1w %z23.d %p6 -> (%x24,%z26.d,uxtw)[16byte] -e51c9f59 : st1w z25.d, p7, [x26, z28.d, UXTW] : st1w %z25.d %p7 -> (%x26,%z28.d,uxtw)[16byte] -e51e9f9b : st1w z27.d, p7, [x28, z30.d, UXTW] : st1w %z27.d %p7 -> (%x28,%z30.d,uxtw)[16byte] -e51f9fff : st1w z31.d, p7, [sp, z31.d, UXTW] : st1w %z31.d %p7 -> (%sp,%z31.d,uxtw)[16byte] -e500c000 : st1w z0.d, p0, [x0, z0.d, SXTW] : st1w %z0.d %p0 -> (%x0,%z0.d,sxtw)[16byte] -e505c482 : st1w z2.d, p1, [x4, z5.d, SXTW] : st1w %z2.d %p1 -> (%x4,%z5.d,sxtw)[16byte] -e507c8c4 : st1w z4.d, p2, [x6, z7.d, SXTW] : st1w %z4.d %p2 -> (%x6,%z7.d,sxtw)[16byte] -e509c906 : st1w z6.d, p2, [x8, z9.d, SXTW] : st1w %z6.d %p2 -> (%x8,%z9.d,sxtw)[16byte] -e50bcd48 : st1w z8.d, p3, [x10, z11.d, SXTW] : st1w %z8.d %p3 -> (%x10,%z11.d,sxtw)[16byte] -e50dcd6a : st1w z10.d, p3, [x11, z13.d, SXTW] : st1w %z10.d %p3 -> (%x11,%z13.d,sxtw)[16byte] -e50fd1ac : st1w z12.d, p4, [x13, z15.d, SXTW] : st1w %z12.d %p4 -> (%x13,%z15.d,sxtw)[16byte] -e511d1ee : st1w z14.d, p4, [x15, z17.d, SXTW] : st1w %z14.d %p4 -> (%x15,%z17.d,sxtw)[16byte] -e513d630 : st1w z16.d, p5, [x17, z19.d, SXTW] : st1w %z16.d %p5 -> (%x17,%z19.d,sxtw)[16byte] -e514d671 : st1w z17.d, p5, [x19, z20.d, SXTW] : st1w %z17.d %p5 -> (%x19,%z20.d,sxtw)[16byte] -e516d6b3 : st1w z19.d, p5, [x21, z22.d, SXTW] : st1w %z19.d %p5 -> (%x21,%z22.d,sxtw)[16byte] -e518daf5 : st1w z21.d, p6, [x23, z24.d, SXTW] : st1w %z21.d %p6 -> (%x23,%z24.d,sxtw)[16byte] -e51adb17 : st1w z23.d, p6, [x24, z26.d, SXTW] : st1w %z23.d %p6 -> (%x24,%z26.d,sxtw)[16byte] -e51cdf59 : st1w z25.d, p7, [x26, z28.d, SXTW] : st1w %z25.d %p7 -> (%x26,%z28.d,sxtw)[16byte] -e51edf9b : st1w z27.d, p7, [x28, z30.d, SXTW] : st1w %z27.d %p7 -> (%x28,%z30.d,sxtw)[16byte] -e51fdfff : st1w z31.d, p7, [sp, z31.d, SXTW] : st1w %z31.d %p7 -> (%sp,%z31.d,sxtw)[16byte] +e5008000 : st1w z0.d, p0, [x0, z0.d, UXTW] : st1w %z0.d %p0 -> (%x0,%z0.d,uxtw)[4byte] +e5058482 : st1w z2.d, p1, [x4, z5.d, UXTW] : st1w %z2.d %p1 -> (%x4,%z5.d,uxtw)[4byte] +e50788c4 : st1w z4.d, p2, [x6, z7.d, UXTW] : st1w %z4.d %p2 -> (%x6,%z7.d,uxtw)[4byte] +e5098906 : st1w z6.d, p2, [x8, z9.d, UXTW] : st1w %z6.d %p2 -> (%x8,%z9.d,uxtw)[4byte] +e50b8d48 : st1w z8.d, p3, [x10, z11.d, UXTW] : st1w %z8.d %p3 -> (%x10,%z11.d,uxtw)[4byte] +e50d8d6a : st1w z10.d, p3, [x11, z13.d, UXTW] : st1w %z10.d %p3 -> (%x11,%z13.d,uxtw)[4byte] +e50f91ac : st1w z12.d, p4, [x13, z15.d, UXTW] : st1w %z12.d %p4 -> (%x13,%z15.d,uxtw)[4byte] +e51191ee : st1w z14.d, p4, [x15, z17.d, UXTW] : st1w %z14.d %p4 -> (%x15,%z17.d,uxtw)[4byte] +e5139630 : st1w z16.d, p5, [x17, z19.d, UXTW] : st1w %z16.d %p5 -> (%x17,%z19.d,uxtw)[4byte] +e5149671 : st1w z17.d, p5, [x19, z20.d, UXTW] : st1w %z17.d %p5 -> (%x19,%z20.d,uxtw)[4byte] +e51696b3 : st1w z19.d, p5, [x21, z22.d, UXTW] : st1w %z19.d %p5 -> (%x21,%z22.d,uxtw)[4byte] +e5189af5 : st1w z21.d, p6, [x23, z24.d, UXTW] : st1w %z21.d %p6 -> (%x23,%z24.d,uxtw)[4byte] +e51a9b17 : st1w z23.d, p6, [x24, z26.d, UXTW] : st1w %z23.d %p6 -> (%x24,%z26.d,uxtw)[4byte] +e51c9f59 : st1w z25.d, p7, [x26, z28.d, UXTW] : st1w %z25.d %p7 -> (%x26,%z28.d,uxtw)[4byte] +e51e9f9b : st1w z27.d, p7, [x28, z30.d, UXTW] : st1w %z27.d %p7 -> (%x28,%z30.d,uxtw)[4byte] +e51f9fff : st1w z31.d, p7, [sp, z31.d, UXTW] : st1w %z31.d %p7 -> (%sp,%z31.d,uxtw)[4byte] +e500c000 : st1w z0.d, p0, [x0, z0.d, SXTW] : st1w %z0.d %p0 -> (%x0,%z0.d,sxtw)[4byte] +e505c482 : st1w z2.d, p1, [x4, z5.d, SXTW] : st1w %z2.d %p1 -> (%x4,%z5.d,sxtw)[4byte] +e507c8c4 : st1w z4.d, p2, [x6, z7.d, SXTW] : st1w %z4.d %p2 -> (%x6,%z7.d,sxtw)[4byte] +e509c906 : st1w z6.d, p2, [x8, z9.d, SXTW] : st1w %z6.d %p2 -> (%x8,%z9.d,sxtw)[4byte] +e50bcd48 : st1w z8.d, p3, [x10, z11.d, SXTW] : st1w %z8.d %p3 -> (%x10,%z11.d,sxtw)[4byte] +e50dcd6a : st1w z10.d, p3, [x11, z13.d, SXTW] : st1w %z10.d %p3 -> (%x11,%z13.d,sxtw)[4byte] +e50fd1ac : st1w z12.d, p4, [x13, z15.d, SXTW] : st1w %z12.d %p4 -> (%x13,%z15.d,sxtw)[4byte] +e511d1ee : st1w z14.d, p4, [x15, z17.d, SXTW] : st1w %z14.d %p4 -> (%x15,%z17.d,sxtw)[4byte] +e513d630 : st1w z16.d, p5, [x17, z19.d, SXTW] : st1w %z16.d %p5 -> (%x17,%z19.d,sxtw)[4byte] +e514d671 : st1w z17.d, p5, [x19, z20.d, SXTW] : st1w %z17.d %p5 -> (%x19,%z20.d,sxtw)[4byte] +e516d6b3 : st1w z19.d, p5, [x21, z22.d, SXTW] : st1w %z19.d %p5 -> (%x21,%z22.d,sxtw)[4byte] +e518daf5 : st1w z21.d, p6, [x23, z24.d, SXTW] : st1w %z21.d %p6 -> (%x23,%z24.d,sxtw)[4byte] +e51adb17 : st1w z23.d, p6, [x24, z26.d, SXTW] : st1w %z23.d %p6 -> (%x24,%z26.d,sxtw)[4byte] +e51cdf59 : st1w z25.d, p7, [x26, z28.d, SXTW] : st1w %z25.d %p7 -> (%x26,%z28.d,sxtw)[4byte] +e51edf9b : st1w z27.d, p7, [x28, z30.d, SXTW] : st1w %z27.d %p7 -> (%x28,%z30.d,sxtw)[4byte] +e51fdfff : st1w z31.d, p7, [sp, z31.d, SXTW] : st1w %z31.d %p7 -> (%sp,%z31.d,sxtw)[4byte] # ST1W { .D }, , [, .D] (ST1W-Z.P.BZ-D.64.unscaled) -e500a000 : st1w z0.d, p0, [x0, z0.d] : st1w %z0.d %p0 -> (%x0,%z0.d)[16byte] -e505a482 : st1w z2.d, p1, [x4, z5.d] : st1w %z2.d %p1 -> (%x4,%z5.d)[16byte] -e507a8c4 : st1w z4.d, p2, [x6, z7.d] : st1w %z4.d %p2 -> (%x6,%z7.d)[16byte] -e509a906 : st1w z6.d, p2, [x8, z9.d] : st1w %z6.d %p2 -> (%x8,%z9.d)[16byte] -e50bad48 : st1w z8.d, p3, [x10, z11.d] : st1w %z8.d %p3 -> (%x10,%z11.d)[16byte] -e50dad6a : st1w z10.d, p3, [x11, z13.d] : st1w %z10.d %p3 -> (%x11,%z13.d)[16byte] -e50fb1ac : st1w z12.d, p4, [x13, z15.d] : st1w %z12.d %p4 -> (%x13,%z15.d)[16byte] -e511b1ee : st1w z14.d, p4, [x15, z17.d] : st1w %z14.d %p4 -> (%x15,%z17.d)[16byte] -e513b630 : st1w z16.d, p5, [x17, z19.d] : st1w %z16.d %p5 -> (%x17,%z19.d)[16byte] -e514b671 : st1w z17.d, p5, [x19, z20.d] : st1w %z17.d %p5 -> (%x19,%z20.d)[16byte] -e516b6b3 : st1w z19.d, p5, [x21, z22.d] : st1w %z19.d %p5 -> (%x21,%z22.d)[16byte] -e518baf5 : st1w z21.d, p6, [x23, z24.d] : st1w %z21.d %p6 -> (%x23,%z24.d)[16byte] -e51abb17 : st1w z23.d, p6, [x24, z26.d] : st1w %z23.d %p6 -> (%x24,%z26.d)[16byte] -e51cbf59 : st1w z25.d, p7, [x26, z28.d] : st1w %z25.d %p7 -> (%x26,%z28.d)[16byte] -e51ebf9b : st1w z27.d, p7, [x28, z30.d] : st1w %z27.d %p7 -> (%x28,%z30.d)[16byte] -e51fbfff : st1w z31.d, p7, [sp, z31.d] : st1w %z31.d %p7 -> (%sp,%z31.d)[16byte] +e500a000 : st1w z0.d, p0, [x0, z0.d] : st1w %z0.d %p0 -> (%x0,%z0.d)[4byte] +e505a482 : st1w z2.d, p1, [x4, z5.d] : st1w %z2.d %p1 -> (%x4,%z5.d)[4byte] +e507a8c4 : st1w z4.d, p2, [x6, z7.d] : st1w %z4.d %p2 -> (%x6,%z7.d)[4byte] +e509a906 : st1w z6.d, p2, [x8, z9.d] : st1w %z6.d %p2 -> (%x8,%z9.d)[4byte] +e50bad48 : st1w z8.d, p3, [x10, z11.d] : st1w %z8.d %p3 -> (%x10,%z11.d)[4byte] +e50dad6a : st1w z10.d, p3, [x11, z13.d] : st1w %z10.d %p3 -> (%x11,%z13.d)[4byte] +e50fb1ac : st1w z12.d, p4, [x13, z15.d] : st1w %z12.d %p4 -> (%x13,%z15.d)[4byte] +e511b1ee : st1w z14.d, p4, [x15, z17.d] : st1w %z14.d %p4 -> (%x15,%z17.d)[4byte] +e513b630 : st1w z16.d, p5, [x17, z19.d] : st1w %z16.d %p5 -> (%x17,%z19.d)[4byte] +e514b671 : st1w z17.d, p5, [x19, z20.d] : st1w %z17.d %p5 -> (%x19,%z20.d)[4byte] +e516b6b3 : st1w z19.d, p5, [x21, z22.d] : st1w %z19.d %p5 -> (%x21,%z22.d)[4byte] +e518baf5 : st1w z21.d, p6, [x23, z24.d] : st1w %z21.d %p6 -> (%x23,%z24.d)[4byte] +e51abb17 : st1w z23.d, p6, [x24, z26.d] : st1w %z23.d %p6 -> (%x24,%z26.d)[4byte] +e51cbf59 : st1w z25.d, p7, [x26, z28.d] : st1w %z25.d %p7 -> (%x26,%z28.d)[4byte] +e51ebf9b : st1w z27.d, p7, [x28, z30.d] : st1w %z27.d %p7 -> (%x28,%z30.d)[4byte] +e51fbfff : st1w z31.d, p7, [sp, z31.d] : st1w %z31.d %p7 -> (%sp,%z31.d)[4byte] # ST1W { .D }, , [, .D, #2] (ST1W-Z.P.BZ-D.x32.scaled) -e5208000 : st1w z0.d, p0, [x0, z0.d, UXTW #2] : st1w %z0.d %p0 -> (%x0,%z0.d,uxtw #2)[16byte] -e5258482 : st1w z2.d, p1, [x4, z5.d, UXTW #2] : st1w %z2.d %p1 -> (%x4,%z5.d,uxtw #2)[16byte] -e52788c4 : st1w z4.d, p2, [x6, z7.d, UXTW #2] : st1w %z4.d %p2 -> (%x6,%z7.d,uxtw #2)[16byte] -e5298906 : st1w z6.d, p2, [x8, z9.d, UXTW #2] : st1w %z6.d %p2 -> (%x8,%z9.d,uxtw #2)[16byte] -e52b8d48 : st1w z8.d, p3, [x10, z11.d, UXTW #2] : st1w %z8.d %p3 -> (%x10,%z11.d,uxtw #2)[16byte] -e52d8d6a : st1w z10.d, p3, [x11, z13.d, UXTW #2] : st1w %z10.d %p3 -> (%x11,%z13.d,uxtw #2)[16byte] -e52f91ac : st1w z12.d, p4, [x13, z15.d, UXTW #2] : st1w %z12.d %p4 -> (%x13,%z15.d,uxtw #2)[16byte] -e53191ee : st1w z14.d, p4, [x15, z17.d, UXTW #2] : st1w %z14.d %p4 -> (%x15,%z17.d,uxtw #2)[16byte] -e5339630 : st1w z16.d, p5, [x17, z19.d, UXTW #2] : st1w %z16.d %p5 -> (%x17,%z19.d,uxtw #2)[16byte] -e5349671 : st1w z17.d, p5, [x19, z20.d, UXTW #2] : st1w %z17.d %p5 -> (%x19,%z20.d,uxtw #2)[16byte] -e53696b3 : st1w z19.d, p5, [x21, z22.d, UXTW #2] : st1w %z19.d %p5 -> (%x21,%z22.d,uxtw #2)[16byte] -e5389af5 : st1w z21.d, p6, [x23, z24.d, UXTW #2] : st1w %z21.d %p6 -> (%x23,%z24.d,uxtw #2)[16byte] -e53a9b17 : st1w z23.d, p6, [x24, z26.d, UXTW #2] : st1w %z23.d %p6 -> (%x24,%z26.d,uxtw #2)[16byte] -e53c9f59 : st1w z25.d, p7, [x26, z28.d, UXTW #2] : st1w %z25.d %p7 -> (%x26,%z28.d,uxtw #2)[16byte] -e53e9f9b : st1w z27.d, p7, [x28, z30.d, UXTW #2] : st1w %z27.d %p7 -> (%x28,%z30.d,uxtw #2)[16byte] -e53f9fff : st1w z31.d, p7, [sp, z31.d, UXTW #2] : st1w %z31.d %p7 -> (%sp,%z31.d,uxtw #2)[16byte] -e520c000 : st1w z0.d, p0, [x0, z0.d, SXTW #2] : st1w %z0.d %p0 -> (%x0,%z0.d,sxtw #2)[16byte] -e525c482 : st1w z2.d, p1, [x4, z5.d, SXTW #2] : st1w %z2.d %p1 -> (%x4,%z5.d,sxtw #2)[16byte] -e527c8c4 : st1w z4.d, p2, [x6, z7.d, SXTW #2] : st1w %z4.d %p2 -> (%x6,%z7.d,sxtw #2)[16byte] -e529c906 : st1w z6.d, p2, [x8, z9.d, SXTW #2] : st1w %z6.d %p2 -> (%x8,%z9.d,sxtw #2)[16byte] -e52bcd48 : st1w z8.d, p3, [x10, z11.d, SXTW #2] : st1w %z8.d %p3 -> (%x10,%z11.d,sxtw #2)[16byte] -e52dcd6a : st1w z10.d, p3, [x11, z13.d, SXTW #2] : st1w %z10.d %p3 -> (%x11,%z13.d,sxtw #2)[16byte] -e52fd1ac : st1w z12.d, p4, [x13, z15.d, SXTW #2] : st1w %z12.d %p4 -> (%x13,%z15.d,sxtw #2)[16byte] -e531d1ee : st1w z14.d, p4, [x15, z17.d, SXTW #2] : st1w %z14.d %p4 -> (%x15,%z17.d,sxtw #2)[16byte] -e533d630 : st1w z16.d, p5, [x17, z19.d, SXTW #2] : st1w %z16.d %p5 -> (%x17,%z19.d,sxtw #2)[16byte] -e534d671 : st1w z17.d, p5, [x19, z20.d, SXTW #2] : st1w %z17.d %p5 -> (%x19,%z20.d,sxtw #2)[16byte] -e536d6b3 : st1w z19.d, p5, [x21, z22.d, SXTW #2] : st1w %z19.d %p5 -> (%x21,%z22.d,sxtw #2)[16byte] -e538daf5 : st1w z21.d, p6, [x23, z24.d, SXTW #2] : st1w %z21.d %p6 -> (%x23,%z24.d,sxtw #2)[16byte] -e53adb17 : st1w z23.d, p6, [x24, z26.d, SXTW #2] : st1w %z23.d %p6 -> (%x24,%z26.d,sxtw #2)[16byte] -e53cdf59 : st1w z25.d, p7, [x26, z28.d, SXTW #2] : st1w %z25.d %p7 -> (%x26,%z28.d,sxtw #2)[16byte] -e53edf9b : st1w z27.d, p7, [x28, z30.d, SXTW #2] : st1w %z27.d %p7 -> (%x28,%z30.d,sxtw #2)[16byte] -e53fdfff : st1w z31.d, p7, [sp, z31.d, SXTW #2] : st1w %z31.d %p7 -> (%sp,%z31.d,sxtw #2)[16byte] +e5208000 : st1w z0.d, p0, [x0, z0.d, UXTW #2] : st1w %z0.d %p0 -> (%x0,%z0.d,uxtw #2)[4byte] +e5258482 : st1w z2.d, p1, [x4, z5.d, UXTW #2] : st1w %z2.d %p1 -> (%x4,%z5.d,uxtw #2)[4byte] +e52788c4 : st1w z4.d, p2, [x6, z7.d, UXTW #2] : st1w %z4.d %p2 -> (%x6,%z7.d,uxtw #2)[4byte] +e5298906 : st1w z6.d, p2, [x8, z9.d, UXTW #2] : st1w %z6.d %p2 -> (%x8,%z9.d,uxtw #2)[4byte] +e52b8d48 : st1w z8.d, p3, [x10, z11.d, UXTW #2] : st1w %z8.d %p3 -> (%x10,%z11.d,uxtw #2)[4byte] +e52d8d6a : st1w z10.d, p3, [x11, z13.d, UXTW #2] : st1w %z10.d %p3 -> (%x11,%z13.d,uxtw #2)[4byte] +e52f91ac : st1w z12.d, p4, [x13, z15.d, UXTW #2] : st1w %z12.d %p4 -> (%x13,%z15.d,uxtw #2)[4byte] +e53191ee : st1w z14.d, p4, [x15, z17.d, UXTW #2] : st1w %z14.d %p4 -> (%x15,%z17.d,uxtw #2)[4byte] +e5339630 : st1w z16.d, p5, [x17, z19.d, UXTW #2] : st1w %z16.d %p5 -> (%x17,%z19.d,uxtw #2)[4byte] +e5349671 : st1w z17.d, p5, [x19, z20.d, UXTW #2] : st1w %z17.d %p5 -> (%x19,%z20.d,uxtw #2)[4byte] +e53696b3 : st1w z19.d, p5, [x21, z22.d, UXTW #2] : st1w %z19.d %p5 -> (%x21,%z22.d,uxtw #2)[4byte] +e5389af5 : st1w z21.d, p6, [x23, z24.d, UXTW #2] : st1w %z21.d %p6 -> (%x23,%z24.d,uxtw #2)[4byte] +e53a9b17 : st1w z23.d, p6, [x24, z26.d, UXTW #2] : st1w %z23.d %p6 -> (%x24,%z26.d,uxtw #2)[4byte] +e53c9f59 : st1w z25.d, p7, [x26, z28.d, UXTW #2] : st1w %z25.d %p7 -> (%x26,%z28.d,uxtw #2)[4byte] +e53e9f9b : st1w z27.d, p7, [x28, z30.d, UXTW #2] : st1w %z27.d %p7 -> (%x28,%z30.d,uxtw #2)[4byte] +e53f9fff : st1w z31.d, p7, [sp, z31.d, UXTW #2] : st1w %z31.d %p7 -> (%sp,%z31.d,uxtw #2)[4byte] +e520c000 : st1w z0.d, p0, [x0, z0.d, SXTW #2] : st1w %z0.d %p0 -> (%x0,%z0.d,sxtw #2)[4byte] +e525c482 : st1w z2.d, p1, [x4, z5.d, SXTW #2] : st1w %z2.d %p1 -> (%x4,%z5.d,sxtw #2)[4byte] +e527c8c4 : st1w z4.d, p2, [x6, z7.d, SXTW #2] : st1w %z4.d %p2 -> (%x6,%z7.d,sxtw #2)[4byte] +e529c906 : st1w z6.d, p2, [x8, z9.d, SXTW #2] : st1w %z6.d %p2 -> (%x8,%z9.d,sxtw #2)[4byte] +e52bcd48 : st1w z8.d, p3, [x10, z11.d, SXTW #2] : st1w %z8.d %p3 -> (%x10,%z11.d,sxtw #2)[4byte] +e52dcd6a : st1w z10.d, p3, [x11, z13.d, SXTW #2] : st1w %z10.d %p3 -> (%x11,%z13.d,sxtw #2)[4byte] +e52fd1ac : st1w z12.d, p4, [x13, z15.d, SXTW #2] : st1w %z12.d %p4 -> (%x13,%z15.d,sxtw #2)[4byte] +e531d1ee : st1w z14.d, p4, [x15, z17.d, SXTW #2] : st1w %z14.d %p4 -> (%x15,%z17.d,sxtw #2)[4byte] +e533d630 : st1w z16.d, p5, [x17, z19.d, SXTW #2] : st1w %z16.d %p5 -> (%x17,%z19.d,sxtw #2)[4byte] +e534d671 : st1w z17.d, p5, [x19, z20.d, SXTW #2] : st1w %z17.d %p5 -> (%x19,%z20.d,sxtw #2)[4byte] +e536d6b3 : st1w z19.d, p5, [x21, z22.d, SXTW #2] : st1w %z19.d %p5 -> (%x21,%z22.d,sxtw #2)[4byte] +e538daf5 : st1w z21.d, p6, [x23, z24.d, SXTW #2] : st1w %z21.d %p6 -> (%x23,%z24.d,sxtw #2)[4byte] +e53adb17 : st1w z23.d, p6, [x24, z26.d, SXTW #2] : st1w %z23.d %p6 -> (%x24,%z26.d,sxtw #2)[4byte] +e53cdf59 : st1w z25.d, p7, [x26, z28.d, SXTW #2] : st1w %z25.d %p7 -> (%x26,%z28.d,sxtw #2)[4byte] +e53edf9b : st1w z27.d, p7, [x28, z30.d, SXTW #2] : st1w %z27.d %p7 -> (%x28,%z30.d,sxtw #2)[4byte] +e53fdfff : st1w z31.d, p7, [sp, z31.d, SXTW #2] : st1w %z31.d %p7 -> (%sp,%z31.d,sxtw #2)[4byte] # ST1W { .D }, , [, .D, LSL #2] (ST1W-Z.P.BZ-D.64.scaled) -e520a000 : st1w z0.d, p0, [x0, z0.d, LSL #2] : st1w %z0.d %p0 -> (%x0,%z0.d,lsl #2)[16byte] -e525a482 : st1w z2.d, p1, [x4, z5.d, LSL #2] : st1w %z2.d %p1 -> (%x4,%z5.d,lsl #2)[16byte] -e527a8c4 : st1w z4.d, p2, [x6, z7.d, LSL #2] : st1w %z4.d %p2 -> (%x6,%z7.d,lsl #2)[16byte] -e529a906 : st1w z6.d, p2, [x8, z9.d, LSL #2] : st1w %z6.d %p2 -> (%x8,%z9.d,lsl #2)[16byte] -e52bad48 : st1w z8.d, p3, [x10, z11.d, LSL #2] : st1w %z8.d %p3 -> (%x10,%z11.d,lsl #2)[16byte] -e52dad6a : st1w z10.d, p3, [x11, z13.d, LSL #2] : st1w %z10.d %p3 -> (%x11,%z13.d,lsl #2)[16byte] -e52fb1ac : st1w z12.d, p4, [x13, z15.d, LSL #2] : st1w %z12.d %p4 -> (%x13,%z15.d,lsl #2)[16byte] -e531b1ee : st1w z14.d, p4, [x15, z17.d, LSL #2] : st1w %z14.d %p4 -> (%x15,%z17.d,lsl #2)[16byte] -e533b630 : st1w z16.d, p5, [x17, z19.d, LSL #2] : st1w %z16.d %p5 -> (%x17,%z19.d,lsl #2)[16byte] -e534b671 : st1w z17.d, p5, [x19, z20.d, LSL #2] : st1w %z17.d %p5 -> (%x19,%z20.d,lsl #2)[16byte] -e536b6b3 : st1w z19.d, p5, [x21, z22.d, LSL #2] : st1w %z19.d %p5 -> (%x21,%z22.d,lsl #2)[16byte] -e538baf5 : st1w z21.d, p6, [x23, z24.d, LSL #2] : st1w %z21.d %p6 -> (%x23,%z24.d,lsl #2)[16byte] -e53abb17 : st1w z23.d, p6, [x24, z26.d, LSL #2] : st1w %z23.d %p6 -> (%x24,%z26.d,lsl #2)[16byte] -e53cbf59 : st1w z25.d, p7, [x26, z28.d, LSL #2] : st1w %z25.d %p7 -> (%x26,%z28.d,lsl #2)[16byte] -e53ebf9b : st1w z27.d, p7, [x28, z30.d, LSL #2] : st1w %z27.d %p7 -> (%x28,%z30.d,lsl #2)[16byte] -e53fbfff : st1w z31.d, p7, [sp, z31.d, LSL #2] : st1w %z31.d %p7 -> (%sp,%z31.d,lsl #2)[16byte] +e520a000 : st1w z0.d, p0, [x0, z0.d, LSL #2] : st1w %z0.d %p0 -> (%x0,%z0.d,lsl #2)[4byte] +e525a482 : st1w z2.d, p1, [x4, z5.d, LSL #2] : st1w %z2.d %p1 -> (%x4,%z5.d,lsl #2)[4byte] +e527a8c4 : st1w z4.d, p2, [x6, z7.d, LSL #2] : st1w %z4.d %p2 -> (%x6,%z7.d,lsl #2)[4byte] +e529a906 : st1w z6.d, p2, [x8, z9.d, LSL #2] : st1w %z6.d %p2 -> (%x8,%z9.d,lsl #2)[4byte] +e52bad48 : st1w z8.d, p3, [x10, z11.d, LSL #2] : st1w %z8.d %p3 -> (%x10,%z11.d,lsl #2)[4byte] +e52dad6a : st1w z10.d, p3, [x11, z13.d, LSL #2] : st1w %z10.d %p3 -> (%x11,%z13.d,lsl #2)[4byte] +e52fb1ac : st1w z12.d, p4, [x13, z15.d, LSL #2] : st1w %z12.d %p4 -> (%x13,%z15.d,lsl #2)[4byte] +e531b1ee : st1w z14.d, p4, [x15, z17.d, LSL #2] : st1w %z14.d %p4 -> (%x15,%z17.d,lsl #2)[4byte] +e533b630 : st1w z16.d, p5, [x17, z19.d, LSL #2] : st1w %z16.d %p5 -> (%x17,%z19.d,lsl #2)[4byte] +e534b671 : st1w z17.d, p5, [x19, z20.d, LSL #2] : st1w %z17.d %p5 -> (%x19,%z20.d,lsl #2)[4byte] +e536b6b3 : st1w z19.d, p5, [x21, z22.d, LSL #2] : st1w %z19.d %p5 -> (%x21,%z22.d,lsl #2)[4byte] +e538baf5 : st1w z21.d, p6, [x23, z24.d, LSL #2] : st1w %z21.d %p6 -> (%x23,%z24.d,lsl #2)[4byte] +e53abb17 : st1w z23.d, p6, [x24, z26.d, LSL #2] : st1w %z23.d %p6 -> (%x24,%z26.d,lsl #2)[4byte] +e53cbf59 : st1w z25.d, p7, [x26, z28.d, LSL #2] : st1w %z25.d %p7 -> (%x26,%z28.d,lsl #2)[4byte] +e53ebf9b : st1w z27.d, p7, [x28, z30.d, LSL #2] : st1w %z27.d %p7 -> (%x28,%z30.d,lsl #2)[4byte] +e53fbfff : st1w z31.d, p7, [sp, z31.d, LSL #2] : st1w %z31.d %p7 -> (%sp,%z31.d,lsl #2)[4byte] # ST1W { . }, , [, , LSL #2] (ST1W-Z.P.BR-_) -e5404000 : st1w z0.s, p0, [x0, x0, LSL #2] : st1w %z0.s %p0 -> (%x0,%x0,lsl #2)[32byte] -e5454482 : st1w z2.s, p1, [x4, x5, LSL #2] : st1w %z2.s %p1 -> (%x4,%x5,lsl #2)[32byte] -e54748c4 : st1w z4.s, p2, [x6, x7, LSL #2] : st1w %z4.s %p2 -> (%x6,%x7,lsl #2)[32byte] -e5494906 : st1w z6.s, p2, [x8, x9, LSL #2] : st1w %z6.s %p2 -> (%x8,%x9,lsl #2)[32byte] -e54b4d48 : st1w z8.s, p3, [x10, x11, LSL #2] : st1w %z8.s %p3 -> (%x10,%x11,lsl #2)[32byte] -e54c4d6a : st1w z10.s, p3, [x11, x12, LSL #2] : st1w %z10.s %p3 -> (%x11,%x12,lsl #2)[32byte] -e54e51ac : st1w z12.s, p4, [x13, x14, LSL #2] : st1w %z12.s %p4 -> (%x13,%x14,lsl #2)[32byte] -e55051ee : st1w z14.s, p4, [x15, x16, LSL #2] : st1w %z14.s %p4 -> (%x15,%x16,lsl #2)[32byte] -e5525630 : st1w z16.s, p5, [x17, x18, LSL #2] : st1w %z16.s %p5 -> (%x17,%x18,lsl #2)[32byte] -e5545671 : st1w z17.s, p5, [x19, x20, LSL #2] : st1w %z17.s %p5 -> (%x19,%x20,lsl #2)[32byte] -e55656b3 : st1w z19.s, p5, [x21, x22, LSL #2] : st1w %z19.s %p5 -> (%x21,%x22,lsl #2)[32byte] -e5585af5 : st1w z21.s, p6, [x23, x24, LSL #2] : st1w %z21.s %p6 -> (%x23,%x24,lsl #2)[32byte] -e5595b17 : st1w z23.s, p6, [x24, x25, LSL #2] : st1w %z23.s %p6 -> (%x24,%x25,lsl #2)[32byte] -e55b5f59 : st1w z25.s, p7, [x26, x27, LSL #2] : st1w %z25.s %p7 -> (%x26,%x27,lsl #2)[32byte] -e55d5f9b : st1w z27.s, p7, [x28, x29, LSL #2] : st1w %z27.s %p7 -> (%x28,%x29,lsl #2)[32byte] -e55e5fff : st1w z31.s, p7, [sp, x30, LSL #2] : st1w %z31.s %p7 -> (%sp,%x30,lsl #2)[32byte] -e5604000 : st1w z0.d, p0, [x0, x0, LSL #2] : st1w %z0.d %p0 -> (%x0,%x0,lsl #2)[16byte] -e5654482 : st1w z2.d, p1, [x4, x5, LSL #2] : st1w %z2.d %p1 -> (%x4,%x5,lsl #2)[16byte] -e56748c4 : st1w z4.d, p2, [x6, x7, LSL #2] : st1w %z4.d %p2 -> (%x6,%x7,lsl #2)[16byte] -e5694906 : st1w z6.d, p2, [x8, x9, LSL #2] : st1w %z6.d %p2 -> (%x8,%x9,lsl #2)[16byte] -e56b4d48 : st1w z8.d, p3, [x10, x11, LSL #2] : st1w %z8.d %p3 -> (%x10,%x11,lsl #2)[16byte] -e56c4d6a : st1w z10.d, p3, [x11, x12, LSL #2] : st1w %z10.d %p3 -> (%x11,%x12,lsl #2)[16byte] -e56e51ac : st1w z12.d, p4, [x13, x14, LSL #2] : st1w %z12.d %p4 -> (%x13,%x14,lsl #2)[16byte] -e57051ee : st1w z14.d, p4, [x15, x16, LSL #2] : st1w %z14.d %p4 -> (%x15,%x16,lsl #2)[16byte] -e5725630 : st1w z16.d, p5, [x17, x18, LSL #2] : st1w %z16.d %p5 -> (%x17,%x18,lsl #2)[16byte] -e5745671 : st1w z17.d, p5, [x19, x20, LSL #2] : st1w %z17.d %p5 -> (%x19,%x20,lsl #2)[16byte] -e57656b3 : st1w z19.d, p5, [x21, x22, LSL #2] : st1w %z19.d %p5 -> (%x21,%x22,lsl #2)[16byte] -e5785af5 : st1w z21.d, p6, [x23, x24, LSL #2] : st1w %z21.d %p6 -> (%x23,%x24,lsl #2)[16byte] -e5795b17 : st1w z23.d, p6, [x24, x25, LSL #2] : st1w %z23.d %p6 -> (%x24,%x25,lsl #2)[16byte] -e57b5f59 : st1w z25.d, p7, [x26, x27, LSL #2] : st1w %z25.d %p7 -> (%x26,%x27,lsl #2)[16byte] -e57d5f9b : st1w z27.d, p7, [x28, x29, LSL #2] : st1w %z27.d %p7 -> (%x28,%x29,lsl #2)[16byte] -e57e5fff : st1w z31.d, p7, [sp, x30, LSL #2] : st1w %z31.d %p7 -> (%sp,%x30,lsl #2)[16byte] +e5404000 : st1w z0.s, p0, [x0, x0, LSL #2] : st1w %z0.s %p0 -> (%x0,%x0,lsl #2)[4byte] +e5454482 : st1w z2.s, p1, [x4, x5, LSL #2] : st1w %z2.s %p1 -> (%x4,%x5,lsl #2)[4byte] +e54748c4 : st1w z4.s, p2, [x6, x7, LSL #2] : st1w %z4.s %p2 -> (%x6,%x7,lsl #2)[4byte] +e5494906 : st1w z6.s, p2, [x8, x9, LSL #2] : st1w %z6.s %p2 -> (%x8,%x9,lsl #2)[4byte] +e54b4d48 : st1w z8.s, p3, [x10, x11, LSL #2] : st1w %z8.s %p3 -> (%x10,%x11,lsl #2)[4byte] +e54c4d6a : st1w z10.s, p3, [x11, x12, LSL #2] : st1w %z10.s %p3 -> (%x11,%x12,lsl #2)[4byte] +e54e51ac : st1w z12.s, p4, [x13, x14, LSL #2] : st1w %z12.s %p4 -> (%x13,%x14,lsl #2)[4byte] +e55051ee : st1w z14.s, p4, [x15, x16, LSL #2] : st1w %z14.s %p4 -> (%x15,%x16,lsl #2)[4byte] +e5525630 : st1w z16.s, p5, [x17, x18, LSL #2] : st1w %z16.s %p5 -> (%x17,%x18,lsl #2)[4byte] +e5545671 : st1w z17.s, p5, [x19, x20, LSL #2] : st1w %z17.s %p5 -> (%x19,%x20,lsl #2)[4byte] +e55656b3 : st1w z19.s, p5, [x21, x22, LSL #2] : st1w %z19.s %p5 -> (%x21,%x22,lsl #2)[4byte] +e5585af5 : st1w z21.s, p6, [x23, x24, LSL #2] : st1w %z21.s %p6 -> (%x23,%x24,lsl #2)[4byte] +e5595b17 : st1w z23.s, p6, [x24, x25, LSL #2] : st1w %z23.s %p6 -> (%x24,%x25,lsl #2)[4byte] +e55b5f59 : st1w z25.s, p7, [x26, x27, LSL #2] : st1w %z25.s %p7 -> (%x26,%x27,lsl #2)[4byte] +e55d5f9b : st1w z27.s, p7, [x28, x29, LSL #2] : st1w %z27.s %p7 -> (%x28,%x29,lsl #2)[4byte] +e55e5fff : st1w z31.s, p7, [sp, x30, LSL #2] : st1w %z31.s %p7 -> (%sp,%x30,lsl #2)[4byte] +e5604000 : st1w z0.d, p0, [x0, x0, LSL #2] : st1w %z0.d %p0 -> (%x0,%x0,lsl #2)[4byte] +e5654482 : st1w z2.d, p1, [x4, x5, LSL #2] : st1w %z2.d %p1 -> (%x4,%x5,lsl #2)[4byte] +e56748c4 : st1w z4.d, p2, [x6, x7, LSL #2] : st1w %z4.d %p2 -> (%x6,%x7,lsl #2)[4byte] +e5694906 : st1w z6.d, p2, [x8, x9, LSL #2] : st1w %z6.d %p2 -> (%x8,%x9,lsl #2)[4byte] +e56b4d48 : st1w z8.d, p3, [x10, x11, LSL #2] : st1w %z8.d %p3 -> (%x10,%x11,lsl #2)[4byte] +e56c4d6a : st1w z10.d, p3, [x11, x12, LSL #2] : st1w %z10.d %p3 -> (%x11,%x12,lsl #2)[4byte] +e56e51ac : st1w z12.d, p4, [x13, x14, LSL #2] : st1w %z12.d %p4 -> (%x13,%x14,lsl #2)[4byte] +e57051ee : st1w z14.d, p4, [x15, x16, LSL #2] : st1w %z14.d %p4 -> (%x15,%x16,lsl #2)[4byte] +e5725630 : st1w z16.d, p5, [x17, x18, LSL #2] : st1w %z16.d %p5 -> (%x17,%x18,lsl #2)[4byte] +e5745671 : st1w z17.d, p5, [x19, x20, LSL #2] : st1w %z17.d %p5 -> (%x19,%x20,lsl #2)[4byte] +e57656b3 : st1w z19.d, p5, [x21, x22, LSL #2] : st1w %z19.d %p5 -> (%x21,%x22,lsl #2)[4byte] +e5785af5 : st1w z21.d, p6, [x23, x24, LSL #2] : st1w %z21.d %p6 -> (%x23,%x24,lsl #2)[4byte] +e5795b17 : st1w z23.d, p6, [x24, x25, LSL #2] : st1w %z23.d %p6 -> (%x24,%x25,lsl #2)[4byte] +e57b5f59 : st1w z25.d, p7, [x26, x27, LSL #2] : st1w %z25.d %p7 -> (%x26,%x27,lsl #2)[4byte] +e57d5f9b : st1w z27.d, p7, [x28, x29, LSL #2] : st1w %z27.d %p7 -> (%x28,%x29,lsl #2)[4byte] +e57e5fff : st1w z31.d, p7, [sp, x30, LSL #2] : st1w %z31.d %p7 -> (%sp,%x30,lsl #2)[4byte] # ST1W { .S }, , [, .S, ] (ST1W-Z.P.BZ-S.x32.unscaled) -e5408000 : st1w z0.s, p0, [x0, z0.s, UXTW] : st1w %z0.s %p0 -> (%x0,%z0.s,uxtw)[32byte] -e5458482 : st1w z2.s, p1, [x4, z5.s, UXTW] : st1w %z2.s %p1 -> (%x4,%z5.s,uxtw)[32byte] -e54788c4 : st1w z4.s, p2, [x6, z7.s, UXTW] : st1w %z4.s %p2 -> (%x6,%z7.s,uxtw)[32byte] -e5498906 : st1w z6.s, p2, [x8, z9.s, UXTW] : st1w %z6.s %p2 -> (%x8,%z9.s,uxtw)[32byte] -e54b8d48 : st1w z8.s, p3, [x10, z11.s, UXTW] : st1w %z8.s %p3 -> (%x10,%z11.s,uxtw)[32byte] -e54d8d6a : st1w z10.s, p3, [x11, z13.s, UXTW] : st1w %z10.s %p3 -> (%x11,%z13.s,uxtw)[32byte] -e54f91ac : st1w z12.s, p4, [x13, z15.s, UXTW] : st1w %z12.s %p4 -> (%x13,%z15.s,uxtw)[32byte] -e55191ee : st1w z14.s, p4, [x15, z17.s, UXTW] : st1w %z14.s %p4 -> (%x15,%z17.s,uxtw)[32byte] -e5539630 : st1w z16.s, p5, [x17, z19.s, UXTW] : st1w %z16.s %p5 -> (%x17,%z19.s,uxtw)[32byte] -e5549671 : st1w z17.s, p5, [x19, z20.s, UXTW] : st1w %z17.s %p5 -> (%x19,%z20.s,uxtw)[32byte] -e55696b3 : st1w z19.s, p5, [x21, z22.s, UXTW] : st1w %z19.s %p5 -> (%x21,%z22.s,uxtw)[32byte] -e5589af5 : st1w z21.s, p6, [x23, z24.s, UXTW] : st1w %z21.s %p6 -> (%x23,%z24.s,uxtw)[32byte] -e55a9b17 : st1w z23.s, p6, [x24, z26.s, UXTW] : st1w %z23.s %p6 -> (%x24,%z26.s,uxtw)[32byte] -e55c9f59 : st1w z25.s, p7, [x26, z28.s, UXTW] : st1w %z25.s %p7 -> (%x26,%z28.s,uxtw)[32byte] -e55e9f9b : st1w z27.s, p7, [x28, z30.s, UXTW] : st1w %z27.s %p7 -> (%x28,%z30.s,uxtw)[32byte] -e55f9fff : st1w z31.s, p7, [sp, z31.s, UXTW] : st1w %z31.s %p7 -> (%sp,%z31.s,uxtw)[32byte] -e540c000 : st1w z0.s, p0, [x0, z0.s, SXTW] : st1w %z0.s %p0 -> (%x0,%z0.s,sxtw)[32byte] -e545c482 : st1w z2.s, p1, [x4, z5.s, SXTW] : st1w %z2.s %p1 -> (%x4,%z5.s,sxtw)[32byte] -e547c8c4 : st1w z4.s, p2, [x6, z7.s, SXTW] : st1w %z4.s %p2 -> (%x6,%z7.s,sxtw)[32byte] -e549c906 : st1w z6.s, p2, [x8, z9.s, SXTW] : st1w %z6.s %p2 -> (%x8,%z9.s,sxtw)[32byte] -e54bcd48 : st1w z8.s, p3, [x10, z11.s, SXTW] : st1w %z8.s %p3 -> (%x10,%z11.s,sxtw)[32byte] -e54dcd6a : st1w z10.s, p3, [x11, z13.s, SXTW] : st1w %z10.s %p3 -> (%x11,%z13.s,sxtw)[32byte] -e54fd1ac : st1w z12.s, p4, [x13, z15.s, SXTW] : st1w %z12.s %p4 -> (%x13,%z15.s,sxtw)[32byte] -e551d1ee : st1w z14.s, p4, [x15, z17.s, SXTW] : st1w %z14.s %p4 -> (%x15,%z17.s,sxtw)[32byte] -e553d630 : st1w z16.s, p5, [x17, z19.s, SXTW] : st1w %z16.s %p5 -> (%x17,%z19.s,sxtw)[32byte] -e554d671 : st1w z17.s, p5, [x19, z20.s, SXTW] : st1w %z17.s %p5 -> (%x19,%z20.s,sxtw)[32byte] -e556d6b3 : st1w z19.s, p5, [x21, z22.s, SXTW] : st1w %z19.s %p5 -> (%x21,%z22.s,sxtw)[32byte] -e558daf5 : st1w z21.s, p6, [x23, z24.s, SXTW] : st1w %z21.s %p6 -> (%x23,%z24.s,sxtw)[32byte] -e55adb17 : st1w z23.s, p6, [x24, z26.s, SXTW] : st1w %z23.s %p6 -> (%x24,%z26.s,sxtw)[32byte] -e55cdf59 : st1w z25.s, p7, [x26, z28.s, SXTW] : st1w %z25.s %p7 -> (%x26,%z28.s,sxtw)[32byte] -e55edf9b : st1w z27.s, p7, [x28, z30.s, SXTW] : st1w %z27.s %p7 -> (%x28,%z30.s,sxtw)[32byte] -e55fdfff : st1w z31.s, p7, [sp, z31.s, SXTW] : st1w %z31.s %p7 -> (%sp,%z31.s,sxtw)[32byte] +e5408000 : st1w z0.s, p0, [x0, z0.s, UXTW] : st1w %z0.s %p0 -> (%x0,%z0.s,uxtw)[4byte] +e5458482 : st1w z2.s, p1, [x4, z5.s, UXTW] : st1w %z2.s %p1 -> (%x4,%z5.s,uxtw)[4byte] +e54788c4 : st1w z4.s, p2, [x6, z7.s, UXTW] : st1w %z4.s %p2 -> (%x6,%z7.s,uxtw)[4byte] +e5498906 : st1w z6.s, p2, [x8, z9.s, UXTW] : st1w %z6.s %p2 -> (%x8,%z9.s,uxtw)[4byte] +e54b8d48 : st1w z8.s, p3, [x10, z11.s, UXTW] : st1w %z8.s %p3 -> (%x10,%z11.s,uxtw)[4byte] +e54d8d6a : st1w z10.s, p3, [x11, z13.s, UXTW] : st1w %z10.s %p3 -> (%x11,%z13.s,uxtw)[4byte] +e54f91ac : st1w z12.s, p4, [x13, z15.s, UXTW] : st1w %z12.s %p4 -> (%x13,%z15.s,uxtw)[4byte] +e55191ee : st1w z14.s, p4, [x15, z17.s, UXTW] : st1w %z14.s %p4 -> (%x15,%z17.s,uxtw)[4byte] +e5539630 : st1w z16.s, p5, [x17, z19.s, UXTW] : st1w %z16.s %p5 -> (%x17,%z19.s,uxtw)[4byte] +e5549671 : st1w z17.s, p5, [x19, z20.s, UXTW] : st1w %z17.s %p5 -> (%x19,%z20.s,uxtw)[4byte] +e55696b3 : st1w z19.s, p5, [x21, z22.s, UXTW] : st1w %z19.s %p5 -> (%x21,%z22.s,uxtw)[4byte] +e5589af5 : st1w z21.s, p6, [x23, z24.s, UXTW] : st1w %z21.s %p6 -> (%x23,%z24.s,uxtw)[4byte] +e55a9b17 : st1w z23.s, p6, [x24, z26.s, UXTW] : st1w %z23.s %p6 -> (%x24,%z26.s,uxtw)[4byte] +e55c9f59 : st1w z25.s, p7, [x26, z28.s, UXTW] : st1w %z25.s %p7 -> (%x26,%z28.s,uxtw)[4byte] +e55e9f9b : st1w z27.s, p7, [x28, z30.s, UXTW] : st1w %z27.s %p7 -> (%x28,%z30.s,uxtw)[4byte] +e55f9fff : st1w z31.s, p7, [sp, z31.s, UXTW] : st1w %z31.s %p7 -> (%sp,%z31.s,uxtw)[4byte] +e540c000 : st1w z0.s, p0, [x0, z0.s, SXTW] : st1w %z0.s %p0 -> (%x0,%z0.s,sxtw)[4byte] +e545c482 : st1w z2.s, p1, [x4, z5.s, SXTW] : st1w %z2.s %p1 -> (%x4,%z5.s,sxtw)[4byte] +e547c8c4 : st1w z4.s, p2, [x6, z7.s, SXTW] : st1w %z4.s %p2 -> (%x6,%z7.s,sxtw)[4byte] +e549c906 : st1w z6.s, p2, [x8, z9.s, SXTW] : st1w %z6.s %p2 -> (%x8,%z9.s,sxtw)[4byte] +e54bcd48 : st1w z8.s, p3, [x10, z11.s, SXTW] : st1w %z8.s %p3 -> (%x10,%z11.s,sxtw)[4byte] +e54dcd6a : st1w z10.s, p3, [x11, z13.s, SXTW] : st1w %z10.s %p3 -> (%x11,%z13.s,sxtw)[4byte] +e54fd1ac : st1w z12.s, p4, [x13, z15.s, SXTW] : st1w %z12.s %p4 -> (%x13,%z15.s,sxtw)[4byte] +e551d1ee : st1w z14.s, p4, [x15, z17.s, SXTW] : st1w %z14.s %p4 -> (%x15,%z17.s,sxtw)[4byte] +e553d630 : st1w z16.s, p5, [x17, z19.s, SXTW] : st1w %z16.s %p5 -> (%x17,%z19.s,sxtw)[4byte] +e554d671 : st1w z17.s, p5, [x19, z20.s, SXTW] : st1w %z17.s %p5 -> (%x19,%z20.s,sxtw)[4byte] +e556d6b3 : st1w z19.s, p5, [x21, z22.s, SXTW] : st1w %z19.s %p5 -> (%x21,%z22.s,sxtw)[4byte] +e558daf5 : st1w z21.s, p6, [x23, z24.s, SXTW] : st1w %z21.s %p6 -> (%x23,%z24.s,sxtw)[4byte] +e55adb17 : st1w z23.s, p6, [x24, z26.s, SXTW] : st1w %z23.s %p6 -> (%x24,%z26.s,sxtw)[4byte] +e55cdf59 : st1w z25.s, p7, [x26, z28.s, SXTW] : st1w %z25.s %p7 -> (%x26,%z28.s,sxtw)[4byte] +e55edf9b : st1w z27.s, p7, [x28, z30.s, SXTW] : st1w %z27.s %p7 -> (%x28,%z30.s,sxtw)[4byte] +e55fdfff : st1w z31.s, p7, [sp, z31.s, SXTW] : st1w %z31.s %p7 -> (%sp,%z31.s,sxtw)[4byte] # ST1W { .D }, , [.D{, #}] (ST1W-Z.P.AI-D) -e540a000 : st1w z0.d, p0, [z0.d, #0] : st1w %z0.d %p0 -> (%z0.d)[16byte] -e542a482 : st1w z2.d, p1, [z4.d, #8] : st1w %z2.d %p1 -> +0x08(%z4.d)[16byte] -e544a8c4 : st1w z4.d, p2, [z6.d, #16] : st1w %z4.d %p2 -> +0x10(%z6.d)[16byte] -e546a906 : st1w z6.d, p2, [z8.d, #24] : st1w %z6.d %p2 -> +0x18(%z8.d)[16byte] -e548ad48 : st1w z8.d, p3, [z10.d, #32] : st1w %z8.d %p3 -> +0x20(%z10.d)[16byte] -e54aad8a : st1w z10.d, p3, [z12.d, #40] : st1w %z10.d %p3 -> +0x28(%z12.d)[16byte] -e54cb1cc : st1w z12.d, p4, [z14.d, #48] : st1w %z12.d %p4 -> +0x30(%z14.d)[16byte] -e54eb20e : st1w z14.d, p4, [z16.d, #56] : st1w %z14.d %p4 -> +0x38(%z16.d)[16byte] -e550b650 : st1w z16.d, p5, [z18.d, #64] : st1w %z16.d %p5 -> +0x40(%z18.d)[16byte] -e551b671 : st1w z17.d, p5, [z19.d, #68] : st1w %z17.d %p5 -> +0x44(%z19.d)[16byte] -e553b6b3 : st1w z19.d, p5, [z21.d, #76] : st1w %z19.d %p5 -> +0x4c(%z21.d)[16byte] -e555baf5 : st1w z21.d, p6, [z23.d, #84] : st1w %z21.d %p6 -> +0x54(%z23.d)[16byte] -e557bb37 : st1w z23.d, p6, [z25.d, #92] : st1w %z23.d %p6 -> +0x5c(%z25.d)[16byte] -e559bf79 : st1w z25.d, p7, [z27.d, #100] : st1w %z25.d %p7 -> +0x64(%z27.d)[16byte] -e55bbfbb : st1w z27.d, p7, [z29.d, #108] : st1w %z27.d %p7 -> +0x6c(%z29.d)[16byte] -e55fbfff : st1w z31.d, p7, [z31.d, #124] : st1w %z31.d %p7 -> +0x7c(%z31.d)[16byte] +e540a000 : st1w z0.d, p0, [z0.d, #0] : st1w %z0.d %p0 -> (%z0.d)[4byte] +e542a482 : st1w z2.d, p1, [z4.d, #8] : st1w %z2.d %p1 -> +0x08(%z4.d)[4byte] +e544a8c4 : st1w z4.d, p2, [z6.d, #16] : st1w %z4.d %p2 -> +0x10(%z6.d)[4byte] +e546a906 : st1w z6.d, p2, [z8.d, #24] : st1w %z6.d %p2 -> +0x18(%z8.d)[4byte] +e548ad48 : st1w z8.d, p3, [z10.d, #32] : st1w %z8.d %p3 -> +0x20(%z10.d)[4byte] +e54aad8a : st1w z10.d, p3, [z12.d, #40] : st1w %z10.d %p3 -> +0x28(%z12.d)[4byte] +e54cb1cc : st1w z12.d, p4, [z14.d, #48] : st1w %z12.d %p4 -> +0x30(%z14.d)[4byte] +e54eb20e : st1w z14.d, p4, [z16.d, #56] : st1w %z14.d %p4 -> +0x38(%z16.d)[4byte] +e550b650 : st1w z16.d, p5, [z18.d, #64] : st1w %z16.d %p5 -> +0x40(%z18.d)[4byte] +e551b671 : st1w z17.d, p5, [z19.d, #68] : st1w %z17.d %p5 -> +0x44(%z19.d)[4byte] +e553b6b3 : st1w z19.d, p5, [z21.d, #76] : st1w %z19.d %p5 -> +0x4c(%z21.d)[4byte] +e555baf5 : st1w z21.d, p6, [z23.d, #84] : st1w %z21.d %p6 -> +0x54(%z23.d)[4byte] +e557bb37 : st1w z23.d, p6, [z25.d, #92] : st1w %z23.d %p6 -> +0x5c(%z25.d)[4byte] +e559bf79 : st1w z25.d, p7, [z27.d, #100] : st1w %z25.d %p7 -> +0x64(%z27.d)[4byte] +e55bbfbb : st1w z27.d, p7, [z29.d, #108] : st1w %z27.d %p7 -> +0x6c(%z29.d)[4byte] +e55fbfff : st1w z31.d, p7, [z31.d, #124] : st1w %z31.d %p7 -> +0x7c(%z31.d)[4byte] # ST1W { . }, , [{, #, MUL VL}] (ST1W-Z.P.BI-_) -e548e000 : st1w z0.s, p0, [x0, #-8, MUL VL] : st1w %z0.s %p0 -> -0x0100(%x0)[32byte] -e549e482 : st1w z2.s, p1, [x4, #-7, MUL VL] : st1w %z2.s %p1 -> -0xe0(%x4)[32byte] -e54ae8c4 : st1w z4.s, p2, [x6, #-6, MUL VL] : st1w %z4.s %p2 -> -0xc0(%x6)[32byte] -e54be906 : st1w z6.s, p2, [x8, #-5, MUL VL] : st1w %z6.s %p2 -> -0xa0(%x8)[32byte] -e54ced48 : st1w z8.s, p3, [x10, #-4, MUL VL] : st1w %z8.s %p3 -> -0x80(%x10)[32byte] -e54ded6a : st1w z10.s, p3, [x11, #-3, MUL VL] : st1w %z10.s %p3 -> -0x60(%x11)[32byte] -e54ef1ac : st1w z12.s, p4, [x13, #-2, MUL VL] : st1w %z12.s %p4 -> -0x40(%x13)[32byte] -e54ff1ee : st1w z14.s, p4, [x15, #-1, MUL VL] : st1w %z14.s %p4 -> -0x20(%x15)[32byte] -e540f630 : st1w z16.s, p5, [x17, #0, MUL VL] : st1w %z16.s %p5 -> (%x17)[32byte] -e540f671 : st1w z17.s, p5, [x19, #0, MUL VL] : st1w %z17.s %p5 -> (%x19)[32byte] -e541f6b3 : st1w z19.s, p5, [x21, #1, MUL VL] : st1w %z19.s %p5 -> +0x20(%x21)[32byte] -e542faf5 : st1w z21.s, p6, [x23, #2, MUL VL] : st1w %z21.s %p6 -> +0x40(%x23)[32byte] -e543fb17 : st1w z23.s, p6, [x24, #3, MUL VL] : st1w %z23.s %p6 -> +0x60(%x24)[32byte] -e544ff59 : st1w z25.s, p7, [x26, #4, MUL VL] : st1w %z25.s %p7 -> +0x80(%x26)[32byte] -e545ff9b : st1w z27.s, p7, [x28, #5, MUL VL] : st1w %z27.s %p7 -> +0xa0(%x28)[32byte] -e547ffff : st1w z31.s, p7, [sp, #7, MUL VL] : st1w %z31.s %p7 -> +0xe0(%sp)[32byte] -e568e000 : st1w z0.d, p0, [x0, #-8, MUL VL] : st1w %z0.d %p0 -> -0x80(%x0)[16byte] -e569e482 : st1w z2.d, p1, [x4, #-7, MUL VL] : st1w %z2.d %p1 -> -0x70(%x4)[16byte] -e56ae8c4 : st1w z4.d, p2, [x6, #-6, MUL VL] : st1w %z4.d %p2 -> -0x60(%x6)[16byte] -e56be906 : st1w z6.d, p2, [x8, #-5, MUL VL] : st1w %z6.d %p2 -> -0x50(%x8)[16byte] -e56ced48 : st1w z8.d, p3, [x10, #-4, MUL VL] : st1w %z8.d %p3 -> -0x40(%x10)[16byte] -e56ded6a : st1w z10.d, p3, [x11, #-3, MUL VL] : st1w %z10.d %p3 -> -0x30(%x11)[16byte] -e56ef1ac : st1w z12.d, p4, [x13, #-2, MUL VL] : st1w %z12.d %p4 -> -0x20(%x13)[16byte] -e56ff1ee : st1w z14.d, p4, [x15, #-1, MUL VL] : st1w %z14.d %p4 -> -0x10(%x15)[16byte] -e560f630 : st1w z16.d, p5, [x17, #0, MUL VL] : st1w %z16.d %p5 -> (%x17)[16byte] -e560f671 : st1w z17.d, p5, [x19, #0, MUL VL] : st1w %z17.d %p5 -> (%x19)[16byte] -e561f6b3 : st1w z19.d, p5, [x21, #1, MUL VL] : st1w %z19.d %p5 -> +0x10(%x21)[16byte] -e562faf5 : st1w z21.d, p6, [x23, #2, MUL VL] : st1w %z21.d %p6 -> +0x20(%x23)[16byte] -e563fb17 : st1w z23.d, p6, [x24, #3, MUL VL] : st1w %z23.d %p6 -> +0x30(%x24)[16byte] -e564ff59 : st1w z25.d, p7, [x26, #4, MUL VL] : st1w %z25.d %p7 -> +0x40(%x26)[16byte] -e565ff9b : st1w z27.d, p7, [x28, #5, MUL VL] : st1w %z27.d %p7 -> +0x50(%x28)[16byte] -e567ffff : st1w z31.d, p7, [sp, #7, MUL VL] : st1w %z31.d %p7 -> +0x70(%sp)[16byte] +e548e000 : st1w z0.s, p0, [x0, #-8, MUL VL] : st1w %z0.s %p0 -> -0x0100(%x0)[4byte] +e549e482 : st1w z2.s, p1, [x4, #-7, MUL VL] : st1w %z2.s %p1 -> -0xe0(%x4)[4byte] +e54ae8c4 : st1w z4.s, p2, [x6, #-6, MUL VL] : st1w %z4.s %p2 -> -0xc0(%x6)[4byte] +e54be906 : st1w z6.s, p2, [x8, #-5, MUL VL] : st1w %z6.s %p2 -> -0xa0(%x8)[4byte] +e54ced48 : st1w z8.s, p3, [x10, #-4, MUL VL] : st1w %z8.s %p3 -> -0x80(%x10)[4byte] +e54ded6a : st1w z10.s, p3, [x11, #-3, MUL VL] : st1w %z10.s %p3 -> -0x60(%x11)[4byte] +e54ef1ac : st1w z12.s, p4, [x13, #-2, MUL VL] : st1w %z12.s %p4 -> -0x40(%x13)[4byte] +e54ff1ee : st1w z14.s, p4, [x15, #-1, MUL VL] : st1w %z14.s %p4 -> -0x20(%x15)[4byte] +e540f630 : st1w z16.s, p5, [x17, #0, MUL VL] : st1w %z16.s %p5 -> (%x17)[4byte] +e540f671 : st1w z17.s, p5, [x19, #0, MUL VL] : st1w %z17.s %p5 -> (%x19)[4byte] +e541f6b3 : st1w z19.s, p5, [x21, #1, MUL VL] : st1w %z19.s %p5 -> +0x20(%x21)[4byte] +e542faf5 : st1w z21.s, p6, [x23, #2, MUL VL] : st1w %z21.s %p6 -> +0x40(%x23)[4byte] +e543fb17 : st1w z23.s, p6, [x24, #3, MUL VL] : st1w %z23.s %p6 -> +0x60(%x24)[4byte] +e544ff59 : st1w z25.s, p7, [x26, #4, MUL VL] : st1w %z25.s %p7 -> +0x80(%x26)[4byte] +e545ff9b : st1w z27.s, p7, [x28, #5, MUL VL] : st1w %z27.s %p7 -> +0xa0(%x28)[4byte] +e547ffff : st1w z31.s, p7, [sp, #7, MUL VL] : st1w %z31.s %p7 -> +0xe0(%sp)[4byte] +e568e000 : st1w z0.d, p0, [x0, #-8, MUL VL] : st1w %z0.d %p0 -> -0x80(%x0)[4byte] +e569e482 : st1w z2.d, p1, [x4, #-7, MUL VL] : st1w %z2.d %p1 -> -0x70(%x4)[4byte] +e56ae8c4 : st1w z4.d, p2, [x6, #-6, MUL VL] : st1w %z4.d %p2 -> -0x60(%x6)[4byte] +e56be906 : st1w z6.d, p2, [x8, #-5, MUL VL] : st1w %z6.d %p2 -> -0x50(%x8)[4byte] +e56ced48 : st1w z8.d, p3, [x10, #-4, MUL VL] : st1w %z8.d %p3 -> -0x40(%x10)[4byte] +e56ded6a : st1w z10.d, p3, [x11, #-3, MUL VL] : st1w %z10.d %p3 -> -0x30(%x11)[4byte] +e56ef1ac : st1w z12.d, p4, [x13, #-2, MUL VL] : st1w %z12.d %p4 -> -0x20(%x13)[4byte] +e56ff1ee : st1w z14.d, p4, [x15, #-1, MUL VL] : st1w %z14.d %p4 -> -0x10(%x15)[4byte] +e560f630 : st1w z16.d, p5, [x17, #0, MUL VL] : st1w %z16.d %p5 -> (%x17)[4byte] +e560f671 : st1w z17.d, p5, [x19, #0, MUL VL] : st1w %z17.d %p5 -> (%x19)[4byte] +e561f6b3 : st1w z19.d, p5, [x21, #1, MUL VL] : st1w %z19.d %p5 -> +0x10(%x21)[4byte] +e562faf5 : st1w z21.d, p6, [x23, #2, MUL VL] : st1w %z21.d %p6 -> +0x20(%x23)[4byte] +e563fb17 : st1w z23.d, p6, [x24, #3, MUL VL] : st1w %z23.d %p6 -> +0x30(%x24)[4byte] +e564ff59 : st1w z25.d, p7, [x26, #4, MUL VL] : st1w %z25.d %p7 -> +0x40(%x26)[4byte] +e565ff9b : st1w z27.d, p7, [x28, #5, MUL VL] : st1w %z27.d %p7 -> +0x50(%x28)[4byte] +e567ffff : st1w z31.d, p7, [sp, #7, MUL VL] : st1w %z31.d %p7 -> +0x70(%sp)[4byte] # ST1W { .S }, , [, .S, #2] (ST1W-Z.P.BZ-S.x32.scaled) -e5608000 : st1w z0.s, p0, [x0, z0.s, UXTW #2] : st1w %z0.s %p0 -> (%x0,%z0.s,uxtw #2)[32byte] -e5658482 : st1w z2.s, p1, [x4, z5.s, UXTW #2] : st1w %z2.s %p1 -> (%x4,%z5.s,uxtw #2)[32byte] -e56788c4 : st1w z4.s, p2, [x6, z7.s, UXTW #2] : st1w %z4.s %p2 -> (%x6,%z7.s,uxtw #2)[32byte] -e5698906 : st1w z6.s, p2, [x8, z9.s, UXTW #2] : st1w %z6.s %p2 -> (%x8,%z9.s,uxtw #2)[32byte] -e56b8d48 : st1w z8.s, p3, [x10, z11.s, UXTW #2] : st1w %z8.s %p3 -> (%x10,%z11.s,uxtw #2)[32byte] -e56d8d6a : st1w z10.s, p3, [x11, z13.s, UXTW #2] : st1w %z10.s %p3 -> (%x11,%z13.s,uxtw #2)[32byte] -e56f91ac : st1w z12.s, p4, [x13, z15.s, UXTW #2] : st1w %z12.s %p4 -> (%x13,%z15.s,uxtw #2)[32byte] -e57191ee : st1w z14.s, p4, [x15, z17.s, UXTW #2] : st1w %z14.s %p4 -> (%x15,%z17.s,uxtw #2)[32byte] -e5739630 : st1w z16.s, p5, [x17, z19.s, UXTW #2] : st1w %z16.s %p5 -> (%x17,%z19.s,uxtw #2)[32byte] -e5749671 : st1w z17.s, p5, [x19, z20.s, UXTW #2] : st1w %z17.s %p5 -> (%x19,%z20.s,uxtw #2)[32byte] -e57696b3 : st1w z19.s, p5, [x21, z22.s, UXTW #2] : st1w %z19.s %p5 -> (%x21,%z22.s,uxtw #2)[32byte] -e5789af5 : st1w z21.s, p6, [x23, z24.s, UXTW #2] : st1w %z21.s %p6 -> (%x23,%z24.s,uxtw #2)[32byte] -e57a9b17 : st1w z23.s, p6, [x24, z26.s, UXTW #2] : st1w %z23.s %p6 -> (%x24,%z26.s,uxtw #2)[32byte] -e57c9f59 : st1w z25.s, p7, [x26, z28.s, UXTW #2] : st1w %z25.s %p7 -> (%x26,%z28.s,uxtw #2)[32byte] -e57e9f9b : st1w z27.s, p7, [x28, z30.s, UXTW #2] : st1w %z27.s %p7 -> (%x28,%z30.s,uxtw #2)[32byte] -e57f9fff : st1w z31.s, p7, [sp, z31.s, UXTW #2] : st1w %z31.s %p7 -> (%sp,%z31.s,uxtw #2)[32byte] -e560c000 : st1w z0.s, p0, [x0, z0.s, SXTW #2] : st1w %z0.s %p0 -> (%x0,%z0.s,sxtw #2)[32byte] -e565c482 : st1w z2.s, p1, [x4, z5.s, SXTW #2] : st1w %z2.s %p1 -> (%x4,%z5.s,sxtw #2)[32byte] -e567c8c4 : st1w z4.s, p2, [x6, z7.s, SXTW #2] : st1w %z4.s %p2 -> (%x6,%z7.s,sxtw #2)[32byte] -e569c906 : st1w z6.s, p2, [x8, z9.s, SXTW #2] : st1w %z6.s %p2 -> (%x8,%z9.s,sxtw #2)[32byte] -e56bcd48 : st1w z8.s, p3, [x10, z11.s, SXTW #2] : st1w %z8.s %p3 -> (%x10,%z11.s,sxtw #2)[32byte] -e56dcd6a : st1w z10.s, p3, [x11, z13.s, SXTW #2] : st1w %z10.s %p3 -> (%x11,%z13.s,sxtw #2)[32byte] -e56fd1ac : st1w z12.s, p4, [x13, z15.s, SXTW #2] : st1w %z12.s %p4 -> (%x13,%z15.s,sxtw #2)[32byte] -e571d1ee : st1w z14.s, p4, [x15, z17.s, SXTW #2] : st1w %z14.s %p4 -> (%x15,%z17.s,sxtw #2)[32byte] -e573d630 : st1w z16.s, p5, [x17, z19.s, SXTW #2] : st1w %z16.s %p5 -> (%x17,%z19.s,sxtw #2)[32byte] -e574d671 : st1w z17.s, p5, [x19, z20.s, SXTW #2] : st1w %z17.s %p5 -> (%x19,%z20.s,sxtw #2)[32byte] -e576d6b3 : st1w z19.s, p5, [x21, z22.s, SXTW #2] : st1w %z19.s %p5 -> (%x21,%z22.s,sxtw #2)[32byte] -e578daf5 : st1w z21.s, p6, [x23, z24.s, SXTW #2] : st1w %z21.s %p6 -> (%x23,%z24.s,sxtw #2)[32byte] -e57adb17 : st1w z23.s, p6, [x24, z26.s, SXTW #2] : st1w %z23.s %p6 -> (%x24,%z26.s,sxtw #2)[32byte] -e57cdf59 : st1w z25.s, p7, [x26, z28.s, SXTW #2] : st1w %z25.s %p7 -> (%x26,%z28.s,sxtw #2)[32byte] -e57edf9b : st1w z27.s, p7, [x28, z30.s, SXTW #2] : st1w %z27.s %p7 -> (%x28,%z30.s,sxtw #2)[32byte] -e57fdfff : st1w z31.s, p7, [sp, z31.s, SXTW #2] : st1w %z31.s %p7 -> (%sp,%z31.s,sxtw #2)[32byte] +e5608000 : st1w z0.s, p0, [x0, z0.s, UXTW #2] : st1w %z0.s %p0 -> (%x0,%z0.s,uxtw #2)[4byte] +e5658482 : st1w z2.s, p1, [x4, z5.s, UXTW #2] : st1w %z2.s %p1 -> (%x4,%z5.s,uxtw #2)[4byte] +e56788c4 : st1w z4.s, p2, [x6, z7.s, UXTW #2] : st1w %z4.s %p2 -> (%x6,%z7.s,uxtw #2)[4byte] +e5698906 : st1w z6.s, p2, [x8, z9.s, UXTW #2] : st1w %z6.s %p2 -> (%x8,%z9.s,uxtw #2)[4byte] +e56b8d48 : st1w z8.s, p3, [x10, z11.s, UXTW #2] : st1w %z8.s %p3 -> (%x10,%z11.s,uxtw #2)[4byte] +e56d8d6a : st1w z10.s, p3, [x11, z13.s, UXTW #2] : st1w %z10.s %p3 -> (%x11,%z13.s,uxtw #2)[4byte] +e56f91ac : st1w z12.s, p4, [x13, z15.s, UXTW #2] : st1w %z12.s %p4 -> (%x13,%z15.s,uxtw #2)[4byte] +e57191ee : st1w z14.s, p4, [x15, z17.s, UXTW #2] : st1w %z14.s %p4 -> (%x15,%z17.s,uxtw #2)[4byte] +e5739630 : st1w z16.s, p5, [x17, z19.s, UXTW #2] : st1w %z16.s %p5 -> (%x17,%z19.s,uxtw #2)[4byte] +e5749671 : st1w z17.s, p5, [x19, z20.s, UXTW #2] : st1w %z17.s %p5 -> (%x19,%z20.s,uxtw #2)[4byte] +e57696b3 : st1w z19.s, p5, [x21, z22.s, UXTW #2] : st1w %z19.s %p5 -> (%x21,%z22.s,uxtw #2)[4byte] +e5789af5 : st1w z21.s, p6, [x23, z24.s, UXTW #2] : st1w %z21.s %p6 -> (%x23,%z24.s,uxtw #2)[4byte] +e57a9b17 : st1w z23.s, p6, [x24, z26.s, UXTW #2] : st1w %z23.s %p6 -> (%x24,%z26.s,uxtw #2)[4byte] +e57c9f59 : st1w z25.s, p7, [x26, z28.s, UXTW #2] : st1w %z25.s %p7 -> (%x26,%z28.s,uxtw #2)[4byte] +e57e9f9b : st1w z27.s, p7, [x28, z30.s, UXTW #2] : st1w %z27.s %p7 -> (%x28,%z30.s,uxtw #2)[4byte] +e57f9fff : st1w z31.s, p7, [sp, z31.s, UXTW #2] : st1w %z31.s %p7 -> (%sp,%z31.s,uxtw #2)[4byte] +e560c000 : st1w z0.s, p0, [x0, z0.s, SXTW #2] : st1w %z0.s %p0 -> (%x0,%z0.s,sxtw #2)[4byte] +e565c482 : st1w z2.s, p1, [x4, z5.s, SXTW #2] : st1w %z2.s %p1 -> (%x4,%z5.s,sxtw #2)[4byte] +e567c8c4 : st1w z4.s, p2, [x6, z7.s, SXTW #2] : st1w %z4.s %p2 -> (%x6,%z7.s,sxtw #2)[4byte] +e569c906 : st1w z6.s, p2, [x8, z9.s, SXTW #2] : st1w %z6.s %p2 -> (%x8,%z9.s,sxtw #2)[4byte] +e56bcd48 : st1w z8.s, p3, [x10, z11.s, SXTW #2] : st1w %z8.s %p3 -> (%x10,%z11.s,sxtw #2)[4byte] +e56dcd6a : st1w z10.s, p3, [x11, z13.s, SXTW #2] : st1w %z10.s %p3 -> (%x11,%z13.s,sxtw #2)[4byte] +e56fd1ac : st1w z12.s, p4, [x13, z15.s, SXTW #2] : st1w %z12.s %p4 -> (%x13,%z15.s,sxtw #2)[4byte] +e571d1ee : st1w z14.s, p4, [x15, z17.s, SXTW #2] : st1w %z14.s %p4 -> (%x15,%z17.s,sxtw #2)[4byte] +e573d630 : st1w z16.s, p5, [x17, z19.s, SXTW #2] : st1w %z16.s %p5 -> (%x17,%z19.s,sxtw #2)[4byte] +e574d671 : st1w z17.s, p5, [x19, z20.s, SXTW #2] : st1w %z17.s %p5 -> (%x19,%z20.s,sxtw #2)[4byte] +e576d6b3 : st1w z19.s, p5, [x21, z22.s, SXTW #2] : st1w %z19.s %p5 -> (%x21,%z22.s,sxtw #2)[4byte] +e578daf5 : st1w z21.s, p6, [x23, z24.s, SXTW #2] : st1w %z21.s %p6 -> (%x23,%z24.s,sxtw #2)[4byte] +e57adb17 : st1w z23.s, p6, [x24, z26.s, SXTW #2] : st1w %z23.s %p6 -> (%x24,%z26.s,sxtw #2)[4byte] +e57cdf59 : st1w z25.s, p7, [x26, z28.s, SXTW #2] : st1w %z25.s %p7 -> (%x26,%z28.s,sxtw #2)[4byte] +e57edf9b : st1w z27.s, p7, [x28, z30.s, SXTW #2] : st1w %z27.s %p7 -> (%x28,%z30.s,sxtw #2)[4byte] +e57fdfff : st1w z31.s, p7, [sp, z31.s, SXTW #2] : st1w %z31.s %p7 -> (%sp,%z31.s,sxtw #2)[4byte] # ST1W { .S }, , [.S{, #}] (ST1W-Z.P.AI-S) -e560a000 : st1w z0.s, p0, [z0.s, #0] : st1w %z0.s %p0 -> (%z0.s)[32byte] -e562a482 : st1w z2.s, p1, [z4.s, #8] : st1w %z2.s %p1 -> +0x08(%z4.s)[32byte] -e564a8c4 : st1w z4.s, p2, [z6.s, #16] : st1w %z4.s %p2 -> +0x10(%z6.s)[32byte] -e566a906 : st1w z6.s, p2, [z8.s, #24] : st1w %z6.s %p2 -> +0x18(%z8.s)[32byte] -e568ad48 : st1w z8.s, p3, [z10.s, #32] : st1w %z8.s %p3 -> +0x20(%z10.s)[32byte] -e56aad8a : st1w z10.s, p3, [z12.s, #40] : st1w %z10.s %p3 -> +0x28(%z12.s)[32byte] -e56cb1cc : st1w z12.s, p4, [z14.s, #48] : st1w %z12.s %p4 -> +0x30(%z14.s)[32byte] -e56eb20e : st1w z14.s, p4, [z16.s, #56] : st1w %z14.s %p4 -> +0x38(%z16.s)[32byte] -e570b650 : st1w z16.s, p5, [z18.s, #64] : st1w %z16.s %p5 -> +0x40(%z18.s)[32byte] -e571b671 : st1w z17.s, p5, [z19.s, #68] : st1w %z17.s %p5 -> +0x44(%z19.s)[32byte] -e573b6b3 : st1w z19.s, p5, [z21.s, #76] : st1w %z19.s %p5 -> +0x4c(%z21.s)[32byte] -e575baf5 : st1w z21.s, p6, [z23.s, #84] : st1w %z21.s %p6 -> +0x54(%z23.s)[32byte] -e577bb37 : st1w z23.s, p6, [z25.s, #92] : st1w %z23.s %p6 -> +0x5c(%z25.s)[32byte] -e579bf79 : st1w z25.s, p7, [z27.s, #100] : st1w %z25.s %p7 -> +0x64(%z27.s)[32byte] -e57bbfbb : st1w z27.s, p7, [z29.s, #108] : st1w %z27.s %p7 -> +0x6c(%z29.s)[32byte] -e57fbfff : st1w z31.s, p7, [z31.s, #124] : st1w %z31.s %p7 -> +0x7c(%z31.s)[32byte] +e560a000 : st1w z0.s, p0, [z0.s, #0] : st1w %z0.s %p0 -> (%z0.s)[4byte] +e562a482 : st1w z2.s, p1, [z4.s, #8] : st1w %z2.s %p1 -> +0x08(%z4.s)[4byte] +e564a8c4 : st1w z4.s, p2, [z6.s, #16] : st1w %z4.s %p2 -> +0x10(%z6.s)[4byte] +e566a906 : st1w z6.s, p2, [z8.s, #24] : st1w %z6.s %p2 -> +0x18(%z8.s)[4byte] +e568ad48 : st1w z8.s, p3, [z10.s, #32] : st1w %z8.s %p3 -> +0x20(%z10.s)[4byte] +e56aad8a : st1w z10.s, p3, [z12.s, #40] : st1w %z10.s %p3 -> +0x28(%z12.s)[4byte] +e56cb1cc : st1w z12.s, p4, [z14.s, #48] : st1w %z12.s %p4 -> +0x30(%z14.s)[4byte] +e56eb20e : st1w z14.s, p4, [z16.s, #56] : st1w %z14.s %p4 -> +0x38(%z16.s)[4byte] +e570b650 : st1w z16.s, p5, [z18.s, #64] : st1w %z16.s %p5 -> +0x40(%z18.s)[4byte] +e571b671 : st1w z17.s, p5, [z19.s, #68] : st1w %z17.s %p5 -> +0x44(%z19.s)[4byte] +e573b6b3 : st1w z19.s, p5, [z21.s, #76] : st1w %z19.s %p5 -> +0x4c(%z21.s)[4byte] +e575baf5 : st1w z21.s, p6, [z23.s, #84] : st1w %z21.s %p6 -> +0x54(%z23.s)[4byte] +e577bb37 : st1w z23.s, p6, [z25.s, #92] : st1w %z23.s %p6 -> +0x5c(%z25.s)[4byte] +e579bf79 : st1w z25.s, p7, [z27.s, #100] : st1w %z25.s %p7 -> +0x64(%z27.s)[4byte] +e57bbfbb : st1w z27.s, p7, [z29.s, #108] : st1w %z27.s %p7 -> +0x6c(%z29.s)[4byte] +e57fbfff : st1w z31.s, p7, [z31.s, #124] : st1w %z31.s %p7 -> +0x7c(%z31.s)[4byte] # ST2B { .B, .B }, , [, ] (ST2B-Z.P.BR-Contiguous) -e4206000 : st2b {z0.b, z1.b}, p0, [x0, x0] : st2b %z0.b %z1.b %p0 -> (%x0,%x0)[64byte] -e4256482 : st2b {z2.b, z3.b}, p1, [x4, x5] : st2b %z2.b %z3.b %p1 -> (%x4,%x5)[64byte] -e42768c4 : st2b {z4.b, z5.b}, p2, [x6, x7] : st2b %z4.b %z5.b %p2 -> (%x6,%x7)[64byte] -e4296906 : st2b {z6.b, z7.b}, p2, [x8, x9] : st2b %z6.b %z7.b %p2 -> (%x8,%x9)[64byte] -e42b6d48 : st2b {z8.b, z9.b}, p3, [x10, x11] : st2b %z8.b %z9.b %p3 -> (%x10,%x11)[64byte] -e42c6d6a : st2b {z10.b, z11.b}, p3, [x11, x12] : st2b %z10.b %z11.b %p3 -> (%x11,%x12)[64byte] -e42e71ac : st2b {z12.b, z13.b}, p4, [x13, x14] : st2b %z12.b %z13.b %p4 -> (%x13,%x14)[64byte] -e43071ee : st2b {z14.b, z15.b}, p4, [x15, x16] : st2b %z14.b %z15.b %p4 -> (%x15,%x16)[64byte] -e4327630 : st2b {z16.b, z17.b}, p5, [x17, x18] : st2b %z16.b %z17.b %p5 -> (%x17,%x18)[64byte] -e4347671 : st2b {z17.b, z18.b}, p5, [x19, x20] : st2b %z17.b %z18.b %p5 -> (%x19,%x20)[64byte] -e43676b3 : st2b {z19.b, z20.b}, p5, [x21, x22] : st2b %z19.b %z20.b %p5 -> (%x21,%x22)[64byte] -e4387af5 : st2b {z21.b, z22.b}, p6, [x23, x24] : st2b %z21.b %z22.b %p6 -> (%x23,%x24)[64byte] -e4397b17 : st2b {z23.b, z24.b}, p6, [x24, x25] : st2b %z23.b %z24.b %p6 -> (%x24,%x25)[64byte] -e43b7f59 : st2b {z25.b, z26.b}, p7, [x26, x27] : st2b %z25.b %z26.b %p7 -> (%x26,%x27)[64byte] -e43d7f9b : st2b {z27.b, z28.b}, p7, [x28, x29] : st2b %z27.b %z28.b %p7 -> (%x28,%x29)[64byte] -e43e7fff : st2b {z31.b, z0.b}, p7, [sp, x30] : st2b %z31.b %z0.b %p7 -> (%sp,%x30)[64byte] +e4206000 : st2b {z0.b, z1.b}, p0, [x0, x0] : st2b %z0.b %z1.b %p0 -> (%x0,%x0)[1byte] +e4256482 : st2b {z2.b, z3.b}, p1, [x4, x5] : st2b %z2.b %z3.b %p1 -> (%x4,%x5)[1byte] +e42768c4 : st2b {z4.b, z5.b}, p2, [x6, x7] : st2b %z4.b %z5.b %p2 -> (%x6,%x7)[1byte] +e4296906 : st2b {z6.b, z7.b}, p2, [x8, x9] : st2b %z6.b %z7.b %p2 -> (%x8,%x9)[1byte] +e42b6d48 : st2b {z8.b, z9.b}, p3, [x10, x11] : st2b %z8.b %z9.b %p3 -> (%x10,%x11)[1byte] +e42c6d6a : st2b {z10.b, z11.b}, p3, [x11, x12] : st2b %z10.b %z11.b %p3 -> (%x11,%x12)[1byte] +e42e71ac : st2b {z12.b, z13.b}, p4, [x13, x14] : st2b %z12.b %z13.b %p4 -> (%x13,%x14)[1byte] +e43071ee : st2b {z14.b, z15.b}, p4, [x15, x16] : st2b %z14.b %z15.b %p4 -> (%x15,%x16)[1byte] +e4327630 : st2b {z16.b, z17.b}, p5, [x17, x18] : st2b %z16.b %z17.b %p5 -> (%x17,%x18)[1byte] +e4347671 : st2b {z17.b, z18.b}, p5, [x19, x20] : st2b %z17.b %z18.b %p5 -> (%x19,%x20)[1byte] +e43676b3 : st2b {z19.b, z20.b}, p5, [x21, x22] : st2b %z19.b %z20.b %p5 -> (%x21,%x22)[1byte] +e4387af5 : st2b {z21.b, z22.b}, p6, [x23, x24] : st2b %z21.b %z22.b %p6 -> (%x23,%x24)[1byte] +e4397b17 : st2b {z23.b, z24.b}, p6, [x24, x25] : st2b %z23.b %z24.b %p6 -> (%x24,%x25)[1byte] +e43b7f59 : st2b {z25.b, z26.b}, p7, [x26, x27] : st2b %z25.b %z26.b %p7 -> (%x26,%x27)[1byte] +e43d7f9b : st2b {z27.b, z28.b}, p7, [x28, x29] : st2b %z27.b %z28.b %p7 -> (%x28,%x29)[1byte] +e43e7fff : st2b {z31.b, z0.b}, p7, [sp, x30] : st2b %z31.b %z0.b %p7 -> (%sp,%x30)[1byte] # ST2B { .B, .B }, , [{, #, MUL VL}] (ST2B-Z.P.BI-Contiguous) -e438e000 : st2b {z0.b, z1.b}, p0, [x0, #-16, MUL VL] : st2b %z0.b %z1.b %p0 -> -0x0200(%x0)[64byte] -e439e482 : st2b {z2.b, z3.b}, p1, [x4, #-14, MUL VL] : st2b %z2.b %z3.b %p1 -> -0x01c0(%x4)[64byte] -e43ae8c4 : st2b {z4.b, z5.b}, p2, [x6, #-12, MUL VL] : st2b %z4.b %z5.b %p2 -> -0x0180(%x6)[64byte] -e43be906 : st2b {z6.b, z7.b}, p2, [x8, #-10, MUL VL] : st2b %z6.b %z7.b %p2 -> -0x0140(%x8)[64byte] -e43ced48 : st2b {z8.b, z9.b}, p3, [x10, #-8, MUL VL] : st2b %z8.b %z9.b %p3 -> -0x0100(%x10)[64byte] -e43ded6a : st2b {z10.b, z11.b}, p3, [x11, #-6, MUL VL] : st2b %z10.b %z11.b %p3 -> -0xc0(%x11)[64byte] -e43ef1ac : st2b {z12.b, z13.b}, p4, [x13, #-4, MUL VL] : st2b %z12.b %z13.b %p4 -> -0x80(%x13)[64byte] -e43ff1ee : st2b {z14.b, z15.b}, p4, [x15, #-2, MUL VL] : st2b %z14.b %z15.b %p4 -> -0x40(%x15)[64byte] -e430f630 : st2b {z16.b, z17.b}, p5, [x17, #0, MUL VL] : st2b %z16.b %z17.b %p5 -> (%x17)[64byte] -e430f671 : st2b {z17.b, z18.b}, p5, [x19, #0, MUL VL] : st2b %z17.b %z18.b %p5 -> (%x19)[64byte] -e431f6b3 : st2b {z19.b, z20.b}, p5, [x21, #2, MUL VL] : st2b %z19.b %z20.b %p5 -> +0x40(%x21)[64byte] -e432faf5 : st2b {z21.b, z22.b}, p6, [x23, #4, MUL VL] : st2b %z21.b %z22.b %p6 -> +0x80(%x23)[64byte] -e433fb17 : st2b {z23.b, z24.b}, p6, [x24, #6, MUL VL] : st2b %z23.b %z24.b %p6 -> +0xc0(%x24)[64byte] -e434ff59 : st2b {z25.b, z26.b}, p7, [x26, #8, MUL VL] : st2b %z25.b %z26.b %p7 -> +0x0100(%x26)[64byte] -e435ff9b : st2b {z27.b, z28.b}, p7, [x28, #10, MUL VL] : st2b %z27.b %z28.b %p7 -> +0x0140(%x28)[64byte] -e437ffff : st2b {z31.b, z0.b}, p7, [sp, #14, MUL VL] : st2b %z31.b %z0.b %p7 -> +0x01c0(%sp)[64byte] +e438e000 : st2b {z0.b, z1.b}, p0, [x0, #-16, MUL VL] : st2b %z0.b %z1.b %p0 -> -0x0200(%x0)[1byte] +e439e482 : st2b {z2.b, z3.b}, p1, [x4, #-14, MUL VL] : st2b %z2.b %z3.b %p1 -> -0x01c0(%x4)[1byte] +e43ae8c4 : st2b {z4.b, z5.b}, p2, [x6, #-12, MUL VL] : st2b %z4.b %z5.b %p2 -> -0x0180(%x6)[1byte] +e43be906 : st2b {z6.b, z7.b}, p2, [x8, #-10, MUL VL] : st2b %z6.b %z7.b %p2 -> -0x0140(%x8)[1byte] +e43ced48 : st2b {z8.b, z9.b}, p3, [x10, #-8, MUL VL] : st2b %z8.b %z9.b %p3 -> -0x0100(%x10)[1byte] +e43ded6a : st2b {z10.b, z11.b}, p3, [x11, #-6, MUL VL] : st2b %z10.b %z11.b %p3 -> -0xc0(%x11)[1byte] +e43ef1ac : st2b {z12.b, z13.b}, p4, [x13, #-4, MUL VL] : st2b %z12.b %z13.b %p4 -> -0x80(%x13)[1byte] +e43ff1ee : st2b {z14.b, z15.b}, p4, [x15, #-2, MUL VL] : st2b %z14.b %z15.b %p4 -> -0x40(%x15)[1byte] +e430f630 : st2b {z16.b, z17.b}, p5, [x17, #0, MUL VL] : st2b %z16.b %z17.b %p5 -> (%x17)[1byte] +e430f671 : st2b {z17.b, z18.b}, p5, [x19, #0, MUL VL] : st2b %z17.b %z18.b %p5 -> (%x19)[1byte] +e431f6b3 : st2b {z19.b, z20.b}, p5, [x21, #2, MUL VL] : st2b %z19.b %z20.b %p5 -> +0x40(%x21)[1byte] +e432faf5 : st2b {z21.b, z22.b}, p6, [x23, #4, MUL VL] : st2b %z21.b %z22.b %p6 -> +0x80(%x23)[1byte] +e433fb17 : st2b {z23.b, z24.b}, p6, [x24, #6, MUL VL] : st2b %z23.b %z24.b %p6 -> +0xc0(%x24)[1byte] +e434ff59 : st2b {z25.b, z26.b}, p7, [x26, #8, MUL VL] : st2b %z25.b %z26.b %p7 -> +0x0100(%x26)[1byte] +e435ff9b : st2b {z27.b, z28.b}, p7, [x28, #10, MUL VL] : st2b %z27.b %z28.b %p7 -> +0x0140(%x28)[1byte] +e437ffff : st2b {z31.b, z0.b}, p7, [sp, #14, MUL VL] : st2b %z31.b %z0.b %p7 -> +0x01c0(%sp)[1byte] # ST2D { .D, .D }, , [, , LSL #3] (ST2D-Z.P.BR-Contiguous) -e5a06000 : st2d {z0.d, z1.d}, p0, [x0, x0, LSL #3] : st2d %z0.d %z1.d %p0 -> (%x0,%x0,lsl #3)[64byte] -e5a56482 : st2d {z2.d, z3.d}, p1, [x4, x5, LSL #3] : st2d %z2.d %z3.d %p1 -> (%x4,%x5,lsl #3)[64byte] -e5a768c4 : st2d {z4.d, z5.d}, p2, [x6, x7, LSL #3] : st2d %z4.d %z5.d %p2 -> (%x6,%x7,lsl #3)[64byte] -e5a96906 : st2d {z6.d, z7.d}, p2, [x8, x9, LSL #3] : st2d %z6.d %z7.d %p2 -> (%x8,%x9,lsl #3)[64byte] -e5ab6d48 : st2d {z8.d, z9.d}, p3, [x10, x11, LSL #3] : st2d %z8.d %z9.d %p3 -> (%x10,%x11,lsl #3)[64byte] -e5ac6d6a : st2d {z10.d, z11.d}, p3, [x11, x12, LSL #3] : st2d %z10.d %z11.d %p3 -> (%x11,%x12,lsl #3)[64byte] -e5ae71ac : st2d {z12.d, z13.d}, p4, [x13, x14, LSL #3] : st2d %z12.d %z13.d %p4 -> (%x13,%x14,lsl #3)[64byte] -e5b071ee : st2d {z14.d, z15.d}, p4, [x15, x16, LSL #3] : st2d %z14.d %z15.d %p4 -> (%x15,%x16,lsl #3)[64byte] -e5b27630 : st2d {z16.d, z17.d}, p5, [x17, x18, LSL #3] : st2d %z16.d %z17.d %p5 -> (%x17,%x18,lsl #3)[64byte] -e5b47671 : st2d {z17.d, z18.d}, p5, [x19, x20, LSL #3] : st2d %z17.d %z18.d %p5 -> (%x19,%x20,lsl #3)[64byte] -e5b676b3 : st2d {z19.d, z20.d}, p5, [x21, x22, LSL #3] : st2d %z19.d %z20.d %p5 -> (%x21,%x22,lsl #3)[64byte] -e5b87af5 : st2d {z21.d, z22.d}, p6, [x23, x24, LSL #3] : st2d %z21.d %z22.d %p6 -> (%x23,%x24,lsl #3)[64byte] -e5b97b17 : st2d {z23.d, z24.d}, p6, [x24, x25, LSL #3] : st2d %z23.d %z24.d %p6 -> (%x24,%x25,lsl #3)[64byte] -e5bb7f59 : st2d {z25.d, z26.d}, p7, [x26, x27, LSL #3] : st2d %z25.d %z26.d %p7 -> (%x26,%x27,lsl #3)[64byte] -e5bd7f9b : st2d {z27.d, z28.d}, p7, [x28, x29, LSL #3] : st2d %z27.d %z28.d %p7 -> (%x28,%x29,lsl #3)[64byte] -e5be7fff : st2d {z31.d, z0.d}, p7, [sp, x30, LSL #3] : st2d %z31.d %z0.d %p7 -> (%sp,%x30,lsl #3)[64byte] +e5a06000 : st2d {z0.d, z1.d}, p0, [x0, x0, LSL #3] : st2d %z0.d %z1.d %p0 -> (%x0,%x0,lsl #3)[8byte] +e5a56482 : st2d {z2.d, z3.d}, p1, [x4, x5, LSL #3] : st2d %z2.d %z3.d %p1 -> (%x4,%x5,lsl #3)[8byte] +e5a768c4 : st2d {z4.d, z5.d}, p2, [x6, x7, LSL #3] : st2d %z4.d %z5.d %p2 -> (%x6,%x7,lsl #3)[8byte] +e5a96906 : st2d {z6.d, z7.d}, p2, [x8, x9, LSL #3] : st2d %z6.d %z7.d %p2 -> (%x8,%x9,lsl #3)[8byte] +e5ab6d48 : st2d {z8.d, z9.d}, p3, [x10, x11, LSL #3] : st2d %z8.d %z9.d %p3 -> (%x10,%x11,lsl #3)[8byte] +e5ac6d6a : st2d {z10.d, z11.d}, p3, [x11, x12, LSL #3] : st2d %z10.d %z11.d %p3 -> (%x11,%x12,lsl #3)[8byte] +e5ae71ac : st2d {z12.d, z13.d}, p4, [x13, x14, LSL #3] : st2d %z12.d %z13.d %p4 -> (%x13,%x14,lsl #3)[8byte] +e5b071ee : st2d {z14.d, z15.d}, p4, [x15, x16, LSL #3] : st2d %z14.d %z15.d %p4 -> (%x15,%x16,lsl #3)[8byte] +e5b27630 : st2d {z16.d, z17.d}, p5, [x17, x18, LSL #3] : st2d %z16.d %z17.d %p5 -> (%x17,%x18,lsl #3)[8byte] +e5b47671 : st2d {z17.d, z18.d}, p5, [x19, x20, LSL #3] : st2d %z17.d %z18.d %p5 -> (%x19,%x20,lsl #3)[8byte] +e5b676b3 : st2d {z19.d, z20.d}, p5, [x21, x22, LSL #3] : st2d %z19.d %z20.d %p5 -> (%x21,%x22,lsl #3)[8byte] +e5b87af5 : st2d {z21.d, z22.d}, p6, [x23, x24, LSL #3] : st2d %z21.d %z22.d %p6 -> (%x23,%x24,lsl #3)[8byte] +e5b97b17 : st2d {z23.d, z24.d}, p6, [x24, x25, LSL #3] : st2d %z23.d %z24.d %p6 -> (%x24,%x25,lsl #3)[8byte] +e5bb7f59 : st2d {z25.d, z26.d}, p7, [x26, x27, LSL #3] : st2d %z25.d %z26.d %p7 -> (%x26,%x27,lsl #3)[8byte] +e5bd7f9b : st2d {z27.d, z28.d}, p7, [x28, x29, LSL #3] : st2d %z27.d %z28.d %p7 -> (%x28,%x29,lsl #3)[8byte] +e5be7fff : st2d {z31.d, z0.d}, p7, [sp, x30, LSL #3] : st2d %z31.d %z0.d %p7 -> (%sp,%x30,lsl #3)[8byte] # ST2D { .D, .D }, , [{, #, MUL VL}] (ST2D-Z.P.BI-Contiguous) -e5b8e000 : st2d {z0.d, z1.d}, p0, [x0, #-16, MUL VL] : st2d %z0.d %z1.d %p0 -> -0x0200(%x0)[64byte] -e5b9e482 : st2d {z2.d, z3.d}, p1, [x4, #-14, MUL VL] : st2d %z2.d %z3.d %p1 -> -0x01c0(%x4)[64byte] -e5bae8c4 : st2d {z4.d, z5.d}, p2, [x6, #-12, MUL VL] : st2d %z4.d %z5.d %p2 -> -0x0180(%x6)[64byte] -e5bbe906 : st2d {z6.d, z7.d}, p2, [x8, #-10, MUL VL] : st2d %z6.d %z7.d %p2 -> -0x0140(%x8)[64byte] -e5bced48 : st2d {z8.d, z9.d}, p3, [x10, #-8, MUL VL] : st2d %z8.d %z9.d %p3 -> -0x0100(%x10)[64byte] -e5bded6a : st2d {z10.d, z11.d}, p3, [x11, #-6, MUL VL] : st2d %z10.d %z11.d %p3 -> -0xc0(%x11)[64byte] -e5bef1ac : st2d {z12.d, z13.d}, p4, [x13, #-4, MUL VL] : st2d %z12.d %z13.d %p4 -> -0x80(%x13)[64byte] -e5bff1ee : st2d {z14.d, z15.d}, p4, [x15, #-2, MUL VL] : st2d %z14.d %z15.d %p4 -> -0x40(%x15)[64byte] -e5b0f630 : st2d {z16.d, z17.d}, p5, [x17, #0, MUL VL] : st2d %z16.d %z17.d %p5 -> (%x17)[64byte] -e5b0f671 : st2d {z17.d, z18.d}, p5, [x19, #0, MUL VL] : st2d %z17.d %z18.d %p5 -> (%x19)[64byte] -e5b1f6b3 : st2d {z19.d, z20.d}, p5, [x21, #2, MUL VL] : st2d %z19.d %z20.d %p5 -> +0x40(%x21)[64byte] -e5b2faf5 : st2d {z21.d, z22.d}, p6, [x23, #4, MUL VL] : st2d %z21.d %z22.d %p6 -> +0x80(%x23)[64byte] -e5b3fb17 : st2d {z23.d, z24.d}, p6, [x24, #6, MUL VL] : st2d %z23.d %z24.d %p6 -> +0xc0(%x24)[64byte] -e5b4ff59 : st2d {z25.d, z26.d}, p7, [x26, #8, MUL VL] : st2d %z25.d %z26.d %p7 -> +0x0100(%x26)[64byte] -e5b5ff9b : st2d {z27.d, z28.d}, p7, [x28, #10, MUL VL] : st2d %z27.d %z28.d %p7 -> +0x0140(%x28)[64byte] -e5b7ffff : st2d {z31.d, z0.d}, p7, [sp, #14, MUL VL] : st2d %z31.d %z0.d %p7 -> +0x01c0(%sp)[64byte] +e5b8e000 : st2d {z0.d, z1.d}, p0, [x0, #-16, MUL VL] : st2d %z0.d %z1.d %p0 -> -0x0200(%x0)[8byte] +e5b9e482 : st2d {z2.d, z3.d}, p1, [x4, #-14, MUL VL] : st2d %z2.d %z3.d %p1 -> -0x01c0(%x4)[8byte] +e5bae8c4 : st2d {z4.d, z5.d}, p2, [x6, #-12, MUL VL] : st2d %z4.d %z5.d %p2 -> -0x0180(%x6)[8byte] +e5bbe906 : st2d {z6.d, z7.d}, p2, [x8, #-10, MUL VL] : st2d %z6.d %z7.d %p2 -> -0x0140(%x8)[8byte] +e5bced48 : st2d {z8.d, z9.d}, p3, [x10, #-8, MUL VL] : st2d %z8.d %z9.d %p3 -> -0x0100(%x10)[8byte] +e5bded6a : st2d {z10.d, z11.d}, p3, [x11, #-6, MUL VL] : st2d %z10.d %z11.d %p3 -> -0xc0(%x11)[8byte] +e5bef1ac : st2d {z12.d, z13.d}, p4, [x13, #-4, MUL VL] : st2d %z12.d %z13.d %p4 -> -0x80(%x13)[8byte] +e5bff1ee : st2d {z14.d, z15.d}, p4, [x15, #-2, MUL VL] : st2d %z14.d %z15.d %p4 -> -0x40(%x15)[8byte] +e5b0f630 : st2d {z16.d, z17.d}, p5, [x17, #0, MUL VL] : st2d %z16.d %z17.d %p5 -> (%x17)[8byte] +e5b0f671 : st2d {z17.d, z18.d}, p5, [x19, #0, MUL VL] : st2d %z17.d %z18.d %p5 -> (%x19)[8byte] +e5b1f6b3 : st2d {z19.d, z20.d}, p5, [x21, #2, MUL VL] : st2d %z19.d %z20.d %p5 -> +0x40(%x21)[8byte] +e5b2faf5 : st2d {z21.d, z22.d}, p6, [x23, #4, MUL VL] : st2d %z21.d %z22.d %p6 -> +0x80(%x23)[8byte] +e5b3fb17 : st2d {z23.d, z24.d}, p6, [x24, #6, MUL VL] : st2d %z23.d %z24.d %p6 -> +0xc0(%x24)[8byte] +e5b4ff59 : st2d {z25.d, z26.d}, p7, [x26, #8, MUL VL] : st2d %z25.d %z26.d %p7 -> +0x0100(%x26)[8byte] +e5b5ff9b : st2d {z27.d, z28.d}, p7, [x28, #10, MUL VL] : st2d %z27.d %z28.d %p7 -> +0x0140(%x28)[8byte] +e5b7ffff : st2d {z31.d, z0.d}, p7, [sp, #14, MUL VL] : st2d %z31.d %z0.d %p7 -> +0x01c0(%sp)[8byte] # ST2H { .H, .H }, , [, , LSL #1] (ST2H-Z.P.BR-Contiguous) -e4a06000 : st2h {z0.h, z1.h}, p0, [x0, x0, LSL #1] : st2h %z0.h %z1.h %p0 -> (%x0,%x0,lsl #1)[64byte] -e4a56482 : st2h {z2.h, z3.h}, p1, [x4, x5, LSL #1] : st2h %z2.h %z3.h %p1 -> (%x4,%x5,lsl #1)[64byte] -e4a768c4 : st2h {z4.h, z5.h}, p2, [x6, x7, LSL #1] : st2h %z4.h %z5.h %p2 -> (%x6,%x7,lsl #1)[64byte] -e4a96906 : st2h {z6.h, z7.h}, p2, [x8, x9, LSL #1] : st2h %z6.h %z7.h %p2 -> (%x8,%x9,lsl #1)[64byte] -e4ab6d48 : st2h {z8.h, z9.h}, p3, [x10, x11, LSL #1] : st2h %z8.h %z9.h %p3 -> (%x10,%x11,lsl #1)[64byte] -e4ac6d6a : st2h {z10.h, z11.h}, p3, [x11, x12, LSL #1] : st2h %z10.h %z11.h %p3 -> (%x11,%x12,lsl #1)[64byte] -e4ae71ac : st2h {z12.h, z13.h}, p4, [x13, x14, LSL #1] : st2h %z12.h %z13.h %p4 -> (%x13,%x14,lsl #1)[64byte] -e4b071ee : st2h {z14.h, z15.h}, p4, [x15, x16, LSL #1] : st2h %z14.h %z15.h %p4 -> (%x15,%x16,lsl #1)[64byte] -e4b27630 : st2h {z16.h, z17.h}, p5, [x17, x18, LSL #1] : st2h %z16.h %z17.h %p5 -> (%x17,%x18,lsl #1)[64byte] -e4b47671 : st2h {z17.h, z18.h}, p5, [x19, x20, LSL #1] : st2h %z17.h %z18.h %p5 -> (%x19,%x20,lsl #1)[64byte] -e4b676b3 : st2h {z19.h, z20.h}, p5, [x21, x22, LSL #1] : st2h %z19.h %z20.h %p5 -> (%x21,%x22,lsl #1)[64byte] -e4b87af5 : st2h {z21.h, z22.h}, p6, [x23, x24, LSL #1] : st2h %z21.h %z22.h %p6 -> (%x23,%x24,lsl #1)[64byte] -e4b97b17 : st2h {z23.h, z24.h}, p6, [x24, x25, LSL #1] : st2h %z23.h %z24.h %p6 -> (%x24,%x25,lsl #1)[64byte] -e4bb7f59 : st2h {z25.h, z26.h}, p7, [x26, x27, LSL #1] : st2h %z25.h %z26.h %p7 -> (%x26,%x27,lsl #1)[64byte] -e4bd7f9b : st2h {z27.h, z28.h}, p7, [x28, x29, LSL #1] : st2h %z27.h %z28.h %p7 -> (%x28,%x29,lsl #1)[64byte] -e4be7fff : st2h {z31.h, z0.h}, p7, [sp, x30, LSL #1] : st2h %z31.h %z0.h %p7 -> (%sp,%x30,lsl #1)[64byte] +e4a06000 : st2h {z0.h, z1.h}, p0, [x0, x0, LSL #1] : st2h %z0.h %z1.h %p0 -> (%x0,%x0,lsl #1)[2byte] +e4a56482 : st2h {z2.h, z3.h}, p1, [x4, x5, LSL #1] : st2h %z2.h %z3.h %p1 -> (%x4,%x5,lsl #1)[2byte] +e4a768c4 : st2h {z4.h, z5.h}, p2, [x6, x7, LSL #1] : st2h %z4.h %z5.h %p2 -> (%x6,%x7,lsl #1)[2byte] +e4a96906 : st2h {z6.h, z7.h}, p2, [x8, x9, LSL #1] : st2h %z6.h %z7.h %p2 -> (%x8,%x9,lsl #1)[2byte] +e4ab6d48 : st2h {z8.h, z9.h}, p3, [x10, x11, LSL #1] : st2h %z8.h %z9.h %p3 -> (%x10,%x11,lsl #1)[2byte] +e4ac6d6a : st2h {z10.h, z11.h}, p3, [x11, x12, LSL #1] : st2h %z10.h %z11.h %p3 -> (%x11,%x12,lsl #1)[2byte] +e4ae71ac : st2h {z12.h, z13.h}, p4, [x13, x14, LSL #1] : st2h %z12.h %z13.h %p4 -> (%x13,%x14,lsl #1)[2byte] +e4b071ee : st2h {z14.h, z15.h}, p4, [x15, x16, LSL #1] : st2h %z14.h %z15.h %p4 -> (%x15,%x16,lsl #1)[2byte] +e4b27630 : st2h {z16.h, z17.h}, p5, [x17, x18, LSL #1] : st2h %z16.h %z17.h %p5 -> (%x17,%x18,lsl #1)[2byte] +e4b47671 : st2h {z17.h, z18.h}, p5, [x19, x20, LSL #1] : st2h %z17.h %z18.h %p5 -> (%x19,%x20,lsl #1)[2byte] +e4b676b3 : st2h {z19.h, z20.h}, p5, [x21, x22, LSL #1] : st2h %z19.h %z20.h %p5 -> (%x21,%x22,lsl #1)[2byte] +e4b87af5 : st2h {z21.h, z22.h}, p6, [x23, x24, LSL #1] : st2h %z21.h %z22.h %p6 -> (%x23,%x24,lsl #1)[2byte] +e4b97b17 : st2h {z23.h, z24.h}, p6, [x24, x25, LSL #1] : st2h %z23.h %z24.h %p6 -> (%x24,%x25,lsl #1)[2byte] +e4bb7f59 : st2h {z25.h, z26.h}, p7, [x26, x27, LSL #1] : st2h %z25.h %z26.h %p7 -> (%x26,%x27,lsl #1)[2byte] +e4bd7f9b : st2h {z27.h, z28.h}, p7, [x28, x29, LSL #1] : st2h %z27.h %z28.h %p7 -> (%x28,%x29,lsl #1)[2byte] +e4be7fff : st2h {z31.h, z0.h}, p7, [sp, x30, LSL #1] : st2h %z31.h %z0.h %p7 -> (%sp,%x30,lsl #1)[2byte] # ST2H { .H, .H }, , [{, #, MUL VL}] (ST2H-Z.P.BI-Contiguous) -e4b8e000 : st2h {z0.h, z1.h}, p0, [x0, #-16, MUL VL] : st2h %z0.h %z1.h %p0 -> -0x0200(%x0)[64byte] -e4b9e482 : st2h {z2.h, z3.h}, p1, [x4, #-14, MUL VL] : st2h %z2.h %z3.h %p1 -> -0x01c0(%x4)[64byte] -e4bae8c4 : st2h {z4.h, z5.h}, p2, [x6, #-12, MUL VL] : st2h %z4.h %z5.h %p2 -> -0x0180(%x6)[64byte] -e4bbe906 : st2h {z6.h, z7.h}, p2, [x8, #-10, MUL VL] : st2h %z6.h %z7.h %p2 -> -0x0140(%x8)[64byte] -e4bced48 : st2h {z8.h, z9.h}, p3, [x10, #-8, MUL VL] : st2h %z8.h %z9.h %p3 -> -0x0100(%x10)[64byte] -e4bded6a : st2h {z10.h, z11.h}, p3, [x11, #-6, MUL VL] : st2h %z10.h %z11.h %p3 -> -0xc0(%x11)[64byte] -e4bef1ac : st2h {z12.h, z13.h}, p4, [x13, #-4, MUL VL] : st2h %z12.h %z13.h %p4 -> -0x80(%x13)[64byte] -e4bff1ee : st2h {z14.h, z15.h}, p4, [x15, #-2, MUL VL] : st2h %z14.h %z15.h %p4 -> -0x40(%x15)[64byte] -e4b0f630 : st2h {z16.h, z17.h}, p5, [x17, #0, MUL VL] : st2h %z16.h %z17.h %p5 -> (%x17)[64byte] -e4b0f671 : st2h {z17.h, z18.h}, p5, [x19, #0, MUL VL] : st2h %z17.h %z18.h %p5 -> (%x19)[64byte] -e4b1f6b3 : st2h {z19.h, z20.h}, p5, [x21, #2, MUL VL] : st2h %z19.h %z20.h %p5 -> +0x40(%x21)[64byte] -e4b2faf5 : st2h {z21.h, z22.h}, p6, [x23, #4, MUL VL] : st2h %z21.h %z22.h %p6 -> +0x80(%x23)[64byte] -e4b3fb17 : st2h {z23.h, z24.h}, p6, [x24, #6, MUL VL] : st2h %z23.h %z24.h %p6 -> +0xc0(%x24)[64byte] -e4b4ff59 : st2h {z25.h, z26.h}, p7, [x26, #8, MUL VL] : st2h %z25.h %z26.h %p7 -> +0x0100(%x26)[64byte] -e4b5ff9b : st2h {z27.h, z28.h}, p7, [x28, #10, MUL VL] : st2h %z27.h %z28.h %p7 -> +0x0140(%x28)[64byte] -e4b7ffff : st2h {z31.h, z0.h}, p7, [sp, #14, MUL VL] : st2h %z31.h %z0.h %p7 -> +0x01c0(%sp)[64byte] +e4b8e000 : st2h {z0.h, z1.h}, p0, [x0, #-16, MUL VL] : st2h %z0.h %z1.h %p0 -> -0x0200(%x0)[2byte] +e4b9e482 : st2h {z2.h, z3.h}, p1, [x4, #-14, MUL VL] : st2h %z2.h %z3.h %p1 -> -0x01c0(%x4)[2byte] +e4bae8c4 : st2h {z4.h, z5.h}, p2, [x6, #-12, MUL VL] : st2h %z4.h %z5.h %p2 -> -0x0180(%x6)[2byte] +e4bbe906 : st2h {z6.h, z7.h}, p2, [x8, #-10, MUL VL] : st2h %z6.h %z7.h %p2 -> -0x0140(%x8)[2byte] +e4bced48 : st2h {z8.h, z9.h}, p3, [x10, #-8, MUL VL] : st2h %z8.h %z9.h %p3 -> -0x0100(%x10)[2byte] +e4bded6a : st2h {z10.h, z11.h}, p3, [x11, #-6, MUL VL] : st2h %z10.h %z11.h %p3 -> -0xc0(%x11)[2byte] +e4bef1ac : st2h {z12.h, z13.h}, p4, [x13, #-4, MUL VL] : st2h %z12.h %z13.h %p4 -> -0x80(%x13)[2byte] +e4bff1ee : st2h {z14.h, z15.h}, p4, [x15, #-2, MUL VL] : st2h %z14.h %z15.h %p4 -> -0x40(%x15)[2byte] +e4b0f630 : st2h {z16.h, z17.h}, p5, [x17, #0, MUL VL] : st2h %z16.h %z17.h %p5 -> (%x17)[2byte] +e4b0f671 : st2h {z17.h, z18.h}, p5, [x19, #0, MUL VL] : st2h %z17.h %z18.h %p5 -> (%x19)[2byte] +e4b1f6b3 : st2h {z19.h, z20.h}, p5, [x21, #2, MUL VL] : st2h %z19.h %z20.h %p5 -> +0x40(%x21)[2byte] +e4b2faf5 : st2h {z21.h, z22.h}, p6, [x23, #4, MUL VL] : st2h %z21.h %z22.h %p6 -> +0x80(%x23)[2byte] +e4b3fb17 : st2h {z23.h, z24.h}, p6, [x24, #6, MUL VL] : st2h %z23.h %z24.h %p6 -> +0xc0(%x24)[2byte] +e4b4ff59 : st2h {z25.h, z26.h}, p7, [x26, #8, MUL VL] : st2h %z25.h %z26.h %p7 -> +0x0100(%x26)[2byte] +e4b5ff9b : st2h {z27.h, z28.h}, p7, [x28, #10, MUL VL] : st2h %z27.h %z28.h %p7 -> +0x0140(%x28)[2byte] +e4b7ffff : st2h {z31.h, z0.h}, p7, [sp, #14, MUL VL] : st2h %z31.h %z0.h %p7 -> +0x01c0(%sp)[2byte] # ST2W { .S, .S }, , [, , LSL #2] (ST2W-Z.P.BR-Contiguous) -e5206000 : st2w {z0.s, z1.s}, p0, [x0, x0, LSL #2] : st2w %z0.s %z1.s %p0 -> (%x0,%x0,lsl #2)[64byte] -e5256482 : st2w {z2.s, z3.s}, p1, [x4, x5, LSL #2] : st2w %z2.s %z3.s %p1 -> (%x4,%x5,lsl #2)[64byte] -e52768c4 : st2w {z4.s, z5.s}, p2, [x6, x7, LSL #2] : st2w %z4.s %z5.s %p2 -> (%x6,%x7,lsl #2)[64byte] -e5296906 : st2w {z6.s, z7.s}, p2, [x8, x9, LSL #2] : st2w %z6.s %z7.s %p2 -> (%x8,%x9,lsl #2)[64byte] -e52b6d48 : st2w {z8.s, z9.s}, p3, [x10, x11, LSL #2] : st2w %z8.s %z9.s %p3 -> (%x10,%x11,lsl #2)[64byte] -e52c6d6a : st2w {z10.s, z11.s}, p3, [x11, x12, LSL #2] : st2w %z10.s %z11.s %p3 -> (%x11,%x12,lsl #2)[64byte] -e52e71ac : st2w {z12.s, z13.s}, p4, [x13, x14, LSL #2] : st2w %z12.s %z13.s %p4 -> (%x13,%x14,lsl #2)[64byte] -e53071ee : st2w {z14.s, z15.s}, p4, [x15, x16, LSL #2] : st2w %z14.s %z15.s %p4 -> (%x15,%x16,lsl #2)[64byte] -e5327630 : st2w {z16.s, z17.s}, p5, [x17, x18, LSL #2] : st2w %z16.s %z17.s %p5 -> (%x17,%x18,lsl #2)[64byte] -e5347671 : st2w {z17.s, z18.s}, p5, [x19, x20, LSL #2] : st2w %z17.s %z18.s %p5 -> (%x19,%x20,lsl #2)[64byte] -e53676b3 : st2w {z19.s, z20.s}, p5, [x21, x22, LSL #2] : st2w %z19.s %z20.s %p5 -> (%x21,%x22,lsl #2)[64byte] -e5387af5 : st2w {z21.s, z22.s}, p6, [x23, x24, LSL #2] : st2w %z21.s %z22.s %p6 -> (%x23,%x24,lsl #2)[64byte] -e5397b17 : st2w {z23.s, z24.s}, p6, [x24, x25, LSL #2] : st2w %z23.s %z24.s %p6 -> (%x24,%x25,lsl #2)[64byte] -e53b7f59 : st2w {z25.s, z26.s}, p7, [x26, x27, LSL #2] : st2w %z25.s %z26.s %p7 -> (%x26,%x27,lsl #2)[64byte] -e53d7f9b : st2w {z27.s, z28.s}, p7, [x28, x29, LSL #2] : st2w %z27.s %z28.s %p7 -> (%x28,%x29,lsl #2)[64byte] -e53e7fff : st2w {z31.s, z0.s}, p7, [sp, x30, LSL #2] : st2w %z31.s %z0.s %p7 -> (%sp,%x30,lsl #2)[64byte] +e5206000 : st2w {z0.s, z1.s}, p0, [x0, x0, LSL #2] : st2w %z0.s %z1.s %p0 -> (%x0,%x0,lsl #2)[4byte] +e5256482 : st2w {z2.s, z3.s}, p1, [x4, x5, LSL #2] : st2w %z2.s %z3.s %p1 -> (%x4,%x5,lsl #2)[4byte] +e52768c4 : st2w {z4.s, z5.s}, p2, [x6, x7, LSL #2] : st2w %z4.s %z5.s %p2 -> (%x6,%x7,lsl #2)[4byte] +e5296906 : st2w {z6.s, z7.s}, p2, [x8, x9, LSL #2] : st2w %z6.s %z7.s %p2 -> (%x8,%x9,lsl #2)[4byte] +e52b6d48 : st2w {z8.s, z9.s}, p3, [x10, x11, LSL #2] : st2w %z8.s %z9.s %p3 -> (%x10,%x11,lsl #2)[4byte] +e52c6d6a : st2w {z10.s, z11.s}, p3, [x11, x12, LSL #2] : st2w %z10.s %z11.s %p3 -> (%x11,%x12,lsl #2)[4byte] +e52e71ac : st2w {z12.s, z13.s}, p4, [x13, x14, LSL #2] : st2w %z12.s %z13.s %p4 -> (%x13,%x14,lsl #2)[4byte] +e53071ee : st2w {z14.s, z15.s}, p4, [x15, x16, LSL #2] : st2w %z14.s %z15.s %p4 -> (%x15,%x16,lsl #2)[4byte] +e5327630 : st2w {z16.s, z17.s}, p5, [x17, x18, LSL #2] : st2w %z16.s %z17.s %p5 -> (%x17,%x18,lsl #2)[4byte] +e5347671 : st2w {z17.s, z18.s}, p5, [x19, x20, LSL #2] : st2w %z17.s %z18.s %p5 -> (%x19,%x20,lsl #2)[4byte] +e53676b3 : st2w {z19.s, z20.s}, p5, [x21, x22, LSL #2] : st2w %z19.s %z20.s %p5 -> (%x21,%x22,lsl #2)[4byte] +e5387af5 : st2w {z21.s, z22.s}, p6, [x23, x24, LSL #2] : st2w %z21.s %z22.s %p6 -> (%x23,%x24,lsl #2)[4byte] +e5397b17 : st2w {z23.s, z24.s}, p6, [x24, x25, LSL #2] : st2w %z23.s %z24.s %p6 -> (%x24,%x25,lsl #2)[4byte] +e53b7f59 : st2w {z25.s, z26.s}, p7, [x26, x27, LSL #2] : st2w %z25.s %z26.s %p7 -> (%x26,%x27,lsl #2)[4byte] +e53d7f9b : st2w {z27.s, z28.s}, p7, [x28, x29, LSL #2] : st2w %z27.s %z28.s %p7 -> (%x28,%x29,lsl #2)[4byte] +e53e7fff : st2w {z31.s, z0.s}, p7, [sp, x30, LSL #2] : st2w %z31.s %z0.s %p7 -> (%sp,%x30,lsl #2)[4byte] # ST2W { .S, .S }, , [{, #, MUL VL}] (ST2W-Z.P.BI-Contiguous) -e538e000 : st2w {z0.s, z1.s}, p0, [x0, #-16, MUL VL] : st2w %z0.s %z1.s %p0 -> -0x0200(%x0)[64byte] -e539e482 : st2w {z2.s, z3.s}, p1, [x4, #-14, MUL VL] : st2w %z2.s %z3.s %p1 -> -0x01c0(%x4)[64byte] -e53ae8c4 : st2w {z4.s, z5.s}, p2, [x6, #-12, MUL VL] : st2w %z4.s %z5.s %p2 -> -0x0180(%x6)[64byte] -e53be906 : st2w {z6.s, z7.s}, p2, [x8, #-10, MUL VL] : st2w %z6.s %z7.s %p2 -> -0x0140(%x8)[64byte] -e53ced48 : st2w {z8.s, z9.s}, p3, [x10, #-8, MUL VL] : st2w %z8.s %z9.s %p3 -> -0x0100(%x10)[64byte] -e53ded6a : st2w {z10.s, z11.s}, p3, [x11, #-6, MUL VL] : st2w %z10.s %z11.s %p3 -> -0xc0(%x11)[64byte] -e53ef1ac : st2w {z12.s, z13.s}, p4, [x13, #-4, MUL VL] : st2w %z12.s %z13.s %p4 -> -0x80(%x13)[64byte] -e53ff1ee : st2w {z14.s, z15.s}, p4, [x15, #-2, MUL VL] : st2w %z14.s %z15.s %p4 -> -0x40(%x15)[64byte] -e530f630 : st2w {z16.s, z17.s}, p5, [x17, #0, MUL VL] : st2w %z16.s %z17.s %p5 -> (%x17)[64byte] -e530f671 : st2w {z17.s, z18.s}, p5, [x19, #0, MUL VL] : st2w %z17.s %z18.s %p5 -> (%x19)[64byte] -e531f6b3 : st2w {z19.s, z20.s}, p5, [x21, #2, MUL VL] : st2w %z19.s %z20.s %p5 -> +0x40(%x21)[64byte] -e532faf5 : st2w {z21.s, z22.s}, p6, [x23, #4, MUL VL] : st2w %z21.s %z22.s %p6 -> +0x80(%x23)[64byte] -e533fb17 : st2w {z23.s, z24.s}, p6, [x24, #6, MUL VL] : st2w %z23.s %z24.s %p6 -> +0xc0(%x24)[64byte] -e534ff59 : st2w {z25.s, z26.s}, p7, [x26, #8, MUL VL] : st2w %z25.s %z26.s %p7 -> +0x0100(%x26)[64byte] -e535ff9b : st2w {z27.s, z28.s}, p7, [x28, #10, MUL VL] : st2w %z27.s %z28.s %p7 -> +0x0140(%x28)[64byte] -e537ffff : st2w {z31.s, z0.s}, p7, [sp, #14, MUL VL] : st2w %z31.s %z0.s %p7 -> +0x01c0(%sp)[64byte] +e538e000 : st2w {z0.s, z1.s}, p0, [x0, #-16, MUL VL] : st2w %z0.s %z1.s %p0 -> -0x0200(%x0)[4byte] +e539e482 : st2w {z2.s, z3.s}, p1, [x4, #-14, MUL VL] : st2w %z2.s %z3.s %p1 -> -0x01c0(%x4)[4byte] +e53ae8c4 : st2w {z4.s, z5.s}, p2, [x6, #-12, MUL VL] : st2w %z4.s %z5.s %p2 -> -0x0180(%x6)[4byte] +e53be906 : st2w {z6.s, z7.s}, p2, [x8, #-10, MUL VL] : st2w %z6.s %z7.s %p2 -> -0x0140(%x8)[4byte] +e53ced48 : st2w {z8.s, z9.s}, p3, [x10, #-8, MUL VL] : st2w %z8.s %z9.s %p3 -> -0x0100(%x10)[4byte] +e53ded6a : st2w {z10.s, z11.s}, p3, [x11, #-6, MUL VL] : st2w %z10.s %z11.s %p3 -> -0xc0(%x11)[4byte] +e53ef1ac : st2w {z12.s, z13.s}, p4, [x13, #-4, MUL VL] : st2w %z12.s %z13.s %p4 -> -0x80(%x13)[4byte] +e53ff1ee : st2w {z14.s, z15.s}, p4, [x15, #-2, MUL VL] : st2w %z14.s %z15.s %p4 -> -0x40(%x15)[4byte] +e530f630 : st2w {z16.s, z17.s}, p5, [x17, #0, MUL VL] : st2w %z16.s %z17.s %p5 -> (%x17)[4byte] +e530f671 : st2w {z17.s, z18.s}, p5, [x19, #0, MUL VL] : st2w %z17.s %z18.s %p5 -> (%x19)[4byte] +e531f6b3 : st2w {z19.s, z20.s}, p5, [x21, #2, MUL VL] : st2w %z19.s %z20.s %p5 -> +0x40(%x21)[4byte] +e532faf5 : st2w {z21.s, z22.s}, p6, [x23, #4, MUL VL] : st2w %z21.s %z22.s %p6 -> +0x80(%x23)[4byte] +e533fb17 : st2w {z23.s, z24.s}, p6, [x24, #6, MUL VL] : st2w %z23.s %z24.s %p6 -> +0xc0(%x24)[4byte] +e534ff59 : st2w {z25.s, z26.s}, p7, [x26, #8, MUL VL] : st2w %z25.s %z26.s %p7 -> +0x0100(%x26)[4byte] +e535ff9b : st2w {z27.s, z28.s}, p7, [x28, #10, MUL VL] : st2w %z27.s %z28.s %p7 -> +0x0140(%x28)[4byte] +e537ffff : st2w {z31.s, z0.s}, p7, [sp, #14, MUL VL] : st2w %z31.s %z0.s %p7 -> +0x01c0(%sp)[4byte] # ST3B { .B, .B, .B }, , [, ] (ST3B-Z.P.BR-Contiguous) -e4406000 : st3b {z0.b, z1.b, z2.b}, p0, [x0, x0] : st3b %z0.b %z1.b %z2.b %p0 -> (%x0,%x0)[96byte] -e4456482 : st3b {z2.b, z3.b, z4.b}, p1, [x4, x5] : st3b %z2.b %z3.b %z4.b %p1 -> (%x4,%x5)[96byte] -e44768c4 : st3b {z4.b, z5.b, z6.b}, p2, [x6, x7] : st3b %z4.b %z5.b %z6.b %p2 -> (%x6,%x7)[96byte] -e4496906 : st3b {z6.b, z7.b, z8.b}, p2, [x8, x9] : st3b %z6.b %z7.b %z8.b %p2 -> (%x8,%x9)[96byte] -e44b6d48 : st3b {z8.b, z9.b, z10.b}, p3, [x10, x11] : st3b %z8.b %z9.b %z10.b %p3 -> (%x10,%x11)[96byte] -e44c6d6a : st3b {z10.b, z11.b, z12.b}, p3, [x11, x12] : st3b %z10.b %z11.b %z12.b %p3 -> (%x11,%x12)[96byte] -e44e71ac : st3b {z12.b, z13.b, z14.b}, p4, [x13, x14] : st3b %z12.b %z13.b %z14.b %p4 -> (%x13,%x14)[96byte] -e45071ee : st3b {z14.b, z15.b, z16.b}, p4, [x15, x16] : st3b %z14.b %z15.b %z16.b %p4 -> (%x15,%x16)[96byte] -e4527630 : st3b {z16.b, z17.b, z18.b}, p5, [x17, x18] : st3b %z16.b %z17.b %z18.b %p5 -> (%x17,%x18)[96byte] -e4547671 : st3b {z17.b, z18.b, z19.b}, p5, [x19, x20] : st3b %z17.b %z18.b %z19.b %p5 -> (%x19,%x20)[96byte] -e45676b3 : st3b {z19.b, z20.b, z21.b}, p5, [x21, x22] : st3b %z19.b %z20.b %z21.b %p5 -> (%x21,%x22)[96byte] -e4587af5 : st3b {z21.b, z22.b, z23.b}, p6, [x23, x24] : st3b %z21.b %z22.b %z23.b %p6 -> (%x23,%x24)[96byte] -e4597b17 : st3b {z23.b, z24.b, z25.b}, p6, [x24, x25] : st3b %z23.b %z24.b %z25.b %p6 -> (%x24,%x25)[96byte] -e45b7f59 : st3b {z25.b, z26.b, z27.b}, p7, [x26, x27] : st3b %z25.b %z26.b %z27.b %p7 -> (%x26,%x27)[96byte] -e45d7f9b : st3b {z27.b, z28.b, z29.b}, p7, [x28, x29] : st3b %z27.b %z28.b %z29.b %p7 -> (%x28,%x29)[96byte] -e45e7fff : st3b {z31.b, z0.b, z1.b}, p7, [sp, x30] : st3b %z31.b %z0.b %z1.b %p7 -> (%sp,%x30)[96byte] +e4406000 : st3b {z0.b, z1.b, z2.b}, p0, [x0, x0] : st3b %z0.b %z1.b %z2.b %p0 -> (%x0,%x0)[1byte] +e4456482 : st3b {z2.b, z3.b, z4.b}, p1, [x4, x5] : st3b %z2.b %z3.b %z4.b %p1 -> (%x4,%x5)[1byte] +e44768c4 : st3b {z4.b, z5.b, z6.b}, p2, [x6, x7] : st3b %z4.b %z5.b %z6.b %p2 -> (%x6,%x7)[1byte] +e4496906 : st3b {z6.b, z7.b, z8.b}, p2, [x8, x9] : st3b %z6.b %z7.b %z8.b %p2 -> (%x8,%x9)[1byte] +e44b6d48 : st3b {z8.b, z9.b, z10.b}, p3, [x10, x11] : st3b %z8.b %z9.b %z10.b %p3 -> (%x10,%x11)[1byte] +e44c6d6a : st3b {z10.b, z11.b, z12.b}, p3, [x11, x12] : st3b %z10.b %z11.b %z12.b %p3 -> (%x11,%x12)[1byte] +e44e71ac : st3b {z12.b, z13.b, z14.b}, p4, [x13, x14] : st3b %z12.b %z13.b %z14.b %p4 -> (%x13,%x14)[1byte] +e45071ee : st3b {z14.b, z15.b, z16.b}, p4, [x15, x16] : st3b %z14.b %z15.b %z16.b %p4 -> (%x15,%x16)[1byte] +e4527630 : st3b {z16.b, z17.b, z18.b}, p5, [x17, x18] : st3b %z16.b %z17.b %z18.b %p5 -> (%x17,%x18)[1byte] +e4547671 : st3b {z17.b, z18.b, z19.b}, p5, [x19, x20] : st3b %z17.b %z18.b %z19.b %p5 -> (%x19,%x20)[1byte] +e45676b3 : st3b {z19.b, z20.b, z21.b}, p5, [x21, x22] : st3b %z19.b %z20.b %z21.b %p5 -> (%x21,%x22)[1byte] +e4587af5 : st3b {z21.b, z22.b, z23.b}, p6, [x23, x24] : st3b %z21.b %z22.b %z23.b %p6 -> (%x23,%x24)[1byte] +e4597b17 : st3b {z23.b, z24.b, z25.b}, p6, [x24, x25] : st3b %z23.b %z24.b %z25.b %p6 -> (%x24,%x25)[1byte] +e45b7f59 : st3b {z25.b, z26.b, z27.b}, p7, [x26, x27] : st3b %z25.b %z26.b %z27.b %p7 -> (%x26,%x27)[1byte] +e45d7f9b : st3b {z27.b, z28.b, z29.b}, p7, [x28, x29] : st3b %z27.b %z28.b %z29.b %p7 -> (%x28,%x29)[1byte] +e45e7fff : st3b {z31.b, z0.b, z1.b}, p7, [sp, x30] : st3b %z31.b %z0.b %z1.b %p7 -> (%sp,%x30)[1byte] # ST3B { .B, .B, .B }, , [{, #, MUL VL}] (ST3B-Z.P.BI-Contiguous) -e458e000 : st3b {z0.b, z1.b, z2.b}, p0, [x0, #-24, MUL VL] : st3b %z0.b %z1.b %z2.b %p0 -> -0x0300(%x0)[96byte] -e459e482 : st3b {z2.b, z3.b, z4.b}, p1, [x4, #-21, MUL VL] : st3b %z2.b %z3.b %z4.b %p1 -> -0x02a0(%x4)[96byte] -e45ae8c4 : st3b {z4.b, z5.b, z6.b}, p2, [x6, #-18, MUL VL] : st3b %z4.b %z5.b %z6.b %p2 -> -0x0240(%x6)[96byte] -e45be906 : st3b {z6.b, z7.b, z8.b}, p2, [x8, #-15, MUL VL] : st3b %z6.b %z7.b %z8.b %p2 -> -0x01e0(%x8)[96byte] -e45ced48 : st3b {z8.b, z9.b, z10.b}, p3, [x10, #-12, MUL VL] : st3b %z8.b %z9.b %z10.b %p3 -> -0x0180(%x10)[96byte] -e45ded6a : st3b {z10.b, z11.b, z12.b}, p3, [x11, #-9, MUL VL] : st3b %z10.b %z11.b %z12.b %p3 -> -0x0120(%x11)[96byte] -e45ef1ac : st3b {z12.b, z13.b, z14.b}, p4, [x13, #-6, MUL VL] : st3b %z12.b %z13.b %z14.b %p4 -> -0xc0(%x13)[96byte] -e45ff1ee : st3b {z14.b, z15.b, z16.b}, p4, [x15, #-3, MUL VL] : st3b %z14.b %z15.b %z16.b %p4 -> -0x60(%x15)[96byte] -e450f630 : st3b {z16.b, z17.b, z18.b}, p5, [x17, #0, MUL VL] : st3b %z16.b %z17.b %z18.b %p5 -> (%x17)[96byte] -e450f671 : st3b {z17.b, z18.b, z19.b}, p5, [x19, #0, MUL VL] : st3b %z17.b %z18.b %z19.b %p5 -> (%x19)[96byte] -e451f6b3 : st3b {z19.b, z20.b, z21.b}, p5, [x21, #3, MUL VL] : st3b %z19.b %z20.b %z21.b %p5 -> +0x60(%x21)[96byte] -e452faf5 : st3b {z21.b, z22.b, z23.b}, p6, [x23, #6, MUL VL] : st3b %z21.b %z22.b %z23.b %p6 -> +0xc0(%x23)[96byte] -e453fb17 : st3b {z23.b, z24.b, z25.b}, p6, [x24, #9, MUL VL] : st3b %z23.b %z24.b %z25.b %p6 -> +0x0120(%x24)[96byte] -e454ff59 : st3b {z25.b, z26.b, z27.b}, p7, [x26, #12, MUL VL] : st3b %z25.b %z26.b %z27.b %p7 -> +0x0180(%x26)[96byte] -e455ff9b : st3b {z27.b, z28.b, z29.b}, p7, [x28, #15, MUL VL] : st3b %z27.b %z28.b %z29.b %p7 -> +0x01e0(%x28)[96byte] -e457ffff : st3b {z31.b, z0.b, z1.b}, p7, [sp, #21, MUL VL] : st3b %z31.b %z0.b %z1.b %p7 -> +0x02a0(%sp)[96byte] +e458e000 : st3b {z0.b, z1.b, z2.b}, p0, [x0, #-24, MUL VL] : st3b %z0.b %z1.b %z2.b %p0 -> -0x0300(%x0)[1byte] +e459e482 : st3b {z2.b, z3.b, z4.b}, p1, [x4, #-21, MUL VL] : st3b %z2.b %z3.b %z4.b %p1 -> -0x02a0(%x4)[1byte] +e45ae8c4 : st3b {z4.b, z5.b, z6.b}, p2, [x6, #-18, MUL VL] : st3b %z4.b %z5.b %z6.b %p2 -> -0x0240(%x6)[1byte] +e45be906 : st3b {z6.b, z7.b, z8.b}, p2, [x8, #-15, MUL VL] : st3b %z6.b %z7.b %z8.b %p2 -> -0x01e0(%x8)[1byte] +e45ced48 : st3b {z8.b, z9.b, z10.b}, p3, [x10, #-12, MUL VL] : st3b %z8.b %z9.b %z10.b %p3 -> -0x0180(%x10)[1byte] +e45ded6a : st3b {z10.b, z11.b, z12.b}, p3, [x11, #-9, MUL VL] : st3b %z10.b %z11.b %z12.b %p3 -> -0x0120(%x11)[1byte] +e45ef1ac : st3b {z12.b, z13.b, z14.b}, p4, [x13, #-6, MUL VL] : st3b %z12.b %z13.b %z14.b %p4 -> -0xc0(%x13)[1byte] +e45ff1ee : st3b {z14.b, z15.b, z16.b}, p4, [x15, #-3, MUL VL] : st3b %z14.b %z15.b %z16.b %p4 -> -0x60(%x15)[1byte] +e450f630 : st3b {z16.b, z17.b, z18.b}, p5, [x17, #0, MUL VL] : st3b %z16.b %z17.b %z18.b %p5 -> (%x17)[1byte] +e450f671 : st3b {z17.b, z18.b, z19.b}, p5, [x19, #0, MUL VL] : st3b %z17.b %z18.b %z19.b %p5 -> (%x19)[1byte] +e451f6b3 : st3b {z19.b, z20.b, z21.b}, p5, [x21, #3, MUL VL] : st3b %z19.b %z20.b %z21.b %p5 -> +0x60(%x21)[1byte] +e452faf5 : st3b {z21.b, z22.b, z23.b}, p6, [x23, #6, MUL VL] : st3b %z21.b %z22.b %z23.b %p6 -> +0xc0(%x23)[1byte] +e453fb17 : st3b {z23.b, z24.b, z25.b}, p6, [x24, #9, MUL VL] : st3b %z23.b %z24.b %z25.b %p6 -> +0x0120(%x24)[1byte] +e454ff59 : st3b {z25.b, z26.b, z27.b}, p7, [x26, #12, MUL VL] : st3b %z25.b %z26.b %z27.b %p7 -> +0x0180(%x26)[1byte] +e455ff9b : st3b {z27.b, z28.b, z29.b}, p7, [x28, #15, MUL VL] : st3b %z27.b %z28.b %z29.b %p7 -> +0x01e0(%x28)[1byte] +e457ffff : st3b {z31.b, z0.b, z1.b}, p7, [sp, #21, MUL VL] : st3b %z31.b %z0.b %z1.b %p7 -> +0x02a0(%sp)[1byte] # ST3D { .D, .D, .D }, , [, , LSL #3] (ST3D-Z.P.BR-Contiguous) -e5c06000 : st3d {z0.d, z1.d, z2.d}, p0, [x0, x0, LSL #3] : st3d %z0.d %z1.d %z2.d %p0 -> (%x0,%x0,lsl #3)[96byte] -e5c56482 : st3d {z2.d, z3.d, z4.d}, p1, [x4, x5, LSL #3] : st3d %z2.d %z3.d %z4.d %p1 -> (%x4,%x5,lsl #3)[96byte] -e5c768c4 : st3d {z4.d, z5.d, z6.d}, p2, [x6, x7, LSL #3] : st3d %z4.d %z5.d %z6.d %p2 -> (%x6,%x7,lsl #3)[96byte] -e5c96906 : st3d {z6.d, z7.d, z8.d}, p2, [x8, x9, LSL #3] : st3d %z6.d %z7.d %z8.d %p2 -> (%x8,%x9,lsl #3)[96byte] -e5cb6d48 : st3d {z8.d, z9.d, z10.d}, p3, [x10, x11, LSL #3] : st3d %z8.d %z9.d %z10.d %p3 -> (%x10,%x11,lsl #3)[96byte] -e5cc6d6a : st3d {z10.d, z11.d, z12.d}, p3, [x11, x12, LSL #3] : st3d %z10.d %z11.d %z12.d %p3 -> (%x11,%x12,lsl #3)[96byte] -e5ce71ac : st3d {z12.d, z13.d, z14.d}, p4, [x13, x14, LSL #3] : st3d %z12.d %z13.d %z14.d %p4 -> (%x13,%x14,lsl #3)[96byte] -e5d071ee : st3d {z14.d, z15.d, z16.d}, p4, [x15, x16, LSL #3] : st3d %z14.d %z15.d %z16.d %p4 -> (%x15,%x16,lsl #3)[96byte] -e5d27630 : st3d {z16.d, z17.d, z18.d}, p5, [x17, x18, LSL #3] : st3d %z16.d %z17.d %z18.d %p5 -> (%x17,%x18,lsl #3)[96byte] -e5d47671 : st3d {z17.d, z18.d, z19.d}, p5, [x19, x20, LSL #3] : st3d %z17.d %z18.d %z19.d %p5 -> (%x19,%x20,lsl #3)[96byte] -e5d676b3 : st3d {z19.d, z20.d, z21.d}, p5, [x21, x22, LSL #3] : st3d %z19.d %z20.d %z21.d %p5 -> (%x21,%x22,lsl #3)[96byte] -e5d87af5 : st3d {z21.d, z22.d, z23.d}, p6, [x23, x24, LSL #3] : st3d %z21.d %z22.d %z23.d %p6 -> (%x23,%x24,lsl #3)[96byte] -e5d97b17 : st3d {z23.d, z24.d, z25.d}, p6, [x24, x25, LSL #3] : st3d %z23.d %z24.d %z25.d %p6 -> (%x24,%x25,lsl #3)[96byte] -e5db7f59 : st3d {z25.d, z26.d, z27.d}, p7, [x26, x27, LSL #3] : st3d %z25.d %z26.d %z27.d %p7 -> (%x26,%x27,lsl #3)[96byte] -e5dd7f9b : st3d {z27.d, z28.d, z29.d}, p7, [x28, x29, LSL #3] : st3d %z27.d %z28.d %z29.d %p7 -> (%x28,%x29,lsl #3)[96byte] -e5de7fff : st3d {z31.d, z0.d, z1.d}, p7, [sp, x30, LSL #3] : st3d %z31.d %z0.d %z1.d %p7 -> (%sp,%x30,lsl #3)[96byte] +e5c06000 : st3d {z0.d, z1.d, z2.d}, p0, [x0, x0, LSL #3] : st3d %z0.d %z1.d %z2.d %p0 -> (%x0,%x0,lsl #3)[8byte] +e5c56482 : st3d {z2.d, z3.d, z4.d}, p1, [x4, x5, LSL #3] : st3d %z2.d %z3.d %z4.d %p1 -> (%x4,%x5,lsl #3)[8byte] +e5c768c4 : st3d {z4.d, z5.d, z6.d}, p2, [x6, x7, LSL #3] : st3d %z4.d %z5.d %z6.d %p2 -> (%x6,%x7,lsl #3)[8byte] +e5c96906 : st3d {z6.d, z7.d, z8.d}, p2, [x8, x9, LSL #3] : st3d %z6.d %z7.d %z8.d %p2 -> (%x8,%x9,lsl #3)[8byte] +e5cb6d48 : st3d {z8.d, z9.d, z10.d}, p3, [x10, x11, LSL #3] : st3d %z8.d %z9.d %z10.d %p3 -> (%x10,%x11,lsl #3)[8byte] +e5cc6d6a : st3d {z10.d, z11.d, z12.d}, p3, [x11, x12, LSL #3] : st3d %z10.d %z11.d %z12.d %p3 -> (%x11,%x12,lsl #3)[8byte] +e5ce71ac : st3d {z12.d, z13.d, z14.d}, p4, [x13, x14, LSL #3] : st3d %z12.d %z13.d %z14.d %p4 -> (%x13,%x14,lsl #3)[8byte] +e5d071ee : st3d {z14.d, z15.d, z16.d}, p4, [x15, x16, LSL #3] : st3d %z14.d %z15.d %z16.d %p4 -> (%x15,%x16,lsl #3)[8byte] +e5d27630 : st3d {z16.d, z17.d, z18.d}, p5, [x17, x18, LSL #3] : st3d %z16.d %z17.d %z18.d %p5 -> (%x17,%x18,lsl #3)[8byte] +e5d47671 : st3d {z17.d, z18.d, z19.d}, p5, [x19, x20, LSL #3] : st3d %z17.d %z18.d %z19.d %p5 -> (%x19,%x20,lsl #3)[8byte] +e5d676b3 : st3d {z19.d, z20.d, z21.d}, p5, [x21, x22, LSL #3] : st3d %z19.d %z20.d %z21.d %p5 -> (%x21,%x22,lsl #3)[8byte] +e5d87af5 : st3d {z21.d, z22.d, z23.d}, p6, [x23, x24, LSL #3] : st3d %z21.d %z22.d %z23.d %p6 -> (%x23,%x24,lsl #3)[8byte] +e5d97b17 : st3d {z23.d, z24.d, z25.d}, p6, [x24, x25, LSL #3] : st3d %z23.d %z24.d %z25.d %p6 -> (%x24,%x25,lsl #3)[8byte] +e5db7f59 : st3d {z25.d, z26.d, z27.d}, p7, [x26, x27, LSL #3] : st3d %z25.d %z26.d %z27.d %p7 -> (%x26,%x27,lsl #3)[8byte] +e5dd7f9b : st3d {z27.d, z28.d, z29.d}, p7, [x28, x29, LSL #3] : st3d %z27.d %z28.d %z29.d %p7 -> (%x28,%x29,lsl #3)[8byte] +e5de7fff : st3d {z31.d, z0.d, z1.d}, p7, [sp, x30, LSL #3] : st3d %z31.d %z0.d %z1.d %p7 -> (%sp,%x30,lsl #3)[8byte] # ST3D { .D, .D, .D }, , [{, #, MUL VL}] (ST3D-Z.P.BI-Contiguous) -e5d8e000 : st3d {z0.d, z1.d, z2.d}, p0, [x0, #-24, MUL VL] : st3d %z0.d %z1.d %z2.d %p0 -> -0x0300(%x0)[96byte] -e5d9e482 : st3d {z2.d, z3.d, z4.d}, p1, [x4, #-21, MUL VL] : st3d %z2.d %z3.d %z4.d %p1 -> -0x02a0(%x4)[96byte] -e5dae8c4 : st3d {z4.d, z5.d, z6.d}, p2, [x6, #-18, MUL VL] : st3d %z4.d %z5.d %z6.d %p2 -> -0x0240(%x6)[96byte] -e5dbe906 : st3d {z6.d, z7.d, z8.d}, p2, [x8, #-15, MUL VL] : st3d %z6.d %z7.d %z8.d %p2 -> -0x01e0(%x8)[96byte] -e5dced48 : st3d {z8.d, z9.d, z10.d}, p3, [x10, #-12, MUL VL] : st3d %z8.d %z9.d %z10.d %p3 -> -0x0180(%x10)[96byte] -e5dded6a : st3d {z10.d, z11.d, z12.d}, p3, [x11, #-9, MUL VL] : st3d %z10.d %z11.d %z12.d %p3 -> -0x0120(%x11)[96byte] -e5def1ac : st3d {z12.d, z13.d, z14.d}, p4, [x13, #-6, MUL VL] : st3d %z12.d %z13.d %z14.d %p4 -> -0xc0(%x13)[96byte] -e5dff1ee : st3d {z14.d, z15.d, z16.d}, p4, [x15, #-3, MUL VL] : st3d %z14.d %z15.d %z16.d %p4 -> -0x60(%x15)[96byte] -e5d0f630 : st3d {z16.d, z17.d, z18.d}, p5, [x17, #0, MUL VL] : st3d %z16.d %z17.d %z18.d %p5 -> (%x17)[96byte] -e5d0f671 : st3d {z17.d, z18.d, z19.d}, p5, [x19, #0, MUL VL] : st3d %z17.d %z18.d %z19.d %p5 -> (%x19)[96byte] -e5d1f6b3 : st3d {z19.d, z20.d, z21.d}, p5, [x21, #3, MUL VL] : st3d %z19.d %z20.d %z21.d %p5 -> +0x60(%x21)[96byte] -e5d2faf5 : st3d {z21.d, z22.d, z23.d}, p6, [x23, #6, MUL VL] : st3d %z21.d %z22.d %z23.d %p6 -> +0xc0(%x23)[96byte] -e5d3fb17 : st3d {z23.d, z24.d, z25.d}, p6, [x24, #9, MUL VL] : st3d %z23.d %z24.d %z25.d %p6 -> +0x0120(%x24)[96byte] -e5d4ff59 : st3d {z25.d, z26.d, z27.d}, p7, [x26, #12, MUL VL] : st3d %z25.d %z26.d %z27.d %p7 -> +0x0180(%x26)[96byte] -e5d5ff9b : st3d {z27.d, z28.d, z29.d}, p7, [x28, #15, MUL VL] : st3d %z27.d %z28.d %z29.d %p7 -> +0x01e0(%x28)[96byte] -e5d7ffff : st3d {z31.d, z0.d, z1.d}, p7, [sp, #21, MUL VL] : st3d %z31.d %z0.d %z1.d %p7 -> +0x02a0(%sp)[96byte] +e5d8e000 : st3d {z0.d, z1.d, z2.d}, p0, [x0, #-24, MUL VL] : st3d %z0.d %z1.d %z2.d %p0 -> -0x0300(%x0)[8byte] +e5d9e482 : st3d {z2.d, z3.d, z4.d}, p1, [x4, #-21, MUL VL] : st3d %z2.d %z3.d %z4.d %p1 -> -0x02a0(%x4)[8byte] +e5dae8c4 : st3d {z4.d, z5.d, z6.d}, p2, [x6, #-18, MUL VL] : st3d %z4.d %z5.d %z6.d %p2 -> -0x0240(%x6)[8byte] +e5dbe906 : st3d {z6.d, z7.d, z8.d}, p2, [x8, #-15, MUL VL] : st3d %z6.d %z7.d %z8.d %p2 -> -0x01e0(%x8)[8byte] +e5dced48 : st3d {z8.d, z9.d, z10.d}, p3, [x10, #-12, MUL VL] : st3d %z8.d %z9.d %z10.d %p3 -> -0x0180(%x10)[8byte] +e5dded6a : st3d {z10.d, z11.d, z12.d}, p3, [x11, #-9, MUL VL] : st3d %z10.d %z11.d %z12.d %p3 -> -0x0120(%x11)[8byte] +e5def1ac : st3d {z12.d, z13.d, z14.d}, p4, [x13, #-6, MUL VL] : st3d %z12.d %z13.d %z14.d %p4 -> -0xc0(%x13)[8byte] +e5dff1ee : st3d {z14.d, z15.d, z16.d}, p4, [x15, #-3, MUL VL] : st3d %z14.d %z15.d %z16.d %p4 -> -0x60(%x15)[8byte] +e5d0f630 : st3d {z16.d, z17.d, z18.d}, p5, [x17, #0, MUL VL] : st3d %z16.d %z17.d %z18.d %p5 -> (%x17)[8byte] +e5d0f671 : st3d {z17.d, z18.d, z19.d}, p5, [x19, #0, MUL VL] : st3d %z17.d %z18.d %z19.d %p5 -> (%x19)[8byte] +e5d1f6b3 : st3d {z19.d, z20.d, z21.d}, p5, [x21, #3, MUL VL] : st3d %z19.d %z20.d %z21.d %p5 -> +0x60(%x21)[8byte] +e5d2faf5 : st3d {z21.d, z22.d, z23.d}, p6, [x23, #6, MUL VL] : st3d %z21.d %z22.d %z23.d %p6 -> +0xc0(%x23)[8byte] +e5d3fb17 : st3d {z23.d, z24.d, z25.d}, p6, [x24, #9, MUL VL] : st3d %z23.d %z24.d %z25.d %p6 -> +0x0120(%x24)[8byte] +e5d4ff59 : st3d {z25.d, z26.d, z27.d}, p7, [x26, #12, MUL VL] : st3d %z25.d %z26.d %z27.d %p7 -> +0x0180(%x26)[8byte] +e5d5ff9b : st3d {z27.d, z28.d, z29.d}, p7, [x28, #15, MUL VL] : st3d %z27.d %z28.d %z29.d %p7 -> +0x01e0(%x28)[8byte] +e5d7ffff : st3d {z31.d, z0.d, z1.d}, p7, [sp, #21, MUL VL] : st3d %z31.d %z0.d %z1.d %p7 -> +0x02a0(%sp)[8byte] # ST3H { .H, .H, .H }, , [, , LSL #1] (ST3H-Z.P.BR-Contiguous) -e4c06000 : st3h {z0.h, z1.h, z2.h}, p0, [x0, x0, LSL #1] : st3h %z0.h %z1.h %z2.h %p0 -> (%x0,%x0,lsl #1)[96byte] -e4c56482 : st3h {z2.h, z3.h, z4.h}, p1, [x4, x5, LSL #1] : st3h %z2.h %z3.h %z4.h %p1 -> (%x4,%x5,lsl #1)[96byte] -e4c768c4 : st3h {z4.h, z5.h, z6.h}, p2, [x6, x7, LSL #1] : st3h %z4.h %z5.h %z6.h %p2 -> (%x6,%x7,lsl #1)[96byte] -e4c96906 : st3h {z6.h, z7.h, z8.h}, p2, [x8, x9, LSL #1] : st3h %z6.h %z7.h %z8.h %p2 -> (%x8,%x9,lsl #1)[96byte] -e4cb6d48 : st3h {z8.h, z9.h, z10.h}, p3, [x10, x11, LSL #1] : st3h %z8.h %z9.h %z10.h %p3 -> (%x10,%x11,lsl #1)[96byte] -e4cc6d6a : st3h {z10.h, z11.h, z12.h}, p3, [x11, x12, LSL #1] : st3h %z10.h %z11.h %z12.h %p3 -> (%x11,%x12,lsl #1)[96byte] -e4ce71ac : st3h {z12.h, z13.h, z14.h}, p4, [x13, x14, LSL #1] : st3h %z12.h %z13.h %z14.h %p4 -> (%x13,%x14,lsl #1)[96byte] -e4d071ee : st3h {z14.h, z15.h, z16.h}, p4, [x15, x16, LSL #1] : st3h %z14.h %z15.h %z16.h %p4 -> (%x15,%x16,lsl #1)[96byte] -e4d27630 : st3h {z16.h, z17.h, z18.h}, p5, [x17, x18, LSL #1] : st3h %z16.h %z17.h %z18.h %p5 -> (%x17,%x18,lsl #1)[96byte] -e4d47671 : st3h {z17.h, z18.h, z19.h}, p5, [x19, x20, LSL #1] : st3h %z17.h %z18.h %z19.h %p5 -> (%x19,%x20,lsl #1)[96byte] -e4d676b3 : st3h {z19.h, z20.h, z21.h}, p5, [x21, x22, LSL #1] : st3h %z19.h %z20.h %z21.h %p5 -> (%x21,%x22,lsl #1)[96byte] -e4d87af5 : st3h {z21.h, z22.h, z23.h}, p6, [x23, x24, LSL #1] : st3h %z21.h %z22.h %z23.h %p6 -> (%x23,%x24,lsl #1)[96byte] -e4d97b17 : st3h {z23.h, z24.h, z25.h}, p6, [x24, x25, LSL #1] : st3h %z23.h %z24.h %z25.h %p6 -> (%x24,%x25,lsl #1)[96byte] -e4db7f59 : st3h {z25.h, z26.h, z27.h}, p7, [x26, x27, LSL #1] : st3h %z25.h %z26.h %z27.h %p7 -> (%x26,%x27,lsl #1)[96byte] -e4dd7f9b : st3h {z27.h, z28.h, z29.h}, p7, [x28, x29, LSL #1] : st3h %z27.h %z28.h %z29.h %p7 -> (%x28,%x29,lsl #1)[96byte] -e4de7fff : st3h {z31.h, z0.h, z1.h}, p7, [sp, x30, LSL #1] : st3h %z31.h %z0.h %z1.h %p7 -> (%sp,%x30,lsl #1)[96byte] +e4c06000 : st3h {z0.h, z1.h, z2.h}, p0, [x0, x0, LSL #1] : st3h %z0.h %z1.h %z2.h %p0 -> (%x0,%x0,lsl #1)[2byte] +e4c56482 : st3h {z2.h, z3.h, z4.h}, p1, [x4, x5, LSL #1] : st3h %z2.h %z3.h %z4.h %p1 -> (%x4,%x5,lsl #1)[2byte] +e4c768c4 : st3h {z4.h, z5.h, z6.h}, p2, [x6, x7, LSL #1] : st3h %z4.h %z5.h %z6.h %p2 -> (%x6,%x7,lsl #1)[2byte] +e4c96906 : st3h {z6.h, z7.h, z8.h}, p2, [x8, x9, LSL #1] : st3h %z6.h %z7.h %z8.h %p2 -> (%x8,%x9,lsl #1)[2byte] +e4cb6d48 : st3h {z8.h, z9.h, z10.h}, p3, [x10, x11, LSL #1] : st3h %z8.h %z9.h %z10.h %p3 -> (%x10,%x11,lsl #1)[2byte] +e4cc6d6a : st3h {z10.h, z11.h, z12.h}, p3, [x11, x12, LSL #1] : st3h %z10.h %z11.h %z12.h %p3 -> (%x11,%x12,lsl #1)[2byte] +e4ce71ac : st3h {z12.h, z13.h, z14.h}, p4, [x13, x14, LSL #1] : st3h %z12.h %z13.h %z14.h %p4 -> (%x13,%x14,lsl #1)[2byte] +e4d071ee : st3h {z14.h, z15.h, z16.h}, p4, [x15, x16, LSL #1] : st3h %z14.h %z15.h %z16.h %p4 -> (%x15,%x16,lsl #1)[2byte] +e4d27630 : st3h {z16.h, z17.h, z18.h}, p5, [x17, x18, LSL #1] : st3h %z16.h %z17.h %z18.h %p5 -> (%x17,%x18,lsl #1)[2byte] +e4d47671 : st3h {z17.h, z18.h, z19.h}, p5, [x19, x20, LSL #1] : st3h %z17.h %z18.h %z19.h %p5 -> (%x19,%x20,lsl #1)[2byte] +e4d676b3 : st3h {z19.h, z20.h, z21.h}, p5, [x21, x22, LSL #1] : st3h %z19.h %z20.h %z21.h %p5 -> (%x21,%x22,lsl #1)[2byte] +e4d87af5 : st3h {z21.h, z22.h, z23.h}, p6, [x23, x24, LSL #1] : st3h %z21.h %z22.h %z23.h %p6 -> (%x23,%x24,lsl #1)[2byte] +e4d97b17 : st3h {z23.h, z24.h, z25.h}, p6, [x24, x25, LSL #1] : st3h %z23.h %z24.h %z25.h %p6 -> (%x24,%x25,lsl #1)[2byte] +e4db7f59 : st3h {z25.h, z26.h, z27.h}, p7, [x26, x27, LSL #1] : st3h %z25.h %z26.h %z27.h %p7 -> (%x26,%x27,lsl #1)[2byte] +e4dd7f9b : st3h {z27.h, z28.h, z29.h}, p7, [x28, x29, LSL #1] : st3h %z27.h %z28.h %z29.h %p7 -> (%x28,%x29,lsl #1)[2byte] +e4de7fff : st3h {z31.h, z0.h, z1.h}, p7, [sp, x30, LSL #1] : st3h %z31.h %z0.h %z1.h %p7 -> (%sp,%x30,lsl #1)[2byte] # ST3H { .H, .H, .H }, , [{, #, MUL VL}] (ST3H-Z.P.BI-Contiguous) -e4d8e000 : st3h {z0.h, z1.h, z2.h}, p0, [x0, #-24, MUL VL] : st3h %z0.h %z1.h %z2.h %p0 -> -0x0300(%x0)[96byte] -e4d9e482 : st3h {z2.h, z3.h, z4.h}, p1, [x4, #-21, MUL VL] : st3h %z2.h %z3.h %z4.h %p1 -> -0x02a0(%x4)[96byte] -e4dae8c4 : st3h {z4.h, z5.h, z6.h}, p2, [x6, #-18, MUL VL] : st3h %z4.h %z5.h %z6.h %p2 -> -0x0240(%x6)[96byte] -e4dbe906 : st3h {z6.h, z7.h, z8.h}, p2, [x8, #-15, MUL VL] : st3h %z6.h %z7.h %z8.h %p2 -> -0x01e0(%x8)[96byte] -e4dced48 : st3h {z8.h, z9.h, z10.h}, p3, [x10, #-12, MUL VL] : st3h %z8.h %z9.h %z10.h %p3 -> -0x0180(%x10)[96byte] -e4dded6a : st3h {z10.h, z11.h, z12.h}, p3, [x11, #-9, MUL VL] : st3h %z10.h %z11.h %z12.h %p3 -> -0x0120(%x11)[96byte] -e4def1ac : st3h {z12.h, z13.h, z14.h}, p4, [x13, #-6, MUL VL] : st3h %z12.h %z13.h %z14.h %p4 -> -0xc0(%x13)[96byte] -e4dff1ee : st3h {z14.h, z15.h, z16.h}, p4, [x15, #-3, MUL VL] : st3h %z14.h %z15.h %z16.h %p4 -> -0x60(%x15)[96byte] -e4d0f630 : st3h {z16.h, z17.h, z18.h}, p5, [x17, #0, MUL VL] : st3h %z16.h %z17.h %z18.h %p5 -> (%x17)[96byte] -e4d0f671 : st3h {z17.h, z18.h, z19.h}, p5, [x19, #0, MUL VL] : st3h %z17.h %z18.h %z19.h %p5 -> (%x19)[96byte] -e4d1f6b3 : st3h {z19.h, z20.h, z21.h}, p5, [x21, #3, MUL VL] : st3h %z19.h %z20.h %z21.h %p5 -> +0x60(%x21)[96byte] -e4d2faf5 : st3h {z21.h, z22.h, z23.h}, p6, [x23, #6, MUL VL] : st3h %z21.h %z22.h %z23.h %p6 -> +0xc0(%x23)[96byte] -e4d3fb17 : st3h {z23.h, z24.h, z25.h}, p6, [x24, #9, MUL VL] : st3h %z23.h %z24.h %z25.h %p6 -> +0x0120(%x24)[96byte] -e4d4ff59 : st3h {z25.h, z26.h, z27.h}, p7, [x26, #12, MUL VL] : st3h %z25.h %z26.h %z27.h %p7 -> +0x0180(%x26)[96byte] -e4d5ff9b : st3h {z27.h, z28.h, z29.h}, p7, [x28, #15, MUL VL] : st3h %z27.h %z28.h %z29.h %p7 -> +0x01e0(%x28)[96byte] -e4d7ffff : st3h {z31.h, z0.h, z1.h}, p7, [sp, #21, MUL VL] : st3h %z31.h %z0.h %z1.h %p7 -> +0x02a0(%sp)[96byte] +e4d8e000 : st3h {z0.h, z1.h, z2.h}, p0, [x0, #-24, MUL VL] : st3h %z0.h %z1.h %z2.h %p0 -> -0x0300(%x0)[2byte] +e4d9e482 : st3h {z2.h, z3.h, z4.h}, p1, [x4, #-21, MUL VL] : st3h %z2.h %z3.h %z4.h %p1 -> -0x02a0(%x4)[2byte] +e4dae8c4 : st3h {z4.h, z5.h, z6.h}, p2, [x6, #-18, MUL VL] : st3h %z4.h %z5.h %z6.h %p2 -> -0x0240(%x6)[2byte] +e4dbe906 : st3h {z6.h, z7.h, z8.h}, p2, [x8, #-15, MUL VL] : st3h %z6.h %z7.h %z8.h %p2 -> -0x01e0(%x8)[2byte] +e4dced48 : st3h {z8.h, z9.h, z10.h}, p3, [x10, #-12, MUL VL] : st3h %z8.h %z9.h %z10.h %p3 -> -0x0180(%x10)[2byte] +e4dded6a : st3h {z10.h, z11.h, z12.h}, p3, [x11, #-9, MUL VL] : st3h %z10.h %z11.h %z12.h %p3 -> -0x0120(%x11)[2byte] +e4def1ac : st3h {z12.h, z13.h, z14.h}, p4, [x13, #-6, MUL VL] : st3h %z12.h %z13.h %z14.h %p4 -> -0xc0(%x13)[2byte] +e4dff1ee : st3h {z14.h, z15.h, z16.h}, p4, [x15, #-3, MUL VL] : st3h %z14.h %z15.h %z16.h %p4 -> -0x60(%x15)[2byte] +e4d0f630 : st3h {z16.h, z17.h, z18.h}, p5, [x17, #0, MUL VL] : st3h %z16.h %z17.h %z18.h %p5 -> (%x17)[2byte] +e4d0f671 : st3h {z17.h, z18.h, z19.h}, p5, [x19, #0, MUL VL] : st3h %z17.h %z18.h %z19.h %p5 -> (%x19)[2byte] +e4d1f6b3 : st3h {z19.h, z20.h, z21.h}, p5, [x21, #3, MUL VL] : st3h %z19.h %z20.h %z21.h %p5 -> +0x60(%x21)[2byte] +e4d2faf5 : st3h {z21.h, z22.h, z23.h}, p6, [x23, #6, MUL VL] : st3h %z21.h %z22.h %z23.h %p6 -> +0xc0(%x23)[2byte] +e4d3fb17 : st3h {z23.h, z24.h, z25.h}, p6, [x24, #9, MUL VL] : st3h %z23.h %z24.h %z25.h %p6 -> +0x0120(%x24)[2byte] +e4d4ff59 : st3h {z25.h, z26.h, z27.h}, p7, [x26, #12, MUL VL] : st3h %z25.h %z26.h %z27.h %p7 -> +0x0180(%x26)[2byte] +e4d5ff9b : st3h {z27.h, z28.h, z29.h}, p7, [x28, #15, MUL VL] : st3h %z27.h %z28.h %z29.h %p7 -> +0x01e0(%x28)[2byte] +e4d7ffff : st3h {z31.h, z0.h, z1.h}, p7, [sp, #21, MUL VL] : st3h %z31.h %z0.h %z1.h %p7 -> +0x02a0(%sp)[2byte] # ST3W { .S, .S, .S }, , [, , LSL #2] (ST3W-Z.P.BR-Contiguous) -e5406000 : st3w {z0.s, z1.s, z2.s}, p0, [x0, x0, LSL #2] : st3w %z0.s %z1.s %z2.s %p0 -> (%x0,%x0,lsl #2)[96byte] -e5456482 : st3w {z2.s, z3.s, z4.s}, p1, [x4, x5, LSL #2] : st3w %z2.s %z3.s %z4.s %p1 -> (%x4,%x5,lsl #2)[96byte] -e54768c4 : st3w {z4.s, z5.s, z6.s}, p2, [x6, x7, LSL #2] : st3w %z4.s %z5.s %z6.s %p2 -> (%x6,%x7,lsl #2)[96byte] -e5496906 : st3w {z6.s, z7.s, z8.s}, p2, [x8, x9, LSL #2] : st3w %z6.s %z7.s %z8.s %p2 -> (%x8,%x9,lsl #2)[96byte] -e54b6d48 : st3w {z8.s, z9.s, z10.s}, p3, [x10, x11, LSL #2] : st3w %z8.s %z9.s %z10.s %p3 -> (%x10,%x11,lsl #2)[96byte] -e54c6d6a : st3w {z10.s, z11.s, z12.s}, p3, [x11, x12, LSL #2] : st3w %z10.s %z11.s %z12.s %p3 -> (%x11,%x12,lsl #2)[96byte] -e54e71ac : st3w {z12.s, z13.s, z14.s}, p4, [x13, x14, LSL #2] : st3w %z12.s %z13.s %z14.s %p4 -> (%x13,%x14,lsl #2)[96byte] -e55071ee : st3w {z14.s, z15.s, z16.s}, p4, [x15, x16, LSL #2] : st3w %z14.s %z15.s %z16.s %p4 -> (%x15,%x16,lsl #2)[96byte] -e5527630 : st3w {z16.s, z17.s, z18.s}, p5, [x17, x18, LSL #2] : st3w %z16.s %z17.s %z18.s %p5 -> (%x17,%x18,lsl #2)[96byte] -e5547671 : st3w {z17.s, z18.s, z19.s}, p5, [x19, x20, LSL #2] : st3w %z17.s %z18.s %z19.s %p5 -> (%x19,%x20,lsl #2)[96byte] -e55676b3 : st3w {z19.s, z20.s, z21.s}, p5, [x21, x22, LSL #2] : st3w %z19.s %z20.s %z21.s %p5 -> (%x21,%x22,lsl #2)[96byte] -e5587af5 : st3w {z21.s, z22.s, z23.s}, p6, [x23, x24, LSL #2] : st3w %z21.s %z22.s %z23.s %p6 -> (%x23,%x24,lsl #2)[96byte] -e5597b17 : st3w {z23.s, z24.s, z25.s}, p6, [x24, x25, LSL #2] : st3w %z23.s %z24.s %z25.s %p6 -> (%x24,%x25,lsl #2)[96byte] -e55b7f59 : st3w {z25.s, z26.s, z27.s}, p7, [x26, x27, LSL #2] : st3w %z25.s %z26.s %z27.s %p7 -> (%x26,%x27,lsl #2)[96byte] -e55d7f9b : st3w {z27.s, z28.s, z29.s}, p7, [x28, x29, LSL #2] : st3w %z27.s %z28.s %z29.s %p7 -> (%x28,%x29,lsl #2)[96byte] -e55e7fff : st3w {z31.s, z0.s, z1.s}, p7, [sp, x30, LSL #2] : st3w %z31.s %z0.s %z1.s %p7 -> (%sp,%x30,lsl #2)[96byte] +e5406000 : st3w {z0.s, z1.s, z2.s}, p0, [x0, x0, LSL #2] : st3w %z0.s %z1.s %z2.s %p0 -> (%x0,%x0,lsl #2)[4byte] +e5456482 : st3w {z2.s, z3.s, z4.s}, p1, [x4, x5, LSL #2] : st3w %z2.s %z3.s %z4.s %p1 -> (%x4,%x5,lsl #2)[4byte] +e54768c4 : st3w {z4.s, z5.s, z6.s}, p2, [x6, x7, LSL #2] : st3w %z4.s %z5.s %z6.s %p2 -> (%x6,%x7,lsl #2)[4byte] +e5496906 : st3w {z6.s, z7.s, z8.s}, p2, [x8, x9, LSL #2] : st3w %z6.s %z7.s %z8.s %p2 -> (%x8,%x9,lsl #2)[4byte] +e54b6d48 : st3w {z8.s, z9.s, z10.s}, p3, [x10, x11, LSL #2] : st3w %z8.s %z9.s %z10.s %p3 -> (%x10,%x11,lsl #2)[4byte] +e54c6d6a : st3w {z10.s, z11.s, z12.s}, p3, [x11, x12, LSL #2] : st3w %z10.s %z11.s %z12.s %p3 -> (%x11,%x12,lsl #2)[4byte] +e54e71ac : st3w {z12.s, z13.s, z14.s}, p4, [x13, x14, LSL #2] : st3w %z12.s %z13.s %z14.s %p4 -> (%x13,%x14,lsl #2)[4byte] +e55071ee : st3w {z14.s, z15.s, z16.s}, p4, [x15, x16, LSL #2] : st3w %z14.s %z15.s %z16.s %p4 -> (%x15,%x16,lsl #2)[4byte] +e5527630 : st3w {z16.s, z17.s, z18.s}, p5, [x17, x18, LSL #2] : st3w %z16.s %z17.s %z18.s %p5 -> (%x17,%x18,lsl #2)[4byte] +e5547671 : st3w {z17.s, z18.s, z19.s}, p5, [x19, x20, LSL #2] : st3w %z17.s %z18.s %z19.s %p5 -> (%x19,%x20,lsl #2)[4byte] +e55676b3 : st3w {z19.s, z20.s, z21.s}, p5, [x21, x22, LSL #2] : st3w %z19.s %z20.s %z21.s %p5 -> (%x21,%x22,lsl #2)[4byte] +e5587af5 : st3w {z21.s, z22.s, z23.s}, p6, [x23, x24, LSL #2] : st3w %z21.s %z22.s %z23.s %p6 -> (%x23,%x24,lsl #2)[4byte] +e5597b17 : st3w {z23.s, z24.s, z25.s}, p6, [x24, x25, LSL #2] : st3w %z23.s %z24.s %z25.s %p6 -> (%x24,%x25,lsl #2)[4byte] +e55b7f59 : st3w {z25.s, z26.s, z27.s}, p7, [x26, x27, LSL #2] : st3w %z25.s %z26.s %z27.s %p7 -> (%x26,%x27,lsl #2)[4byte] +e55d7f9b : st3w {z27.s, z28.s, z29.s}, p7, [x28, x29, LSL #2] : st3w %z27.s %z28.s %z29.s %p7 -> (%x28,%x29,lsl #2)[4byte] +e55e7fff : st3w {z31.s, z0.s, z1.s}, p7, [sp, x30, LSL #2] : st3w %z31.s %z0.s %z1.s %p7 -> (%sp,%x30,lsl #2)[4byte] # ST3W { .S, .S, .S }, , [{, #, MUL VL}] (ST3W-Z.P.BI-Contiguous) -e558e000 : st3w {z0.s, z1.s, z2.s}, p0, [x0, #-24, MUL VL] : st3w %z0.s %z1.s %z2.s %p0 -> -0x0300(%x0)[96byte] -e559e482 : st3w {z2.s, z3.s, z4.s}, p1, [x4, #-21, MUL VL] : st3w %z2.s %z3.s %z4.s %p1 -> -0x02a0(%x4)[96byte] -e55ae8c4 : st3w {z4.s, z5.s, z6.s}, p2, [x6, #-18, MUL VL] : st3w %z4.s %z5.s %z6.s %p2 -> -0x0240(%x6)[96byte] -e55be906 : st3w {z6.s, z7.s, z8.s}, p2, [x8, #-15, MUL VL] : st3w %z6.s %z7.s %z8.s %p2 -> -0x01e0(%x8)[96byte] -e55ced48 : st3w {z8.s, z9.s, z10.s}, p3, [x10, #-12, MUL VL] : st3w %z8.s %z9.s %z10.s %p3 -> -0x0180(%x10)[96byte] -e55ded6a : st3w {z10.s, z11.s, z12.s}, p3, [x11, #-9, MUL VL] : st3w %z10.s %z11.s %z12.s %p3 -> -0x0120(%x11)[96byte] -e55ef1ac : st3w {z12.s, z13.s, z14.s}, p4, [x13, #-6, MUL VL] : st3w %z12.s %z13.s %z14.s %p4 -> -0xc0(%x13)[96byte] -e55ff1ee : st3w {z14.s, z15.s, z16.s}, p4, [x15, #-3, MUL VL] : st3w %z14.s %z15.s %z16.s %p4 -> -0x60(%x15)[96byte] -e550f630 : st3w {z16.s, z17.s, z18.s}, p5, [x17, #0, MUL VL] : st3w %z16.s %z17.s %z18.s %p5 -> (%x17)[96byte] -e550f671 : st3w {z17.s, z18.s, z19.s}, p5, [x19, #0, MUL VL] : st3w %z17.s %z18.s %z19.s %p5 -> (%x19)[96byte] -e551f6b3 : st3w {z19.s, z20.s, z21.s}, p5, [x21, #3, MUL VL] : st3w %z19.s %z20.s %z21.s %p5 -> +0x60(%x21)[96byte] -e552faf5 : st3w {z21.s, z22.s, z23.s}, p6, [x23, #6, MUL VL] : st3w %z21.s %z22.s %z23.s %p6 -> +0xc0(%x23)[96byte] -e553fb17 : st3w {z23.s, z24.s, z25.s}, p6, [x24, #9, MUL VL] : st3w %z23.s %z24.s %z25.s %p6 -> +0x0120(%x24)[96byte] -e554ff59 : st3w {z25.s, z26.s, z27.s}, p7, [x26, #12, MUL VL] : st3w %z25.s %z26.s %z27.s %p7 -> +0x0180(%x26)[96byte] -e555ff9b : st3w {z27.s, z28.s, z29.s}, p7, [x28, #15, MUL VL] : st3w %z27.s %z28.s %z29.s %p7 -> +0x01e0(%x28)[96byte] -e557ffff : st3w {z31.s, z0.s, z1.s}, p7, [sp, #21, MUL VL] : st3w %z31.s %z0.s %z1.s %p7 -> +0x02a0(%sp)[96byte] +e558e000 : st3w {z0.s, z1.s, z2.s}, p0, [x0, #-24, MUL VL] : st3w %z0.s %z1.s %z2.s %p0 -> -0x0300(%x0)[4byte] +e559e482 : st3w {z2.s, z3.s, z4.s}, p1, [x4, #-21, MUL VL] : st3w %z2.s %z3.s %z4.s %p1 -> -0x02a0(%x4)[4byte] +e55ae8c4 : st3w {z4.s, z5.s, z6.s}, p2, [x6, #-18, MUL VL] : st3w %z4.s %z5.s %z6.s %p2 -> -0x0240(%x6)[4byte] +e55be906 : st3w {z6.s, z7.s, z8.s}, p2, [x8, #-15, MUL VL] : st3w %z6.s %z7.s %z8.s %p2 -> -0x01e0(%x8)[4byte] +e55ced48 : st3w {z8.s, z9.s, z10.s}, p3, [x10, #-12, MUL VL] : st3w %z8.s %z9.s %z10.s %p3 -> -0x0180(%x10)[4byte] +e55ded6a : st3w {z10.s, z11.s, z12.s}, p3, [x11, #-9, MUL VL] : st3w %z10.s %z11.s %z12.s %p3 -> -0x0120(%x11)[4byte] +e55ef1ac : st3w {z12.s, z13.s, z14.s}, p4, [x13, #-6, MUL VL] : st3w %z12.s %z13.s %z14.s %p4 -> -0xc0(%x13)[4byte] +e55ff1ee : st3w {z14.s, z15.s, z16.s}, p4, [x15, #-3, MUL VL] : st3w %z14.s %z15.s %z16.s %p4 -> -0x60(%x15)[4byte] +e550f630 : st3w {z16.s, z17.s, z18.s}, p5, [x17, #0, MUL VL] : st3w %z16.s %z17.s %z18.s %p5 -> (%x17)[4byte] +e550f671 : st3w {z17.s, z18.s, z19.s}, p5, [x19, #0, MUL VL] : st3w %z17.s %z18.s %z19.s %p5 -> (%x19)[4byte] +e551f6b3 : st3w {z19.s, z20.s, z21.s}, p5, [x21, #3, MUL VL] : st3w %z19.s %z20.s %z21.s %p5 -> +0x60(%x21)[4byte] +e552faf5 : st3w {z21.s, z22.s, z23.s}, p6, [x23, #6, MUL VL] : st3w %z21.s %z22.s %z23.s %p6 -> +0xc0(%x23)[4byte] +e553fb17 : st3w {z23.s, z24.s, z25.s}, p6, [x24, #9, MUL VL] : st3w %z23.s %z24.s %z25.s %p6 -> +0x0120(%x24)[4byte] +e554ff59 : st3w {z25.s, z26.s, z27.s}, p7, [x26, #12, MUL VL] : st3w %z25.s %z26.s %z27.s %p7 -> +0x0180(%x26)[4byte] +e555ff9b : st3w {z27.s, z28.s, z29.s}, p7, [x28, #15, MUL VL] : st3w %z27.s %z28.s %z29.s %p7 -> +0x01e0(%x28)[4byte] +e557ffff : st3w {z31.s, z0.s, z1.s}, p7, [sp, #21, MUL VL] : st3w %z31.s %z0.s %z1.s %p7 -> +0x02a0(%sp)[4byte] # ST4B { .B, .B, .B, .B }, , [, ] (ST4B-Z.P.BR-Contiguous) -e4606000 : st4b {z0.b, z1.b, z2.b, z3.b}, p0, [x0, x0] : st4b %z0.b %z1.b %z2.b %z3.b %p0 -> (%x0,%x0)[128byte] -e4656482 : st4b {z2.b, z3.b, z4.b, z5.b}, p1, [x4, x5] : st4b %z2.b %z3.b %z4.b %z5.b %p1 -> (%x4,%x5)[128byte] -e46768c4 : st4b {z4.b, z5.b, z6.b, z7.b}, p2, [x6, x7] : st4b %z4.b %z5.b %z6.b %z7.b %p2 -> (%x6,%x7)[128byte] -e4696906 : st4b {z6.b, z7.b, z8.b, z9.b}, p2, [x8, x9] : st4b %z6.b %z7.b %z8.b %z9.b %p2 -> (%x8,%x9)[128byte] -e46b6d48 : st4b {z8.b, z9.b, z10.b, z11.b}, p3, [x10, x11] : st4b %z8.b %z9.b %z10.b %z11.b %p3 -> (%x10,%x11)[128byte] -e46c6d6a : st4b {z10.b, z11.b, z12.b, z13.b}, p3, [x11, x12] : st4b %z10.b %z11.b %z12.b %z13.b %p3 -> (%x11,%x12)[128byte] -e46e71ac : st4b {z12.b, z13.b, z14.b, z15.b}, p4, [x13, x14] : st4b %z12.b %z13.b %z14.b %z15.b %p4 -> (%x13,%x14)[128byte] -e47071ee : st4b {z14.b, z15.b, z16.b, z17.b}, p4, [x15, x16] : st4b %z14.b %z15.b %z16.b %z17.b %p4 -> (%x15,%x16)[128byte] -e4727630 : st4b {z16.b, z17.b, z18.b, z19.b}, p5, [x17, x18] : st4b %z16.b %z17.b %z18.b %z19.b %p5 -> (%x17,%x18)[128byte] -e4747671 : st4b {z17.b, z18.b, z19.b, z20.b}, p5, [x19, x20] : st4b %z17.b %z18.b %z19.b %z20.b %p5 -> (%x19,%x20)[128byte] -e47676b3 : st4b {z19.b, z20.b, z21.b, z22.b}, p5, [x21, x22] : st4b %z19.b %z20.b %z21.b %z22.b %p5 -> (%x21,%x22)[128byte] -e4787af5 : st4b {z21.b, z22.b, z23.b, z24.b}, p6, [x23, x24] : st4b %z21.b %z22.b %z23.b %z24.b %p6 -> (%x23,%x24)[128byte] -e4797b17 : st4b {z23.b, z24.b, z25.b, z26.b}, p6, [x24, x25] : st4b %z23.b %z24.b %z25.b %z26.b %p6 -> (%x24,%x25)[128byte] -e47b7f59 : st4b {z25.b, z26.b, z27.b, z28.b}, p7, [x26, x27] : st4b %z25.b %z26.b %z27.b %z28.b %p7 -> (%x26,%x27)[128byte] -e47d7f9b : st4b {z27.b, z28.b, z29.b, z30.b}, p7, [x28, x29] : st4b %z27.b %z28.b %z29.b %z30.b %p7 -> (%x28,%x29)[128byte] -e47e7fff : st4b {z31.b, z0.b, z1.b, z2.b}, p7, [sp, x30] : st4b %z31.b %z0.b %z1.b %z2.b %p7 -> (%sp,%x30)[128byte] +e4606000 : st4b {z0.b, z1.b, z2.b, z3.b}, p0, [x0, x0] : st4b %z0.b %z1.b %z2.b %z3.b %p0 -> (%x0,%x0)[1byte] +e4656482 : st4b {z2.b, z3.b, z4.b, z5.b}, p1, [x4, x5] : st4b %z2.b %z3.b %z4.b %z5.b %p1 -> (%x4,%x5)[1byte] +e46768c4 : st4b {z4.b, z5.b, z6.b, z7.b}, p2, [x6, x7] : st4b %z4.b %z5.b %z6.b %z7.b %p2 -> (%x6,%x7)[1byte] +e4696906 : st4b {z6.b, z7.b, z8.b, z9.b}, p2, [x8, x9] : st4b %z6.b %z7.b %z8.b %z9.b %p2 -> (%x8,%x9)[1byte] +e46b6d48 : st4b {z8.b, z9.b, z10.b, z11.b}, p3, [x10, x11] : st4b %z8.b %z9.b %z10.b %z11.b %p3 -> (%x10,%x11)[1byte] +e46c6d6a : st4b {z10.b, z11.b, z12.b, z13.b}, p3, [x11, x12] : st4b %z10.b %z11.b %z12.b %z13.b %p3 -> (%x11,%x12)[1byte] +e46e71ac : st4b {z12.b, z13.b, z14.b, z15.b}, p4, [x13, x14] : st4b %z12.b %z13.b %z14.b %z15.b %p4 -> (%x13,%x14)[1byte] +e47071ee : st4b {z14.b, z15.b, z16.b, z17.b}, p4, [x15, x16] : st4b %z14.b %z15.b %z16.b %z17.b %p4 -> (%x15,%x16)[1byte] +e4727630 : st4b {z16.b, z17.b, z18.b, z19.b}, p5, [x17, x18] : st4b %z16.b %z17.b %z18.b %z19.b %p5 -> (%x17,%x18)[1byte] +e4747671 : st4b {z17.b, z18.b, z19.b, z20.b}, p5, [x19, x20] : st4b %z17.b %z18.b %z19.b %z20.b %p5 -> (%x19,%x20)[1byte] +e47676b3 : st4b {z19.b, z20.b, z21.b, z22.b}, p5, [x21, x22] : st4b %z19.b %z20.b %z21.b %z22.b %p5 -> (%x21,%x22)[1byte] +e4787af5 : st4b {z21.b, z22.b, z23.b, z24.b}, p6, [x23, x24] : st4b %z21.b %z22.b %z23.b %z24.b %p6 -> (%x23,%x24)[1byte] +e4797b17 : st4b {z23.b, z24.b, z25.b, z26.b}, p6, [x24, x25] : st4b %z23.b %z24.b %z25.b %z26.b %p6 -> (%x24,%x25)[1byte] +e47b7f59 : st4b {z25.b, z26.b, z27.b, z28.b}, p7, [x26, x27] : st4b %z25.b %z26.b %z27.b %z28.b %p7 -> (%x26,%x27)[1byte] +e47d7f9b : st4b {z27.b, z28.b, z29.b, z30.b}, p7, [x28, x29] : st4b %z27.b %z28.b %z29.b %z30.b %p7 -> (%x28,%x29)[1byte] +e47e7fff : st4b {z31.b, z0.b, z1.b, z2.b}, p7, [sp, x30] : st4b %z31.b %z0.b %z1.b %z2.b %p7 -> (%sp,%x30)[1byte] # ST4B { .B, .B, .B, .B }, , [{, #, MUL VL}] (ST4B-Z.P.BI-Contiguous) -e478e000 : st4b {z0.b, z1.b, z2.b, z3.b}, p0, [x0, #-32, MUL VL] : st4b %z0.b %z1.b %z2.b %z3.b %p0 -> -0x0400(%x0)[128byte] -e479e482 : st4b {z2.b, z3.b, z4.b, z5.b}, p1, [x4, #-28, MUL VL] : st4b %z2.b %z3.b %z4.b %z5.b %p1 -> -0x0380(%x4)[128byte] -e47ae8c4 : st4b {z4.b, z5.b, z6.b, z7.b}, p2, [x6, #-24, MUL VL] : st4b %z4.b %z5.b %z6.b %z7.b %p2 -> -0x0300(%x6)[128byte] -e47be906 : st4b {z6.b, z7.b, z8.b, z9.b}, p2, [x8, #-20, MUL VL] : st4b %z6.b %z7.b %z8.b %z9.b %p2 -> -0x0280(%x8)[128byte] -e47ced48 : st4b {z8.b, z9.b, z10.b, z11.b}, p3, [x10, #-16, MUL VL] : st4b %z8.b %z9.b %z10.b %z11.b %p3 -> -0x0200(%x10)[128byte] -e47ded6a : st4b {z10.b, z11.b, z12.b, z13.b}, p3, [x11, #-12, MUL VL] : st4b %z10.b %z11.b %z12.b %z13.b %p3 -> -0x0180(%x11)[128byte] -e47ef1ac : st4b {z12.b, z13.b, z14.b, z15.b}, p4, [x13, #-8, MUL VL] : st4b %z12.b %z13.b %z14.b %z15.b %p4 -> -0x0100(%x13)[128byte] -e47ff1ee : st4b {z14.b, z15.b, z16.b, z17.b}, p4, [x15, #-4, MUL VL] : st4b %z14.b %z15.b %z16.b %z17.b %p4 -> -0x80(%x15)[128byte] -e470f630 : st4b {z16.b, z17.b, z18.b, z19.b}, p5, [x17, #0, MUL VL] : st4b %z16.b %z17.b %z18.b %z19.b %p5 -> (%x17)[128byte] -e470f671 : st4b {z17.b, z18.b, z19.b, z20.b}, p5, [x19, #0, MUL VL] : st4b %z17.b %z18.b %z19.b %z20.b %p5 -> (%x19)[128byte] -e471f6b3 : st4b {z19.b, z20.b, z21.b, z22.b}, p5, [x21, #4, MUL VL] : st4b %z19.b %z20.b %z21.b %z22.b %p5 -> +0x80(%x21)[128byte] -e472faf5 : st4b {z21.b, z22.b, z23.b, z24.b}, p6, [x23, #8, MUL VL] : st4b %z21.b %z22.b %z23.b %z24.b %p6 -> +0x0100(%x23)[128byte] -e473fb17 : st4b {z23.b, z24.b, z25.b, z26.b}, p6, [x24, #12, MUL VL] : st4b %z23.b %z24.b %z25.b %z26.b %p6 -> +0x0180(%x24)[128byte] -e474ff59 : st4b {z25.b, z26.b, z27.b, z28.b}, p7, [x26, #16, MUL VL] : st4b %z25.b %z26.b %z27.b %z28.b %p7 -> +0x0200(%x26)[128byte] -e475ff9b : st4b {z27.b, z28.b, z29.b, z30.b}, p7, [x28, #20, MUL VL] : st4b %z27.b %z28.b %z29.b %z30.b %p7 -> +0x0280(%x28)[128byte] -e477ffff : st4b {z31.b, z0.b, z1.b, z2.b}, p7, [sp, #28, MUL VL] : st4b %z31.b %z0.b %z1.b %z2.b %p7 -> +0x0380(%sp)[128byte] +e478e000 : st4b {z0.b, z1.b, z2.b, z3.b}, p0, [x0, #-32, MUL VL] : st4b %z0.b %z1.b %z2.b %z3.b %p0 -> -0x0400(%x0)[1byte] +e479e482 : st4b {z2.b, z3.b, z4.b, z5.b}, p1, [x4, #-28, MUL VL] : st4b %z2.b %z3.b %z4.b %z5.b %p1 -> -0x0380(%x4)[1byte] +e47ae8c4 : st4b {z4.b, z5.b, z6.b, z7.b}, p2, [x6, #-24, MUL VL] : st4b %z4.b %z5.b %z6.b %z7.b %p2 -> -0x0300(%x6)[1byte] +e47be906 : st4b {z6.b, z7.b, z8.b, z9.b}, p2, [x8, #-20, MUL VL] : st4b %z6.b %z7.b %z8.b %z9.b %p2 -> -0x0280(%x8)[1byte] +e47ced48 : st4b {z8.b, z9.b, z10.b, z11.b}, p3, [x10, #-16, MUL VL] : st4b %z8.b %z9.b %z10.b %z11.b %p3 -> -0x0200(%x10)[1byte] +e47ded6a : st4b {z10.b, z11.b, z12.b, z13.b}, p3, [x11, #-12, MUL VL] : st4b %z10.b %z11.b %z12.b %z13.b %p3 -> -0x0180(%x11)[1byte] +e47ef1ac : st4b {z12.b, z13.b, z14.b, z15.b}, p4, [x13, #-8, MUL VL] : st4b %z12.b %z13.b %z14.b %z15.b %p4 -> -0x0100(%x13)[1byte] +e47ff1ee : st4b {z14.b, z15.b, z16.b, z17.b}, p4, [x15, #-4, MUL VL] : st4b %z14.b %z15.b %z16.b %z17.b %p4 -> -0x80(%x15)[1byte] +e470f630 : st4b {z16.b, z17.b, z18.b, z19.b}, p5, [x17, #0, MUL VL] : st4b %z16.b %z17.b %z18.b %z19.b %p5 -> (%x17)[1byte] +e470f671 : st4b {z17.b, z18.b, z19.b, z20.b}, p5, [x19, #0, MUL VL] : st4b %z17.b %z18.b %z19.b %z20.b %p5 -> (%x19)[1byte] +e471f6b3 : st4b {z19.b, z20.b, z21.b, z22.b}, p5, [x21, #4, MUL VL] : st4b %z19.b %z20.b %z21.b %z22.b %p5 -> +0x80(%x21)[1byte] +e472faf5 : st4b {z21.b, z22.b, z23.b, z24.b}, p6, [x23, #8, MUL VL] : st4b %z21.b %z22.b %z23.b %z24.b %p6 -> +0x0100(%x23)[1byte] +e473fb17 : st4b {z23.b, z24.b, z25.b, z26.b}, p6, [x24, #12, MUL VL] : st4b %z23.b %z24.b %z25.b %z26.b %p6 -> +0x0180(%x24)[1byte] +e474ff59 : st4b {z25.b, z26.b, z27.b, z28.b}, p7, [x26, #16, MUL VL] : st4b %z25.b %z26.b %z27.b %z28.b %p7 -> +0x0200(%x26)[1byte] +e475ff9b : st4b {z27.b, z28.b, z29.b, z30.b}, p7, [x28, #20, MUL VL] : st4b %z27.b %z28.b %z29.b %z30.b %p7 -> +0x0280(%x28)[1byte] +e477ffff : st4b {z31.b, z0.b, z1.b, z2.b}, p7, [sp, #28, MUL VL] : st4b %z31.b %z0.b %z1.b %z2.b %p7 -> +0x0380(%sp)[1byte] # ST4D { .D, .D, .D, .D }, , [, , LSL #3] (ST4D-Z.P.BR-Contiguous) -e5e06000 : st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x0, x0, LSL #3] : st4d %z0.d %z1.d %z2.d %z3.d %p0 -> (%x0,%x0,lsl #3)[128byte] -e5e56482 : st4d {z2.d, z3.d, z4.d, z5.d}, p1, [x4, x5, LSL #3] : st4d %z2.d %z3.d %z4.d %z5.d %p1 -> (%x4,%x5,lsl #3)[128byte] -e5e768c4 : st4d {z4.d, z5.d, z6.d, z7.d}, p2, [x6, x7, LSL #3] : st4d %z4.d %z5.d %z6.d %z7.d %p2 -> (%x6,%x7,lsl #3)[128byte] -e5e96906 : st4d {z6.d, z7.d, z8.d, z9.d}, p2, [x8, x9, LSL #3] : st4d %z6.d %z7.d %z8.d %z9.d %p2 -> (%x8,%x9,lsl #3)[128byte] -e5eb6d48 : st4d {z8.d, z9.d, z10.d, z11.d}, p3, [x10, x11, LSL #3] : st4d %z8.d %z9.d %z10.d %z11.d %p3 -> (%x10,%x11,lsl #3)[128byte] -e5ec6d6a : st4d {z10.d, z11.d, z12.d, z13.d}, p3, [x11, x12, LSL #3] : st4d %z10.d %z11.d %z12.d %z13.d %p3 -> (%x11,%x12,lsl #3)[128byte] -e5ee71ac : st4d {z12.d, z13.d, z14.d, z15.d}, p4, [x13, x14, LSL #3] : st4d %z12.d %z13.d %z14.d %z15.d %p4 -> (%x13,%x14,lsl #3)[128byte] -e5f071ee : st4d {z14.d, z15.d, z16.d, z17.d}, p4, [x15, x16, LSL #3] : st4d %z14.d %z15.d %z16.d %z17.d %p4 -> (%x15,%x16,lsl #3)[128byte] -e5f27630 : st4d {z16.d, z17.d, z18.d, z19.d}, p5, [x17, x18, LSL #3] : st4d %z16.d %z17.d %z18.d %z19.d %p5 -> (%x17,%x18,lsl #3)[128byte] -e5f47671 : st4d {z17.d, z18.d, z19.d, z20.d}, p5, [x19, x20, LSL #3] : st4d %z17.d %z18.d %z19.d %z20.d %p5 -> (%x19,%x20,lsl #3)[128byte] -e5f676b3 : st4d {z19.d, z20.d, z21.d, z22.d}, p5, [x21, x22, LSL #3] : st4d %z19.d %z20.d %z21.d %z22.d %p5 -> (%x21,%x22,lsl #3)[128byte] -e5f87af5 : st4d {z21.d, z22.d, z23.d, z24.d}, p6, [x23, x24, LSL #3] : st4d %z21.d %z22.d %z23.d %z24.d %p6 -> (%x23,%x24,lsl #3)[128byte] -e5f97b17 : st4d {z23.d, z24.d, z25.d, z26.d}, p6, [x24, x25, LSL #3] : st4d %z23.d %z24.d %z25.d %z26.d %p6 -> (%x24,%x25,lsl #3)[128byte] -e5fb7f59 : st4d {z25.d, z26.d, z27.d, z28.d}, p7, [x26, x27, LSL #3] : st4d %z25.d %z26.d %z27.d %z28.d %p7 -> (%x26,%x27,lsl #3)[128byte] -e5fd7f9b : st4d {z27.d, z28.d, z29.d, z30.d}, p7, [x28, x29, LSL #3] : st4d %z27.d %z28.d %z29.d %z30.d %p7 -> (%x28,%x29,lsl #3)[128byte] -e5fe7fff : st4d {z31.d, z0.d, z1.d, z2.d}, p7, [sp, x30, LSL #3] : st4d %z31.d %z0.d %z1.d %z2.d %p7 -> (%sp,%x30,lsl #3)[128byte] +e5e06000 : st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x0, x0, LSL #3] : st4d %z0.d %z1.d %z2.d %z3.d %p0 -> (%x0,%x0,lsl #3)[8byte] +e5e56482 : st4d {z2.d, z3.d, z4.d, z5.d}, p1, [x4, x5, LSL #3] : st4d %z2.d %z3.d %z4.d %z5.d %p1 -> (%x4,%x5,lsl #3)[8byte] +e5e768c4 : st4d {z4.d, z5.d, z6.d, z7.d}, p2, [x6, x7, LSL #3] : st4d %z4.d %z5.d %z6.d %z7.d %p2 -> (%x6,%x7,lsl #3)[8byte] +e5e96906 : st4d {z6.d, z7.d, z8.d, z9.d}, p2, [x8, x9, LSL #3] : st4d %z6.d %z7.d %z8.d %z9.d %p2 -> (%x8,%x9,lsl #3)[8byte] +e5eb6d48 : st4d {z8.d, z9.d, z10.d, z11.d}, p3, [x10, x11, LSL #3] : st4d %z8.d %z9.d %z10.d %z11.d %p3 -> (%x10,%x11,lsl #3)[8byte] +e5ec6d6a : st4d {z10.d, z11.d, z12.d, z13.d}, p3, [x11, x12, LSL #3] : st4d %z10.d %z11.d %z12.d %z13.d %p3 -> (%x11,%x12,lsl #3)[8byte] +e5ee71ac : st4d {z12.d, z13.d, z14.d, z15.d}, p4, [x13, x14, LSL #3] : st4d %z12.d %z13.d %z14.d %z15.d %p4 -> (%x13,%x14,lsl #3)[8byte] +e5f071ee : st4d {z14.d, z15.d, z16.d, z17.d}, p4, [x15, x16, LSL #3] : st4d %z14.d %z15.d %z16.d %z17.d %p4 -> (%x15,%x16,lsl #3)[8byte] +e5f27630 : st4d {z16.d, z17.d, z18.d, z19.d}, p5, [x17, x18, LSL #3] : st4d %z16.d %z17.d %z18.d %z19.d %p5 -> (%x17,%x18,lsl #3)[8byte] +e5f47671 : st4d {z17.d, z18.d, z19.d, z20.d}, p5, [x19, x20, LSL #3] : st4d %z17.d %z18.d %z19.d %z20.d %p5 -> (%x19,%x20,lsl #3)[8byte] +e5f676b3 : st4d {z19.d, z20.d, z21.d, z22.d}, p5, [x21, x22, LSL #3] : st4d %z19.d %z20.d %z21.d %z22.d %p5 -> (%x21,%x22,lsl #3)[8byte] +e5f87af5 : st4d {z21.d, z22.d, z23.d, z24.d}, p6, [x23, x24, LSL #3] : st4d %z21.d %z22.d %z23.d %z24.d %p6 -> (%x23,%x24,lsl #3)[8byte] +e5f97b17 : st4d {z23.d, z24.d, z25.d, z26.d}, p6, [x24, x25, LSL #3] : st4d %z23.d %z24.d %z25.d %z26.d %p6 -> (%x24,%x25,lsl #3)[8byte] +e5fb7f59 : st4d {z25.d, z26.d, z27.d, z28.d}, p7, [x26, x27, LSL #3] : st4d %z25.d %z26.d %z27.d %z28.d %p7 -> (%x26,%x27,lsl #3)[8byte] +e5fd7f9b : st4d {z27.d, z28.d, z29.d, z30.d}, p7, [x28, x29, LSL #3] : st4d %z27.d %z28.d %z29.d %z30.d %p7 -> (%x28,%x29,lsl #3)[8byte] +e5fe7fff : st4d {z31.d, z0.d, z1.d, z2.d}, p7, [sp, x30, LSL #3] : st4d %z31.d %z0.d %z1.d %z2.d %p7 -> (%sp,%x30,lsl #3)[8byte] # ST4D { .D, .D, .D, .D }, , [{, #, MUL VL}] (ST4D-Z.P.BI-Contiguous) -e5f8e000 : st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x0, #-32, MUL VL] : st4d %z0.d %z1.d %z2.d %z3.d %p0 -> -0x0400(%x0)[128byte] -e5f9e482 : st4d {z2.d, z3.d, z4.d, z5.d}, p1, [x4, #-28, MUL VL] : st4d %z2.d %z3.d %z4.d %z5.d %p1 -> -0x0380(%x4)[128byte] -e5fae8c4 : st4d {z4.d, z5.d, z6.d, z7.d}, p2, [x6, #-24, MUL VL] : st4d %z4.d %z5.d %z6.d %z7.d %p2 -> -0x0300(%x6)[128byte] -e5fbe906 : st4d {z6.d, z7.d, z8.d, z9.d}, p2, [x8, #-20, MUL VL] : st4d %z6.d %z7.d %z8.d %z9.d %p2 -> -0x0280(%x8)[128byte] -e5fced48 : st4d {z8.d, z9.d, z10.d, z11.d}, p3, [x10, #-16, MUL VL] : st4d %z8.d %z9.d %z10.d %z11.d %p3 -> -0x0200(%x10)[128byte] -e5fded6a : st4d {z10.d, z11.d, z12.d, z13.d}, p3, [x11, #-12, MUL VL] : st4d %z10.d %z11.d %z12.d %z13.d %p3 -> -0x0180(%x11)[128byte] -e5fef1ac : st4d {z12.d, z13.d, z14.d, z15.d}, p4, [x13, #-8, MUL VL] : st4d %z12.d %z13.d %z14.d %z15.d %p4 -> -0x0100(%x13)[128byte] -e5fff1ee : st4d {z14.d, z15.d, z16.d, z17.d}, p4, [x15, #-4, MUL VL] : st4d %z14.d %z15.d %z16.d %z17.d %p4 -> -0x80(%x15)[128byte] -e5f0f630 : st4d {z16.d, z17.d, z18.d, z19.d}, p5, [x17, #0, MUL VL] : st4d %z16.d %z17.d %z18.d %z19.d %p5 -> (%x17)[128byte] -e5f0f671 : st4d {z17.d, z18.d, z19.d, z20.d}, p5, [x19, #0, MUL VL] : st4d %z17.d %z18.d %z19.d %z20.d %p5 -> (%x19)[128byte] -e5f1f6b3 : st4d {z19.d, z20.d, z21.d, z22.d}, p5, [x21, #4, MUL VL] : st4d %z19.d %z20.d %z21.d %z22.d %p5 -> +0x80(%x21)[128byte] -e5f2faf5 : st4d {z21.d, z22.d, z23.d, z24.d}, p6, [x23, #8, MUL VL] : st4d %z21.d %z22.d %z23.d %z24.d %p6 -> +0x0100(%x23)[128byte] -e5f3fb17 : st4d {z23.d, z24.d, z25.d, z26.d}, p6, [x24, #12, MUL VL] : st4d %z23.d %z24.d %z25.d %z26.d %p6 -> +0x0180(%x24)[128byte] -e5f4ff59 : st4d {z25.d, z26.d, z27.d, z28.d}, p7, [x26, #16, MUL VL] : st4d %z25.d %z26.d %z27.d %z28.d %p7 -> +0x0200(%x26)[128byte] -e5f5ff9b : st4d {z27.d, z28.d, z29.d, z30.d}, p7, [x28, #20, MUL VL] : st4d %z27.d %z28.d %z29.d %z30.d %p7 -> +0x0280(%x28)[128byte] -e5f7ffff : st4d {z31.d, z0.d, z1.d, z2.d}, p7, [sp, #28, MUL VL] : st4d %z31.d %z0.d %z1.d %z2.d %p7 -> +0x0380(%sp)[128byte] +e5f8e000 : st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x0, #-32, MUL VL] : st4d %z0.d %z1.d %z2.d %z3.d %p0 -> -0x0400(%x0)[8byte] +e5f9e482 : st4d {z2.d, z3.d, z4.d, z5.d}, p1, [x4, #-28, MUL VL] : st4d %z2.d %z3.d %z4.d %z5.d %p1 -> -0x0380(%x4)[8byte] +e5fae8c4 : st4d {z4.d, z5.d, z6.d, z7.d}, p2, [x6, #-24, MUL VL] : st4d %z4.d %z5.d %z6.d %z7.d %p2 -> -0x0300(%x6)[8byte] +e5fbe906 : st4d {z6.d, z7.d, z8.d, z9.d}, p2, [x8, #-20, MUL VL] : st4d %z6.d %z7.d %z8.d %z9.d %p2 -> -0x0280(%x8)[8byte] +e5fced48 : st4d {z8.d, z9.d, z10.d, z11.d}, p3, [x10, #-16, MUL VL] : st4d %z8.d %z9.d %z10.d %z11.d %p3 -> -0x0200(%x10)[8byte] +e5fded6a : st4d {z10.d, z11.d, z12.d, z13.d}, p3, [x11, #-12, MUL VL] : st4d %z10.d %z11.d %z12.d %z13.d %p3 -> -0x0180(%x11)[8byte] +e5fef1ac : st4d {z12.d, z13.d, z14.d, z15.d}, p4, [x13, #-8, MUL VL] : st4d %z12.d %z13.d %z14.d %z15.d %p4 -> -0x0100(%x13)[8byte] +e5fff1ee : st4d {z14.d, z15.d, z16.d, z17.d}, p4, [x15, #-4, MUL VL] : st4d %z14.d %z15.d %z16.d %z17.d %p4 -> -0x80(%x15)[8byte] +e5f0f630 : st4d {z16.d, z17.d, z18.d, z19.d}, p5, [x17, #0, MUL VL] : st4d %z16.d %z17.d %z18.d %z19.d %p5 -> (%x17)[8byte] +e5f0f671 : st4d {z17.d, z18.d, z19.d, z20.d}, p5, [x19, #0, MUL VL] : st4d %z17.d %z18.d %z19.d %z20.d %p5 -> (%x19)[8byte] +e5f1f6b3 : st4d {z19.d, z20.d, z21.d, z22.d}, p5, [x21, #4, MUL VL] : st4d %z19.d %z20.d %z21.d %z22.d %p5 -> +0x80(%x21)[8byte] +e5f2faf5 : st4d {z21.d, z22.d, z23.d, z24.d}, p6, [x23, #8, MUL VL] : st4d %z21.d %z22.d %z23.d %z24.d %p6 -> +0x0100(%x23)[8byte] +e5f3fb17 : st4d {z23.d, z24.d, z25.d, z26.d}, p6, [x24, #12, MUL VL] : st4d %z23.d %z24.d %z25.d %z26.d %p6 -> +0x0180(%x24)[8byte] +e5f4ff59 : st4d {z25.d, z26.d, z27.d, z28.d}, p7, [x26, #16, MUL VL] : st4d %z25.d %z26.d %z27.d %z28.d %p7 -> +0x0200(%x26)[8byte] +e5f5ff9b : st4d {z27.d, z28.d, z29.d, z30.d}, p7, [x28, #20, MUL VL] : st4d %z27.d %z28.d %z29.d %z30.d %p7 -> +0x0280(%x28)[8byte] +e5f7ffff : st4d {z31.d, z0.d, z1.d, z2.d}, p7, [sp, #28, MUL VL] : st4d %z31.d %z0.d %z1.d %z2.d %p7 -> +0x0380(%sp)[8byte] # ST4H { .H, .H, .H, .H }, , [, , LSL #1] (ST4H-Z.P.BR-Contiguous) -e4e06000 : st4h {z0.h, z1.h, z2.h, z3.h}, p0, [x0, x0, LSL #1] : st4h %z0.h %z1.h %z2.h %z3.h %p0 -> (%x0,%x0,lsl #1)[128byte] -e4e56482 : st4h {z2.h, z3.h, z4.h, z5.h}, p1, [x4, x5, LSL #1] : st4h %z2.h %z3.h %z4.h %z5.h %p1 -> (%x4,%x5,lsl #1)[128byte] -e4e768c4 : st4h {z4.h, z5.h, z6.h, z7.h}, p2, [x6, x7, LSL #1] : st4h %z4.h %z5.h %z6.h %z7.h %p2 -> (%x6,%x7,lsl #1)[128byte] -e4e96906 : st4h {z6.h, z7.h, z8.h, z9.h}, p2, [x8, x9, LSL #1] : st4h %z6.h %z7.h %z8.h %z9.h %p2 -> (%x8,%x9,lsl #1)[128byte] -e4eb6d48 : st4h {z8.h, z9.h, z10.h, z11.h}, p3, [x10, x11, LSL #1] : st4h %z8.h %z9.h %z10.h %z11.h %p3 -> (%x10,%x11,lsl #1)[128byte] -e4ec6d6a : st4h {z10.h, z11.h, z12.h, z13.h}, p3, [x11, x12, LSL #1] : st4h %z10.h %z11.h %z12.h %z13.h %p3 -> (%x11,%x12,lsl #1)[128byte] -e4ee71ac : st4h {z12.h, z13.h, z14.h, z15.h}, p4, [x13, x14, LSL #1] : st4h %z12.h %z13.h %z14.h %z15.h %p4 -> (%x13,%x14,lsl #1)[128byte] -e4f071ee : st4h {z14.h, z15.h, z16.h, z17.h}, p4, [x15, x16, LSL #1] : st4h %z14.h %z15.h %z16.h %z17.h %p4 -> (%x15,%x16,lsl #1)[128byte] -e4f27630 : st4h {z16.h, z17.h, z18.h, z19.h}, p5, [x17, x18, LSL #1] : st4h %z16.h %z17.h %z18.h %z19.h %p5 -> (%x17,%x18,lsl #1)[128byte] -e4f47671 : st4h {z17.h, z18.h, z19.h, z20.h}, p5, [x19, x20, LSL #1] : st4h %z17.h %z18.h %z19.h %z20.h %p5 -> (%x19,%x20,lsl #1)[128byte] -e4f676b3 : st4h {z19.h, z20.h, z21.h, z22.h}, p5, [x21, x22, LSL #1] : st4h %z19.h %z20.h %z21.h %z22.h %p5 -> (%x21,%x22,lsl #1)[128byte] -e4f87af5 : st4h {z21.h, z22.h, z23.h, z24.h}, p6, [x23, x24, LSL #1] : st4h %z21.h %z22.h %z23.h %z24.h %p6 -> (%x23,%x24,lsl #1)[128byte] -e4f97b17 : st4h {z23.h, z24.h, z25.h, z26.h}, p6, [x24, x25, LSL #1] : st4h %z23.h %z24.h %z25.h %z26.h %p6 -> (%x24,%x25,lsl #1)[128byte] -e4fb7f59 : st4h {z25.h, z26.h, z27.h, z28.h}, p7, [x26, x27, LSL #1] : st4h %z25.h %z26.h %z27.h %z28.h %p7 -> (%x26,%x27,lsl #1)[128byte] -e4fd7f9b : st4h {z27.h, z28.h, z29.h, z30.h}, p7, [x28, x29, LSL #1] : st4h %z27.h %z28.h %z29.h %z30.h %p7 -> (%x28,%x29,lsl #1)[128byte] -e4fe7fff : st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x30, LSL #1] : st4h %z31.h %z0.h %z1.h %z2.h %p7 -> (%sp,%x30,lsl #1)[128byte] +e4e06000 : st4h {z0.h, z1.h, z2.h, z3.h}, p0, [x0, x0, LSL #1] : st4h %z0.h %z1.h %z2.h %z3.h %p0 -> (%x0,%x0,lsl #1)[2byte] +e4e56482 : st4h {z2.h, z3.h, z4.h, z5.h}, p1, [x4, x5, LSL #1] : st4h %z2.h %z3.h %z4.h %z5.h %p1 -> (%x4,%x5,lsl #1)[2byte] +e4e768c4 : st4h {z4.h, z5.h, z6.h, z7.h}, p2, [x6, x7, LSL #1] : st4h %z4.h %z5.h %z6.h %z7.h %p2 -> (%x6,%x7,lsl #1)[2byte] +e4e96906 : st4h {z6.h, z7.h, z8.h, z9.h}, p2, [x8, x9, LSL #1] : st4h %z6.h %z7.h %z8.h %z9.h %p2 -> (%x8,%x9,lsl #1)[2byte] +e4eb6d48 : st4h {z8.h, z9.h, z10.h, z11.h}, p3, [x10, x11, LSL #1] : st4h %z8.h %z9.h %z10.h %z11.h %p3 -> (%x10,%x11,lsl #1)[2byte] +e4ec6d6a : st4h {z10.h, z11.h, z12.h, z13.h}, p3, [x11, x12, LSL #1] : st4h %z10.h %z11.h %z12.h %z13.h %p3 -> (%x11,%x12,lsl #1)[2byte] +e4ee71ac : st4h {z12.h, z13.h, z14.h, z15.h}, p4, [x13, x14, LSL #1] : st4h %z12.h %z13.h %z14.h %z15.h %p4 -> (%x13,%x14,lsl #1)[2byte] +e4f071ee : st4h {z14.h, z15.h, z16.h, z17.h}, p4, [x15, x16, LSL #1] : st4h %z14.h %z15.h %z16.h %z17.h %p4 -> (%x15,%x16,lsl #1)[2byte] +e4f27630 : st4h {z16.h, z17.h, z18.h, z19.h}, p5, [x17, x18, LSL #1] : st4h %z16.h %z17.h %z18.h %z19.h %p5 -> (%x17,%x18,lsl #1)[2byte] +e4f47671 : st4h {z17.h, z18.h, z19.h, z20.h}, p5, [x19, x20, LSL #1] : st4h %z17.h %z18.h %z19.h %z20.h %p5 -> (%x19,%x20,lsl #1)[2byte] +e4f676b3 : st4h {z19.h, z20.h, z21.h, z22.h}, p5, [x21, x22, LSL #1] : st4h %z19.h %z20.h %z21.h %z22.h %p5 -> (%x21,%x22,lsl #1)[2byte] +e4f87af5 : st4h {z21.h, z22.h, z23.h, z24.h}, p6, [x23, x24, LSL #1] : st4h %z21.h %z22.h %z23.h %z24.h %p6 -> (%x23,%x24,lsl #1)[2byte] +e4f97b17 : st4h {z23.h, z24.h, z25.h, z26.h}, p6, [x24, x25, LSL #1] : st4h %z23.h %z24.h %z25.h %z26.h %p6 -> (%x24,%x25,lsl #1)[2byte] +e4fb7f59 : st4h {z25.h, z26.h, z27.h, z28.h}, p7, [x26, x27, LSL #1] : st4h %z25.h %z26.h %z27.h %z28.h %p7 -> (%x26,%x27,lsl #1)[2byte] +e4fd7f9b : st4h {z27.h, z28.h, z29.h, z30.h}, p7, [x28, x29, LSL #1] : st4h %z27.h %z28.h %z29.h %z30.h %p7 -> (%x28,%x29,lsl #1)[2byte] +e4fe7fff : st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x30, LSL #1] : st4h %z31.h %z0.h %z1.h %z2.h %p7 -> (%sp,%x30,lsl #1)[2byte] # ST4H { .H, .H, .H, .H }, , [{, #, MUL VL}] (ST4H-Z.P.BI-Contiguous) -e4f8e000 : st4h {z0.h, z1.h, z2.h, z3.h}, p0, [x0, #-32, MUL VL] : st4h %z0.h %z1.h %z2.h %z3.h %p0 -> -0x0400(%x0)[128byte] -e4f9e482 : st4h {z2.h, z3.h, z4.h, z5.h}, p1, [x4, #-28, MUL VL] : st4h %z2.h %z3.h %z4.h %z5.h %p1 -> -0x0380(%x4)[128byte] -e4fae8c4 : st4h {z4.h, z5.h, z6.h, z7.h}, p2, [x6, #-24, MUL VL] : st4h %z4.h %z5.h %z6.h %z7.h %p2 -> -0x0300(%x6)[128byte] -e4fbe906 : st4h {z6.h, z7.h, z8.h, z9.h}, p2, [x8, #-20, MUL VL] : st4h %z6.h %z7.h %z8.h %z9.h %p2 -> -0x0280(%x8)[128byte] -e4fced48 : st4h {z8.h, z9.h, z10.h, z11.h}, p3, [x10, #-16, MUL VL] : st4h %z8.h %z9.h %z10.h %z11.h %p3 -> -0x0200(%x10)[128byte] -e4fded6a : st4h {z10.h, z11.h, z12.h, z13.h}, p3, [x11, #-12, MUL VL] : st4h %z10.h %z11.h %z12.h %z13.h %p3 -> -0x0180(%x11)[128byte] -e4fef1ac : st4h {z12.h, z13.h, z14.h, z15.h}, p4, [x13, #-8, MUL VL] : st4h %z12.h %z13.h %z14.h %z15.h %p4 -> -0x0100(%x13)[128byte] -e4fff1ee : st4h {z14.h, z15.h, z16.h, z17.h}, p4, [x15, #-4, MUL VL] : st4h %z14.h %z15.h %z16.h %z17.h %p4 -> -0x80(%x15)[128byte] -e4f0f630 : st4h {z16.h, z17.h, z18.h, z19.h}, p5, [x17, #0, MUL VL] : st4h %z16.h %z17.h %z18.h %z19.h %p5 -> (%x17)[128byte] -e4f0f671 : st4h {z17.h, z18.h, z19.h, z20.h}, p5, [x19, #0, MUL VL] : st4h %z17.h %z18.h %z19.h %z20.h %p5 -> (%x19)[128byte] -e4f1f6b3 : st4h {z19.h, z20.h, z21.h, z22.h}, p5, [x21, #4, MUL VL] : st4h %z19.h %z20.h %z21.h %z22.h %p5 -> +0x80(%x21)[128byte] -e4f2faf5 : st4h {z21.h, z22.h, z23.h, z24.h}, p6, [x23, #8, MUL VL] : st4h %z21.h %z22.h %z23.h %z24.h %p6 -> +0x0100(%x23)[128byte] -e4f3fb17 : st4h {z23.h, z24.h, z25.h, z26.h}, p6, [x24, #12, MUL VL] : st4h %z23.h %z24.h %z25.h %z26.h %p6 -> +0x0180(%x24)[128byte] -e4f4ff59 : st4h {z25.h, z26.h, z27.h, z28.h}, p7, [x26, #16, MUL VL] : st4h %z25.h %z26.h %z27.h %z28.h %p7 -> +0x0200(%x26)[128byte] -e4f5ff9b : st4h {z27.h, z28.h, z29.h, z30.h}, p7, [x28, #20, MUL VL] : st4h %z27.h %z28.h %z29.h %z30.h %p7 -> +0x0280(%x28)[128byte] -e4f7ffff : st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, #28, MUL VL] : st4h %z31.h %z0.h %z1.h %z2.h %p7 -> +0x0380(%sp)[128byte] +e4f8e000 : st4h {z0.h, z1.h, z2.h, z3.h}, p0, [x0, #-32, MUL VL] : st4h %z0.h %z1.h %z2.h %z3.h %p0 -> -0x0400(%x0)[2byte] +e4f9e482 : st4h {z2.h, z3.h, z4.h, z5.h}, p1, [x4, #-28, MUL VL] : st4h %z2.h %z3.h %z4.h %z5.h %p1 -> -0x0380(%x4)[2byte] +e4fae8c4 : st4h {z4.h, z5.h, z6.h, z7.h}, p2, [x6, #-24, MUL VL] : st4h %z4.h %z5.h %z6.h %z7.h %p2 -> -0x0300(%x6)[2byte] +e4fbe906 : st4h {z6.h, z7.h, z8.h, z9.h}, p2, [x8, #-20, MUL VL] : st4h %z6.h %z7.h %z8.h %z9.h %p2 -> -0x0280(%x8)[2byte] +e4fced48 : st4h {z8.h, z9.h, z10.h, z11.h}, p3, [x10, #-16, MUL VL] : st4h %z8.h %z9.h %z10.h %z11.h %p3 -> -0x0200(%x10)[2byte] +e4fded6a : st4h {z10.h, z11.h, z12.h, z13.h}, p3, [x11, #-12, MUL VL] : st4h %z10.h %z11.h %z12.h %z13.h %p3 -> -0x0180(%x11)[2byte] +e4fef1ac : st4h {z12.h, z13.h, z14.h, z15.h}, p4, [x13, #-8, MUL VL] : st4h %z12.h %z13.h %z14.h %z15.h %p4 -> -0x0100(%x13)[2byte] +e4fff1ee : st4h {z14.h, z15.h, z16.h, z17.h}, p4, [x15, #-4, MUL VL] : st4h %z14.h %z15.h %z16.h %z17.h %p4 -> -0x80(%x15)[2byte] +e4f0f630 : st4h {z16.h, z17.h, z18.h, z19.h}, p5, [x17, #0, MUL VL] : st4h %z16.h %z17.h %z18.h %z19.h %p5 -> (%x17)[2byte] +e4f0f671 : st4h {z17.h, z18.h, z19.h, z20.h}, p5, [x19, #0, MUL VL] : st4h %z17.h %z18.h %z19.h %z20.h %p5 -> (%x19)[2byte] +e4f1f6b3 : st4h {z19.h, z20.h, z21.h, z22.h}, p5, [x21, #4, MUL VL] : st4h %z19.h %z20.h %z21.h %z22.h %p5 -> +0x80(%x21)[2byte] +e4f2faf5 : st4h {z21.h, z22.h, z23.h, z24.h}, p6, [x23, #8, MUL VL] : st4h %z21.h %z22.h %z23.h %z24.h %p6 -> +0x0100(%x23)[2byte] +e4f3fb17 : st4h {z23.h, z24.h, z25.h, z26.h}, p6, [x24, #12, MUL VL] : st4h %z23.h %z24.h %z25.h %z26.h %p6 -> +0x0180(%x24)[2byte] +e4f4ff59 : st4h {z25.h, z26.h, z27.h, z28.h}, p7, [x26, #16, MUL VL] : st4h %z25.h %z26.h %z27.h %z28.h %p7 -> +0x0200(%x26)[2byte] +e4f5ff9b : st4h {z27.h, z28.h, z29.h, z30.h}, p7, [x28, #20, MUL VL] : st4h %z27.h %z28.h %z29.h %z30.h %p7 -> +0x0280(%x28)[2byte] +e4f7ffff : st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, #28, MUL VL] : st4h %z31.h %z0.h %z1.h %z2.h %p7 -> +0x0380(%sp)[2byte] # ST4W { .S, .S, .S, .S }, , [, , LSL #2] (ST4W-Z.P.BR-Contiguous) -e5606000 : st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x0, x0, LSL #2] : st4w %z0.s %z1.s %z2.s %z3.s %p0 -> (%x0,%x0,lsl #2)[128byte] -e5656482 : st4w {z2.s, z3.s, z4.s, z5.s}, p1, [x4, x5, LSL #2] : st4w %z2.s %z3.s %z4.s %z5.s %p1 -> (%x4,%x5,lsl #2)[128byte] -e56768c4 : st4w {z4.s, z5.s, z6.s, z7.s}, p2, [x6, x7, LSL #2] : st4w %z4.s %z5.s %z6.s %z7.s %p2 -> (%x6,%x7,lsl #2)[128byte] -e5696906 : st4w {z6.s, z7.s, z8.s, z9.s}, p2, [x8, x9, LSL #2] : st4w %z6.s %z7.s %z8.s %z9.s %p2 -> (%x8,%x9,lsl #2)[128byte] -e56b6d48 : st4w {z8.s, z9.s, z10.s, z11.s}, p3, [x10, x11, LSL #2] : st4w %z8.s %z9.s %z10.s %z11.s %p3 -> (%x10,%x11,lsl #2)[128byte] -e56c6d6a : st4w {z10.s, z11.s, z12.s, z13.s}, p3, [x11, x12, LSL #2] : st4w %z10.s %z11.s %z12.s %z13.s %p3 -> (%x11,%x12,lsl #2)[128byte] -e56e71ac : st4w {z12.s, z13.s, z14.s, z15.s}, p4, [x13, x14, LSL #2] : st4w %z12.s %z13.s %z14.s %z15.s %p4 -> (%x13,%x14,lsl #2)[128byte] -e57071ee : st4w {z14.s, z15.s, z16.s, z17.s}, p4, [x15, x16, LSL #2] : st4w %z14.s %z15.s %z16.s %z17.s %p4 -> (%x15,%x16,lsl #2)[128byte] -e5727630 : st4w {z16.s, z17.s, z18.s, z19.s}, p5, [x17, x18, LSL #2] : st4w %z16.s %z17.s %z18.s %z19.s %p5 -> (%x17,%x18,lsl #2)[128byte] -e5747671 : st4w {z17.s, z18.s, z19.s, z20.s}, p5, [x19, x20, LSL #2] : st4w %z17.s %z18.s %z19.s %z20.s %p5 -> (%x19,%x20,lsl #2)[128byte] -e57676b3 : st4w {z19.s, z20.s, z21.s, z22.s}, p5, [x21, x22, LSL #2] : st4w %z19.s %z20.s %z21.s %z22.s %p5 -> (%x21,%x22,lsl #2)[128byte] -e5787af5 : st4w {z21.s, z22.s, z23.s, z24.s}, p6, [x23, x24, LSL #2] : st4w %z21.s %z22.s %z23.s %z24.s %p6 -> (%x23,%x24,lsl #2)[128byte] -e5797b17 : st4w {z23.s, z24.s, z25.s, z26.s}, p6, [x24, x25, LSL #2] : st4w %z23.s %z24.s %z25.s %z26.s %p6 -> (%x24,%x25,lsl #2)[128byte] -e57b7f59 : st4w {z25.s, z26.s, z27.s, z28.s}, p7, [x26, x27, LSL #2] : st4w %z25.s %z26.s %z27.s %z28.s %p7 -> (%x26,%x27,lsl #2)[128byte] -e57d7f9b : st4w {z27.s, z28.s, z29.s, z30.s}, p7, [x28, x29, LSL #2] : st4w %z27.s %z28.s %z29.s %z30.s %p7 -> (%x28,%x29,lsl #2)[128byte] -e57e7fff : st4w {z31.s, z0.s, z1.s, z2.s}, p7, [sp, x30, LSL #2] : st4w %z31.s %z0.s %z1.s %z2.s %p7 -> (%sp,%x30,lsl #2)[128byte] +e5606000 : st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x0, x0, LSL #2] : st4w %z0.s %z1.s %z2.s %z3.s %p0 -> (%x0,%x0,lsl #2)[4byte] +e5656482 : st4w {z2.s, z3.s, z4.s, z5.s}, p1, [x4, x5, LSL #2] : st4w %z2.s %z3.s %z4.s %z5.s %p1 -> (%x4,%x5,lsl #2)[4byte] +e56768c4 : st4w {z4.s, z5.s, z6.s, z7.s}, p2, [x6, x7, LSL #2] : st4w %z4.s %z5.s %z6.s %z7.s %p2 -> (%x6,%x7,lsl #2)[4byte] +e5696906 : st4w {z6.s, z7.s, z8.s, z9.s}, p2, [x8, x9, LSL #2] : st4w %z6.s %z7.s %z8.s %z9.s %p2 -> (%x8,%x9,lsl #2)[4byte] +e56b6d48 : st4w {z8.s, z9.s, z10.s, z11.s}, p3, [x10, x11, LSL #2] : st4w %z8.s %z9.s %z10.s %z11.s %p3 -> (%x10,%x11,lsl #2)[4byte] +e56c6d6a : st4w {z10.s, z11.s, z12.s, z13.s}, p3, [x11, x12, LSL #2] : st4w %z10.s %z11.s %z12.s %z13.s %p3 -> (%x11,%x12,lsl #2)[4byte] +e56e71ac : st4w {z12.s, z13.s, z14.s, z15.s}, p4, [x13, x14, LSL #2] : st4w %z12.s %z13.s %z14.s %z15.s %p4 -> (%x13,%x14,lsl #2)[4byte] +e57071ee : st4w {z14.s, z15.s, z16.s, z17.s}, p4, [x15, x16, LSL #2] : st4w %z14.s %z15.s %z16.s %z17.s %p4 -> (%x15,%x16,lsl #2)[4byte] +e5727630 : st4w {z16.s, z17.s, z18.s, z19.s}, p5, [x17, x18, LSL #2] : st4w %z16.s %z17.s %z18.s %z19.s %p5 -> (%x17,%x18,lsl #2)[4byte] +e5747671 : st4w {z17.s, z18.s, z19.s, z20.s}, p5, [x19, x20, LSL #2] : st4w %z17.s %z18.s %z19.s %z20.s %p5 -> (%x19,%x20,lsl #2)[4byte] +e57676b3 : st4w {z19.s, z20.s, z21.s, z22.s}, p5, [x21, x22, LSL #2] : st4w %z19.s %z20.s %z21.s %z22.s %p5 -> (%x21,%x22,lsl #2)[4byte] +e5787af5 : st4w {z21.s, z22.s, z23.s, z24.s}, p6, [x23, x24, LSL #2] : st4w %z21.s %z22.s %z23.s %z24.s %p6 -> (%x23,%x24,lsl #2)[4byte] +e5797b17 : st4w {z23.s, z24.s, z25.s, z26.s}, p6, [x24, x25, LSL #2] : st4w %z23.s %z24.s %z25.s %z26.s %p6 -> (%x24,%x25,lsl #2)[4byte] +e57b7f59 : st4w {z25.s, z26.s, z27.s, z28.s}, p7, [x26, x27, LSL #2] : st4w %z25.s %z26.s %z27.s %z28.s %p7 -> (%x26,%x27,lsl #2)[4byte] +e57d7f9b : st4w {z27.s, z28.s, z29.s, z30.s}, p7, [x28, x29, LSL #2] : st4w %z27.s %z28.s %z29.s %z30.s %p7 -> (%x28,%x29,lsl #2)[4byte] +e57e7fff : st4w {z31.s, z0.s, z1.s, z2.s}, p7, [sp, x30, LSL #2] : st4w %z31.s %z0.s %z1.s %z2.s %p7 -> (%sp,%x30,lsl #2)[4byte] # ST4W { .S, .S, .S, .S }, , [{, #, MUL VL}] (ST4W-Z.P.BI-Contiguous) -e578e000 : st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x0, #-32, MUL VL] : st4w %z0.s %z1.s %z2.s %z3.s %p0 -> -0x0400(%x0)[128byte] -e579e482 : st4w {z2.s, z3.s, z4.s, z5.s}, p1, [x4, #-28, MUL VL] : st4w %z2.s %z3.s %z4.s %z5.s %p1 -> -0x0380(%x4)[128byte] -e57ae8c4 : st4w {z4.s, z5.s, z6.s, z7.s}, p2, [x6, #-24, MUL VL] : st4w %z4.s %z5.s %z6.s %z7.s %p2 -> -0x0300(%x6)[128byte] -e57be906 : st4w {z6.s, z7.s, z8.s, z9.s}, p2, [x8, #-20, MUL VL] : st4w %z6.s %z7.s %z8.s %z9.s %p2 -> -0x0280(%x8)[128byte] -e57ced48 : st4w {z8.s, z9.s, z10.s, z11.s}, p3, [x10, #-16, MUL VL] : st4w %z8.s %z9.s %z10.s %z11.s %p3 -> -0x0200(%x10)[128byte] -e57ded6a : st4w {z10.s, z11.s, z12.s, z13.s}, p3, [x11, #-12, MUL VL] : st4w %z10.s %z11.s %z12.s %z13.s %p3 -> -0x0180(%x11)[128byte] -e57ef1ac : st4w {z12.s, z13.s, z14.s, z15.s}, p4, [x13, #-8, MUL VL] : st4w %z12.s %z13.s %z14.s %z15.s %p4 -> -0x0100(%x13)[128byte] -e57ff1ee : st4w {z14.s, z15.s, z16.s, z17.s}, p4, [x15, #-4, MUL VL] : st4w %z14.s %z15.s %z16.s %z17.s %p4 -> -0x80(%x15)[128byte] -e570f630 : st4w {z16.s, z17.s, z18.s, z19.s}, p5, [x17, #0, MUL VL] : st4w %z16.s %z17.s %z18.s %z19.s %p5 -> (%x17)[128byte] -e570f671 : st4w {z17.s, z18.s, z19.s, z20.s}, p5, [x19, #0, MUL VL] : st4w %z17.s %z18.s %z19.s %z20.s %p5 -> (%x19)[128byte] -e571f6b3 : st4w {z19.s, z20.s, z21.s, z22.s}, p5, [x21, #4, MUL VL] : st4w %z19.s %z20.s %z21.s %z22.s %p5 -> +0x80(%x21)[128byte] -e572faf5 : st4w {z21.s, z22.s, z23.s, z24.s}, p6, [x23, #8, MUL VL] : st4w %z21.s %z22.s %z23.s %z24.s %p6 -> +0x0100(%x23)[128byte] -e573fb17 : st4w {z23.s, z24.s, z25.s, z26.s}, p6, [x24, #12, MUL VL] : st4w %z23.s %z24.s %z25.s %z26.s %p6 -> +0x0180(%x24)[128byte] -e574ff59 : st4w {z25.s, z26.s, z27.s, z28.s}, p7, [x26, #16, MUL VL] : st4w %z25.s %z26.s %z27.s %z28.s %p7 -> +0x0200(%x26)[128byte] -e575ff9b : st4w {z27.s, z28.s, z29.s, z30.s}, p7, [x28, #20, MUL VL] : st4w %z27.s %z28.s %z29.s %z30.s %p7 -> +0x0280(%x28)[128byte] -e577ffff : st4w {z31.s, z0.s, z1.s, z2.s}, p7, [sp, #28, MUL VL] : st4w %z31.s %z0.s %z1.s %z2.s %p7 -> +0x0380(%sp)[128byte] +e578e000 : st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x0, #-32, MUL VL] : st4w %z0.s %z1.s %z2.s %z3.s %p0 -> -0x0400(%x0)[4byte] +e579e482 : st4w {z2.s, z3.s, z4.s, z5.s}, p1, [x4, #-28, MUL VL] : st4w %z2.s %z3.s %z4.s %z5.s %p1 -> -0x0380(%x4)[4byte] +e57ae8c4 : st4w {z4.s, z5.s, z6.s, z7.s}, p2, [x6, #-24, MUL VL] : st4w %z4.s %z5.s %z6.s %z7.s %p2 -> -0x0300(%x6)[4byte] +e57be906 : st4w {z6.s, z7.s, z8.s, z9.s}, p2, [x8, #-20, MUL VL] : st4w %z6.s %z7.s %z8.s %z9.s %p2 -> -0x0280(%x8)[4byte] +e57ced48 : st4w {z8.s, z9.s, z10.s, z11.s}, p3, [x10, #-16, MUL VL] : st4w %z8.s %z9.s %z10.s %z11.s %p3 -> -0x0200(%x10)[4byte] +e57ded6a : st4w {z10.s, z11.s, z12.s, z13.s}, p3, [x11, #-12, MUL VL] : st4w %z10.s %z11.s %z12.s %z13.s %p3 -> -0x0180(%x11)[4byte] +e57ef1ac : st4w {z12.s, z13.s, z14.s, z15.s}, p4, [x13, #-8, MUL VL] : st4w %z12.s %z13.s %z14.s %z15.s %p4 -> -0x0100(%x13)[4byte] +e57ff1ee : st4w {z14.s, z15.s, z16.s, z17.s}, p4, [x15, #-4, MUL VL] : st4w %z14.s %z15.s %z16.s %z17.s %p4 -> -0x80(%x15)[4byte] +e570f630 : st4w {z16.s, z17.s, z18.s, z19.s}, p5, [x17, #0, MUL VL] : st4w %z16.s %z17.s %z18.s %z19.s %p5 -> (%x17)[4byte] +e570f671 : st4w {z17.s, z18.s, z19.s, z20.s}, p5, [x19, #0, MUL VL] : st4w %z17.s %z18.s %z19.s %z20.s %p5 -> (%x19)[4byte] +e571f6b3 : st4w {z19.s, z20.s, z21.s, z22.s}, p5, [x21, #4, MUL VL] : st4w %z19.s %z20.s %z21.s %z22.s %p5 -> +0x80(%x21)[4byte] +e572faf5 : st4w {z21.s, z22.s, z23.s, z24.s}, p6, [x23, #8, MUL VL] : st4w %z21.s %z22.s %z23.s %z24.s %p6 -> +0x0100(%x23)[4byte] +e573fb17 : st4w {z23.s, z24.s, z25.s, z26.s}, p6, [x24, #12, MUL VL] : st4w %z23.s %z24.s %z25.s %z26.s %p6 -> +0x0180(%x24)[4byte] +e574ff59 : st4w {z25.s, z26.s, z27.s, z28.s}, p7, [x26, #16, MUL VL] : st4w %z25.s %z26.s %z27.s %z28.s %p7 -> +0x0200(%x26)[4byte] +e575ff9b : st4w {z27.s, z28.s, z29.s, z30.s}, p7, [x28, #20, MUL VL] : st4w %z27.s %z28.s %z29.s %z30.s %p7 -> +0x0280(%x28)[4byte] +e577ffff : st4w {z31.s, z0.s, z1.s, z2.s}, p7, [sp, #28, MUL VL] : st4w %z31.s %z0.s %z1.s %z2.s %p7 -> +0x0380(%sp)[4byte] # STNT1B { .B }, , [, ] (STNT1B-Z.P.BR-Contiguous) -e4006000 : stnt1b z0.b, p0, [x0, x0] : stnt1b %z0.b %p0 -> (%x0,%x0)[32byte] -e4056482 : stnt1b z2.b, p1, [x4, x5] : stnt1b %z2.b %p1 -> (%x4,%x5)[32byte] -e40768c4 : stnt1b z4.b, p2, [x6, x7] : stnt1b %z4.b %p2 -> (%x6,%x7)[32byte] -e4096906 : stnt1b z6.b, p2, [x8, x9] : stnt1b %z6.b %p2 -> (%x8,%x9)[32byte] -e40b6d48 : stnt1b z8.b, p3, [x10, x11] : stnt1b %z8.b %p3 -> (%x10,%x11)[32byte] -e40c6d6a : stnt1b z10.b, p3, [x11, x12] : stnt1b %z10.b %p3 -> (%x11,%x12)[32byte] -e40e71ac : stnt1b z12.b, p4, [x13, x14] : stnt1b %z12.b %p4 -> (%x13,%x14)[32byte] -e41071ee : stnt1b z14.b, p4, [x15, x16] : stnt1b %z14.b %p4 -> (%x15,%x16)[32byte] -e4127630 : stnt1b z16.b, p5, [x17, x18] : stnt1b %z16.b %p5 -> (%x17,%x18)[32byte] -e4147671 : stnt1b z17.b, p5, [x19, x20] : stnt1b %z17.b %p5 -> (%x19,%x20)[32byte] -e41676b3 : stnt1b z19.b, p5, [x21, x22] : stnt1b %z19.b %p5 -> (%x21,%x22)[32byte] -e4187af5 : stnt1b z21.b, p6, [x23, x24] : stnt1b %z21.b %p6 -> (%x23,%x24)[32byte] -e4197b17 : stnt1b z23.b, p6, [x24, x25] : stnt1b %z23.b %p6 -> (%x24,%x25)[32byte] -e41b7f59 : stnt1b z25.b, p7, [x26, x27] : stnt1b %z25.b %p7 -> (%x26,%x27)[32byte] -e41d7f9b : stnt1b z27.b, p7, [x28, x29] : stnt1b %z27.b %p7 -> (%x28,%x29)[32byte] -e41e7fff : stnt1b z31.b, p7, [sp, x30] : stnt1b %z31.b %p7 -> (%sp,%x30)[32byte] +e4006000 : stnt1b z0.b, p0, [x0, x0] : stnt1b %z0.b %p0 -> (%x0,%x0)[1byte] +e4056482 : stnt1b z2.b, p1, [x4, x5] : stnt1b %z2.b %p1 -> (%x4,%x5)[1byte] +e40768c4 : stnt1b z4.b, p2, [x6, x7] : stnt1b %z4.b %p2 -> (%x6,%x7)[1byte] +e4096906 : stnt1b z6.b, p2, [x8, x9] : stnt1b %z6.b %p2 -> (%x8,%x9)[1byte] +e40b6d48 : stnt1b z8.b, p3, [x10, x11] : stnt1b %z8.b %p3 -> (%x10,%x11)[1byte] +e40c6d6a : stnt1b z10.b, p3, [x11, x12] : stnt1b %z10.b %p3 -> (%x11,%x12)[1byte] +e40e71ac : stnt1b z12.b, p4, [x13, x14] : stnt1b %z12.b %p4 -> (%x13,%x14)[1byte] +e41071ee : stnt1b z14.b, p4, [x15, x16] : stnt1b %z14.b %p4 -> (%x15,%x16)[1byte] +e4127630 : stnt1b z16.b, p5, [x17, x18] : stnt1b %z16.b %p5 -> (%x17,%x18)[1byte] +e4147671 : stnt1b z17.b, p5, [x19, x20] : stnt1b %z17.b %p5 -> (%x19,%x20)[1byte] +e41676b3 : stnt1b z19.b, p5, [x21, x22] : stnt1b %z19.b %p5 -> (%x21,%x22)[1byte] +e4187af5 : stnt1b z21.b, p6, [x23, x24] : stnt1b %z21.b %p6 -> (%x23,%x24)[1byte] +e4197b17 : stnt1b z23.b, p6, [x24, x25] : stnt1b %z23.b %p6 -> (%x24,%x25)[1byte] +e41b7f59 : stnt1b z25.b, p7, [x26, x27] : stnt1b %z25.b %p7 -> (%x26,%x27)[1byte] +e41d7f9b : stnt1b z27.b, p7, [x28, x29] : stnt1b %z27.b %p7 -> (%x28,%x29)[1byte] +e41e7fff : stnt1b z31.b, p7, [sp, x30] : stnt1b %z31.b %p7 -> (%sp,%x30)[1byte] # STNT1B { .B }, , [{, #, MUL VL}] (STNT1B-Z.P.BI-Contiguous) -e418e000 : stnt1b z0.b, p0, [x0, #-8, MUL VL] : stnt1b %z0.b %p0 -> -0x0100(%x0)[32byte] -e419e482 : stnt1b z2.b, p1, [x4, #-7, MUL VL] : stnt1b %z2.b %p1 -> -0xe0(%x4)[32byte] -e41ae8c4 : stnt1b z4.b, p2, [x6, #-6, MUL VL] : stnt1b %z4.b %p2 -> -0xc0(%x6)[32byte] -e41be906 : stnt1b z6.b, p2, [x8, #-5, MUL VL] : stnt1b %z6.b %p2 -> -0xa0(%x8)[32byte] -e41ced48 : stnt1b z8.b, p3, [x10, #-4, MUL VL] : stnt1b %z8.b %p3 -> -0x80(%x10)[32byte] -e41ded6a : stnt1b z10.b, p3, [x11, #-3, MUL VL] : stnt1b %z10.b %p3 -> -0x60(%x11)[32byte] -e41ef1ac : stnt1b z12.b, p4, [x13, #-2, MUL VL] : stnt1b %z12.b %p4 -> -0x40(%x13)[32byte] -e41ff1ee : stnt1b z14.b, p4, [x15, #-1, MUL VL] : stnt1b %z14.b %p4 -> -0x20(%x15)[32byte] -e410f630 : stnt1b z16.b, p5, [x17, #0, MUL VL] : stnt1b %z16.b %p5 -> (%x17)[32byte] -e410f671 : stnt1b z17.b, p5, [x19, #0, MUL VL] : stnt1b %z17.b %p5 -> (%x19)[32byte] -e411f6b3 : stnt1b z19.b, p5, [x21, #1, MUL VL] : stnt1b %z19.b %p5 -> +0x20(%x21)[32byte] -e412faf5 : stnt1b z21.b, p6, [x23, #2, MUL VL] : stnt1b %z21.b %p6 -> +0x40(%x23)[32byte] -e413fb17 : stnt1b z23.b, p6, [x24, #3, MUL VL] : stnt1b %z23.b %p6 -> +0x60(%x24)[32byte] -e414ff59 : stnt1b z25.b, p7, [x26, #4, MUL VL] : stnt1b %z25.b %p7 -> +0x80(%x26)[32byte] -e415ff9b : stnt1b z27.b, p7, [x28, #5, MUL VL] : stnt1b %z27.b %p7 -> +0xa0(%x28)[32byte] -e417ffff : stnt1b z31.b, p7, [sp, #7, MUL VL] : stnt1b %z31.b %p7 -> +0xe0(%sp)[32byte] +e418e000 : stnt1b z0.b, p0, [x0, #-8, MUL VL] : stnt1b %z0.b %p0 -> -0x0100(%x0)[1byte] +e419e482 : stnt1b z2.b, p1, [x4, #-7, MUL VL] : stnt1b %z2.b %p1 -> -0xe0(%x4)[1byte] +e41ae8c4 : stnt1b z4.b, p2, [x6, #-6, MUL VL] : stnt1b %z4.b %p2 -> -0xc0(%x6)[1byte] +e41be906 : stnt1b z6.b, p2, [x8, #-5, MUL VL] : stnt1b %z6.b %p2 -> -0xa0(%x8)[1byte] +e41ced48 : stnt1b z8.b, p3, [x10, #-4, MUL VL] : stnt1b %z8.b %p3 -> -0x80(%x10)[1byte] +e41ded6a : stnt1b z10.b, p3, [x11, #-3, MUL VL] : stnt1b %z10.b %p3 -> -0x60(%x11)[1byte] +e41ef1ac : stnt1b z12.b, p4, [x13, #-2, MUL VL] : stnt1b %z12.b %p4 -> -0x40(%x13)[1byte] +e41ff1ee : stnt1b z14.b, p4, [x15, #-1, MUL VL] : stnt1b %z14.b %p4 -> -0x20(%x15)[1byte] +e410f630 : stnt1b z16.b, p5, [x17, #0, MUL VL] : stnt1b %z16.b %p5 -> (%x17)[1byte] +e410f671 : stnt1b z17.b, p5, [x19, #0, MUL VL] : stnt1b %z17.b %p5 -> (%x19)[1byte] +e411f6b3 : stnt1b z19.b, p5, [x21, #1, MUL VL] : stnt1b %z19.b %p5 -> +0x20(%x21)[1byte] +e412faf5 : stnt1b z21.b, p6, [x23, #2, MUL VL] : stnt1b %z21.b %p6 -> +0x40(%x23)[1byte] +e413fb17 : stnt1b z23.b, p6, [x24, #3, MUL VL] : stnt1b %z23.b %p6 -> +0x60(%x24)[1byte] +e414ff59 : stnt1b z25.b, p7, [x26, #4, MUL VL] : stnt1b %z25.b %p7 -> +0x80(%x26)[1byte] +e415ff9b : stnt1b z27.b, p7, [x28, #5, MUL VL] : stnt1b %z27.b %p7 -> +0xa0(%x28)[1byte] +e417ffff : stnt1b z31.b, p7, [sp, #7, MUL VL] : stnt1b %z31.b %p7 -> +0xe0(%sp)[1byte] # STNT1D { .D }, , [, , LSL #3] (STNT1D-Z.P.BR-Contiguous) -e5806000 : stnt1d z0.d, p0, [x0, x0, LSL #3] : stnt1d %z0.d %p0 -> (%x0,%x0,lsl #3)[32byte] -e5856482 : stnt1d z2.d, p1, [x4, x5, LSL #3] : stnt1d %z2.d %p1 -> (%x4,%x5,lsl #3)[32byte] -e58768c4 : stnt1d z4.d, p2, [x6, x7, LSL #3] : stnt1d %z4.d %p2 -> (%x6,%x7,lsl #3)[32byte] -e5896906 : stnt1d z6.d, p2, [x8, x9, LSL #3] : stnt1d %z6.d %p2 -> (%x8,%x9,lsl #3)[32byte] -e58b6d48 : stnt1d z8.d, p3, [x10, x11, LSL #3] : stnt1d %z8.d %p3 -> (%x10,%x11,lsl #3)[32byte] -e58c6d6a : stnt1d z10.d, p3, [x11, x12, LSL #3] : stnt1d %z10.d %p3 -> (%x11,%x12,lsl #3)[32byte] -e58e71ac : stnt1d z12.d, p4, [x13, x14, LSL #3] : stnt1d %z12.d %p4 -> (%x13,%x14,lsl #3)[32byte] -e59071ee : stnt1d z14.d, p4, [x15, x16, LSL #3] : stnt1d %z14.d %p4 -> (%x15,%x16,lsl #3)[32byte] -e5927630 : stnt1d z16.d, p5, [x17, x18, LSL #3] : stnt1d %z16.d %p5 -> (%x17,%x18,lsl #3)[32byte] -e5947671 : stnt1d z17.d, p5, [x19, x20, LSL #3] : stnt1d %z17.d %p5 -> (%x19,%x20,lsl #3)[32byte] -e59676b3 : stnt1d z19.d, p5, [x21, x22, LSL #3] : stnt1d %z19.d %p5 -> (%x21,%x22,lsl #3)[32byte] -e5987af5 : stnt1d z21.d, p6, [x23, x24, LSL #3] : stnt1d %z21.d %p6 -> (%x23,%x24,lsl #3)[32byte] -e5997b17 : stnt1d z23.d, p6, [x24, x25, LSL #3] : stnt1d %z23.d %p6 -> (%x24,%x25,lsl #3)[32byte] -e59b7f59 : stnt1d z25.d, p7, [x26, x27, LSL #3] : stnt1d %z25.d %p7 -> (%x26,%x27,lsl #3)[32byte] -e59d7f9b : stnt1d z27.d, p7, [x28, x29, LSL #3] : stnt1d %z27.d %p7 -> (%x28,%x29,lsl #3)[32byte] -e59e7fff : stnt1d z31.d, p7, [sp, x30, LSL #3] : stnt1d %z31.d %p7 -> (%sp,%x30,lsl #3)[32byte] +e5806000 : stnt1d z0.d, p0, [x0, x0, LSL #3] : stnt1d %z0.d %p0 -> (%x0,%x0,lsl #3)[8byte] +e5856482 : stnt1d z2.d, p1, [x4, x5, LSL #3] : stnt1d %z2.d %p1 -> (%x4,%x5,lsl #3)[8byte] +e58768c4 : stnt1d z4.d, p2, [x6, x7, LSL #3] : stnt1d %z4.d %p2 -> (%x6,%x7,lsl #3)[8byte] +e5896906 : stnt1d z6.d, p2, [x8, x9, LSL #3] : stnt1d %z6.d %p2 -> (%x8,%x9,lsl #3)[8byte] +e58b6d48 : stnt1d z8.d, p3, [x10, x11, LSL #3] : stnt1d %z8.d %p3 -> (%x10,%x11,lsl #3)[8byte] +e58c6d6a : stnt1d z10.d, p3, [x11, x12, LSL #3] : stnt1d %z10.d %p3 -> (%x11,%x12,lsl #3)[8byte] +e58e71ac : stnt1d z12.d, p4, [x13, x14, LSL #3] : stnt1d %z12.d %p4 -> (%x13,%x14,lsl #3)[8byte] +e59071ee : stnt1d z14.d, p4, [x15, x16, LSL #3] : stnt1d %z14.d %p4 -> (%x15,%x16,lsl #3)[8byte] +e5927630 : stnt1d z16.d, p5, [x17, x18, LSL #3] : stnt1d %z16.d %p5 -> (%x17,%x18,lsl #3)[8byte] +e5947671 : stnt1d z17.d, p5, [x19, x20, LSL #3] : stnt1d %z17.d %p5 -> (%x19,%x20,lsl #3)[8byte] +e59676b3 : stnt1d z19.d, p5, [x21, x22, LSL #3] : stnt1d %z19.d %p5 -> (%x21,%x22,lsl #3)[8byte] +e5987af5 : stnt1d z21.d, p6, [x23, x24, LSL #3] : stnt1d %z21.d %p6 -> (%x23,%x24,lsl #3)[8byte] +e5997b17 : stnt1d z23.d, p6, [x24, x25, LSL #3] : stnt1d %z23.d %p6 -> (%x24,%x25,lsl #3)[8byte] +e59b7f59 : stnt1d z25.d, p7, [x26, x27, LSL #3] : stnt1d %z25.d %p7 -> (%x26,%x27,lsl #3)[8byte] +e59d7f9b : stnt1d z27.d, p7, [x28, x29, LSL #3] : stnt1d %z27.d %p7 -> (%x28,%x29,lsl #3)[8byte] +e59e7fff : stnt1d z31.d, p7, [sp, x30, LSL #3] : stnt1d %z31.d %p7 -> (%sp,%x30,lsl #3)[8byte] # STNT1D { .D }, , [{, #, MUL VL}] (STNT1D-Z.P.BI-Contiguous) -e598e000 : stnt1d z0.d, p0, [x0, #-8, MUL VL] : stnt1d %z0.d %p0 -> -0x0100(%x0)[32byte] -e599e482 : stnt1d z2.d, p1, [x4, #-7, MUL VL] : stnt1d %z2.d %p1 -> -0xe0(%x4)[32byte] -e59ae8c4 : stnt1d z4.d, p2, [x6, #-6, MUL VL] : stnt1d %z4.d %p2 -> -0xc0(%x6)[32byte] -e59be906 : stnt1d z6.d, p2, [x8, #-5, MUL VL] : stnt1d %z6.d %p2 -> -0xa0(%x8)[32byte] -e59ced48 : stnt1d z8.d, p3, [x10, #-4, MUL VL] : stnt1d %z8.d %p3 -> -0x80(%x10)[32byte] -e59ded6a : stnt1d z10.d, p3, [x11, #-3, MUL VL] : stnt1d %z10.d %p3 -> -0x60(%x11)[32byte] -e59ef1ac : stnt1d z12.d, p4, [x13, #-2, MUL VL] : stnt1d %z12.d %p4 -> -0x40(%x13)[32byte] -e59ff1ee : stnt1d z14.d, p4, [x15, #-1, MUL VL] : stnt1d %z14.d %p4 -> -0x20(%x15)[32byte] -e590f630 : stnt1d z16.d, p5, [x17, #0, MUL VL] : stnt1d %z16.d %p5 -> (%x17)[32byte] -e590f671 : stnt1d z17.d, p5, [x19, #0, MUL VL] : stnt1d %z17.d %p5 -> (%x19)[32byte] -e591f6b3 : stnt1d z19.d, p5, [x21, #1, MUL VL] : stnt1d %z19.d %p5 -> +0x20(%x21)[32byte] -e592faf5 : stnt1d z21.d, p6, [x23, #2, MUL VL] : stnt1d %z21.d %p6 -> +0x40(%x23)[32byte] -e593fb17 : stnt1d z23.d, p6, [x24, #3, MUL VL] : stnt1d %z23.d %p6 -> +0x60(%x24)[32byte] -e594ff59 : stnt1d z25.d, p7, [x26, #4, MUL VL] : stnt1d %z25.d %p7 -> +0x80(%x26)[32byte] -e595ff9b : stnt1d z27.d, p7, [x28, #5, MUL VL] : stnt1d %z27.d %p7 -> +0xa0(%x28)[32byte] -e597ffff : stnt1d z31.d, p7, [sp, #7, MUL VL] : stnt1d %z31.d %p7 -> +0xe0(%sp)[32byte] +e598e000 : stnt1d z0.d, p0, [x0, #-8, MUL VL] : stnt1d %z0.d %p0 -> -0x0100(%x0)[8byte] +e599e482 : stnt1d z2.d, p1, [x4, #-7, MUL VL] : stnt1d %z2.d %p1 -> -0xe0(%x4)[8byte] +e59ae8c4 : stnt1d z4.d, p2, [x6, #-6, MUL VL] : stnt1d %z4.d %p2 -> -0xc0(%x6)[8byte] +e59be906 : stnt1d z6.d, p2, [x8, #-5, MUL VL] : stnt1d %z6.d %p2 -> -0xa0(%x8)[8byte] +e59ced48 : stnt1d z8.d, p3, [x10, #-4, MUL VL] : stnt1d %z8.d %p3 -> -0x80(%x10)[8byte] +e59ded6a : stnt1d z10.d, p3, [x11, #-3, MUL VL] : stnt1d %z10.d %p3 -> -0x60(%x11)[8byte] +e59ef1ac : stnt1d z12.d, p4, [x13, #-2, MUL VL] : stnt1d %z12.d %p4 -> -0x40(%x13)[8byte] +e59ff1ee : stnt1d z14.d, p4, [x15, #-1, MUL VL] : stnt1d %z14.d %p4 -> -0x20(%x15)[8byte] +e590f630 : stnt1d z16.d, p5, [x17, #0, MUL VL] : stnt1d %z16.d %p5 -> (%x17)[8byte] +e590f671 : stnt1d z17.d, p5, [x19, #0, MUL VL] : stnt1d %z17.d %p5 -> (%x19)[8byte] +e591f6b3 : stnt1d z19.d, p5, [x21, #1, MUL VL] : stnt1d %z19.d %p5 -> +0x20(%x21)[8byte] +e592faf5 : stnt1d z21.d, p6, [x23, #2, MUL VL] : stnt1d %z21.d %p6 -> +0x40(%x23)[8byte] +e593fb17 : stnt1d z23.d, p6, [x24, #3, MUL VL] : stnt1d %z23.d %p6 -> +0x60(%x24)[8byte] +e594ff59 : stnt1d z25.d, p7, [x26, #4, MUL VL] : stnt1d %z25.d %p7 -> +0x80(%x26)[8byte] +e595ff9b : stnt1d z27.d, p7, [x28, #5, MUL VL] : stnt1d %z27.d %p7 -> +0xa0(%x28)[8byte] +e597ffff : stnt1d z31.d, p7, [sp, #7, MUL VL] : stnt1d %z31.d %p7 -> +0xe0(%sp)[8byte] # STNT1H { .H }, , [, , LSL #1] (STNT1H-Z.P.BR-Contiguous) -e4806000 : stnt1h z0.h, p0, [x0, x0, LSL #1] : stnt1h %z0.h %p0 -> (%x0,%x0,lsl #1)[32byte] -e4856482 : stnt1h z2.h, p1, [x4, x5, LSL #1] : stnt1h %z2.h %p1 -> (%x4,%x5,lsl #1)[32byte] -e48768c4 : stnt1h z4.h, p2, [x6, x7, LSL #1] : stnt1h %z4.h %p2 -> (%x6,%x7,lsl #1)[32byte] -e4896906 : stnt1h z6.h, p2, [x8, x9, LSL #1] : stnt1h %z6.h %p2 -> (%x8,%x9,lsl #1)[32byte] -e48b6d48 : stnt1h z8.h, p3, [x10, x11, LSL #1] : stnt1h %z8.h %p3 -> (%x10,%x11,lsl #1)[32byte] -e48c6d6a : stnt1h z10.h, p3, [x11, x12, LSL #1] : stnt1h %z10.h %p3 -> (%x11,%x12,lsl #1)[32byte] -e48e71ac : stnt1h z12.h, p4, [x13, x14, LSL #1] : stnt1h %z12.h %p4 -> (%x13,%x14,lsl #1)[32byte] -e49071ee : stnt1h z14.h, p4, [x15, x16, LSL #1] : stnt1h %z14.h %p4 -> (%x15,%x16,lsl #1)[32byte] -e4927630 : stnt1h z16.h, p5, [x17, x18, LSL #1] : stnt1h %z16.h %p5 -> (%x17,%x18,lsl #1)[32byte] -e4947671 : stnt1h z17.h, p5, [x19, x20, LSL #1] : stnt1h %z17.h %p5 -> (%x19,%x20,lsl #1)[32byte] -e49676b3 : stnt1h z19.h, p5, [x21, x22, LSL #1] : stnt1h %z19.h %p5 -> (%x21,%x22,lsl #1)[32byte] -e4987af5 : stnt1h z21.h, p6, [x23, x24, LSL #1] : stnt1h %z21.h %p6 -> (%x23,%x24,lsl #1)[32byte] -e4997b17 : stnt1h z23.h, p6, [x24, x25, LSL #1] : stnt1h %z23.h %p6 -> (%x24,%x25,lsl #1)[32byte] -e49b7f59 : stnt1h z25.h, p7, [x26, x27, LSL #1] : stnt1h %z25.h %p7 -> (%x26,%x27,lsl #1)[32byte] -e49d7f9b : stnt1h z27.h, p7, [x28, x29, LSL #1] : stnt1h %z27.h %p7 -> (%x28,%x29,lsl #1)[32byte] -e49e7fff : stnt1h z31.h, p7, [sp, x30, LSL #1] : stnt1h %z31.h %p7 -> (%sp,%x30,lsl #1)[32byte] +e4806000 : stnt1h z0.h, p0, [x0, x0, LSL #1] : stnt1h %z0.h %p0 -> (%x0,%x0,lsl #1)[2byte] +e4856482 : stnt1h z2.h, p1, [x4, x5, LSL #1] : stnt1h %z2.h %p1 -> (%x4,%x5,lsl #1)[2byte] +e48768c4 : stnt1h z4.h, p2, [x6, x7, LSL #1] : stnt1h %z4.h %p2 -> (%x6,%x7,lsl #1)[2byte] +e4896906 : stnt1h z6.h, p2, [x8, x9, LSL #1] : stnt1h %z6.h %p2 -> (%x8,%x9,lsl #1)[2byte] +e48b6d48 : stnt1h z8.h, p3, [x10, x11, LSL #1] : stnt1h %z8.h %p3 -> (%x10,%x11,lsl #1)[2byte] +e48c6d6a : stnt1h z10.h, p3, [x11, x12, LSL #1] : stnt1h %z10.h %p3 -> (%x11,%x12,lsl #1)[2byte] +e48e71ac : stnt1h z12.h, p4, [x13, x14, LSL #1] : stnt1h %z12.h %p4 -> (%x13,%x14,lsl #1)[2byte] +e49071ee : stnt1h z14.h, p4, [x15, x16, LSL #1] : stnt1h %z14.h %p4 -> (%x15,%x16,lsl #1)[2byte] +e4927630 : stnt1h z16.h, p5, [x17, x18, LSL #1] : stnt1h %z16.h %p5 -> (%x17,%x18,lsl #1)[2byte] +e4947671 : stnt1h z17.h, p5, [x19, x20, LSL #1] : stnt1h %z17.h %p5 -> (%x19,%x20,lsl #1)[2byte] +e49676b3 : stnt1h z19.h, p5, [x21, x22, LSL #1] : stnt1h %z19.h %p5 -> (%x21,%x22,lsl #1)[2byte] +e4987af5 : stnt1h z21.h, p6, [x23, x24, LSL #1] : stnt1h %z21.h %p6 -> (%x23,%x24,lsl #1)[2byte] +e4997b17 : stnt1h z23.h, p6, [x24, x25, LSL #1] : stnt1h %z23.h %p6 -> (%x24,%x25,lsl #1)[2byte] +e49b7f59 : stnt1h z25.h, p7, [x26, x27, LSL #1] : stnt1h %z25.h %p7 -> (%x26,%x27,lsl #1)[2byte] +e49d7f9b : stnt1h z27.h, p7, [x28, x29, LSL #1] : stnt1h %z27.h %p7 -> (%x28,%x29,lsl #1)[2byte] +e49e7fff : stnt1h z31.h, p7, [sp, x30, LSL #1] : stnt1h %z31.h %p7 -> (%sp,%x30,lsl #1)[2byte] # STNT1H { .H }, , [{, #, MUL VL}] (STNT1H-Z.P.BI-Contiguous) -e498e000 : stnt1h z0.h, p0, [x0, #-8, MUL VL] : stnt1h %z0.h %p0 -> -0x0100(%x0)[32byte] -e499e482 : stnt1h z2.h, p1, [x4, #-7, MUL VL] : stnt1h %z2.h %p1 -> -0xe0(%x4)[32byte] -e49ae8c4 : stnt1h z4.h, p2, [x6, #-6, MUL VL] : stnt1h %z4.h %p2 -> -0xc0(%x6)[32byte] -e49be906 : stnt1h z6.h, p2, [x8, #-5, MUL VL] : stnt1h %z6.h %p2 -> -0xa0(%x8)[32byte] -e49ced48 : stnt1h z8.h, p3, [x10, #-4, MUL VL] : stnt1h %z8.h %p3 -> -0x80(%x10)[32byte] -e49ded6a : stnt1h z10.h, p3, [x11, #-3, MUL VL] : stnt1h %z10.h %p3 -> -0x60(%x11)[32byte] -e49ef1ac : stnt1h z12.h, p4, [x13, #-2, MUL VL] : stnt1h %z12.h %p4 -> -0x40(%x13)[32byte] -e49ff1ee : stnt1h z14.h, p4, [x15, #-1, MUL VL] : stnt1h %z14.h %p4 -> -0x20(%x15)[32byte] -e490f630 : stnt1h z16.h, p5, [x17, #0, MUL VL] : stnt1h %z16.h %p5 -> (%x17)[32byte] -e490f671 : stnt1h z17.h, p5, [x19, #0, MUL VL] : stnt1h %z17.h %p5 -> (%x19)[32byte] -e491f6b3 : stnt1h z19.h, p5, [x21, #1, MUL VL] : stnt1h %z19.h %p5 -> +0x20(%x21)[32byte] -e492faf5 : stnt1h z21.h, p6, [x23, #2, MUL VL] : stnt1h %z21.h %p6 -> +0x40(%x23)[32byte] -e493fb17 : stnt1h z23.h, p6, [x24, #3, MUL VL] : stnt1h %z23.h %p6 -> +0x60(%x24)[32byte] -e494ff59 : stnt1h z25.h, p7, [x26, #4, MUL VL] : stnt1h %z25.h %p7 -> +0x80(%x26)[32byte] -e495ff9b : stnt1h z27.h, p7, [x28, #5, MUL VL] : stnt1h %z27.h %p7 -> +0xa0(%x28)[32byte] -e497ffff : stnt1h z31.h, p7, [sp, #7, MUL VL] : stnt1h %z31.h %p7 -> +0xe0(%sp)[32byte] +e498e000 : stnt1h z0.h, p0, [x0, #-8, MUL VL] : stnt1h %z0.h %p0 -> -0x0100(%x0)[2byte] +e499e482 : stnt1h z2.h, p1, [x4, #-7, MUL VL] : stnt1h %z2.h %p1 -> -0xe0(%x4)[2byte] +e49ae8c4 : stnt1h z4.h, p2, [x6, #-6, MUL VL] : stnt1h %z4.h %p2 -> -0xc0(%x6)[2byte] +e49be906 : stnt1h z6.h, p2, [x8, #-5, MUL VL] : stnt1h %z6.h %p2 -> -0xa0(%x8)[2byte] +e49ced48 : stnt1h z8.h, p3, [x10, #-4, MUL VL] : stnt1h %z8.h %p3 -> -0x80(%x10)[2byte] +e49ded6a : stnt1h z10.h, p3, [x11, #-3, MUL VL] : stnt1h %z10.h %p3 -> -0x60(%x11)[2byte] +e49ef1ac : stnt1h z12.h, p4, [x13, #-2, MUL VL] : stnt1h %z12.h %p4 -> -0x40(%x13)[2byte] +e49ff1ee : stnt1h z14.h, p4, [x15, #-1, MUL VL] : stnt1h %z14.h %p4 -> -0x20(%x15)[2byte] +e490f630 : stnt1h z16.h, p5, [x17, #0, MUL VL] : stnt1h %z16.h %p5 -> (%x17)[2byte] +e490f671 : stnt1h z17.h, p5, [x19, #0, MUL VL] : stnt1h %z17.h %p5 -> (%x19)[2byte] +e491f6b3 : stnt1h z19.h, p5, [x21, #1, MUL VL] : stnt1h %z19.h %p5 -> +0x20(%x21)[2byte] +e492faf5 : stnt1h z21.h, p6, [x23, #2, MUL VL] : stnt1h %z21.h %p6 -> +0x40(%x23)[2byte] +e493fb17 : stnt1h z23.h, p6, [x24, #3, MUL VL] : stnt1h %z23.h %p6 -> +0x60(%x24)[2byte] +e494ff59 : stnt1h z25.h, p7, [x26, #4, MUL VL] : stnt1h %z25.h %p7 -> +0x80(%x26)[2byte] +e495ff9b : stnt1h z27.h, p7, [x28, #5, MUL VL] : stnt1h %z27.h %p7 -> +0xa0(%x28)[2byte] +e497ffff : stnt1h z31.h, p7, [sp, #7, MUL VL] : stnt1h %z31.h %p7 -> +0xe0(%sp)[2byte] # STNT1W { .S }, , [, , LSL #2] (STNT1W-Z.P.BR-Contiguous) -e5006000 : stnt1w z0.s, p0, [x0, x0, LSL #2] : stnt1w %z0.s %p0 -> (%x0,%x0,lsl #2)[32byte] -e5056482 : stnt1w z2.s, p1, [x4, x5, LSL #2] : stnt1w %z2.s %p1 -> (%x4,%x5,lsl #2)[32byte] -e50768c4 : stnt1w z4.s, p2, [x6, x7, LSL #2] : stnt1w %z4.s %p2 -> (%x6,%x7,lsl #2)[32byte] -e5096906 : stnt1w z6.s, p2, [x8, x9, LSL #2] : stnt1w %z6.s %p2 -> (%x8,%x9,lsl #2)[32byte] -e50b6d48 : stnt1w z8.s, p3, [x10, x11, LSL #2] : stnt1w %z8.s %p3 -> (%x10,%x11,lsl #2)[32byte] -e50c6d6a : stnt1w z10.s, p3, [x11, x12, LSL #2] : stnt1w %z10.s %p3 -> (%x11,%x12,lsl #2)[32byte] -e50e71ac : stnt1w z12.s, p4, [x13, x14, LSL #2] : stnt1w %z12.s %p4 -> (%x13,%x14,lsl #2)[32byte] -e51071ee : stnt1w z14.s, p4, [x15, x16, LSL #2] : stnt1w %z14.s %p4 -> (%x15,%x16,lsl #2)[32byte] -e5127630 : stnt1w z16.s, p5, [x17, x18, LSL #2] : stnt1w %z16.s %p5 -> (%x17,%x18,lsl #2)[32byte] -e5147671 : stnt1w z17.s, p5, [x19, x20, LSL #2] : stnt1w %z17.s %p5 -> (%x19,%x20,lsl #2)[32byte] -e51676b3 : stnt1w z19.s, p5, [x21, x22, LSL #2] : stnt1w %z19.s %p5 -> (%x21,%x22,lsl #2)[32byte] -e5187af5 : stnt1w z21.s, p6, [x23, x24, LSL #2] : stnt1w %z21.s %p6 -> (%x23,%x24,lsl #2)[32byte] -e5197b17 : stnt1w z23.s, p6, [x24, x25, LSL #2] : stnt1w %z23.s %p6 -> (%x24,%x25,lsl #2)[32byte] -e51b7f59 : stnt1w z25.s, p7, [x26, x27, LSL #2] : stnt1w %z25.s %p7 -> (%x26,%x27,lsl #2)[32byte] -e51d7f9b : stnt1w z27.s, p7, [x28, x29, LSL #2] : stnt1w %z27.s %p7 -> (%x28,%x29,lsl #2)[32byte] -e51e7fff : stnt1w z31.s, p7, [sp, x30, LSL #2] : stnt1w %z31.s %p7 -> (%sp,%x30,lsl #2)[32byte] +e5006000 : stnt1w z0.s, p0, [x0, x0, LSL #2] : stnt1w %z0.s %p0 -> (%x0,%x0,lsl #2)[4byte] +e5056482 : stnt1w z2.s, p1, [x4, x5, LSL #2] : stnt1w %z2.s %p1 -> (%x4,%x5,lsl #2)[4byte] +e50768c4 : stnt1w z4.s, p2, [x6, x7, LSL #2] : stnt1w %z4.s %p2 -> (%x6,%x7,lsl #2)[4byte] +e5096906 : stnt1w z6.s, p2, [x8, x9, LSL #2] : stnt1w %z6.s %p2 -> (%x8,%x9,lsl #2)[4byte] +e50b6d48 : stnt1w z8.s, p3, [x10, x11, LSL #2] : stnt1w %z8.s %p3 -> (%x10,%x11,lsl #2)[4byte] +e50c6d6a : stnt1w z10.s, p3, [x11, x12, LSL #2] : stnt1w %z10.s %p3 -> (%x11,%x12,lsl #2)[4byte] +e50e71ac : stnt1w z12.s, p4, [x13, x14, LSL #2] : stnt1w %z12.s %p4 -> (%x13,%x14,lsl #2)[4byte] +e51071ee : stnt1w z14.s, p4, [x15, x16, LSL #2] : stnt1w %z14.s %p4 -> (%x15,%x16,lsl #2)[4byte] +e5127630 : stnt1w z16.s, p5, [x17, x18, LSL #2] : stnt1w %z16.s %p5 -> (%x17,%x18,lsl #2)[4byte] +e5147671 : stnt1w z17.s, p5, [x19, x20, LSL #2] : stnt1w %z17.s %p5 -> (%x19,%x20,lsl #2)[4byte] +e51676b3 : stnt1w z19.s, p5, [x21, x22, LSL #2] : stnt1w %z19.s %p5 -> (%x21,%x22,lsl #2)[4byte] +e5187af5 : stnt1w z21.s, p6, [x23, x24, LSL #2] : stnt1w %z21.s %p6 -> (%x23,%x24,lsl #2)[4byte] +e5197b17 : stnt1w z23.s, p6, [x24, x25, LSL #2] : stnt1w %z23.s %p6 -> (%x24,%x25,lsl #2)[4byte] +e51b7f59 : stnt1w z25.s, p7, [x26, x27, LSL #2] : stnt1w %z25.s %p7 -> (%x26,%x27,lsl #2)[4byte] +e51d7f9b : stnt1w z27.s, p7, [x28, x29, LSL #2] : stnt1w %z27.s %p7 -> (%x28,%x29,lsl #2)[4byte] +e51e7fff : stnt1w z31.s, p7, [sp, x30, LSL #2] : stnt1w %z31.s %p7 -> (%sp,%x30,lsl #2)[4byte] # STNT1W { .S }, , [{, #, MUL VL}] (STNT1W-Z.P.BI-Contiguous) -e518e000 : stnt1w z0.s, p0, [x0, #-8, MUL VL] : stnt1w %z0.s %p0 -> -0x0100(%x0)[32byte] -e519e482 : stnt1w z2.s, p1, [x4, #-7, MUL VL] : stnt1w %z2.s %p1 -> -0xe0(%x4)[32byte] -e51ae8c4 : stnt1w z4.s, p2, [x6, #-6, MUL VL] : stnt1w %z4.s %p2 -> -0xc0(%x6)[32byte] -e51be906 : stnt1w z6.s, p2, [x8, #-5, MUL VL] : stnt1w %z6.s %p2 -> -0xa0(%x8)[32byte] -e51ced48 : stnt1w z8.s, p3, [x10, #-4, MUL VL] : stnt1w %z8.s %p3 -> -0x80(%x10)[32byte] -e51ded6a : stnt1w z10.s, p3, [x11, #-3, MUL VL] : stnt1w %z10.s %p3 -> -0x60(%x11)[32byte] -e51ef1ac : stnt1w z12.s, p4, [x13, #-2, MUL VL] : stnt1w %z12.s %p4 -> -0x40(%x13)[32byte] -e51ff1ee : stnt1w z14.s, p4, [x15, #-1, MUL VL] : stnt1w %z14.s %p4 -> -0x20(%x15)[32byte] -e510f630 : stnt1w z16.s, p5, [x17, #0, MUL VL] : stnt1w %z16.s %p5 -> (%x17)[32byte] -e510f671 : stnt1w z17.s, p5, [x19, #0, MUL VL] : stnt1w %z17.s %p5 -> (%x19)[32byte] -e511f6b3 : stnt1w z19.s, p5, [x21, #1, MUL VL] : stnt1w %z19.s %p5 -> +0x20(%x21)[32byte] -e512faf5 : stnt1w z21.s, p6, [x23, #2, MUL VL] : stnt1w %z21.s %p6 -> +0x40(%x23)[32byte] -e513fb17 : stnt1w z23.s, p6, [x24, #3, MUL VL] : stnt1w %z23.s %p6 -> +0x60(%x24)[32byte] -e514ff59 : stnt1w z25.s, p7, [x26, #4, MUL VL] : stnt1w %z25.s %p7 -> +0x80(%x26)[32byte] -e515ff9b : stnt1w z27.s, p7, [x28, #5, MUL VL] : stnt1w %z27.s %p7 -> +0xa0(%x28)[32byte] -e517ffff : stnt1w z31.s, p7, [sp, #7, MUL VL] : stnt1w %z31.s %p7 -> +0xe0(%sp)[32byte] +e518e000 : stnt1w z0.s, p0, [x0, #-8, MUL VL] : stnt1w %z0.s %p0 -> -0x0100(%x0)[4byte] +e519e482 : stnt1w z2.s, p1, [x4, #-7, MUL VL] : stnt1w %z2.s %p1 -> -0xe0(%x4)[4byte] +e51ae8c4 : stnt1w z4.s, p2, [x6, #-6, MUL VL] : stnt1w %z4.s %p2 -> -0xc0(%x6)[4byte] +e51be906 : stnt1w z6.s, p2, [x8, #-5, MUL VL] : stnt1w %z6.s %p2 -> -0xa0(%x8)[4byte] +e51ced48 : stnt1w z8.s, p3, [x10, #-4, MUL VL] : stnt1w %z8.s %p3 -> -0x80(%x10)[4byte] +e51ded6a : stnt1w z10.s, p3, [x11, #-3, MUL VL] : stnt1w %z10.s %p3 -> -0x60(%x11)[4byte] +e51ef1ac : stnt1w z12.s, p4, [x13, #-2, MUL VL] : stnt1w %z12.s %p4 -> -0x40(%x13)[4byte] +e51ff1ee : stnt1w z14.s, p4, [x15, #-1, MUL VL] : stnt1w %z14.s %p4 -> -0x20(%x15)[4byte] +e510f630 : stnt1w z16.s, p5, [x17, #0, MUL VL] : stnt1w %z16.s %p5 -> (%x17)[4byte] +e510f671 : stnt1w z17.s, p5, [x19, #0, MUL VL] : stnt1w %z17.s %p5 -> (%x19)[4byte] +e511f6b3 : stnt1w z19.s, p5, [x21, #1, MUL VL] : stnt1w %z19.s %p5 -> +0x20(%x21)[4byte] +e512faf5 : stnt1w z21.s, p6, [x23, #2, MUL VL] : stnt1w %z21.s %p6 -> +0x40(%x23)[4byte] +e513fb17 : stnt1w z23.s, p6, [x24, #3, MUL VL] : stnt1w %z23.s %p6 -> +0x60(%x24)[4byte] +e514ff59 : stnt1w z25.s, p7, [x26, #4, MUL VL] : stnt1w %z25.s %p7 -> +0x80(%x26)[4byte] +e515ff9b : stnt1w z27.s, p7, [x28, #5, MUL VL] : stnt1w %z27.s %p7 -> +0xa0(%x28)[4byte] +e517ffff : stnt1w z31.s, p7, [sp, #7, MUL VL] : stnt1w %z31.s %p7 -> +0xe0(%sp)[4byte] # STR , [{, #, MUL VL}] e58003c0 : str p0, [x30] : str %p0 -> (%x30)[4byte] diff --git a/suite/tests/api/dis-a64-sve2.txt b/suite/tests/api/dis-a64-sve2.txt index 7871cead7e0..7a92c6b98cc 100644 --- a/suite/tests/api/dis-a64-sve2.txt +++ b/suite/tests/api/dis-a64-sve2.txt @@ -1601,220 +1601,220 @@ 453fa3ff : histseg z31.b, z31.b, z31.b : histseg %z31.b %z31.b -> %z31.b # LDNT1B { .S }, /Z, [.S{, }] (LDNT1B-Z.P.AR-S.x32.unscaled) -8400a000 : ldnt1b z0.s, p0/Z, [z0.s, x0] : ldnt1b (%z0.s,%x0)[8byte] %p0/z -> %z0.s -8405a482 : ldnt1b z2.s, p1/Z, [z4.s, x5] : ldnt1b (%z4.s,%x5)[8byte] %p1/z -> %z2.s -8407a8c4 : ldnt1b z4.s, p2/Z, [z6.s, x7] : ldnt1b (%z6.s,%x7)[8byte] %p2/z -> %z4.s -8409a906 : ldnt1b z6.s, p2/Z, [z8.s, x9] : ldnt1b (%z8.s,%x9)[8byte] %p2/z -> %z6.s -840bad48 : ldnt1b z8.s, p3/Z, [z10.s, x11] : ldnt1b (%z10.s,%x11)[8byte] %p3/z -> %z8.s -840cad8a : ldnt1b z10.s, p3/Z, [z12.s, x12] : ldnt1b (%z12.s,%x12)[8byte] %p3/z -> %z10.s -840eb1cc : ldnt1b z12.s, p4/Z, [z14.s, x14] : ldnt1b (%z14.s,%x14)[8byte] %p4/z -> %z12.s -8410b20e : ldnt1b z14.s, p4/Z, [z16.s, x16] : ldnt1b (%z16.s,%x16)[8byte] %p4/z -> %z14.s -8412b650 : ldnt1b z16.s, p5/Z, [z18.s, x18] : ldnt1b (%z18.s,%x18)[8byte] %p5/z -> %z16.s -8414b671 : ldnt1b z17.s, p5/Z, [z19.s, x20] : ldnt1b (%z19.s,%x20)[8byte] %p5/z -> %z17.s -8416b6b3 : ldnt1b z19.s, p5/Z, [z21.s, x22] : ldnt1b (%z21.s,%x22)[8byte] %p5/z -> %z19.s -8418baf5 : ldnt1b z21.s, p6/Z, [z23.s, x24] : ldnt1b (%z23.s,%x24)[8byte] %p6/z -> %z21.s -8419bb37 : ldnt1b z23.s, p6/Z, [z25.s, x25] : ldnt1b (%z25.s,%x25)[8byte] %p6/z -> %z23.s -841bbf79 : ldnt1b z25.s, p7/Z, [z27.s, x27] : ldnt1b (%z27.s,%x27)[8byte] %p7/z -> %z25.s -841dbfbb : ldnt1b z27.s, p7/Z, [z29.s, x29] : ldnt1b (%z29.s,%x29)[8byte] %p7/z -> %z27.s -841ebfff : ldnt1b z31.s, p7/Z, [z31.s, x30] : ldnt1b (%z31.s,%x30)[8byte] %p7/z -> %z31.s +8400a000 : ldnt1b z0.s, p0/Z, [z0.s, x0] : ldnt1b (%z0.s,%x0)[1byte] %p0/z -> %z0.s +8405a482 : ldnt1b z2.s, p1/Z, [z4.s, x5] : ldnt1b (%z4.s,%x5)[1byte] %p1/z -> %z2.s +8407a8c4 : ldnt1b z4.s, p2/Z, [z6.s, x7] : ldnt1b (%z6.s,%x7)[1byte] %p2/z -> %z4.s +8409a906 : ldnt1b z6.s, p2/Z, [z8.s, x9] : ldnt1b (%z8.s,%x9)[1byte] %p2/z -> %z6.s +840bad48 : ldnt1b z8.s, p3/Z, [z10.s, x11] : ldnt1b (%z10.s,%x11)[1byte] %p3/z -> %z8.s +840cad8a : ldnt1b z10.s, p3/Z, [z12.s, x12] : ldnt1b (%z12.s,%x12)[1byte] %p3/z -> %z10.s +840eb1cc : ldnt1b z12.s, p4/Z, [z14.s, x14] : ldnt1b (%z14.s,%x14)[1byte] %p4/z -> %z12.s +8410b20e : ldnt1b z14.s, p4/Z, [z16.s, x16] : ldnt1b (%z16.s,%x16)[1byte] %p4/z -> %z14.s +8412b650 : ldnt1b z16.s, p5/Z, [z18.s, x18] : ldnt1b (%z18.s,%x18)[1byte] %p5/z -> %z16.s +8414b671 : ldnt1b z17.s, p5/Z, [z19.s, x20] : ldnt1b (%z19.s,%x20)[1byte] %p5/z -> %z17.s +8416b6b3 : ldnt1b z19.s, p5/Z, [z21.s, x22] : ldnt1b (%z21.s,%x22)[1byte] %p5/z -> %z19.s +8418baf5 : ldnt1b z21.s, p6/Z, [z23.s, x24] : ldnt1b (%z23.s,%x24)[1byte] %p6/z -> %z21.s +8419bb37 : ldnt1b z23.s, p6/Z, [z25.s, x25] : ldnt1b (%z25.s,%x25)[1byte] %p6/z -> %z23.s +841bbf79 : ldnt1b z25.s, p7/Z, [z27.s, x27] : ldnt1b (%z27.s,%x27)[1byte] %p7/z -> %z25.s +841dbfbb : ldnt1b z27.s, p7/Z, [z29.s, x29] : ldnt1b (%z29.s,%x29)[1byte] %p7/z -> %z27.s +841ebfff : ldnt1b z31.s, p7/Z, [z31.s, x30] : ldnt1b (%z31.s,%x30)[1byte] %p7/z -> %z31.s # LDNT1B { .D }, /Z, [.D{, }] (LDNT1B-Z.P.AR-D.64.unscaled) -c400c000 : ldnt1b z0.d, p0/Z, [z0.d, x0] : ldnt1b (%z0.d,%x0)[4byte] %p0/z -> %z0.d -c405c482 : ldnt1b z2.d, p1/Z, [z4.d, x5] : ldnt1b (%z4.d,%x5)[4byte] %p1/z -> %z2.d -c407c8c4 : ldnt1b z4.d, p2/Z, [z6.d, x7] : ldnt1b (%z6.d,%x7)[4byte] %p2/z -> %z4.d -c409c906 : ldnt1b z6.d, p2/Z, [z8.d, x9] : ldnt1b (%z8.d,%x9)[4byte] %p2/z -> %z6.d -c40bcd48 : ldnt1b z8.d, p3/Z, [z10.d, x11] : ldnt1b (%z10.d,%x11)[4byte] %p3/z -> %z8.d -c40ccd8a : ldnt1b z10.d, p3/Z, [z12.d, x12] : ldnt1b (%z12.d,%x12)[4byte] %p3/z -> %z10.d -c40ed1cc : ldnt1b z12.d, p4/Z, [z14.d, x14] : ldnt1b (%z14.d,%x14)[4byte] %p4/z -> %z12.d -c410d20e : ldnt1b z14.d, p4/Z, [z16.d, x16] : ldnt1b (%z16.d,%x16)[4byte] %p4/z -> %z14.d -c412d650 : ldnt1b z16.d, p5/Z, [z18.d, x18] : ldnt1b (%z18.d,%x18)[4byte] %p5/z -> %z16.d -c414d671 : ldnt1b z17.d, p5/Z, [z19.d, x20] : ldnt1b (%z19.d,%x20)[4byte] %p5/z -> %z17.d -c416d6b3 : ldnt1b z19.d, p5/Z, [z21.d, x22] : ldnt1b (%z21.d,%x22)[4byte] %p5/z -> %z19.d -c418daf5 : ldnt1b z21.d, p6/Z, [z23.d, x24] : ldnt1b (%z23.d,%x24)[4byte] %p6/z -> %z21.d -c419db37 : ldnt1b z23.d, p6/Z, [z25.d, x25] : ldnt1b (%z25.d,%x25)[4byte] %p6/z -> %z23.d -c41bdf79 : ldnt1b z25.d, p7/Z, [z27.d, x27] : ldnt1b (%z27.d,%x27)[4byte] %p7/z -> %z25.d -c41ddfbb : ldnt1b z27.d, p7/Z, [z29.d, x29] : ldnt1b (%z29.d,%x29)[4byte] %p7/z -> %z27.d -c41edfff : ldnt1b z31.d, p7/Z, [z31.d, x30] : ldnt1b (%z31.d,%x30)[4byte] %p7/z -> %z31.d +c400c000 : ldnt1b z0.d, p0/Z, [z0.d, x0] : ldnt1b (%z0.d,%x0)[1byte] %p0/z -> %z0.d +c405c482 : ldnt1b z2.d, p1/Z, [z4.d, x5] : ldnt1b (%z4.d,%x5)[1byte] %p1/z -> %z2.d +c407c8c4 : ldnt1b z4.d, p2/Z, [z6.d, x7] : ldnt1b (%z6.d,%x7)[1byte] %p2/z -> %z4.d +c409c906 : ldnt1b z6.d, p2/Z, [z8.d, x9] : ldnt1b (%z8.d,%x9)[1byte] %p2/z -> %z6.d +c40bcd48 : ldnt1b z8.d, p3/Z, [z10.d, x11] : ldnt1b (%z10.d,%x11)[1byte] %p3/z -> %z8.d +c40ccd8a : ldnt1b z10.d, p3/Z, [z12.d, x12] : ldnt1b (%z12.d,%x12)[1byte] %p3/z -> %z10.d +c40ed1cc : ldnt1b z12.d, p4/Z, [z14.d, x14] : ldnt1b (%z14.d,%x14)[1byte] %p4/z -> %z12.d +c410d20e : ldnt1b z14.d, p4/Z, [z16.d, x16] : ldnt1b (%z16.d,%x16)[1byte] %p4/z -> %z14.d +c412d650 : ldnt1b z16.d, p5/Z, [z18.d, x18] : ldnt1b (%z18.d,%x18)[1byte] %p5/z -> %z16.d +c414d671 : ldnt1b z17.d, p5/Z, [z19.d, x20] : ldnt1b (%z19.d,%x20)[1byte] %p5/z -> %z17.d +c416d6b3 : ldnt1b z19.d, p5/Z, [z21.d, x22] : ldnt1b (%z21.d,%x22)[1byte] %p5/z -> %z19.d +c418daf5 : ldnt1b z21.d, p6/Z, [z23.d, x24] : ldnt1b (%z23.d,%x24)[1byte] %p6/z -> %z21.d +c419db37 : ldnt1b z23.d, p6/Z, [z25.d, x25] : ldnt1b (%z25.d,%x25)[1byte] %p6/z -> %z23.d +c41bdf79 : ldnt1b z25.d, p7/Z, [z27.d, x27] : ldnt1b (%z27.d,%x27)[1byte] %p7/z -> %z25.d +c41ddfbb : ldnt1b z27.d, p7/Z, [z29.d, x29] : ldnt1b (%z29.d,%x29)[1byte] %p7/z -> %z27.d +c41edfff : ldnt1b z31.d, p7/Z, [z31.d, x30] : ldnt1b (%z31.d,%x30)[1byte] %p7/z -> %z31.d # LDNT1D { .D }, /Z, [.D{, }] (LDNT1D-Z.P.AR-D.64.unscaled) -c580c000 : ldnt1d z0.d, p0/Z, [z0.d, x0] : ldnt1d (%z0.d,%x0)[32byte] %p0/z -> %z0.d -c585c482 : ldnt1d z2.d, p1/Z, [z4.d, x5] : ldnt1d (%z4.d,%x5)[32byte] %p1/z -> %z2.d -c587c8c4 : ldnt1d z4.d, p2/Z, [z6.d, x7] : ldnt1d (%z6.d,%x7)[32byte] %p2/z -> %z4.d -c589c906 : ldnt1d z6.d, p2/Z, [z8.d, x9] : ldnt1d (%z8.d,%x9)[32byte] %p2/z -> %z6.d -c58bcd48 : ldnt1d z8.d, p3/Z, [z10.d, x11] : ldnt1d (%z10.d,%x11)[32byte] %p3/z -> %z8.d -c58ccd8a : ldnt1d z10.d, p3/Z, [z12.d, x12] : ldnt1d (%z12.d,%x12)[32byte] %p3/z -> %z10.d -c58ed1cc : ldnt1d z12.d, p4/Z, [z14.d, x14] : ldnt1d (%z14.d,%x14)[32byte] %p4/z -> %z12.d -c590d20e : ldnt1d z14.d, p4/Z, [z16.d, x16] : ldnt1d (%z16.d,%x16)[32byte] %p4/z -> %z14.d -c592d650 : ldnt1d z16.d, p5/Z, [z18.d, x18] : ldnt1d (%z18.d,%x18)[32byte] %p5/z -> %z16.d -c594d671 : ldnt1d z17.d, p5/Z, [z19.d, x20] : ldnt1d (%z19.d,%x20)[32byte] %p5/z -> %z17.d -c596d6b3 : ldnt1d z19.d, p5/Z, [z21.d, x22] : ldnt1d (%z21.d,%x22)[32byte] %p5/z -> %z19.d -c598daf5 : ldnt1d z21.d, p6/Z, [z23.d, x24] : ldnt1d (%z23.d,%x24)[32byte] %p6/z -> %z21.d -c599db37 : ldnt1d z23.d, p6/Z, [z25.d, x25] : ldnt1d (%z25.d,%x25)[32byte] %p6/z -> %z23.d -c59bdf79 : ldnt1d z25.d, p7/Z, [z27.d, x27] : ldnt1d (%z27.d,%x27)[32byte] %p7/z -> %z25.d -c59ddfbb : ldnt1d z27.d, p7/Z, [z29.d, x29] : ldnt1d (%z29.d,%x29)[32byte] %p7/z -> %z27.d -c59edfff : ldnt1d z31.d, p7/Z, [z31.d, x30] : ldnt1d (%z31.d,%x30)[32byte] %p7/z -> %z31.d +c580c000 : ldnt1d z0.d, p0/Z, [z0.d, x0] : ldnt1d (%z0.d,%x0)[8byte] %p0/z -> %z0.d +c585c482 : ldnt1d z2.d, p1/Z, [z4.d, x5] : ldnt1d (%z4.d,%x5)[8byte] %p1/z -> %z2.d +c587c8c4 : ldnt1d z4.d, p2/Z, [z6.d, x7] : ldnt1d (%z6.d,%x7)[8byte] %p2/z -> %z4.d +c589c906 : ldnt1d z6.d, p2/Z, [z8.d, x9] : ldnt1d (%z8.d,%x9)[8byte] %p2/z -> %z6.d +c58bcd48 : ldnt1d z8.d, p3/Z, [z10.d, x11] : ldnt1d (%z10.d,%x11)[8byte] %p3/z -> %z8.d +c58ccd8a : ldnt1d z10.d, p3/Z, [z12.d, x12] : ldnt1d (%z12.d,%x12)[8byte] %p3/z -> %z10.d +c58ed1cc : ldnt1d z12.d, p4/Z, [z14.d, x14] : ldnt1d (%z14.d,%x14)[8byte] %p4/z -> %z12.d +c590d20e : ldnt1d z14.d, p4/Z, [z16.d, x16] : ldnt1d (%z16.d,%x16)[8byte] %p4/z -> %z14.d +c592d650 : ldnt1d z16.d, p5/Z, [z18.d, x18] : ldnt1d (%z18.d,%x18)[8byte] %p5/z -> %z16.d +c594d671 : ldnt1d z17.d, p5/Z, [z19.d, x20] : ldnt1d (%z19.d,%x20)[8byte] %p5/z -> %z17.d +c596d6b3 : ldnt1d z19.d, p5/Z, [z21.d, x22] : ldnt1d (%z21.d,%x22)[8byte] %p5/z -> %z19.d +c598daf5 : ldnt1d z21.d, p6/Z, [z23.d, x24] : ldnt1d (%z23.d,%x24)[8byte] %p6/z -> %z21.d +c599db37 : ldnt1d z23.d, p6/Z, [z25.d, x25] : ldnt1d (%z25.d,%x25)[8byte] %p6/z -> %z23.d +c59bdf79 : ldnt1d z25.d, p7/Z, [z27.d, x27] : ldnt1d (%z27.d,%x27)[8byte] %p7/z -> %z25.d +c59ddfbb : ldnt1d z27.d, p7/Z, [z29.d, x29] : ldnt1d (%z29.d,%x29)[8byte] %p7/z -> %z27.d +c59edfff : ldnt1d z31.d, p7/Z, [z31.d, x30] : ldnt1d (%z31.d,%x30)[8byte] %p7/z -> %z31.d # LDNT1H { .S }, /Z, [.S{, }] (LDNT1H-Z.P.AR-S.x32.unscaled) -8480a000 : ldnt1h z0.s, p0/Z, [z0.s, x0] : ldnt1h (%z0.s,%x0)[16byte] %p0/z -> %z0.s -8485a482 : ldnt1h z2.s, p1/Z, [z4.s, x5] : ldnt1h (%z4.s,%x5)[16byte] %p1/z -> %z2.s -8487a8c4 : ldnt1h z4.s, p2/Z, [z6.s, x7] : ldnt1h (%z6.s,%x7)[16byte] %p2/z -> %z4.s -8489a906 : ldnt1h z6.s, p2/Z, [z8.s, x9] : ldnt1h (%z8.s,%x9)[16byte] %p2/z -> %z6.s -848bad48 : ldnt1h z8.s, p3/Z, [z10.s, x11] : ldnt1h (%z10.s,%x11)[16byte] %p3/z -> %z8.s -848cad8a : ldnt1h z10.s, p3/Z, [z12.s, x12] : ldnt1h (%z12.s,%x12)[16byte] %p3/z -> %z10.s -848eb1cc : ldnt1h z12.s, p4/Z, [z14.s, x14] : ldnt1h (%z14.s,%x14)[16byte] %p4/z -> %z12.s -8490b20e : ldnt1h z14.s, p4/Z, [z16.s, x16] : ldnt1h (%z16.s,%x16)[16byte] %p4/z -> %z14.s -8492b650 : ldnt1h z16.s, p5/Z, [z18.s, x18] : ldnt1h (%z18.s,%x18)[16byte] %p5/z -> %z16.s -8494b671 : ldnt1h z17.s, p5/Z, [z19.s, x20] : ldnt1h (%z19.s,%x20)[16byte] %p5/z -> %z17.s -8496b6b3 : ldnt1h z19.s, p5/Z, [z21.s, x22] : ldnt1h (%z21.s,%x22)[16byte] %p5/z -> %z19.s -8498baf5 : ldnt1h z21.s, p6/Z, [z23.s, x24] : ldnt1h (%z23.s,%x24)[16byte] %p6/z -> %z21.s -8499bb37 : ldnt1h z23.s, p6/Z, [z25.s, x25] : ldnt1h (%z25.s,%x25)[16byte] %p6/z -> %z23.s -849bbf79 : ldnt1h z25.s, p7/Z, [z27.s, x27] : ldnt1h (%z27.s,%x27)[16byte] %p7/z -> %z25.s -849dbfbb : ldnt1h z27.s, p7/Z, [z29.s, x29] : ldnt1h (%z29.s,%x29)[16byte] %p7/z -> %z27.s -849ebfff : ldnt1h z31.s, p7/Z, [z31.s, x30] : ldnt1h (%z31.s,%x30)[16byte] %p7/z -> %z31.s +8480a000 : ldnt1h z0.s, p0/Z, [z0.s, x0] : ldnt1h (%z0.s,%x0)[2byte] %p0/z -> %z0.s +8485a482 : ldnt1h z2.s, p1/Z, [z4.s, x5] : ldnt1h (%z4.s,%x5)[2byte] %p1/z -> %z2.s +8487a8c4 : ldnt1h z4.s, p2/Z, [z6.s, x7] : ldnt1h (%z6.s,%x7)[2byte] %p2/z -> %z4.s +8489a906 : ldnt1h z6.s, p2/Z, [z8.s, x9] : ldnt1h (%z8.s,%x9)[2byte] %p2/z -> %z6.s +848bad48 : ldnt1h z8.s, p3/Z, [z10.s, x11] : ldnt1h (%z10.s,%x11)[2byte] %p3/z -> %z8.s +848cad8a : ldnt1h z10.s, p3/Z, [z12.s, x12] : ldnt1h (%z12.s,%x12)[2byte] %p3/z -> %z10.s +848eb1cc : ldnt1h z12.s, p4/Z, [z14.s, x14] : ldnt1h (%z14.s,%x14)[2byte] %p4/z -> %z12.s +8490b20e : ldnt1h z14.s, p4/Z, [z16.s, x16] : ldnt1h (%z16.s,%x16)[2byte] %p4/z -> %z14.s +8492b650 : ldnt1h z16.s, p5/Z, [z18.s, x18] : ldnt1h (%z18.s,%x18)[2byte] %p5/z -> %z16.s +8494b671 : ldnt1h z17.s, p5/Z, [z19.s, x20] : ldnt1h (%z19.s,%x20)[2byte] %p5/z -> %z17.s +8496b6b3 : ldnt1h z19.s, p5/Z, [z21.s, x22] : ldnt1h (%z21.s,%x22)[2byte] %p5/z -> %z19.s +8498baf5 : ldnt1h z21.s, p6/Z, [z23.s, x24] : ldnt1h (%z23.s,%x24)[2byte] %p6/z -> %z21.s +8499bb37 : ldnt1h z23.s, p6/Z, [z25.s, x25] : ldnt1h (%z25.s,%x25)[2byte] %p6/z -> %z23.s +849bbf79 : ldnt1h z25.s, p7/Z, [z27.s, x27] : ldnt1h (%z27.s,%x27)[2byte] %p7/z -> %z25.s +849dbfbb : ldnt1h z27.s, p7/Z, [z29.s, x29] : ldnt1h (%z29.s,%x29)[2byte] %p7/z -> %z27.s +849ebfff : ldnt1h z31.s, p7/Z, [z31.s, x30] : ldnt1h (%z31.s,%x30)[2byte] %p7/z -> %z31.s # LDNT1H { .D }, /Z, [.D{, }] (LDNT1H-Z.P.AR-D.64.unscaled) -c480c000 : ldnt1h z0.d, p0/Z, [z0.d, x0] : ldnt1h (%z0.d,%x0)[8byte] %p0/z -> %z0.d -c485c482 : ldnt1h z2.d, p1/Z, [z4.d, x5] : ldnt1h (%z4.d,%x5)[8byte] %p1/z -> %z2.d -c487c8c4 : ldnt1h z4.d, p2/Z, [z6.d, x7] : ldnt1h (%z6.d,%x7)[8byte] %p2/z -> %z4.d -c489c906 : ldnt1h z6.d, p2/Z, [z8.d, x9] : ldnt1h (%z8.d,%x9)[8byte] %p2/z -> %z6.d -c48bcd48 : ldnt1h z8.d, p3/Z, [z10.d, x11] : ldnt1h (%z10.d,%x11)[8byte] %p3/z -> %z8.d -c48ccd8a : ldnt1h z10.d, p3/Z, [z12.d, x12] : ldnt1h (%z12.d,%x12)[8byte] %p3/z -> %z10.d -c48ed1cc : ldnt1h z12.d, p4/Z, [z14.d, x14] : ldnt1h (%z14.d,%x14)[8byte] %p4/z -> %z12.d -c490d20e : ldnt1h z14.d, p4/Z, [z16.d, x16] : ldnt1h (%z16.d,%x16)[8byte] %p4/z -> %z14.d -c492d650 : ldnt1h z16.d, p5/Z, [z18.d, x18] : ldnt1h (%z18.d,%x18)[8byte] %p5/z -> %z16.d -c494d671 : ldnt1h z17.d, p5/Z, [z19.d, x20] : ldnt1h (%z19.d,%x20)[8byte] %p5/z -> %z17.d -c496d6b3 : ldnt1h z19.d, p5/Z, [z21.d, x22] : ldnt1h (%z21.d,%x22)[8byte] %p5/z -> %z19.d -c498daf5 : ldnt1h z21.d, p6/Z, [z23.d, x24] : ldnt1h (%z23.d,%x24)[8byte] %p6/z -> %z21.d -c499db37 : ldnt1h z23.d, p6/Z, [z25.d, x25] : ldnt1h (%z25.d,%x25)[8byte] %p6/z -> %z23.d -c49bdf79 : ldnt1h z25.d, p7/Z, [z27.d, x27] : ldnt1h (%z27.d,%x27)[8byte] %p7/z -> %z25.d -c49ddfbb : ldnt1h z27.d, p7/Z, [z29.d, x29] : ldnt1h (%z29.d,%x29)[8byte] %p7/z -> %z27.d -c49edfff : ldnt1h z31.d, p7/Z, [z31.d, x30] : ldnt1h (%z31.d,%x30)[8byte] %p7/z -> %z31.d +c480c000 : ldnt1h z0.d, p0/Z, [z0.d, x0] : ldnt1h (%z0.d,%x0)[2byte] %p0/z -> %z0.d +c485c482 : ldnt1h z2.d, p1/Z, [z4.d, x5] : ldnt1h (%z4.d,%x5)[2byte] %p1/z -> %z2.d +c487c8c4 : ldnt1h z4.d, p2/Z, [z6.d, x7] : ldnt1h (%z6.d,%x7)[2byte] %p2/z -> %z4.d +c489c906 : ldnt1h z6.d, p2/Z, [z8.d, x9] : ldnt1h (%z8.d,%x9)[2byte] %p2/z -> %z6.d +c48bcd48 : ldnt1h z8.d, p3/Z, [z10.d, x11] : ldnt1h (%z10.d,%x11)[2byte] %p3/z -> %z8.d +c48ccd8a : ldnt1h z10.d, p3/Z, [z12.d, x12] : ldnt1h (%z12.d,%x12)[2byte] %p3/z -> %z10.d +c48ed1cc : ldnt1h z12.d, p4/Z, [z14.d, x14] : ldnt1h (%z14.d,%x14)[2byte] %p4/z -> %z12.d +c490d20e : ldnt1h z14.d, p4/Z, [z16.d, x16] : ldnt1h (%z16.d,%x16)[2byte] %p4/z -> %z14.d +c492d650 : ldnt1h z16.d, p5/Z, [z18.d, x18] : ldnt1h (%z18.d,%x18)[2byte] %p5/z -> %z16.d +c494d671 : ldnt1h z17.d, p5/Z, [z19.d, x20] : ldnt1h (%z19.d,%x20)[2byte] %p5/z -> %z17.d +c496d6b3 : ldnt1h z19.d, p5/Z, [z21.d, x22] : ldnt1h (%z21.d,%x22)[2byte] %p5/z -> %z19.d +c498daf5 : ldnt1h z21.d, p6/Z, [z23.d, x24] : ldnt1h (%z23.d,%x24)[2byte] %p6/z -> %z21.d +c499db37 : ldnt1h z23.d, p6/Z, [z25.d, x25] : ldnt1h (%z25.d,%x25)[2byte] %p6/z -> %z23.d +c49bdf79 : ldnt1h z25.d, p7/Z, [z27.d, x27] : ldnt1h (%z27.d,%x27)[2byte] %p7/z -> %z25.d +c49ddfbb : ldnt1h z27.d, p7/Z, [z29.d, x29] : ldnt1h (%z29.d,%x29)[2byte] %p7/z -> %z27.d +c49edfff : ldnt1h z31.d, p7/Z, [z31.d, x30] : ldnt1h (%z31.d,%x30)[2byte] %p7/z -> %z31.d # LDNT1SB { .S }, /Z, [.S{, }] (LDNT1SB-Z.P.AR-S.x32.unscaled) -84008000 : ldnt1sb z0.s, p0/Z, [z0.s, x0] : ldnt1sb (%z0.s,%x0)[8byte] %p0/z -> %z0.s -84058482 : ldnt1sb z2.s, p1/Z, [z4.s, x5] : ldnt1sb (%z4.s,%x5)[8byte] %p1/z -> %z2.s -840788c4 : ldnt1sb z4.s, p2/Z, [z6.s, x7] : ldnt1sb (%z6.s,%x7)[8byte] %p2/z -> %z4.s -84098906 : ldnt1sb z6.s, p2/Z, [z8.s, x9] : ldnt1sb (%z8.s,%x9)[8byte] %p2/z -> %z6.s -840b8d48 : ldnt1sb z8.s, p3/Z, [z10.s, x11] : ldnt1sb (%z10.s,%x11)[8byte] %p3/z -> %z8.s -840c8d8a : ldnt1sb z10.s, p3/Z, [z12.s, x12] : ldnt1sb (%z12.s,%x12)[8byte] %p3/z -> %z10.s -840e91cc : ldnt1sb z12.s, p4/Z, [z14.s, x14] : ldnt1sb (%z14.s,%x14)[8byte] %p4/z -> %z12.s -8410920e : ldnt1sb z14.s, p4/Z, [z16.s, x16] : ldnt1sb (%z16.s,%x16)[8byte] %p4/z -> %z14.s -84129650 : ldnt1sb z16.s, p5/Z, [z18.s, x18] : ldnt1sb (%z18.s,%x18)[8byte] %p5/z -> %z16.s -84149671 : ldnt1sb z17.s, p5/Z, [z19.s, x20] : ldnt1sb (%z19.s,%x20)[8byte] %p5/z -> %z17.s -841696b3 : ldnt1sb z19.s, p5/Z, [z21.s, x22] : ldnt1sb (%z21.s,%x22)[8byte] %p5/z -> %z19.s -84189af5 : ldnt1sb z21.s, p6/Z, [z23.s, x24] : ldnt1sb (%z23.s,%x24)[8byte] %p6/z -> %z21.s -84199b37 : ldnt1sb z23.s, p6/Z, [z25.s, x25] : ldnt1sb (%z25.s,%x25)[8byte] %p6/z -> %z23.s -841b9f79 : ldnt1sb z25.s, p7/Z, [z27.s, x27] : ldnt1sb (%z27.s,%x27)[8byte] %p7/z -> %z25.s -841d9fbb : ldnt1sb z27.s, p7/Z, [z29.s, x29] : ldnt1sb (%z29.s,%x29)[8byte] %p7/z -> %z27.s -841e9fff : ldnt1sb z31.s, p7/Z, [z31.s, x30] : ldnt1sb (%z31.s,%x30)[8byte] %p7/z -> %z31.s +84008000 : ldnt1sb z0.s, p0/Z, [z0.s, x0] : ldnt1sb (%z0.s,%x0)[1byte] %p0/z -> %z0.s +84058482 : ldnt1sb z2.s, p1/Z, [z4.s, x5] : ldnt1sb (%z4.s,%x5)[1byte] %p1/z -> %z2.s +840788c4 : ldnt1sb z4.s, p2/Z, [z6.s, x7] : ldnt1sb (%z6.s,%x7)[1byte] %p2/z -> %z4.s +84098906 : ldnt1sb z6.s, p2/Z, [z8.s, x9] : ldnt1sb (%z8.s,%x9)[1byte] %p2/z -> %z6.s +840b8d48 : ldnt1sb z8.s, p3/Z, [z10.s, x11] : ldnt1sb (%z10.s,%x11)[1byte] %p3/z -> %z8.s +840c8d8a : ldnt1sb z10.s, p3/Z, [z12.s, x12] : ldnt1sb (%z12.s,%x12)[1byte] %p3/z -> %z10.s +840e91cc : ldnt1sb z12.s, p4/Z, [z14.s, x14] : ldnt1sb (%z14.s,%x14)[1byte] %p4/z -> %z12.s +8410920e : ldnt1sb z14.s, p4/Z, [z16.s, x16] : ldnt1sb (%z16.s,%x16)[1byte] %p4/z -> %z14.s +84129650 : ldnt1sb z16.s, p5/Z, [z18.s, x18] : ldnt1sb (%z18.s,%x18)[1byte] %p5/z -> %z16.s +84149671 : ldnt1sb z17.s, p5/Z, [z19.s, x20] : ldnt1sb (%z19.s,%x20)[1byte] %p5/z -> %z17.s +841696b3 : ldnt1sb z19.s, p5/Z, [z21.s, x22] : ldnt1sb (%z21.s,%x22)[1byte] %p5/z -> %z19.s +84189af5 : ldnt1sb z21.s, p6/Z, [z23.s, x24] : ldnt1sb (%z23.s,%x24)[1byte] %p6/z -> %z21.s +84199b37 : ldnt1sb z23.s, p6/Z, [z25.s, x25] : ldnt1sb (%z25.s,%x25)[1byte] %p6/z -> %z23.s +841b9f79 : ldnt1sb z25.s, p7/Z, [z27.s, x27] : ldnt1sb (%z27.s,%x27)[1byte] %p7/z -> %z25.s +841d9fbb : ldnt1sb z27.s, p7/Z, [z29.s, x29] : ldnt1sb (%z29.s,%x29)[1byte] %p7/z -> %z27.s +841e9fff : ldnt1sb z31.s, p7/Z, [z31.s, x30] : ldnt1sb (%z31.s,%x30)[1byte] %p7/z -> %z31.s # LDNT1SB { .D }, /Z, [.D{, }] (LDNT1SB-Z.P.AR-D.64.unscaled) -c4008000 : ldnt1sb z0.d, p0/Z, [z0.d, x0] : ldnt1sb (%z0.d,%x0)[4byte] %p0/z -> %z0.d -c4058482 : ldnt1sb z2.d, p1/Z, [z4.d, x5] : ldnt1sb (%z4.d,%x5)[4byte] %p1/z -> %z2.d -c40788c4 : ldnt1sb z4.d, p2/Z, [z6.d, x7] : ldnt1sb (%z6.d,%x7)[4byte] %p2/z -> %z4.d -c4098906 : ldnt1sb z6.d, p2/Z, [z8.d, x9] : ldnt1sb (%z8.d,%x9)[4byte] %p2/z -> %z6.d -c40b8d48 : ldnt1sb z8.d, p3/Z, [z10.d, x11] : ldnt1sb (%z10.d,%x11)[4byte] %p3/z -> %z8.d -c40c8d8a : ldnt1sb z10.d, p3/Z, [z12.d, x12] : ldnt1sb (%z12.d,%x12)[4byte] %p3/z -> %z10.d -c40e91cc : ldnt1sb z12.d, p4/Z, [z14.d, x14] : ldnt1sb (%z14.d,%x14)[4byte] %p4/z -> %z12.d -c410920e : ldnt1sb z14.d, p4/Z, [z16.d, x16] : ldnt1sb (%z16.d,%x16)[4byte] %p4/z -> %z14.d -c4129650 : ldnt1sb z16.d, p5/Z, [z18.d, x18] : ldnt1sb (%z18.d,%x18)[4byte] %p5/z -> %z16.d -c4149671 : ldnt1sb z17.d, p5/Z, [z19.d, x20] : ldnt1sb (%z19.d,%x20)[4byte] %p5/z -> %z17.d -c41696b3 : ldnt1sb z19.d, p5/Z, [z21.d, x22] : ldnt1sb (%z21.d,%x22)[4byte] %p5/z -> %z19.d -c4189af5 : ldnt1sb z21.d, p6/Z, [z23.d, x24] : ldnt1sb (%z23.d,%x24)[4byte] %p6/z -> %z21.d -c4199b37 : ldnt1sb z23.d, p6/Z, [z25.d, x25] : ldnt1sb (%z25.d,%x25)[4byte] %p6/z -> %z23.d -c41b9f79 : ldnt1sb z25.d, p7/Z, [z27.d, x27] : ldnt1sb (%z27.d,%x27)[4byte] %p7/z -> %z25.d -c41d9fbb : ldnt1sb z27.d, p7/Z, [z29.d, x29] : ldnt1sb (%z29.d,%x29)[4byte] %p7/z -> %z27.d -c41e9fff : ldnt1sb z31.d, p7/Z, [z31.d, x30] : ldnt1sb (%z31.d,%x30)[4byte] %p7/z -> %z31.d +c4008000 : ldnt1sb z0.d, p0/Z, [z0.d, x0] : ldnt1sb (%z0.d,%x0)[1byte] %p0/z -> %z0.d +c4058482 : ldnt1sb z2.d, p1/Z, [z4.d, x5] : ldnt1sb (%z4.d,%x5)[1byte] %p1/z -> %z2.d +c40788c4 : ldnt1sb z4.d, p2/Z, [z6.d, x7] : ldnt1sb (%z6.d,%x7)[1byte] %p2/z -> %z4.d +c4098906 : ldnt1sb z6.d, p2/Z, [z8.d, x9] : ldnt1sb (%z8.d,%x9)[1byte] %p2/z -> %z6.d +c40b8d48 : ldnt1sb z8.d, p3/Z, [z10.d, x11] : ldnt1sb (%z10.d,%x11)[1byte] %p3/z -> %z8.d +c40c8d8a : ldnt1sb z10.d, p3/Z, [z12.d, x12] : ldnt1sb (%z12.d,%x12)[1byte] %p3/z -> %z10.d +c40e91cc : ldnt1sb z12.d, p4/Z, [z14.d, x14] : ldnt1sb (%z14.d,%x14)[1byte] %p4/z -> %z12.d +c410920e : ldnt1sb z14.d, p4/Z, [z16.d, x16] : ldnt1sb (%z16.d,%x16)[1byte] %p4/z -> %z14.d +c4129650 : ldnt1sb z16.d, p5/Z, [z18.d, x18] : ldnt1sb (%z18.d,%x18)[1byte] %p5/z -> %z16.d +c4149671 : ldnt1sb z17.d, p5/Z, [z19.d, x20] : ldnt1sb (%z19.d,%x20)[1byte] %p5/z -> %z17.d +c41696b3 : ldnt1sb z19.d, p5/Z, [z21.d, x22] : ldnt1sb (%z21.d,%x22)[1byte] %p5/z -> %z19.d +c4189af5 : ldnt1sb z21.d, p6/Z, [z23.d, x24] : ldnt1sb (%z23.d,%x24)[1byte] %p6/z -> %z21.d +c4199b37 : ldnt1sb z23.d, p6/Z, [z25.d, x25] : ldnt1sb (%z25.d,%x25)[1byte] %p6/z -> %z23.d +c41b9f79 : ldnt1sb z25.d, p7/Z, [z27.d, x27] : ldnt1sb (%z27.d,%x27)[1byte] %p7/z -> %z25.d +c41d9fbb : ldnt1sb z27.d, p7/Z, [z29.d, x29] : ldnt1sb (%z29.d,%x29)[1byte] %p7/z -> %z27.d +c41e9fff : ldnt1sb z31.d, p7/Z, [z31.d, x30] : ldnt1sb (%z31.d,%x30)[1byte] %p7/z -> %z31.d # LDNT1SH { .S }, /Z, [.S{, }] (LDNT1SH-Z.P.AR-S.x32.unscaled) -84808000 : ldnt1sh z0.s, p0/Z, [z0.s, x0] : ldnt1sh (%z0.s,%x0)[16byte] %p0/z -> %z0.s -84858482 : ldnt1sh z2.s, p1/Z, [z4.s, x5] : ldnt1sh (%z4.s,%x5)[16byte] %p1/z -> %z2.s -848788c4 : ldnt1sh z4.s, p2/Z, [z6.s, x7] : ldnt1sh (%z6.s,%x7)[16byte] %p2/z -> %z4.s -84898906 : ldnt1sh z6.s, p2/Z, [z8.s, x9] : ldnt1sh (%z8.s,%x9)[16byte] %p2/z -> %z6.s -848b8d48 : ldnt1sh z8.s, p3/Z, [z10.s, x11] : ldnt1sh (%z10.s,%x11)[16byte] %p3/z -> %z8.s -848c8d8a : ldnt1sh z10.s, p3/Z, [z12.s, x12] : ldnt1sh (%z12.s,%x12)[16byte] %p3/z -> %z10.s -848e91cc : ldnt1sh z12.s, p4/Z, [z14.s, x14] : ldnt1sh (%z14.s,%x14)[16byte] %p4/z -> %z12.s -8490920e : ldnt1sh z14.s, p4/Z, [z16.s, x16] : ldnt1sh (%z16.s,%x16)[16byte] %p4/z -> %z14.s -84929650 : ldnt1sh z16.s, p5/Z, [z18.s, x18] : ldnt1sh (%z18.s,%x18)[16byte] %p5/z -> %z16.s -84949671 : ldnt1sh z17.s, p5/Z, [z19.s, x20] : ldnt1sh (%z19.s,%x20)[16byte] %p5/z -> %z17.s -849696b3 : ldnt1sh z19.s, p5/Z, [z21.s, x22] : ldnt1sh (%z21.s,%x22)[16byte] %p5/z -> %z19.s -84989af5 : ldnt1sh z21.s, p6/Z, [z23.s, x24] : ldnt1sh (%z23.s,%x24)[16byte] %p6/z -> %z21.s -84999b37 : ldnt1sh z23.s, p6/Z, [z25.s, x25] : ldnt1sh (%z25.s,%x25)[16byte] %p6/z -> %z23.s -849b9f79 : ldnt1sh z25.s, p7/Z, [z27.s, x27] : ldnt1sh (%z27.s,%x27)[16byte] %p7/z -> %z25.s -849d9fbb : ldnt1sh z27.s, p7/Z, [z29.s, x29] : ldnt1sh (%z29.s,%x29)[16byte] %p7/z -> %z27.s -849e9fff : ldnt1sh z31.s, p7/Z, [z31.s, x30] : ldnt1sh (%z31.s,%x30)[16byte] %p7/z -> %z31.s +84808000 : ldnt1sh z0.s, p0/Z, [z0.s, x0] : ldnt1sh (%z0.s,%x0)[2byte] %p0/z -> %z0.s +84858482 : ldnt1sh z2.s, p1/Z, [z4.s, x5] : ldnt1sh (%z4.s,%x5)[2byte] %p1/z -> %z2.s +848788c4 : ldnt1sh z4.s, p2/Z, [z6.s, x7] : ldnt1sh (%z6.s,%x7)[2byte] %p2/z -> %z4.s +84898906 : ldnt1sh z6.s, p2/Z, [z8.s, x9] : ldnt1sh (%z8.s,%x9)[2byte] %p2/z -> %z6.s +848b8d48 : ldnt1sh z8.s, p3/Z, [z10.s, x11] : ldnt1sh (%z10.s,%x11)[2byte] %p3/z -> %z8.s +848c8d8a : ldnt1sh z10.s, p3/Z, [z12.s, x12] : ldnt1sh (%z12.s,%x12)[2byte] %p3/z -> %z10.s +848e91cc : ldnt1sh z12.s, p4/Z, [z14.s, x14] : ldnt1sh (%z14.s,%x14)[2byte] %p4/z -> %z12.s +8490920e : ldnt1sh z14.s, p4/Z, [z16.s, x16] : ldnt1sh (%z16.s,%x16)[2byte] %p4/z -> %z14.s +84929650 : ldnt1sh z16.s, p5/Z, [z18.s, x18] : ldnt1sh (%z18.s,%x18)[2byte] %p5/z -> %z16.s +84949671 : ldnt1sh z17.s, p5/Z, [z19.s, x20] : ldnt1sh (%z19.s,%x20)[2byte] %p5/z -> %z17.s +849696b3 : ldnt1sh z19.s, p5/Z, [z21.s, x22] : ldnt1sh (%z21.s,%x22)[2byte] %p5/z -> %z19.s +84989af5 : ldnt1sh z21.s, p6/Z, [z23.s, x24] : ldnt1sh (%z23.s,%x24)[2byte] %p6/z -> %z21.s +84999b37 : ldnt1sh z23.s, p6/Z, [z25.s, x25] : ldnt1sh (%z25.s,%x25)[2byte] %p6/z -> %z23.s +849b9f79 : ldnt1sh z25.s, p7/Z, [z27.s, x27] : ldnt1sh (%z27.s,%x27)[2byte] %p7/z -> %z25.s +849d9fbb : ldnt1sh z27.s, p7/Z, [z29.s, x29] : ldnt1sh (%z29.s,%x29)[2byte] %p7/z -> %z27.s +849e9fff : ldnt1sh z31.s, p7/Z, [z31.s, x30] : ldnt1sh (%z31.s,%x30)[2byte] %p7/z -> %z31.s # LDNT1SH { .D }, /Z, [.D{, }] (LDNT1SH-Z.P.AR-D.64.unscaled) -c4808000 : ldnt1sh z0.d, p0/Z, [z0.d, x0] : ldnt1sh (%z0.d,%x0)[8byte] %p0/z -> %z0.d -c4858482 : ldnt1sh z2.d, p1/Z, [z4.d, x5] : ldnt1sh (%z4.d,%x5)[8byte] %p1/z -> %z2.d -c48788c4 : ldnt1sh z4.d, p2/Z, [z6.d, x7] : ldnt1sh (%z6.d,%x7)[8byte] %p2/z -> %z4.d -c4898906 : ldnt1sh z6.d, p2/Z, [z8.d, x9] : ldnt1sh (%z8.d,%x9)[8byte] %p2/z -> %z6.d -c48b8d48 : ldnt1sh z8.d, p3/Z, [z10.d, x11] : ldnt1sh (%z10.d,%x11)[8byte] %p3/z -> %z8.d -c48c8d8a : ldnt1sh z10.d, p3/Z, [z12.d, x12] : ldnt1sh (%z12.d,%x12)[8byte] %p3/z -> %z10.d -c48e91cc : ldnt1sh z12.d, p4/Z, [z14.d, x14] : ldnt1sh (%z14.d,%x14)[8byte] %p4/z -> %z12.d -c490920e : ldnt1sh z14.d, p4/Z, [z16.d, x16] : ldnt1sh (%z16.d,%x16)[8byte] %p4/z -> %z14.d -c4929650 : ldnt1sh z16.d, p5/Z, [z18.d, x18] : ldnt1sh (%z18.d,%x18)[8byte] %p5/z -> %z16.d -c4949671 : ldnt1sh z17.d, p5/Z, [z19.d, x20] : ldnt1sh (%z19.d,%x20)[8byte] %p5/z -> %z17.d -c49696b3 : ldnt1sh z19.d, p5/Z, [z21.d, x22] : ldnt1sh (%z21.d,%x22)[8byte] %p5/z -> %z19.d -c4989af5 : ldnt1sh z21.d, p6/Z, [z23.d, x24] : ldnt1sh (%z23.d,%x24)[8byte] %p6/z -> %z21.d -c4999b37 : ldnt1sh z23.d, p6/Z, [z25.d, x25] : ldnt1sh (%z25.d,%x25)[8byte] %p6/z -> %z23.d -c49b9f79 : ldnt1sh z25.d, p7/Z, [z27.d, x27] : ldnt1sh (%z27.d,%x27)[8byte] %p7/z -> %z25.d -c49d9fbb : ldnt1sh z27.d, p7/Z, [z29.d, x29] : ldnt1sh (%z29.d,%x29)[8byte] %p7/z -> %z27.d -c49e9fff : ldnt1sh z31.d, p7/Z, [z31.d, x30] : ldnt1sh (%z31.d,%x30)[8byte] %p7/z -> %z31.d +c4808000 : ldnt1sh z0.d, p0/Z, [z0.d, x0] : ldnt1sh (%z0.d,%x0)[2byte] %p0/z -> %z0.d +c4858482 : ldnt1sh z2.d, p1/Z, [z4.d, x5] : ldnt1sh (%z4.d,%x5)[2byte] %p1/z -> %z2.d +c48788c4 : ldnt1sh z4.d, p2/Z, [z6.d, x7] : ldnt1sh (%z6.d,%x7)[2byte] %p2/z -> %z4.d +c4898906 : ldnt1sh z6.d, p2/Z, [z8.d, x9] : ldnt1sh (%z8.d,%x9)[2byte] %p2/z -> %z6.d +c48b8d48 : ldnt1sh z8.d, p3/Z, [z10.d, x11] : ldnt1sh (%z10.d,%x11)[2byte] %p3/z -> %z8.d +c48c8d8a : ldnt1sh z10.d, p3/Z, [z12.d, x12] : ldnt1sh (%z12.d,%x12)[2byte] %p3/z -> %z10.d +c48e91cc : ldnt1sh z12.d, p4/Z, [z14.d, x14] : ldnt1sh (%z14.d,%x14)[2byte] %p4/z -> %z12.d +c490920e : ldnt1sh z14.d, p4/Z, [z16.d, x16] : ldnt1sh (%z16.d,%x16)[2byte] %p4/z -> %z14.d +c4929650 : ldnt1sh z16.d, p5/Z, [z18.d, x18] : ldnt1sh (%z18.d,%x18)[2byte] %p5/z -> %z16.d +c4949671 : ldnt1sh z17.d, p5/Z, [z19.d, x20] : ldnt1sh (%z19.d,%x20)[2byte] %p5/z -> %z17.d +c49696b3 : ldnt1sh z19.d, p5/Z, [z21.d, x22] : ldnt1sh (%z21.d,%x22)[2byte] %p5/z -> %z19.d +c4989af5 : ldnt1sh z21.d, p6/Z, [z23.d, x24] : ldnt1sh (%z23.d,%x24)[2byte] %p6/z -> %z21.d +c4999b37 : ldnt1sh z23.d, p6/Z, [z25.d, x25] : ldnt1sh (%z25.d,%x25)[2byte] %p6/z -> %z23.d +c49b9f79 : ldnt1sh z25.d, p7/Z, [z27.d, x27] : ldnt1sh (%z27.d,%x27)[2byte] %p7/z -> %z25.d +c49d9fbb : ldnt1sh z27.d, p7/Z, [z29.d, x29] : ldnt1sh (%z29.d,%x29)[2byte] %p7/z -> %z27.d +c49e9fff : ldnt1sh z31.d, p7/Z, [z31.d, x30] : ldnt1sh (%z31.d,%x30)[2byte] %p7/z -> %z31.d # LDNT1SW { .D }, /Z, [.D{, }] (LDNT1SW-Z.P.AR-D.64.unscaled) -c5008000 : ldnt1sw z0.d, p0/Z, [z0.d, x0] : ldnt1sw (%z0.d,%x0)[16byte] %p0/z -> %z0.d -c5058482 : ldnt1sw z2.d, p1/Z, [z4.d, x5] : ldnt1sw (%z4.d,%x5)[16byte] %p1/z -> %z2.d -c50788c4 : ldnt1sw z4.d, p2/Z, [z6.d, x7] : ldnt1sw (%z6.d,%x7)[16byte] %p2/z -> %z4.d -c5098906 : ldnt1sw z6.d, p2/Z, [z8.d, x9] : ldnt1sw (%z8.d,%x9)[16byte] %p2/z -> %z6.d -c50b8d48 : ldnt1sw z8.d, p3/Z, [z10.d, x11] : ldnt1sw (%z10.d,%x11)[16byte] %p3/z -> %z8.d -c50c8d8a : ldnt1sw z10.d, p3/Z, [z12.d, x12] : ldnt1sw (%z12.d,%x12)[16byte] %p3/z -> %z10.d -c50e91cc : ldnt1sw z12.d, p4/Z, [z14.d, x14] : ldnt1sw (%z14.d,%x14)[16byte] %p4/z -> %z12.d -c510920e : ldnt1sw z14.d, p4/Z, [z16.d, x16] : ldnt1sw (%z16.d,%x16)[16byte] %p4/z -> %z14.d -c5129650 : ldnt1sw z16.d, p5/Z, [z18.d, x18] : ldnt1sw (%z18.d,%x18)[16byte] %p5/z -> %z16.d -c5149671 : ldnt1sw z17.d, p5/Z, [z19.d, x20] : ldnt1sw (%z19.d,%x20)[16byte] %p5/z -> %z17.d -c51696b3 : ldnt1sw z19.d, p5/Z, [z21.d, x22] : ldnt1sw (%z21.d,%x22)[16byte] %p5/z -> %z19.d -c5189af5 : ldnt1sw z21.d, p6/Z, [z23.d, x24] : ldnt1sw (%z23.d,%x24)[16byte] %p6/z -> %z21.d -c5199b37 : ldnt1sw z23.d, p6/Z, [z25.d, x25] : ldnt1sw (%z25.d,%x25)[16byte] %p6/z -> %z23.d -c51b9f79 : ldnt1sw z25.d, p7/Z, [z27.d, x27] : ldnt1sw (%z27.d,%x27)[16byte] %p7/z -> %z25.d -c51d9fbb : ldnt1sw z27.d, p7/Z, [z29.d, x29] : ldnt1sw (%z29.d,%x29)[16byte] %p7/z -> %z27.d -c51e9fff : ldnt1sw z31.d, p7/Z, [z31.d, x30] : ldnt1sw (%z31.d,%x30)[16byte] %p7/z -> %z31.d +c5008000 : ldnt1sw z0.d, p0/Z, [z0.d, x0] : ldnt1sw (%z0.d,%x0)[4byte] %p0/z -> %z0.d +c5058482 : ldnt1sw z2.d, p1/Z, [z4.d, x5] : ldnt1sw (%z4.d,%x5)[4byte] %p1/z -> %z2.d +c50788c4 : ldnt1sw z4.d, p2/Z, [z6.d, x7] : ldnt1sw (%z6.d,%x7)[4byte] %p2/z -> %z4.d +c5098906 : ldnt1sw z6.d, p2/Z, [z8.d, x9] : ldnt1sw (%z8.d,%x9)[4byte] %p2/z -> %z6.d +c50b8d48 : ldnt1sw z8.d, p3/Z, [z10.d, x11] : ldnt1sw (%z10.d,%x11)[4byte] %p3/z -> %z8.d +c50c8d8a : ldnt1sw z10.d, p3/Z, [z12.d, x12] : ldnt1sw (%z12.d,%x12)[4byte] %p3/z -> %z10.d +c50e91cc : ldnt1sw z12.d, p4/Z, [z14.d, x14] : ldnt1sw (%z14.d,%x14)[4byte] %p4/z -> %z12.d +c510920e : ldnt1sw z14.d, p4/Z, [z16.d, x16] : ldnt1sw (%z16.d,%x16)[4byte] %p4/z -> %z14.d +c5129650 : ldnt1sw z16.d, p5/Z, [z18.d, x18] : ldnt1sw (%z18.d,%x18)[4byte] %p5/z -> %z16.d +c5149671 : ldnt1sw z17.d, p5/Z, [z19.d, x20] : ldnt1sw (%z19.d,%x20)[4byte] %p5/z -> %z17.d +c51696b3 : ldnt1sw z19.d, p5/Z, [z21.d, x22] : ldnt1sw (%z21.d,%x22)[4byte] %p5/z -> %z19.d +c5189af5 : ldnt1sw z21.d, p6/Z, [z23.d, x24] : ldnt1sw (%z23.d,%x24)[4byte] %p6/z -> %z21.d +c5199b37 : ldnt1sw z23.d, p6/Z, [z25.d, x25] : ldnt1sw (%z25.d,%x25)[4byte] %p6/z -> %z23.d +c51b9f79 : ldnt1sw z25.d, p7/Z, [z27.d, x27] : ldnt1sw (%z27.d,%x27)[4byte] %p7/z -> %z25.d +c51d9fbb : ldnt1sw z27.d, p7/Z, [z29.d, x29] : ldnt1sw (%z29.d,%x29)[4byte] %p7/z -> %z27.d +c51e9fff : ldnt1sw z31.d, p7/Z, [z31.d, x30] : ldnt1sw (%z31.d,%x30)[4byte] %p7/z -> %z31.d # LDNT1W { .S }, /Z, [.S{, }] (LDNT1W-Z.P.AR-S.x32.unscaled) -8500a000 : ldnt1w z0.s, p0/Z, [z0.s, x0] : ldnt1w (%z0.s,%x0)[32byte] %p0/z -> %z0.s -8505a482 : ldnt1w z2.s, p1/Z, [z4.s, x5] : ldnt1w (%z4.s,%x5)[32byte] %p1/z -> %z2.s -8507a8c4 : ldnt1w z4.s, p2/Z, [z6.s, x7] : ldnt1w (%z6.s,%x7)[32byte] %p2/z -> %z4.s -8509a906 : ldnt1w z6.s, p2/Z, [z8.s, x9] : ldnt1w (%z8.s,%x9)[32byte] %p2/z -> %z6.s -850bad48 : ldnt1w z8.s, p3/Z, [z10.s, x11] : ldnt1w (%z10.s,%x11)[32byte] %p3/z -> %z8.s -850cad8a : ldnt1w z10.s, p3/Z, [z12.s, x12] : ldnt1w (%z12.s,%x12)[32byte] %p3/z -> %z10.s -850eb1cc : ldnt1w z12.s, p4/Z, [z14.s, x14] : ldnt1w (%z14.s,%x14)[32byte] %p4/z -> %z12.s -8510b20e : ldnt1w z14.s, p4/Z, [z16.s, x16] : ldnt1w (%z16.s,%x16)[32byte] %p4/z -> %z14.s -8512b650 : ldnt1w z16.s, p5/Z, [z18.s, x18] : ldnt1w (%z18.s,%x18)[32byte] %p5/z -> %z16.s -8514b671 : ldnt1w z17.s, p5/Z, [z19.s, x20] : ldnt1w (%z19.s,%x20)[32byte] %p5/z -> %z17.s -8516b6b3 : ldnt1w z19.s, p5/Z, [z21.s, x22] : ldnt1w (%z21.s,%x22)[32byte] %p5/z -> %z19.s -8518baf5 : ldnt1w z21.s, p6/Z, [z23.s, x24] : ldnt1w (%z23.s,%x24)[32byte] %p6/z -> %z21.s -8519bb37 : ldnt1w z23.s, p6/Z, [z25.s, x25] : ldnt1w (%z25.s,%x25)[32byte] %p6/z -> %z23.s -851bbf79 : ldnt1w z25.s, p7/Z, [z27.s, x27] : ldnt1w (%z27.s,%x27)[32byte] %p7/z -> %z25.s -851dbfbb : ldnt1w z27.s, p7/Z, [z29.s, x29] : ldnt1w (%z29.s,%x29)[32byte] %p7/z -> %z27.s -851ebfff : ldnt1w z31.s, p7/Z, [z31.s, x30] : ldnt1w (%z31.s,%x30)[32byte] %p7/z -> %z31.s +8500a000 : ldnt1w z0.s, p0/Z, [z0.s, x0] : ldnt1w (%z0.s,%x0)[4byte] %p0/z -> %z0.s +8505a482 : ldnt1w z2.s, p1/Z, [z4.s, x5] : ldnt1w (%z4.s,%x5)[4byte] %p1/z -> %z2.s +8507a8c4 : ldnt1w z4.s, p2/Z, [z6.s, x7] : ldnt1w (%z6.s,%x7)[4byte] %p2/z -> %z4.s +8509a906 : ldnt1w z6.s, p2/Z, [z8.s, x9] : ldnt1w (%z8.s,%x9)[4byte] %p2/z -> %z6.s +850bad48 : ldnt1w z8.s, p3/Z, [z10.s, x11] : ldnt1w (%z10.s,%x11)[4byte] %p3/z -> %z8.s +850cad8a : ldnt1w z10.s, p3/Z, [z12.s, x12] : ldnt1w (%z12.s,%x12)[4byte] %p3/z -> %z10.s +850eb1cc : ldnt1w z12.s, p4/Z, [z14.s, x14] : ldnt1w (%z14.s,%x14)[4byte] %p4/z -> %z12.s +8510b20e : ldnt1w z14.s, p4/Z, [z16.s, x16] : ldnt1w (%z16.s,%x16)[4byte] %p4/z -> %z14.s +8512b650 : ldnt1w z16.s, p5/Z, [z18.s, x18] : ldnt1w (%z18.s,%x18)[4byte] %p5/z -> %z16.s +8514b671 : ldnt1w z17.s, p5/Z, [z19.s, x20] : ldnt1w (%z19.s,%x20)[4byte] %p5/z -> %z17.s +8516b6b3 : ldnt1w z19.s, p5/Z, [z21.s, x22] : ldnt1w (%z21.s,%x22)[4byte] %p5/z -> %z19.s +8518baf5 : ldnt1w z21.s, p6/Z, [z23.s, x24] : ldnt1w (%z23.s,%x24)[4byte] %p6/z -> %z21.s +8519bb37 : ldnt1w z23.s, p6/Z, [z25.s, x25] : ldnt1w (%z25.s,%x25)[4byte] %p6/z -> %z23.s +851bbf79 : ldnt1w z25.s, p7/Z, [z27.s, x27] : ldnt1w (%z27.s,%x27)[4byte] %p7/z -> %z25.s +851dbfbb : ldnt1w z27.s, p7/Z, [z29.s, x29] : ldnt1w (%z29.s,%x29)[4byte] %p7/z -> %z27.s +851ebfff : ldnt1w z31.s, p7/Z, [z31.s, x30] : ldnt1w (%z31.s,%x30)[4byte] %p7/z -> %z31.s # LDNT1W { .D }, /Z, [.D{, }] (LDNT1W-Z.P.AR-D.64.unscaled) -c500c000 : ldnt1w z0.d, p0/Z, [z0.d, x0] : ldnt1w (%z0.d,%x0)[16byte] %p0/z -> %z0.d -c505c482 : ldnt1w z2.d, p1/Z, [z4.d, x5] : ldnt1w (%z4.d,%x5)[16byte] %p1/z -> %z2.d -c507c8c4 : ldnt1w z4.d, p2/Z, [z6.d, x7] : ldnt1w (%z6.d,%x7)[16byte] %p2/z -> %z4.d -c509c906 : ldnt1w z6.d, p2/Z, [z8.d, x9] : ldnt1w (%z8.d,%x9)[16byte] %p2/z -> %z6.d -c50bcd48 : ldnt1w z8.d, p3/Z, [z10.d, x11] : ldnt1w (%z10.d,%x11)[16byte] %p3/z -> %z8.d -c50ccd8a : ldnt1w z10.d, p3/Z, [z12.d, x12] : ldnt1w (%z12.d,%x12)[16byte] %p3/z -> %z10.d -c50ed1cc : ldnt1w z12.d, p4/Z, [z14.d, x14] : ldnt1w (%z14.d,%x14)[16byte] %p4/z -> %z12.d -c510d20e : ldnt1w z14.d, p4/Z, [z16.d, x16] : ldnt1w (%z16.d,%x16)[16byte] %p4/z -> %z14.d -c512d650 : ldnt1w z16.d, p5/Z, [z18.d, x18] : ldnt1w (%z18.d,%x18)[16byte] %p5/z -> %z16.d -c514d671 : ldnt1w z17.d, p5/Z, [z19.d, x20] : ldnt1w (%z19.d,%x20)[16byte] %p5/z -> %z17.d -c516d6b3 : ldnt1w z19.d, p5/Z, [z21.d, x22] : ldnt1w (%z21.d,%x22)[16byte] %p5/z -> %z19.d -c518daf5 : ldnt1w z21.d, p6/Z, [z23.d, x24] : ldnt1w (%z23.d,%x24)[16byte] %p6/z -> %z21.d -c519db37 : ldnt1w z23.d, p6/Z, [z25.d, x25] : ldnt1w (%z25.d,%x25)[16byte] %p6/z -> %z23.d -c51bdf79 : ldnt1w z25.d, p7/Z, [z27.d, x27] : ldnt1w (%z27.d,%x27)[16byte] %p7/z -> %z25.d -c51ddfbb : ldnt1w z27.d, p7/Z, [z29.d, x29] : ldnt1w (%z29.d,%x29)[16byte] %p7/z -> %z27.d -c51edfff : ldnt1w z31.d, p7/Z, [z31.d, x30] : ldnt1w (%z31.d,%x30)[16byte] %p7/z -> %z31.d +c500c000 : ldnt1w z0.d, p0/Z, [z0.d, x0] : ldnt1w (%z0.d,%x0)[4byte] %p0/z -> %z0.d +c505c482 : ldnt1w z2.d, p1/Z, [z4.d, x5] : ldnt1w (%z4.d,%x5)[4byte] %p1/z -> %z2.d +c507c8c4 : ldnt1w z4.d, p2/Z, [z6.d, x7] : ldnt1w (%z6.d,%x7)[4byte] %p2/z -> %z4.d +c509c906 : ldnt1w z6.d, p2/Z, [z8.d, x9] : ldnt1w (%z8.d,%x9)[4byte] %p2/z -> %z6.d +c50bcd48 : ldnt1w z8.d, p3/Z, [z10.d, x11] : ldnt1w (%z10.d,%x11)[4byte] %p3/z -> %z8.d +c50ccd8a : ldnt1w z10.d, p3/Z, [z12.d, x12] : ldnt1w (%z12.d,%x12)[4byte] %p3/z -> %z10.d +c50ed1cc : ldnt1w z12.d, p4/Z, [z14.d, x14] : ldnt1w (%z14.d,%x14)[4byte] %p4/z -> %z12.d +c510d20e : ldnt1w z14.d, p4/Z, [z16.d, x16] : ldnt1w (%z16.d,%x16)[4byte] %p4/z -> %z14.d +c512d650 : ldnt1w z16.d, p5/Z, [z18.d, x18] : ldnt1w (%z18.d,%x18)[4byte] %p5/z -> %z16.d +c514d671 : ldnt1w z17.d, p5/Z, [z19.d, x20] : ldnt1w (%z19.d,%x20)[4byte] %p5/z -> %z17.d +c516d6b3 : ldnt1w z19.d, p5/Z, [z21.d, x22] : ldnt1w (%z21.d,%x22)[4byte] %p5/z -> %z19.d +c518daf5 : ldnt1w z21.d, p6/Z, [z23.d, x24] : ldnt1w (%z23.d,%x24)[4byte] %p6/z -> %z21.d +c519db37 : ldnt1w z23.d, p6/Z, [z25.d, x25] : ldnt1w (%z25.d,%x25)[4byte] %p6/z -> %z23.d +c51bdf79 : ldnt1w z25.d, p7/Z, [z27.d, x27] : ldnt1w (%z27.d,%x27)[4byte] %p7/z -> %z25.d +c51ddfbb : ldnt1w z27.d, p7/Z, [z29.d, x29] : ldnt1w (%z29.d,%x29)[4byte] %p7/z -> %z27.d +c51edfff : ldnt1w z31.d, p7/Z, [z31.d, x30] : ldnt1w (%z31.d,%x30)[4byte] %p7/z -> %z31.d # MATCH ., /Z, ., . (MATCH-P.P.ZZ-_) 45208000 : match p0.b, p0/Z, z0.b, z0.b : match %p0/z %z0.b %z0.b -> %p0.b @@ -4108,6 +4108,72 @@ c51edfff : ldnt1w z31.d, p7/Z, [z31.d, x30] : ldnt1w (%z31.d,%x30)[16by 45dd779b : smullt z27.d, z28.s, z29.s : smullt %z28.s %z29.s -> %z27.d 45df77ff : smullt z31.d, z31.s, z31.s : smullt %z31.s %z31.s -> %z31.d +# SPLICE ., , { ., . } (SPLICE-Z.P.ZZ-Con) +052d8000 : splice z0.b, p0, {z0.b, z1.b} : splice %p0 %z0.b %z1.b -> %z0.b +052d8482 : splice z2.b, p1, {z4.b, z5.b} : splice %p1 %z4.b %z5.b -> %z2.b +052d88c4 : splice z4.b, p2, {z6.b, z7.b} : splice %p2 %z6.b %z7.b -> %z4.b +052d8906 : splice z6.b, p2, {z8.b, z9.b} : splice %p2 %z8.b %z9.b -> %z6.b +052d8d48 : splice z8.b, p3, {z10.b, z11.b} : splice %p3 %z10.b %z11.b -> %z8.b +052d8d8a : splice z10.b, p3, {z12.b, z13.b} : splice %p3 %z12.b %z13.b -> %z10.b +052d91cc : splice z12.b, p4, {z14.b, z15.b} : splice %p4 %z14.b %z15.b -> %z12.b +052d920e : splice z14.b, p4, {z16.b, z17.b} : splice %p4 %z16.b %z17.b -> %z14.b +052d9650 : splice z16.b, p5, {z18.b, z19.b} : splice %p5 %z18.b %z19.b -> %z16.b +052d9671 : splice z17.b, p5, {z19.b, z20.b} : splice %p5 %z19.b %z20.b -> %z17.b +052d96b3 : splice z19.b, p5, {z21.b, z22.b} : splice %p5 %z21.b %z22.b -> %z19.b +052d9af5 : splice z21.b, p6, {z23.b, z24.b} : splice %p6 %z23.b %z24.b -> %z21.b +052d9b37 : splice z23.b, p6, {z25.b, z26.b} : splice %p6 %z25.b %z26.b -> %z23.b +052d9f79 : splice z25.b, p7, {z27.b, z28.b} : splice %p7 %z27.b %z28.b -> %z25.b +052d9fbb : splice z27.b, p7, {z29.b, z30.b} : splice %p7 %z29.b %z30.b -> %z27.b +052d9fff : splice z31.b, p7, {z31.b, z0.b} : splice %p7 %z31.b %z0.b -> %z31.b +056d8000 : splice z0.h, p0, {z0.h, z1.h} : splice %p0 %z0.h %z1.h -> %z0.h +056d8482 : splice z2.h, p1, {z4.h, z5.h} : splice %p1 %z4.h %z5.h -> %z2.h +056d88c4 : splice z4.h, p2, {z6.h, z7.h} : splice %p2 %z6.h %z7.h -> %z4.h +056d8906 : splice z6.h, p2, {z8.h, z9.h} : splice %p2 %z8.h %z9.h -> %z6.h +056d8d48 : splice z8.h, p3, {z10.h, z11.h} : splice %p3 %z10.h %z11.h -> %z8.h +056d8d8a : splice z10.h, p3, {z12.h, z13.h} : splice %p3 %z12.h %z13.h -> %z10.h +056d91cc : splice z12.h, p4, {z14.h, z15.h} : splice %p4 %z14.h %z15.h -> %z12.h +056d920e : splice z14.h, p4, {z16.h, z17.h} : splice %p4 %z16.h %z17.h -> %z14.h +056d9650 : splice z16.h, p5, {z18.h, z19.h} : splice %p5 %z18.h %z19.h -> %z16.h +056d9671 : splice z17.h, p5, {z19.h, z20.h} : splice %p5 %z19.h %z20.h -> %z17.h +056d96b3 : splice z19.h, p5, {z21.h, z22.h} : splice %p5 %z21.h %z22.h -> %z19.h +056d9af5 : splice z21.h, p6, {z23.h, z24.h} : splice %p6 %z23.h %z24.h -> %z21.h +056d9b37 : splice z23.h, p6, {z25.h, z26.h} : splice %p6 %z25.h %z26.h -> %z23.h +056d9f79 : splice z25.h, p7, {z27.h, z28.h} : splice %p7 %z27.h %z28.h -> %z25.h +056d9fbb : splice z27.h, p7, {z29.h, z30.h} : splice %p7 %z29.h %z30.h -> %z27.h +056d9fff : splice z31.h, p7, {z31.h, z0.h} : splice %p7 %z31.h %z0.h -> %z31.h +05ad8000 : splice z0.s, p0, {z0.s, z1.s} : splice %p0 %z0.s %z1.s -> %z0.s +05ad8482 : splice z2.s, p1, {z4.s, z5.s} : splice %p1 %z4.s %z5.s -> %z2.s +05ad88c4 : splice z4.s, p2, {z6.s, z7.s} : splice %p2 %z6.s %z7.s -> %z4.s +05ad8906 : splice z6.s, p2, {z8.s, z9.s} : splice %p2 %z8.s %z9.s -> %z6.s +05ad8d48 : splice z8.s, p3, {z10.s, z11.s} : splice %p3 %z10.s %z11.s -> %z8.s +05ad8d8a : splice z10.s, p3, {z12.s, z13.s} : splice %p3 %z12.s %z13.s -> %z10.s +05ad91cc : splice z12.s, p4, {z14.s, z15.s} : splice %p4 %z14.s %z15.s -> %z12.s +05ad920e : splice z14.s, p4, {z16.s, z17.s} : splice %p4 %z16.s %z17.s -> %z14.s +05ad9650 : splice z16.s, p5, {z18.s, z19.s} : splice %p5 %z18.s %z19.s -> %z16.s +05ad9671 : splice z17.s, p5, {z19.s, z20.s} : splice %p5 %z19.s %z20.s -> %z17.s +05ad96b3 : splice z19.s, p5, {z21.s, z22.s} : splice %p5 %z21.s %z22.s -> %z19.s +05ad9af5 : splice z21.s, p6, {z23.s, z24.s} : splice %p6 %z23.s %z24.s -> %z21.s +05ad9b37 : splice z23.s, p6, {z25.s, z26.s} : splice %p6 %z25.s %z26.s -> %z23.s +05ad9f79 : splice z25.s, p7, {z27.s, z28.s} : splice %p7 %z27.s %z28.s -> %z25.s +05ad9fbb : splice z27.s, p7, {z29.s, z30.s} : splice %p7 %z29.s %z30.s -> %z27.s +05ad9fff : splice z31.s, p7, {z31.s, z0.s} : splice %p7 %z31.s %z0.s -> %z31.s +05ed8000 : splice z0.d, p0, {z0.d, z1.d} : splice %p0 %z0.d %z1.d -> %z0.d +05ed8482 : splice z2.d, p1, {z4.d, z5.d} : splice %p1 %z4.d %z5.d -> %z2.d +05ed88c4 : splice z4.d, p2, {z6.d, z7.d} : splice %p2 %z6.d %z7.d -> %z4.d +05ed8906 : splice z6.d, p2, {z8.d, z9.d} : splice %p2 %z8.d %z9.d -> %z6.d +05ed8d48 : splice z8.d, p3, {z10.d, z11.d} : splice %p3 %z10.d %z11.d -> %z8.d +05ed8d8a : splice z10.d, p3, {z12.d, z13.d} : splice %p3 %z12.d %z13.d -> %z10.d +05ed91cc : splice z12.d, p4, {z14.d, z15.d} : splice %p4 %z14.d %z15.d -> %z12.d +05ed920e : splice z14.d, p4, {z16.d, z17.d} : splice %p4 %z16.d %z17.d -> %z14.d +05ed9650 : splice z16.d, p5, {z18.d, z19.d} : splice %p5 %z18.d %z19.d -> %z16.d +05ed9671 : splice z17.d, p5, {z19.d, z20.d} : splice %p5 %z19.d %z20.d -> %z17.d +05ed96b3 : splice z19.d, p5, {z21.d, z22.d} : splice %p5 %z21.d %z22.d -> %z19.d +05ed9af5 : splice z21.d, p6, {z23.d, z24.d} : splice %p6 %z23.d %z24.d -> %z21.d +05ed9b37 : splice z23.d, p6, {z25.d, z26.d} : splice %p6 %z25.d %z26.d -> %z23.d +05ed9f79 : splice z25.d, p7, {z27.d, z28.d} : splice %p7 %z27.d %z28.d -> %z25.d +05ed9fbb : splice z27.d, p7, {z29.d, z30.d} : splice %p7 %z29.d %z30.d -> %z27.d +05ed9fff : splice z31.d, p7, {z31.d, z0.d} : splice %p7 %z31.d %z0.d -> %z31.d + # SQABS ., /M, . (SQABS-Z.P.Z-_) 4408a000 : sqabs z0.b, p0/M, z0.b : sqabs %p0/m %z0.b -> %z0.b 4408a482 : sqabs z2.b, p1/M, z4.b : sqabs %p1/m %z4.b -> %z2.b @@ -7429,130 +7495,130 @@ c51edfff : ldnt1w z31.d, p7/Z, [z31.d, x30] : ldnt1w (%z31.d,%x30)[16by 45df57ff : ssubwt z31.d, z31.d, z31.s : ssubwt %z31.d %z31.s -> %z31.d # STNT1B { .D }, , [.D{, }] (STNT1B-Z.P.AR-D.64.unscaled) -e4002000 : stnt1b z0.d, p0, [z0.d, x0] : stnt1b %z0.d %p0 -> (%z0.d,%x0)[4byte] -e4052482 : stnt1b z2.d, p1, [z4.d, x5] : stnt1b %z2.d %p1 -> (%z4.d,%x5)[4byte] -e40728c4 : stnt1b z4.d, p2, [z6.d, x7] : stnt1b %z4.d %p2 -> (%z6.d,%x7)[4byte] -e4092906 : stnt1b z6.d, p2, [z8.d, x9] : stnt1b %z6.d %p2 -> (%z8.d,%x9)[4byte] -e40b2d48 : stnt1b z8.d, p3, [z10.d, x11] : stnt1b %z8.d %p3 -> (%z10.d,%x11)[4byte] -e40c2d8a : stnt1b z10.d, p3, [z12.d, x12] : stnt1b %z10.d %p3 -> (%z12.d,%x12)[4byte] -e40e31cc : stnt1b z12.d, p4, [z14.d, x14] : stnt1b %z12.d %p4 -> (%z14.d,%x14)[4byte] -e410320e : stnt1b z14.d, p4, [z16.d, x16] : stnt1b %z14.d %p4 -> (%z16.d,%x16)[4byte] -e4123650 : stnt1b z16.d, p5, [z18.d, x18] : stnt1b %z16.d %p5 -> (%z18.d,%x18)[4byte] -e4143671 : stnt1b z17.d, p5, [z19.d, x20] : stnt1b %z17.d %p5 -> (%z19.d,%x20)[4byte] -e41636b3 : stnt1b z19.d, p5, [z21.d, x22] : stnt1b %z19.d %p5 -> (%z21.d,%x22)[4byte] -e4183af5 : stnt1b z21.d, p6, [z23.d, x24] : stnt1b %z21.d %p6 -> (%z23.d,%x24)[4byte] -e4193b37 : stnt1b z23.d, p6, [z25.d, x25] : stnt1b %z23.d %p6 -> (%z25.d,%x25)[4byte] -e41b3f79 : stnt1b z25.d, p7, [z27.d, x27] : stnt1b %z25.d %p7 -> (%z27.d,%x27)[4byte] -e41d3fbb : stnt1b z27.d, p7, [z29.d, x29] : stnt1b %z27.d %p7 -> (%z29.d,%x29)[4byte] -e41e3fff : stnt1b z31.d, p7, [z31.d, x30] : stnt1b %z31.d %p7 -> (%z31.d,%x30)[4byte] +e4002000 : stnt1b z0.d, p0, [z0.d, x0] : stnt1b %z0.d %p0 -> (%z0.d,%x0)[1byte] +e4052482 : stnt1b z2.d, p1, [z4.d, x5] : stnt1b %z2.d %p1 -> (%z4.d,%x5)[1byte] +e40728c4 : stnt1b z4.d, p2, [z6.d, x7] : stnt1b %z4.d %p2 -> (%z6.d,%x7)[1byte] +e4092906 : stnt1b z6.d, p2, [z8.d, x9] : stnt1b %z6.d %p2 -> (%z8.d,%x9)[1byte] +e40b2d48 : stnt1b z8.d, p3, [z10.d, x11] : stnt1b %z8.d %p3 -> (%z10.d,%x11)[1byte] +e40c2d8a : stnt1b z10.d, p3, [z12.d, x12] : stnt1b %z10.d %p3 -> (%z12.d,%x12)[1byte] +e40e31cc : stnt1b z12.d, p4, [z14.d, x14] : stnt1b %z12.d %p4 -> (%z14.d,%x14)[1byte] +e410320e : stnt1b z14.d, p4, [z16.d, x16] : stnt1b %z14.d %p4 -> (%z16.d,%x16)[1byte] +e4123650 : stnt1b z16.d, p5, [z18.d, x18] : stnt1b %z16.d %p5 -> (%z18.d,%x18)[1byte] +e4143671 : stnt1b z17.d, p5, [z19.d, x20] : stnt1b %z17.d %p5 -> (%z19.d,%x20)[1byte] +e41636b3 : stnt1b z19.d, p5, [z21.d, x22] : stnt1b %z19.d %p5 -> (%z21.d,%x22)[1byte] +e4183af5 : stnt1b z21.d, p6, [z23.d, x24] : stnt1b %z21.d %p6 -> (%z23.d,%x24)[1byte] +e4193b37 : stnt1b z23.d, p6, [z25.d, x25] : stnt1b %z23.d %p6 -> (%z25.d,%x25)[1byte] +e41b3f79 : stnt1b z25.d, p7, [z27.d, x27] : stnt1b %z25.d %p7 -> (%z27.d,%x27)[1byte] +e41d3fbb : stnt1b z27.d, p7, [z29.d, x29] : stnt1b %z27.d %p7 -> (%z29.d,%x29)[1byte] +e41e3fff : stnt1b z31.d, p7, [z31.d, x30] : stnt1b %z31.d %p7 -> (%z31.d,%x30)[1byte] # STNT1B { .S }, , [.S{, }] (STNT1B-Z.P.AR-S.x32.unscaled) -e4402000 : stnt1b z0.s, p0, [z0.s, x0] : stnt1b %z0.s %p0 -> (%z0.s,%x0)[8byte] -e4452482 : stnt1b z2.s, p1, [z4.s, x5] : stnt1b %z2.s %p1 -> (%z4.s,%x5)[8byte] -e44728c4 : stnt1b z4.s, p2, [z6.s, x7] : stnt1b %z4.s %p2 -> (%z6.s,%x7)[8byte] -e4492906 : stnt1b z6.s, p2, [z8.s, x9] : stnt1b %z6.s %p2 -> (%z8.s,%x9)[8byte] -e44b2d48 : stnt1b z8.s, p3, [z10.s, x11] : stnt1b %z8.s %p3 -> (%z10.s,%x11)[8byte] -e44c2d8a : stnt1b z10.s, p3, [z12.s, x12] : stnt1b %z10.s %p3 -> (%z12.s,%x12)[8byte] -e44e31cc : stnt1b z12.s, p4, [z14.s, x14] : stnt1b %z12.s %p4 -> (%z14.s,%x14)[8byte] -e450320e : stnt1b z14.s, p4, [z16.s, x16] : stnt1b %z14.s %p4 -> (%z16.s,%x16)[8byte] -e4523650 : stnt1b z16.s, p5, [z18.s, x18] : stnt1b %z16.s %p5 -> (%z18.s,%x18)[8byte] -e4543671 : stnt1b z17.s, p5, [z19.s, x20] : stnt1b %z17.s %p5 -> (%z19.s,%x20)[8byte] -e45636b3 : stnt1b z19.s, p5, [z21.s, x22] : stnt1b %z19.s %p5 -> (%z21.s,%x22)[8byte] -e4583af5 : stnt1b z21.s, p6, [z23.s, x24] : stnt1b %z21.s %p6 -> (%z23.s,%x24)[8byte] -e4593b37 : stnt1b z23.s, p6, [z25.s, x25] : stnt1b %z23.s %p6 -> (%z25.s,%x25)[8byte] -e45b3f79 : stnt1b z25.s, p7, [z27.s, x27] : stnt1b %z25.s %p7 -> (%z27.s,%x27)[8byte] -e45d3fbb : stnt1b z27.s, p7, [z29.s, x29] : stnt1b %z27.s %p7 -> (%z29.s,%x29)[8byte] -e45e3fff : stnt1b z31.s, p7, [z31.s, x30] : stnt1b %z31.s %p7 -> (%z31.s,%x30)[8byte] +e4402000 : stnt1b z0.s, p0, [z0.s, x0] : stnt1b %z0.s %p0 -> (%z0.s,%x0)[1byte] +e4452482 : stnt1b z2.s, p1, [z4.s, x5] : stnt1b %z2.s %p1 -> (%z4.s,%x5)[1byte] +e44728c4 : stnt1b z4.s, p2, [z6.s, x7] : stnt1b %z4.s %p2 -> (%z6.s,%x7)[1byte] +e4492906 : stnt1b z6.s, p2, [z8.s, x9] : stnt1b %z6.s %p2 -> (%z8.s,%x9)[1byte] +e44b2d48 : stnt1b z8.s, p3, [z10.s, x11] : stnt1b %z8.s %p3 -> (%z10.s,%x11)[1byte] +e44c2d8a : stnt1b z10.s, p3, [z12.s, x12] : stnt1b %z10.s %p3 -> (%z12.s,%x12)[1byte] +e44e31cc : stnt1b z12.s, p4, [z14.s, x14] : stnt1b %z12.s %p4 -> (%z14.s,%x14)[1byte] +e450320e : stnt1b z14.s, p4, [z16.s, x16] : stnt1b %z14.s %p4 -> (%z16.s,%x16)[1byte] +e4523650 : stnt1b z16.s, p5, [z18.s, x18] : stnt1b %z16.s %p5 -> (%z18.s,%x18)[1byte] +e4543671 : stnt1b z17.s, p5, [z19.s, x20] : stnt1b %z17.s %p5 -> (%z19.s,%x20)[1byte] +e45636b3 : stnt1b z19.s, p5, [z21.s, x22] : stnt1b %z19.s %p5 -> (%z21.s,%x22)[1byte] +e4583af5 : stnt1b z21.s, p6, [z23.s, x24] : stnt1b %z21.s %p6 -> (%z23.s,%x24)[1byte] +e4593b37 : stnt1b z23.s, p6, [z25.s, x25] : stnt1b %z23.s %p6 -> (%z25.s,%x25)[1byte] +e45b3f79 : stnt1b z25.s, p7, [z27.s, x27] : stnt1b %z25.s %p7 -> (%z27.s,%x27)[1byte] +e45d3fbb : stnt1b z27.s, p7, [z29.s, x29] : stnt1b %z27.s %p7 -> (%z29.s,%x29)[1byte] +e45e3fff : stnt1b z31.s, p7, [z31.s, x30] : stnt1b %z31.s %p7 -> (%z31.s,%x30)[1byte] # STNT1D { .D }, , [.D{, }] (STNT1D-Z.P.AR-D.64.unscaled) -e5802000 : stnt1d z0.d, p0, [z0.d, x0] : stnt1d %z0.d %p0 -> (%z0.d,%x0)[32byte] -e5852482 : stnt1d z2.d, p1, [z4.d, x5] : stnt1d %z2.d %p1 -> (%z4.d,%x5)[32byte] -e58728c4 : stnt1d z4.d, p2, [z6.d, x7] : stnt1d %z4.d %p2 -> (%z6.d,%x7)[32byte] -e5892906 : stnt1d z6.d, p2, [z8.d, x9] : stnt1d %z6.d %p2 -> (%z8.d,%x9)[32byte] -e58b2d48 : stnt1d z8.d, p3, [z10.d, x11] : stnt1d %z8.d %p3 -> (%z10.d,%x11)[32byte] -e58c2d8a : stnt1d z10.d, p3, [z12.d, x12] : stnt1d %z10.d %p3 -> (%z12.d,%x12)[32byte] -e58e31cc : stnt1d z12.d, p4, [z14.d, x14] : stnt1d %z12.d %p4 -> (%z14.d,%x14)[32byte] -e590320e : stnt1d z14.d, p4, [z16.d, x16] : stnt1d %z14.d %p4 -> (%z16.d,%x16)[32byte] -e5923650 : stnt1d z16.d, p5, [z18.d, x18] : stnt1d %z16.d %p5 -> (%z18.d,%x18)[32byte] -e5943671 : stnt1d z17.d, p5, [z19.d, x20] : stnt1d %z17.d %p5 -> (%z19.d,%x20)[32byte] -e59636b3 : stnt1d z19.d, p5, [z21.d, x22] : stnt1d %z19.d %p5 -> (%z21.d,%x22)[32byte] -e5983af5 : stnt1d z21.d, p6, [z23.d, x24] : stnt1d %z21.d %p6 -> (%z23.d,%x24)[32byte] -e5993b37 : stnt1d z23.d, p6, [z25.d, x25] : stnt1d %z23.d %p6 -> (%z25.d,%x25)[32byte] -e59b3f79 : stnt1d z25.d, p7, [z27.d, x27] : stnt1d %z25.d %p7 -> (%z27.d,%x27)[32byte] -e59d3fbb : stnt1d z27.d, p7, [z29.d, x29] : stnt1d %z27.d %p7 -> (%z29.d,%x29)[32byte] -e59e3fff : stnt1d z31.d, p7, [z31.d, x30] : stnt1d %z31.d %p7 -> (%z31.d,%x30)[32byte] +e5802000 : stnt1d z0.d, p0, [z0.d, x0] : stnt1d %z0.d %p0 -> (%z0.d,%x0)[8byte] +e5852482 : stnt1d z2.d, p1, [z4.d, x5] : stnt1d %z2.d %p1 -> (%z4.d,%x5)[8byte] +e58728c4 : stnt1d z4.d, p2, [z6.d, x7] : stnt1d %z4.d %p2 -> (%z6.d,%x7)[8byte] +e5892906 : stnt1d z6.d, p2, [z8.d, x9] : stnt1d %z6.d %p2 -> (%z8.d,%x9)[8byte] +e58b2d48 : stnt1d z8.d, p3, [z10.d, x11] : stnt1d %z8.d %p3 -> (%z10.d,%x11)[8byte] +e58c2d8a : stnt1d z10.d, p3, [z12.d, x12] : stnt1d %z10.d %p3 -> (%z12.d,%x12)[8byte] +e58e31cc : stnt1d z12.d, p4, [z14.d, x14] : stnt1d %z12.d %p4 -> (%z14.d,%x14)[8byte] +e590320e : stnt1d z14.d, p4, [z16.d, x16] : stnt1d %z14.d %p4 -> (%z16.d,%x16)[8byte] +e5923650 : stnt1d z16.d, p5, [z18.d, x18] : stnt1d %z16.d %p5 -> (%z18.d,%x18)[8byte] +e5943671 : stnt1d z17.d, p5, [z19.d, x20] : stnt1d %z17.d %p5 -> (%z19.d,%x20)[8byte] +e59636b3 : stnt1d z19.d, p5, [z21.d, x22] : stnt1d %z19.d %p5 -> (%z21.d,%x22)[8byte] +e5983af5 : stnt1d z21.d, p6, [z23.d, x24] : stnt1d %z21.d %p6 -> (%z23.d,%x24)[8byte] +e5993b37 : stnt1d z23.d, p6, [z25.d, x25] : stnt1d %z23.d %p6 -> (%z25.d,%x25)[8byte] +e59b3f79 : stnt1d z25.d, p7, [z27.d, x27] : stnt1d %z25.d %p7 -> (%z27.d,%x27)[8byte] +e59d3fbb : stnt1d z27.d, p7, [z29.d, x29] : stnt1d %z27.d %p7 -> (%z29.d,%x29)[8byte] +e59e3fff : stnt1d z31.d, p7, [z31.d, x30] : stnt1d %z31.d %p7 -> (%z31.d,%x30)[8byte] # STNT1H { .D }, , [.D{, }] (STNT1H-Z.P.AR-D.64.unscaled) -e4802000 : stnt1h z0.d, p0, [z0.d, x0] : stnt1h %z0.d %p0 -> (%z0.d,%x0)[8byte] -e4852482 : stnt1h z2.d, p1, [z4.d, x5] : stnt1h %z2.d %p1 -> (%z4.d,%x5)[8byte] -e48728c4 : stnt1h z4.d, p2, [z6.d, x7] : stnt1h %z4.d %p2 -> (%z6.d,%x7)[8byte] -e4892906 : stnt1h z6.d, p2, [z8.d, x9] : stnt1h %z6.d %p2 -> (%z8.d,%x9)[8byte] -e48b2d48 : stnt1h z8.d, p3, [z10.d, x11] : stnt1h %z8.d %p3 -> (%z10.d,%x11)[8byte] -e48c2d8a : stnt1h z10.d, p3, [z12.d, x12] : stnt1h %z10.d %p3 -> (%z12.d,%x12)[8byte] -e48e31cc : stnt1h z12.d, p4, [z14.d, x14] : stnt1h %z12.d %p4 -> (%z14.d,%x14)[8byte] -e490320e : stnt1h z14.d, p4, [z16.d, x16] : stnt1h %z14.d %p4 -> (%z16.d,%x16)[8byte] -e4923650 : stnt1h z16.d, p5, [z18.d, x18] : stnt1h %z16.d %p5 -> (%z18.d,%x18)[8byte] -e4943671 : stnt1h z17.d, p5, [z19.d, x20] : stnt1h %z17.d %p5 -> (%z19.d,%x20)[8byte] -e49636b3 : stnt1h z19.d, p5, [z21.d, x22] : stnt1h %z19.d %p5 -> (%z21.d,%x22)[8byte] -e4983af5 : stnt1h z21.d, p6, [z23.d, x24] : stnt1h %z21.d %p6 -> (%z23.d,%x24)[8byte] -e4993b37 : stnt1h z23.d, p6, [z25.d, x25] : stnt1h %z23.d %p6 -> (%z25.d,%x25)[8byte] -e49b3f79 : stnt1h z25.d, p7, [z27.d, x27] : stnt1h %z25.d %p7 -> (%z27.d,%x27)[8byte] -e49d3fbb : stnt1h z27.d, p7, [z29.d, x29] : stnt1h %z27.d %p7 -> (%z29.d,%x29)[8byte] -e49e3fff : stnt1h z31.d, p7, [z31.d, x30] : stnt1h %z31.d %p7 -> (%z31.d,%x30)[8byte] +e4802000 : stnt1h z0.d, p0, [z0.d, x0] : stnt1h %z0.d %p0 -> (%z0.d,%x0)[2byte] +e4852482 : stnt1h z2.d, p1, [z4.d, x5] : stnt1h %z2.d %p1 -> (%z4.d,%x5)[2byte] +e48728c4 : stnt1h z4.d, p2, [z6.d, x7] : stnt1h %z4.d %p2 -> (%z6.d,%x7)[2byte] +e4892906 : stnt1h z6.d, p2, [z8.d, x9] : stnt1h %z6.d %p2 -> (%z8.d,%x9)[2byte] +e48b2d48 : stnt1h z8.d, p3, [z10.d, x11] : stnt1h %z8.d %p3 -> (%z10.d,%x11)[2byte] +e48c2d8a : stnt1h z10.d, p3, [z12.d, x12] : stnt1h %z10.d %p3 -> (%z12.d,%x12)[2byte] +e48e31cc : stnt1h z12.d, p4, [z14.d, x14] : stnt1h %z12.d %p4 -> (%z14.d,%x14)[2byte] +e490320e : stnt1h z14.d, p4, [z16.d, x16] : stnt1h %z14.d %p4 -> (%z16.d,%x16)[2byte] +e4923650 : stnt1h z16.d, p5, [z18.d, x18] : stnt1h %z16.d %p5 -> (%z18.d,%x18)[2byte] +e4943671 : stnt1h z17.d, p5, [z19.d, x20] : stnt1h %z17.d %p5 -> (%z19.d,%x20)[2byte] +e49636b3 : stnt1h z19.d, p5, [z21.d, x22] : stnt1h %z19.d %p5 -> (%z21.d,%x22)[2byte] +e4983af5 : stnt1h z21.d, p6, [z23.d, x24] : stnt1h %z21.d %p6 -> (%z23.d,%x24)[2byte] +e4993b37 : stnt1h z23.d, p6, [z25.d, x25] : stnt1h %z23.d %p6 -> (%z25.d,%x25)[2byte] +e49b3f79 : stnt1h z25.d, p7, [z27.d, x27] : stnt1h %z25.d %p7 -> (%z27.d,%x27)[2byte] +e49d3fbb : stnt1h z27.d, p7, [z29.d, x29] : stnt1h %z27.d %p7 -> (%z29.d,%x29)[2byte] +e49e3fff : stnt1h z31.d, p7, [z31.d, x30] : stnt1h %z31.d %p7 -> (%z31.d,%x30)[2byte] # STNT1H { .S }, , [.S{, }] (STNT1H-Z.P.AR-S.x32.unscaled) -e4c02000 : stnt1h z0.s, p0, [z0.s, x0] : stnt1h %z0.s %p0 -> (%z0.s,%x0)[16byte] -e4c52482 : stnt1h z2.s, p1, [z4.s, x5] : stnt1h %z2.s %p1 -> (%z4.s,%x5)[16byte] -e4c728c4 : stnt1h z4.s, p2, [z6.s, x7] : stnt1h %z4.s %p2 -> (%z6.s,%x7)[16byte] -e4c92906 : stnt1h z6.s, p2, [z8.s, x9] : stnt1h %z6.s %p2 -> (%z8.s,%x9)[16byte] -e4cb2d48 : stnt1h z8.s, p3, [z10.s, x11] : stnt1h %z8.s %p3 -> (%z10.s,%x11)[16byte] -e4cc2d8a : stnt1h z10.s, p3, [z12.s, x12] : stnt1h %z10.s %p3 -> (%z12.s,%x12)[16byte] -e4ce31cc : stnt1h z12.s, p4, [z14.s, x14] : stnt1h %z12.s %p4 -> (%z14.s,%x14)[16byte] -e4d0320e : stnt1h z14.s, p4, [z16.s, x16] : stnt1h %z14.s %p4 -> (%z16.s,%x16)[16byte] -e4d23650 : stnt1h z16.s, p5, [z18.s, x18] : stnt1h %z16.s %p5 -> (%z18.s,%x18)[16byte] -e4d43671 : stnt1h z17.s, p5, [z19.s, x20] : stnt1h %z17.s %p5 -> (%z19.s,%x20)[16byte] -e4d636b3 : stnt1h z19.s, p5, [z21.s, x22] : stnt1h %z19.s %p5 -> (%z21.s,%x22)[16byte] -e4d83af5 : stnt1h z21.s, p6, [z23.s, x24] : stnt1h %z21.s %p6 -> (%z23.s,%x24)[16byte] -e4d93b37 : stnt1h z23.s, p6, [z25.s, x25] : stnt1h %z23.s %p6 -> (%z25.s,%x25)[16byte] -e4db3f79 : stnt1h z25.s, p7, [z27.s, x27] : stnt1h %z25.s %p7 -> (%z27.s,%x27)[16byte] -e4dd3fbb : stnt1h z27.s, p7, [z29.s, x29] : stnt1h %z27.s %p7 -> (%z29.s,%x29)[16byte] -e4de3fff : stnt1h z31.s, p7, [z31.s, x30] : stnt1h %z31.s %p7 -> (%z31.s,%x30)[16byte] +e4c02000 : stnt1h z0.s, p0, [z0.s, x0] : stnt1h %z0.s %p0 -> (%z0.s,%x0)[2byte] +e4c52482 : stnt1h z2.s, p1, [z4.s, x5] : stnt1h %z2.s %p1 -> (%z4.s,%x5)[2byte] +e4c728c4 : stnt1h z4.s, p2, [z6.s, x7] : stnt1h %z4.s %p2 -> (%z6.s,%x7)[2byte] +e4c92906 : stnt1h z6.s, p2, [z8.s, x9] : stnt1h %z6.s %p2 -> (%z8.s,%x9)[2byte] +e4cb2d48 : stnt1h z8.s, p3, [z10.s, x11] : stnt1h %z8.s %p3 -> (%z10.s,%x11)[2byte] +e4cc2d8a : stnt1h z10.s, p3, [z12.s, x12] : stnt1h %z10.s %p3 -> (%z12.s,%x12)[2byte] +e4ce31cc : stnt1h z12.s, p4, [z14.s, x14] : stnt1h %z12.s %p4 -> (%z14.s,%x14)[2byte] +e4d0320e : stnt1h z14.s, p4, [z16.s, x16] : stnt1h %z14.s %p4 -> (%z16.s,%x16)[2byte] +e4d23650 : stnt1h z16.s, p5, [z18.s, x18] : stnt1h %z16.s %p5 -> (%z18.s,%x18)[2byte] +e4d43671 : stnt1h z17.s, p5, [z19.s, x20] : stnt1h %z17.s %p5 -> (%z19.s,%x20)[2byte] +e4d636b3 : stnt1h z19.s, p5, [z21.s, x22] : stnt1h %z19.s %p5 -> (%z21.s,%x22)[2byte] +e4d83af5 : stnt1h z21.s, p6, [z23.s, x24] : stnt1h %z21.s %p6 -> (%z23.s,%x24)[2byte] +e4d93b37 : stnt1h z23.s, p6, [z25.s, x25] : stnt1h %z23.s %p6 -> (%z25.s,%x25)[2byte] +e4db3f79 : stnt1h z25.s, p7, [z27.s, x27] : stnt1h %z25.s %p7 -> (%z27.s,%x27)[2byte] +e4dd3fbb : stnt1h z27.s, p7, [z29.s, x29] : stnt1h %z27.s %p7 -> (%z29.s,%x29)[2byte] +e4de3fff : stnt1h z31.s, p7, [z31.s, x30] : stnt1h %z31.s %p7 -> (%z31.s,%x30)[2byte] # STNT1W { .D }, , [.D{, }] (STNT1W-Z.P.AR-D.64.unscaled) -e5002000 : stnt1w z0.d, p0, [z0.d, x0] : stnt1w %z0.d %p0 -> (%z0.d,%x0)[16byte] -e5052482 : stnt1w z2.d, p1, [z4.d, x5] : stnt1w %z2.d %p1 -> (%z4.d,%x5)[16byte] -e50728c4 : stnt1w z4.d, p2, [z6.d, x7] : stnt1w %z4.d %p2 -> (%z6.d,%x7)[16byte] -e5092906 : stnt1w z6.d, p2, [z8.d, x9] : stnt1w %z6.d %p2 -> (%z8.d,%x9)[16byte] -e50b2d48 : stnt1w z8.d, p3, [z10.d, x11] : stnt1w %z8.d %p3 -> (%z10.d,%x11)[16byte] -e50c2d8a : stnt1w z10.d, p3, [z12.d, x12] : stnt1w %z10.d %p3 -> (%z12.d,%x12)[16byte] -e50e31cc : stnt1w z12.d, p4, [z14.d, x14] : stnt1w %z12.d %p4 -> (%z14.d,%x14)[16byte] -e510320e : stnt1w z14.d, p4, [z16.d, x16] : stnt1w %z14.d %p4 -> (%z16.d,%x16)[16byte] -e5123650 : stnt1w z16.d, p5, [z18.d, x18] : stnt1w %z16.d %p5 -> (%z18.d,%x18)[16byte] -e5143671 : stnt1w z17.d, p5, [z19.d, x20] : stnt1w %z17.d %p5 -> (%z19.d,%x20)[16byte] -e51636b3 : stnt1w z19.d, p5, [z21.d, x22] : stnt1w %z19.d %p5 -> (%z21.d,%x22)[16byte] -e5183af5 : stnt1w z21.d, p6, [z23.d, x24] : stnt1w %z21.d %p6 -> (%z23.d,%x24)[16byte] -e5193b37 : stnt1w z23.d, p6, [z25.d, x25] : stnt1w %z23.d %p6 -> (%z25.d,%x25)[16byte] -e51b3f79 : stnt1w z25.d, p7, [z27.d, x27] : stnt1w %z25.d %p7 -> (%z27.d,%x27)[16byte] -e51d3fbb : stnt1w z27.d, p7, [z29.d, x29] : stnt1w %z27.d %p7 -> (%z29.d,%x29)[16byte] -e51e3fff : stnt1w z31.d, p7, [z31.d, x30] : stnt1w %z31.d %p7 -> (%z31.d,%x30)[16byte] +e5002000 : stnt1w z0.d, p0, [z0.d, x0] : stnt1w %z0.d %p0 -> (%z0.d,%x0)[4byte] +e5052482 : stnt1w z2.d, p1, [z4.d, x5] : stnt1w %z2.d %p1 -> (%z4.d,%x5)[4byte] +e50728c4 : stnt1w z4.d, p2, [z6.d, x7] : stnt1w %z4.d %p2 -> (%z6.d,%x7)[4byte] +e5092906 : stnt1w z6.d, p2, [z8.d, x9] : stnt1w %z6.d %p2 -> (%z8.d,%x9)[4byte] +e50b2d48 : stnt1w z8.d, p3, [z10.d, x11] : stnt1w %z8.d %p3 -> (%z10.d,%x11)[4byte] +e50c2d8a : stnt1w z10.d, p3, [z12.d, x12] : stnt1w %z10.d %p3 -> (%z12.d,%x12)[4byte] +e50e31cc : stnt1w z12.d, p4, [z14.d, x14] : stnt1w %z12.d %p4 -> (%z14.d,%x14)[4byte] +e510320e : stnt1w z14.d, p4, [z16.d, x16] : stnt1w %z14.d %p4 -> (%z16.d,%x16)[4byte] +e5123650 : stnt1w z16.d, p5, [z18.d, x18] : stnt1w %z16.d %p5 -> (%z18.d,%x18)[4byte] +e5143671 : stnt1w z17.d, p5, [z19.d, x20] : stnt1w %z17.d %p5 -> (%z19.d,%x20)[4byte] +e51636b3 : stnt1w z19.d, p5, [z21.d, x22] : stnt1w %z19.d %p5 -> (%z21.d,%x22)[4byte] +e5183af5 : stnt1w z21.d, p6, [z23.d, x24] : stnt1w %z21.d %p6 -> (%z23.d,%x24)[4byte] +e5193b37 : stnt1w z23.d, p6, [z25.d, x25] : stnt1w %z23.d %p6 -> (%z25.d,%x25)[4byte] +e51b3f79 : stnt1w z25.d, p7, [z27.d, x27] : stnt1w %z25.d %p7 -> (%z27.d,%x27)[4byte] +e51d3fbb : stnt1w z27.d, p7, [z29.d, x29] : stnt1w %z27.d %p7 -> (%z29.d,%x29)[4byte] +e51e3fff : stnt1w z31.d, p7, [z31.d, x30] : stnt1w %z31.d %p7 -> (%z31.d,%x30)[4byte] # STNT1W { .S }, , [.S{, }] (STNT1W-Z.P.AR-S.x32.unscaled) -e5402000 : stnt1w z0.s, p0, [z0.s, x0] : stnt1w %z0.s %p0 -> (%z0.s,%x0)[32byte] -e5452482 : stnt1w z2.s, p1, [z4.s, x5] : stnt1w %z2.s %p1 -> (%z4.s,%x5)[32byte] -e54728c4 : stnt1w z4.s, p2, [z6.s, x7] : stnt1w %z4.s %p2 -> (%z6.s,%x7)[32byte] -e5492906 : stnt1w z6.s, p2, [z8.s, x9] : stnt1w %z6.s %p2 -> (%z8.s,%x9)[32byte] -e54b2d48 : stnt1w z8.s, p3, [z10.s, x11] : stnt1w %z8.s %p3 -> (%z10.s,%x11)[32byte] -e54c2d8a : stnt1w z10.s, p3, [z12.s, x12] : stnt1w %z10.s %p3 -> (%z12.s,%x12)[32byte] -e54e31cc : stnt1w z12.s, p4, [z14.s, x14] : stnt1w %z12.s %p4 -> (%z14.s,%x14)[32byte] -e550320e : stnt1w z14.s, p4, [z16.s, x16] : stnt1w %z14.s %p4 -> (%z16.s,%x16)[32byte] -e5523650 : stnt1w z16.s, p5, [z18.s, x18] : stnt1w %z16.s %p5 -> (%z18.s,%x18)[32byte] -e5543671 : stnt1w z17.s, p5, [z19.s, x20] : stnt1w %z17.s %p5 -> (%z19.s,%x20)[32byte] -e55636b3 : stnt1w z19.s, p5, [z21.s, x22] : stnt1w %z19.s %p5 -> (%z21.s,%x22)[32byte] -e5583af5 : stnt1w z21.s, p6, [z23.s, x24] : stnt1w %z21.s %p6 -> (%z23.s,%x24)[32byte] -e5593b37 : stnt1w z23.s, p6, [z25.s, x25] : stnt1w %z23.s %p6 -> (%z25.s,%x25)[32byte] -e55b3f79 : stnt1w z25.s, p7, [z27.s, x27] : stnt1w %z25.s %p7 -> (%z27.s,%x27)[32byte] -e55d3fbb : stnt1w z27.s, p7, [z29.s, x29] : stnt1w %z27.s %p7 -> (%z29.s,%x29)[32byte] -e55e3fff : stnt1w z31.s, p7, [z31.s, x30] : stnt1w %z31.s %p7 -> (%z31.s,%x30)[32byte] +e5402000 : stnt1w z0.s, p0, [z0.s, x0] : stnt1w %z0.s %p0 -> (%z0.s,%x0)[4byte] +e5452482 : stnt1w z2.s, p1, [z4.s, x5] : stnt1w %z2.s %p1 -> (%z4.s,%x5)[4byte] +e54728c4 : stnt1w z4.s, p2, [z6.s, x7] : stnt1w %z4.s %p2 -> (%z6.s,%x7)[4byte] +e5492906 : stnt1w z6.s, p2, [z8.s, x9] : stnt1w %z6.s %p2 -> (%z8.s,%x9)[4byte] +e54b2d48 : stnt1w z8.s, p3, [z10.s, x11] : stnt1w %z8.s %p3 -> (%z10.s,%x11)[4byte] +e54c2d8a : stnt1w z10.s, p3, [z12.s, x12] : stnt1w %z10.s %p3 -> (%z12.s,%x12)[4byte] +e54e31cc : stnt1w z12.s, p4, [z14.s, x14] : stnt1w %z12.s %p4 -> (%z14.s,%x14)[4byte] +e550320e : stnt1w z14.s, p4, [z16.s, x16] : stnt1w %z14.s %p4 -> (%z16.s,%x16)[4byte] +e5523650 : stnt1w z16.s, p5, [z18.s, x18] : stnt1w %z16.s %p5 -> (%z18.s,%x18)[4byte] +e5543671 : stnt1w z17.s, p5, [z19.s, x20] : stnt1w %z17.s %p5 -> (%z19.s,%x20)[4byte] +e55636b3 : stnt1w z19.s, p5, [z21.s, x22] : stnt1w %z19.s %p5 -> (%z21.s,%x22)[4byte] +e5583af5 : stnt1w z21.s, p6, [z23.s, x24] : stnt1w %z21.s %p6 -> (%z23.s,%x24)[4byte] +e5593b37 : stnt1w z23.s, p6, [z25.s, x25] : stnt1w %z23.s %p6 -> (%z25.s,%x25)[4byte] +e55b3f79 : stnt1w z25.s, p7, [z27.s, x27] : stnt1w %z25.s %p7 -> (%z27.s,%x27)[4byte] +e55d3fbb : stnt1w z27.s, p7, [z29.s, x29] : stnt1w %z27.s %p7 -> (%z29.s,%x29)[4byte] +e55e3fff : stnt1w z31.s, p7, [z31.s, x30] : stnt1w %z31.s %p7 -> (%z31.s,%x30)[4byte] # SUBHNB ., ., . (SUBHNB-Z.ZZ-_) 45607000 : subhnb z0.b, z0.h, z0.h : subhnb %z0.h %z0.h -> %z0.b @@ -7720,6 +7786,72 @@ e55e3fff : stnt1w z31.s, p7, [z31.s, x30] : stnt1w %z31.s %p7 -> (%z3 44dc9fbb : suqadd z27.d, p7/M, z27.d, z29.d : suqadd %p7/m %z27.d %z29.d -> %z27.d 44dc9fff : suqadd z31.d, p7/M, z31.d, z31.d : suqadd %p7/m %z31.d %z31.d -> %z31.d +# TBL ., { ., . }, . (TBL-Z.ZZ-2) +05202800 : tbl z0.b, {z0.b, z1.b}, z0.b : tbl %z0.b %z1.b %z0.b -> %z0.b +05242862 : tbl z2.b, {z3.b, z4.b}, z4.b : tbl %z3.b %z4.b %z4.b -> %z2.b +052628a4 : tbl z4.b, {z5.b, z6.b}, z6.b : tbl %z5.b %z6.b %z6.b -> %z4.b +052828e6 : tbl z6.b, {z7.b, z8.b}, z8.b : tbl %z7.b %z8.b %z8.b -> %z6.b +052a2928 : tbl z8.b, {z9.b, z10.b}, z10.b : tbl %z9.b %z10.b %z10.b -> %z8.b +052c296a : tbl z10.b, {z11.b, z12.b}, z12.b : tbl %z11.b %z12.b %z12.b -> %z10.b +052e29ac : tbl z12.b, {z13.b, z14.b}, z14.b : tbl %z13.b %z14.b %z14.b -> %z12.b +053029ee : tbl z14.b, {z15.b, z16.b}, z16.b : tbl %z15.b %z16.b %z16.b -> %z14.b +05322a30 : tbl z16.b, {z17.b, z18.b}, z18.b : tbl %z17.b %z18.b %z18.b -> %z16.b +05332a51 : tbl z17.b, {z18.b, z19.b}, z19.b : tbl %z18.b %z19.b %z19.b -> %z17.b +05352a93 : tbl z19.b, {z20.b, z21.b}, z21.b : tbl %z20.b %z21.b %z21.b -> %z19.b +05372ad5 : tbl z21.b, {z22.b, z23.b}, z23.b : tbl %z22.b %z23.b %z23.b -> %z21.b +05392b17 : tbl z23.b, {z24.b, z25.b}, z25.b : tbl %z24.b %z25.b %z25.b -> %z23.b +053b2b59 : tbl z25.b, {z26.b, z27.b}, z27.b : tbl %z26.b %z27.b %z27.b -> %z25.b +053d2b9b : tbl z27.b, {z28.b, z29.b}, z29.b : tbl %z28.b %z29.b %z29.b -> %z27.b +053f2bff : tbl z31.b, {z31.b, z0.b}, z31.b : tbl %z31.b %z0.b %z31.b -> %z31.b +05602800 : tbl z0.h, {z0.h, z1.h}, z0.h : tbl %z0.h %z1.h %z0.h -> %z0.h +05642862 : tbl z2.h, {z3.h, z4.h}, z4.h : tbl %z3.h %z4.h %z4.h -> %z2.h +056628a4 : tbl z4.h, {z5.h, z6.h}, z6.h : tbl %z5.h %z6.h %z6.h -> %z4.h +056828e6 : tbl z6.h, {z7.h, z8.h}, z8.h : tbl %z7.h %z8.h %z8.h -> %z6.h +056a2928 : tbl z8.h, {z9.h, z10.h}, z10.h : tbl %z9.h %z10.h %z10.h -> %z8.h +056c296a : tbl z10.h, {z11.h, z12.h}, z12.h : tbl %z11.h %z12.h %z12.h -> %z10.h +056e29ac : tbl z12.h, {z13.h, z14.h}, z14.h : tbl %z13.h %z14.h %z14.h -> %z12.h +057029ee : tbl z14.h, {z15.h, z16.h}, z16.h : tbl %z15.h %z16.h %z16.h -> %z14.h +05722a30 : tbl z16.h, {z17.h, z18.h}, z18.h : tbl %z17.h %z18.h %z18.h -> %z16.h +05732a51 : tbl z17.h, {z18.h, z19.h}, z19.h : tbl %z18.h %z19.h %z19.h -> %z17.h +05752a93 : tbl z19.h, {z20.h, z21.h}, z21.h : tbl %z20.h %z21.h %z21.h -> %z19.h +05772ad5 : tbl z21.h, {z22.h, z23.h}, z23.h : tbl %z22.h %z23.h %z23.h -> %z21.h +05792b17 : tbl z23.h, {z24.h, z25.h}, z25.h : tbl %z24.h %z25.h %z25.h -> %z23.h +057b2b59 : tbl z25.h, {z26.h, z27.h}, z27.h : tbl %z26.h %z27.h %z27.h -> %z25.h +057d2b9b : tbl z27.h, {z28.h, z29.h}, z29.h : tbl %z28.h %z29.h %z29.h -> %z27.h +057f2bff : tbl z31.h, {z31.h, z0.h}, z31.h : tbl %z31.h %z0.h %z31.h -> %z31.h +05a02800 : tbl z0.s, {z0.s, z1.s}, z0.s : tbl %z0.s %z1.s %z0.s -> %z0.s +05a42862 : tbl z2.s, {z3.s, z4.s}, z4.s : tbl %z3.s %z4.s %z4.s -> %z2.s +05a628a4 : tbl z4.s, {z5.s, z6.s}, z6.s : tbl %z5.s %z6.s %z6.s -> %z4.s +05a828e6 : tbl z6.s, {z7.s, z8.s}, z8.s : tbl %z7.s %z8.s %z8.s -> %z6.s +05aa2928 : tbl z8.s, {z9.s, z10.s}, z10.s : tbl %z9.s %z10.s %z10.s -> %z8.s +05ac296a : tbl z10.s, {z11.s, z12.s}, z12.s : tbl %z11.s %z12.s %z12.s -> %z10.s +05ae29ac : tbl z12.s, {z13.s, z14.s}, z14.s : tbl %z13.s %z14.s %z14.s -> %z12.s +05b029ee : tbl z14.s, {z15.s, z16.s}, z16.s : tbl %z15.s %z16.s %z16.s -> %z14.s +05b22a30 : tbl z16.s, {z17.s, z18.s}, z18.s : tbl %z17.s %z18.s %z18.s -> %z16.s +05b32a51 : tbl z17.s, {z18.s, z19.s}, z19.s : tbl %z18.s %z19.s %z19.s -> %z17.s +05b52a93 : tbl z19.s, {z20.s, z21.s}, z21.s : tbl %z20.s %z21.s %z21.s -> %z19.s +05b72ad5 : tbl z21.s, {z22.s, z23.s}, z23.s : tbl %z22.s %z23.s %z23.s -> %z21.s +05b92b17 : tbl z23.s, {z24.s, z25.s}, z25.s : tbl %z24.s %z25.s %z25.s -> %z23.s +05bb2b59 : tbl z25.s, {z26.s, z27.s}, z27.s : tbl %z26.s %z27.s %z27.s -> %z25.s +05bd2b9b : tbl z27.s, {z28.s, z29.s}, z29.s : tbl %z28.s %z29.s %z29.s -> %z27.s +05bf2bff : tbl z31.s, {z31.s, z0.s}, z31.s : tbl %z31.s %z0.s %z31.s -> %z31.s +05e02800 : tbl z0.d, {z0.d, z1.d}, z0.d : tbl %z0.d %z1.d %z0.d -> %z0.d +05e42862 : tbl z2.d, {z3.d, z4.d}, z4.d : tbl %z3.d %z4.d %z4.d -> %z2.d +05e628a4 : tbl z4.d, {z5.d, z6.d}, z6.d : tbl %z5.d %z6.d %z6.d -> %z4.d +05e828e6 : tbl z6.d, {z7.d, z8.d}, z8.d : tbl %z7.d %z8.d %z8.d -> %z6.d +05ea2928 : tbl z8.d, {z9.d, z10.d}, z10.d : tbl %z9.d %z10.d %z10.d -> %z8.d +05ec296a : tbl z10.d, {z11.d, z12.d}, z12.d : tbl %z11.d %z12.d %z12.d -> %z10.d +05ee29ac : tbl z12.d, {z13.d, z14.d}, z14.d : tbl %z13.d %z14.d %z14.d -> %z12.d +05f029ee : tbl z14.d, {z15.d, z16.d}, z16.d : tbl %z15.d %z16.d %z16.d -> %z14.d +05f22a30 : tbl z16.d, {z17.d, z18.d}, z18.d : tbl %z17.d %z18.d %z18.d -> %z16.d +05f32a51 : tbl z17.d, {z18.d, z19.d}, z19.d : tbl %z18.d %z19.d %z19.d -> %z17.d +05f52a93 : tbl z19.d, {z20.d, z21.d}, z21.d : tbl %z20.d %z21.d %z21.d -> %z19.d +05f72ad5 : tbl z21.d, {z22.d, z23.d}, z23.d : tbl %z22.d %z23.d %z23.d -> %z21.d +05f92b17 : tbl z23.d, {z24.d, z25.d}, z25.d : tbl %z24.d %z25.d %z25.d -> %z23.d +05fb2b59 : tbl z25.d, {z26.d, z27.d}, z27.d : tbl %z26.d %z27.d %z27.d -> %z25.d +05fd2b9b : tbl z27.d, {z28.d, z29.d}, z29.d : tbl %z28.d %z29.d %z29.d -> %z27.d +05ff2bff : tbl z31.d, {z31.d, z0.d}, z31.d : tbl %z31.d %z0.d %z31.d -> %z31.d + # TBX ., ., . (TBX-Z.ZZ-_) 05202c00 : tbx z0.b, z0.b, z0.b : tbx %z0.b %z0.b %z0.b -> %z0.b 05242c62 : tbx z2.b, z3.b, z4.b : tbx %z2.b %z3.b %z4.b -> %z2.b diff --git a/suite/tests/api/dis-a64-v85.txt b/suite/tests/api/dis-a64-v85.txt new file mode 100644 index 00000000000..9bbbe2e261a --- /dev/null +++ b/suite/tests/api/dis-a64-v85.txt @@ -0,0 +1,40 @@ +# ********************************************************** +# Copyright (c) 2024 ARM Limited. All rights reserved. +# ********************************************************** + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of ARM Limited nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL ARM LIMITED OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. + +# Test data for DynamoRIO's AArch64 v8.5 encoder, decoder and disassembler. +# See dis-a64-sve.txt for the formatting. + +# Tests: + +d503241f : bti #0 : bti $0x00 +d503245f : bti #1 : bti $0x01 +d503249f : bti #2 : bti $0x02 +d50324df : bti #3 : bti $0x03 + diff --git a/suite/tests/api/drdecode_x86.c b/suite/tests/api/drdecode_x86.c index fc53a4e3e31..1027d2233b1 100644 --- a/suite/tests/api/drdecode_x86.c +++ b/suite/tests/api/drdecode_x86.c @@ -34,14 +34,16 @@ #include "configure.h" #include "dr_api.h" +#include "tools.h" #include #include +#include #define GD GLOBAL_DCONTEXT -#define ASSERT(x) \ - ((void)((!(x)) ? (printf("ASSERT FAILURE: %s:%d: %s\n", __FILE__, __LINE__, #x), \ - abort(), 0) \ +#define ASSERT(x) \ + ((void)((!(x)) ? (print("ASSERT FAILURE: %s:%d: %s\n", __FILE__, __LINE__, #x), \ + abort(), 0) \ : 0)) #define BUFFER_SIZE_BYTES(buf) sizeof(buf) @@ -155,13 +157,25 @@ test_noalloc(void) */ } -#define CHECK_CATEGORY(dcontext, instr, pc, category) \ - ASSERT(instr_encode(dcontext, instr, pc) - pc < BUFFER_SIZE_ELEMENTS(pc)); \ - instr_reset(dcontext, instr); \ - instr_set_operands_valid(instr, true); \ - ASSERT(decode(dcontext, pc, instr) != NULL); \ - ASSERT(instr_get_category(instr) == category); \ - instr_destroy(dcontext, instr); +#define CHECK_CATEGORY(dcontext, instr, pc, categories, category_names) \ + do { \ + byte *instr_encoded_pc = instr_encode(dcontext, instr, pc); \ + ASSERT(instr_encoded_pc - pc < BUFFER_SIZE_ELEMENTS(pc)); \ + instr_reset(dcontext, instr); \ + instr_set_operands_valid(instr, true); \ + byte *instr_decoded_pc = decode(dcontext, pc, instr); \ + ASSERT(instr_decoded_pc != NULL); \ + for (int i = 0; i < BUFFER_SIZE_ELEMENTS(categories); ++i) { \ + if (categories[i] == DR_INSTR_CATEGORY_UNCATEGORIZED) { \ + ASSERT(instr_get_category(instr) == categories[i]); \ + } else { \ + ASSERT(TESTANY(categories[i], instr_get_category(instr))); \ + } \ + ASSERT(strncmp(instr_get_category_name(categories[i]), category_names[i], \ + strlen(category_names[i])) == 0); \ + } \ + instr_destroy(dcontext, instr); \ + } while (0); static void test_categories(void) @@ -172,17 +186,36 @@ test_categories(void) /* 55 OP_mov_ld */ instr = XINST_CREATE_load(GD, opnd_create_reg(DR_REG_XAX), OPND_CREATE_MEMPTR(DR_REG_XAX, 42)); - CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_LOAD); + const dr_instr_category_t categories_load[] = { DR_INSTR_CATEGORY_LOAD }; + const char *category_names_load[] = { "load" }; + CHECK_CATEGORY(GD, instr, buf, categories_load, category_names_load); /* 14 OP_cmp */ instr = XINST_CREATE_cmp(GD, opnd_create_reg(DR_REG_EAX), opnd_create_reg(DR_REG_EAX)); - CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_MATH); + const dr_instr_category_t categories_cmp[] = { DR_INSTR_CATEGORY_MATH }; + const char *category_names_cmp[] = { "math" }; + CHECK_CATEGORY(GD, instr, buf, categories_cmp, category_names_cmp); /* 46 OP_jmp */ instr_t *after_callee = INSTR_CREATE_label(GD); instr = XINST_CREATE_jump(GD, opnd_create_instr(after_callee)); - CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_BRANCH); + const dr_instr_category_t categories_jmp[] = { DR_INSTR_CATEGORY_BRANCH }; + const char *category_names_jmp[] = { "branch" }; + CHECK_CATEGORY(GD, instr, buf, categories_jmp, category_names_jmp); + + /* OP_fwait */ + instr = INSTR_CREATE_fwait(GD); + const dr_instr_category_t categories_fwait[] = { DR_INSTR_CATEGORY_FP, + DR_INSTR_CATEGORY_STATE }; + const char *category_names_fwait[] = { "fp", "state" }; + CHECK_CATEGORY(GD, instr, buf, categories_fwait, category_names_fwait); + + /* OP_in */ + instr = INSTR_CREATE_in_1(GD); + const dr_instr_category_t categories_in[] = { DR_INSTR_CATEGORY_UNCATEGORIZED }; + const char *category_names_in[] = { "uncategorized" }; + CHECK_CATEGORY(GD, instr, buf, categories_in, category_names_in); } static void @@ -241,7 +274,7 @@ main() test_store_source(); - printf("done\n"); + print("done\n"); return 0; } diff --git a/suite/tests/api/ir_aarch64.c b/suite/tests/api/ir_aarch64.c index a0932d6feec..15ad4f97448 100644 --- a/suite/tests/api/ir_aarch64.c +++ b/suite/tests/api/ir_aarch64.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2023 Google, Inc. All rights reserved. + * Copyright (c) 2015-2024 Google, Inc. All rights reserved. * Copyright (c) 2016 ARM Limited. All rights reserved. * **********************************************************/ @@ -6924,6 +6924,23 @@ test_internal_encode(void *dcontext) instr_destroy(dcontext, jmp); } +static void +test_vector_length(void *dcontext) +{ + /* XXX i#6575: Add further tests. For now, make sure these are exported. */ + const int new_len = 2048; + /* XXX: Make this test work when on actual SVE hardware where this API routine + * is documented as failing. + */ + bool res = dr_set_sve_vector_length(new_len); + ASSERT(res); + ASSERT(dr_get_sve_vector_length() == new_len); + /* Ensure invalid lengths return failure. */ + ASSERT(!dr_set_sve_vector_length(0)); + ASSERT(!dr_set_sve_vector_length(1)); + ASSERT(!dr_set_sve_vector_length(4096)); +} + int main(int argc, char *argv[]) { @@ -7102,6 +7119,8 @@ main(int argc, char *argv[]) test_internal_encode(dcontext); + test_vector_length(dcontext); + print("All tests complete\n"); #ifndef STANDALONE_DECODER dr_standalone_exit(); diff --git a/suite/tests/api/ir_aarch64_sve.c b/suite/tests/api/ir_aarch64_sve.c index dfb18841b36..e33100dd0bc 100644 --- a/suite/tests/api/ir_aarch64_sve.c +++ b/suite/tests/api/ir_aarch64_sve.c @@ -8088,7 +8088,7 @@ TEST_INSTR(ext_sve) opnd_create_immed_uint(imm8_0_0[i], OPSZ_1)); } -TEST_INSTR(splice_sve) +TEST_INSTR(splice_sve_des) { /* Testing SPLICE ., , ., . */ const char *const expected_0_0[6] = { @@ -8096,7 +8096,7 @@ TEST_INSTR(splice_sve) "splice %p3 %z10.b %z12.b -> %z10.b", "splice %p5 %z16.b %z18.b -> %z16.b", "splice %p6 %z21.b %z23.b -> %z21.b", "splice %p7 %z31.b %z31.b -> %z31.b", }; - TEST_LOOP(splice, splice_sve, 6, expected_0_0[i], + TEST_LOOP(splice, splice_sve_des, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_1)); @@ -8106,7 +8106,7 @@ TEST_INSTR(splice_sve) "splice %p3 %z10.h %z12.h -> %z10.h", "splice %p5 %z16.h %z18.h -> %z16.h", "splice %p6 %z21.h %z23.h -> %z21.h", "splice %p7 %z31.h %z31.h -> %z31.h", }; - TEST_LOOP(splice, splice_sve, 6, expected_0_1[i], + TEST_LOOP(splice, splice_sve_des, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_2)); @@ -8116,7 +8116,7 @@ TEST_INSTR(splice_sve) "splice %p3 %z10.s %z12.s -> %z10.s", "splice %p5 %z16.s %z18.s -> %z16.s", "splice %p6 %z21.s %z23.s -> %z21.s", "splice %p7 %z31.s %z31.s -> %z31.s", }; - TEST_LOOP(splice, splice_sve, 6, expected_0_2[i], + TEST_LOOP(splice, splice_sve_des, 6, expected_0_2[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_4)); @@ -8126,7 +8126,7 @@ TEST_INSTR(splice_sve) "splice %p3 %z10.d %z12.d -> %z10.d", "splice %p5 %z16.d %z18.d -> %z16.d", "splice %p6 %z21.d %z23.d -> %z21.d", "splice %p7 %z31.d %z31.d -> %z31.d", }; - TEST_LOOP(splice, splice_sve, 6, expected_0_3[i], + TEST_LOOP(splice, splice_sve_des, 6, expected_0_3[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8)); @@ -13765,1281 +13765,1281 @@ TEST_INSTR(ldff1b_sve_pred) { /* Testing LDFF1B { .H }, /Z, [{, }] */ const char *const expected_0_0[6] = { - "ldff1b (%x0,%x0)[16byte] %p0/z -> %z0.h", - "ldff1b (%x7,%x8)[16byte] %p2/z -> %z5.h", - "ldff1b (%x12,%x13)[16byte] %p3/z -> %z10.h", - "ldff1b (%x17,%x18)[16byte] %p5/z -> %z16.h", - "ldff1b (%x22,%x23)[16byte] %p6/z -> %z21.h", - "ldff1b (%sp,%x30)[16byte] %p7/z -> %z31.h", + "ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.h", + "ldff1b (%x7,%x8)[1byte] %p2/z -> %z5.h", + "ldff1b (%x12,%x13)[1byte] %p3/z -> %z10.h", + "ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.h", + "ldff1b (%x22,%x23)[1byte] %p6/z -> %z21.h", + "ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1B { .S }, /Z, [{, }] */ const char *const expected_1_0[6] = { - "ldff1b (%x0,%x0)[8byte] %p0/z -> %z0.s", - "ldff1b (%x7,%x8)[8byte] %p2/z -> %z5.s", - "ldff1b (%x12,%x13)[8byte] %p3/z -> %z10.s", - "ldff1b (%x17,%x18)[8byte] %p5/z -> %z16.s", - "ldff1b (%x22,%x23)[8byte] %p6/z -> %z21.s", - "ldff1b (%sp,%x30)[8byte] %p7/z -> %z31.s", + "ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.s", + "ldff1b (%x7,%x8)[1byte] %p2/z -> %z5.s", + "ldff1b (%x12,%x13)[1byte] %p3/z -> %z10.s", + "ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.s", + "ldff1b (%x22,%x23)[1byte] %p6/z -> %z21.s", + "ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1B { .D }, /Z, [{, }] */ const char *const expected_2_0[6] = { - "ldff1b (%x0,%x0)[4byte] %p0/z -> %z0.d", - "ldff1b (%x7,%x8)[4byte] %p2/z -> %z5.d", - "ldff1b (%x12,%x13)[4byte] %p3/z -> %z10.d", - "ldff1b (%x17,%x18)[4byte] %p5/z -> %z16.d", - "ldff1b (%x22,%x23)[4byte] %p6/z -> %z21.d", - "ldff1b (%sp,%x30)[4byte] %p7/z -> %z31.d", + "ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.d", + "ldff1b (%x7,%x8)[1byte] %p2/z -> %z5.d", + "ldff1b (%x12,%x13)[1byte] %p3/z -> %z10.d", + "ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.d", + "ldff1b (%x22,%x23)[1byte] %p6/z -> %z21.d", + "ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1B { .B }, /Z, [{, }] */ const char *const expected_3_0[6] = { - "ldff1b (%x0,%x0)[32byte] %p0/z -> %z0.b", - "ldff1b (%x7,%x8)[32byte] %p2/z -> %z5.b", - "ldff1b (%x12,%x13)[32byte] %p3/z -> %z10.b", - "ldff1b (%x17,%x18)[32byte] %p5/z -> %z16.b", - "ldff1b (%x22,%x23)[32byte] %p6/z -> %z21.b", - "ldff1b (%sp,%x30)[32byte] %p7/z -> %z31.b", + "ldff1b (%x0,%x0)[1byte] %p0/z -> %z0.b", + "ldff1b (%x7,%x8)[1byte] %p2/z -> %z5.b", + "ldff1b (%x12,%x13)[1byte] %p3/z -> %z10.b", + "ldff1b (%x17,%x18)[1byte] %p5/z -> %z16.b", + "ldff1b (%x22,%x23)[1byte] %p6/z -> %z21.b", + "ldff1b (%sp,%x30)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1B { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 8, 13, 19, 24, 31 }; const char *const expected_4_0[6] = { - "ldff1b (%z0.s)[8byte] %p0/z -> %z0.s", - "ldff1b +0x08(%z7.s)[8byte] %p2/z -> %z5.s", - "ldff1b +0x0d(%z12.s)[8byte] %p3/z -> %z10.s", - "ldff1b +0x13(%z18.s)[8byte] %p5/z -> %z16.s", - "ldff1b +0x18(%z23.s)[8byte] %p6/z -> %z21.s", - "ldff1b +0x1f(%z31.s)[8byte] %p7/z -> %z31.s", + "ldff1b (%z0.s)[1byte] %p0/z -> %z0.s", + "ldff1b +0x08(%z7.s)[1byte] %p2/z -> %z5.s", + "ldff1b +0x0d(%z12.s)[1byte] %p3/z -> %z10.s", + "ldff1b +0x13(%z18.s)[1byte] %p5/z -> %z16.s", + "ldff1b +0x18(%z23.s)[1byte] %p6/z -> %z21.s", + "ldff1b +0x1f(%z31.s)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_1, 0)); /* Testing LDFF1B { .D }, /Z, [.D{, #}] */ const char *const expected_4_1[6] = { - "ldff1b (%z0.d)[4byte] %p0/z -> %z0.d", - "ldff1b +0x08(%z7.d)[4byte] %p2/z -> %z5.d", - "ldff1b +0x0d(%z12.d)[4byte] %p3/z -> %z10.d", - "ldff1b +0x13(%z18.d)[4byte] %p5/z -> %z16.d", - "ldff1b +0x18(%z23.d)[4byte] %p6/z -> %z21.d", - "ldff1b +0x1f(%z31.d)[4byte] %p7/z -> %z31.d", + "ldff1b (%z0.d)[1byte] %p0/z -> %z0.d", + "ldff1b +0x08(%z7.d)[1byte] %p2/z -> %z5.d", + "ldff1b +0x0d(%z12.d)[1byte] %p3/z -> %z10.d", + "ldff1b +0x13(%z18.d)[1byte] %p5/z -> %z16.d", + "ldff1b +0x18(%z23.d)[1byte] %p6/z -> %z21.d", + "ldff1b +0x1f(%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing LDFF1B { .D }, /Z, [, .D] */ const char *const expected_5_0[6] = { - "ldff1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d", - "ldff1b (%x7,%z8.d)[4byte] %p2/z -> %z5.d", - "ldff1b (%x12,%z13.d)[4byte] %p3/z -> %z10.d", - "ldff1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d", - "ldff1b (%x22,%z24.d)[4byte] %p6/z -> %z21.d", - "ldff1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + "ldff1b (%x0,%z0.d)[1byte] %p0/z -> %z0.d", + "ldff1b (%x7,%z8.d)[1byte] %p2/z -> %z5.d", + "ldff1b (%x12,%z13.d)[1byte] %p3/z -> %z10.d", + "ldff1b (%x17,%z19.d)[1byte] %p5/z -> %z16.d", + "ldff1b (%x22,%z24.d)[1byte] %p6/z -> %z21.d", + "ldff1b (%sp,%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1, 0)); /* Testing LDFF1B { .D }, /Z, [, .D, ] */ const char *const expected_6_0[6] = { - "ldff1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d", - "ldff1b (%x7,%z8.d)[4byte] %p2/z -> %z5.d", - "ldff1b (%x12,%z13.d)[4byte] %p3/z -> %z10.d", - "ldff1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d", - "ldff1b (%x22,%z24.d)[4byte] %p6/z -> %z21.d", - "ldff1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + "ldff1b (%x0,%z0.d)[1byte] %p0/z -> %z0.d", + "ldff1b (%x7,%z8.d)[1byte] %p2/z -> %z5.d", + "ldff1b (%x12,%z13.d)[1byte] %p3/z -> %z10.d", + "ldff1b (%x17,%z19.d)[1byte] %p5/z -> %z16.d", + "ldff1b (%x22,%z24.d)[1byte] %p6/z -> %z21.d", + "ldff1b (%sp,%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1, 0)); const char *const expected_6_1[6] = { - "ldff1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d", - "ldff1b (%x7,%z8.d)[4byte] %p2/z -> %z5.d", - "ldff1b (%x12,%z13.d)[4byte] %p3/z -> %z10.d", - "ldff1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d", - "ldff1b (%x22,%z24.d)[4byte] %p6/z -> %z21.d", - "ldff1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + "ldff1b (%x0,%z0.d)[1byte] %p0/z -> %z0.d", + "ldff1b (%x7,%z8.d)[1byte] %p2/z -> %z5.d", + "ldff1b (%x12,%z13.d)[1byte] %p3/z -> %z10.d", + "ldff1b (%x17,%z19.d)[1byte] %p5/z -> %z16.d", + "ldff1b (%x22,%z24.d)[1byte] %p6/z -> %z21.d", + "ldff1b (%sp,%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1, 0)); /* Testing LDFF1B { .S }, /Z, [, .S, ] */ const char *const expected_7_0[6] = { - "ldff1b (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s", - "ldff1b (%x7,%z8.s,uxtw)[8byte] %p2/z -> %z5.s", - "ldff1b (%x12,%z13.s,uxtw)[8byte] %p3/z -> %z10.s", - "ldff1b (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s", - "ldff1b (%x22,%z24.s,uxtw)[8byte] %p6/z -> %z21.s", - "ldff1b (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s", + "ldff1b (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s", + "ldff1b (%x7,%z8.s,uxtw)[1byte] %p2/z -> %z5.s", + "ldff1b (%x12,%z13.s,uxtw)[1byte] %p3/z -> %z10.s", + "ldff1b (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s", + "ldff1b (%x22,%z24.s,uxtw)[1byte] %p6/z -> %z21.s", + "ldff1b (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_7_1[6] = { - "ldff1b (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s", - "ldff1b (%x7,%z8.s,sxtw)[8byte] %p2/z -> %z5.s", - "ldff1b (%x12,%z13.s,sxtw)[8byte] %p3/z -> %z10.s", - "ldff1b (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s", - "ldff1b (%x22,%z24.s,sxtw)[8byte] %p6/z -> %z21.s", - "ldff1b (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s", + "ldff1b (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s", + "ldff1b (%x7,%z8.s,sxtw)[1byte] %p2/z -> %z5.s", + "ldff1b (%x12,%z13.s,sxtw)[1byte] %p3/z -> %z10.s", + "ldff1b (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s", + "ldff1b (%x22,%z24.s,sxtw)[1byte] %p6/z -> %z21.s", + "ldff1b (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); } TEST_INSTR(ldff1d_sve_pred) { /* Testing LDFF1D { .D }, /Z, [{, , LSL #3}] */ const char *const expected_0_0[6] = { - "ldff1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%x8,lsl #3)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%x13,lsl #3)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%x23,lsl #3)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing LDFF1D { .D }, /Z, [.D{, #}] */ static const uint imm5[6] = { 0, 64, 104, 152, 192, 248 }; const char *const expected_1_0[6] = { - "ldff1d (%z0.d)[32byte] %p0/z -> %z0.d", - "ldff1d +0x40(%z7.d)[32byte] %p2/z -> %z5.d", - "ldff1d +0x68(%z12.d)[32byte] %p3/z -> %z10.d", - "ldff1d +0x98(%z18.d)[32byte] %p5/z -> %z16.d", - "ldff1d +0xc0(%z23.d)[32byte] %p6/z -> %z21.d", - "ldff1d +0xf8(%z31.d)[32byte] %p7/z -> %z31.d", + "ldff1d (%z0.d)[8byte] %p0/z -> %z0.d", + "ldff1d +0x40(%z7.d)[8byte] %p2/z -> %z5.d", + "ldff1d +0x68(%z12.d)[8byte] %p3/z -> %z10.d", + "ldff1d +0x98(%z18.d)[8byte] %p5/z -> %z16.d", + "ldff1d +0xc0(%z23.d)[8byte] %p6/z -> %z21.d", + "ldff1d +0xf8(%z31.d)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_32, 0)); + OPSZ_8, 0)); /* Testing LDFF1D { .D }, /Z, [, .D, LSL #3] */ const char *const expected_2_0[6] = { - "ldff1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%z8.d,lsl #3)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%z0.d,lsl #3)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d,lsl #3)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d,lsl #3)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d,lsl #3)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d,lsl #3)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d,lsl #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing LDFF1D { .D }, /Z, [, .D] */ const char *const expected_3_0[6] = { - "ldff1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%z8.d)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%z13.d)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%z24.d)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); /* Testing LDFF1D { .D }, /Z, [, .D, #3] */ const char *const expected_4_0[6] = { - "ldff1d (%x0,%z0.d,uxtw #3)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%z8.d,uxtw #3)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%z13.d,uxtw #3)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%z19.d,uxtw #3)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%z24.d,uxtw #3)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%z31.d,uxtw #3)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%z0.d,uxtw #3)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d,uxtw #3)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d,uxtw #3)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d,uxtw #3)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d,uxtw #3)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d,uxtw #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); const char *const expected_4_1[6] = { - "ldff1d (%x0,%z0.d,sxtw #3)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%z8.d,sxtw #3)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%z13.d,sxtw #3)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%z19.d,sxtw #3)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%z24.d,sxtw #3)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%z31.d,sxtw #3)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%z0.d,sxtw #3)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d,sxtw #3)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d,sxtw #3)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d,sxtw #3)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d,sxtw #3)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d,sxtw #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing LDFF1D { .D }, /Z, [, .D, ] */ const char *const expected_5_0[6] = { - "ldff1d (%x0,%z0.d,uxtw)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%z8.d,uxtw)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%z13.d,uxtw)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%z19.d,uxtw)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%z24.d,uxtw)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%z31.d,uxtw)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d,uxtw)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d,uxtw)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d,uxtw)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); const char *const expected_5_1[6] = { - "ldff1d (%x0,%z0.d,sxtw)[32byte] %p0/z -> %z0.d", - "ldff1d (%x7,%z8.d,sxtw)[32byte] %p2/z -> %z5.d", - "ldff1d (%x12,%z13.d,sxtw)[32byte] %p3/z -> %z10.d", - "ldff1d (%x17,%z19.d,sxtw)[32byte] %p5/z -> %z16.d", - "ldff1d (%x22,%z24.d,sxtw)[32byte] %p6/z -> %z21.d", - "ldff1d (%sp,%z31.d,sxtw)[32byte] %p7/z -> %z31.d", + "ldff1d (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d,sxtw)[8byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d,sxtw)[8byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d,sxtw)[8byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); } TEST_INSTR(ldff1h_sve_pred) { /* Testing LDFF1H { .H }, /Z, [{, , LSL #1}] */ const char *const expected_0_0[6] = { - "ldff1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h", - "ldff1h (%x7,%x8,lsl #1)[32byte] %p2/z -> %z5.h", - "ldff1h (%x12,%x13,lsl #1)[32byte] %p3/z -> %z10.h", - "ldff1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h", - "ldff1h (%x22,%x23,lsl #1)[32byte] %p6/z -> %z21.h", - "ldff1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h", + "ldff1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h", + "ldff1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h", + "ldff1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h", + "ldff1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h", + "ldff1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h", + "ldff1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_32, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1H { .S }, /Z, [{, , LSL #1}] */ const char *const expected_1_0[6] = { - "ldff1h (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s", - "ldff1h (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.s", - "ldff1h (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.s", - "ldff1h (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s", - "ldff1h (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.s", - "ldff1h (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s", + "ldff1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s", + "ldff1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.s", + "ldff1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.s", + "ldff1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s", + "ldff1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.s", + "ldff1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1H { .D }, /Z, [{, , LSL #1}] */ const char *const expected_2_0[6] = { - "ldff1h (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%x8,lsl #1)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%x13,lsl #1)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%x23,lsl #1)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1H { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 16, 26, 38, 48, 62 }; const char *const expected_3_0[6] = { - "ldff1h (%z0.s)[16byte] %p0/z -> %z0.s", - "ldff1h +0x10(%z7.s)[16byte] %p2/z -> %z5.s", - "ldff1h +0x1a(%z12.s)[16byte] %p3/z -> %z10.s", - "ldff1h +0x26(%z18.s)[16byte] %p5/z -> %z16.s", - "ldff1h +0x30(%z23.s)[16byte] %p6/z -> %z21.s", - "ldff1h +0x3e(%z31.s)[16byte] %p7/z -> %z31.s", + "ldff1h (%z0.s)[2byte] %p0/z -> %z0.s", + "ldff1h +0x10(%z7.s)[2byte] %p2/z -> %z5.s", + "ldff1h +0x1a(%z12.s)[2byte] %p3/z -> %z10.s", + "ldff1h +0x26(%z18.s)[2byte] %p5/z -> %z16.s", + "ldff1h +0x30(%z23.s)[2byte] %p6/z -> %z21.s", + "ldff1h +0x3e(%z31.s)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_2, 0)); /* Testing LDFF1H { .D }, /Z, [.D{, #}] */ const char *const expected_3_1[6] = { - "ldff1h (%z0.d)[8byte] %p0/z -> %z0.d", - "ldff1h +0x10(%z7.d)[8byte] %p2/z -> %z5.d", - "ldff1h +0x1a(%z12.d)[8byte] %p3/z -> %z10.d", - "ldff1h +0x26(%z18.d)[8byte] %p5/z -> %z16.d", - "ldff1h +0x30(%z23.d)[8byte] %p6/z -> %z21.d", - "ldff1h +0x3e(%z31.d)[8byte] %p7/z -> %z31.d", + "ldff1h (%z0.d)[2byte] %p0/z -> %z0.d", + "ldff1h +0x10(%z7.d)[2byte] %p2/z -> %z5.d", + "ldff1h +0x1a(%z12.d)[2byte] %p3/z -> %z10.d", + "ldff1h +0x26(%z18.d)[2byte] %p5/z -> %z16.d", + "ldff1h +0x30(%z23.d)[2byte] %p6/z -> %z21.d", + "ldff1h +0x3e(%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing LDFF1H { .D }, /Z, [, .D, LSL #1] */ const char *const expected_4_0[6] = { - "ldff1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d,lsl #1)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1H { .D }, /Z, [, .D] */ const char *const expected_5_0[6] = { - "ldff1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%z8.d)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%z13.d)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%z24.d)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%z0.d)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LDFF1H { .D }, /Z, [, .D, #1] */ const char *const expected_6_0[6] = { - "ldff1h (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%z8.d,uxtw #1)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d,uxtw #1)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_6_1[6] = { - "ldff1h (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%z8.d,sxtw #1)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d,sxtw #1)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1H { .D }, /Z, [, .D, ] */ const char *const expected_7_0[6] = { - "ldff1h (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%z8.d,uxtw)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%z13.d,uxtw)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%z24.d,uxtw)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d,uxtw)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d,uxtw)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d,uxtw)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_7_1[6] = { - "ldff1h (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d", - "ldff1h (%x7,%z8.d,sxtw)[8byte] %p2/z -> %z5.d", - "ldff1h (%x12,%z13.d,sxtw)[8byte] %p3/z -> %z10.d", - "ldff1h (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d", - "ldff1h (%x22,%z24.d,sxtw)[8byte] %p6/z -> %z21.d", - "ldff1h (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d", + "ldff1h (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d,sxtw)[2byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d,sxtw)[2byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d,sxtw)[2byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LDFF1H { .S }, /Z, [, .S, #1] */ const char *const expected_8_0[6] = { - "ldff1h (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s", - "ldff1h (%x7,%z8.s,uxtw #1)[16byte] %p2/z -> %z5.s", - "ldff1h (%x12,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s", - "ldff1h (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s", - "ldff1h (%x22,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s", - "ldff1h (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s", + "ldff1h (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s", + "ldff1h (%x7,%z8.s,uxtw #1)[2byte] %p2/z -> %z5.s", + "ldff1h (%x12,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s", + "ldff1h (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s", + "ldff1h (%x22,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s", + "ldff1h (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_8_1[6] = { - "ldff1h (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s", - "ldff1h (%x7,%z8.s,sxtw #1)[16byte] %p2/z -> %z5.s", - "ldff1h (%x12,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s", - "ldff1h (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s", - "ldff1h (%x22,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s", - "ldff1h (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s", + "ldff1h (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s", + "ldff1h (%x7,%z8.s,sxtw #1)[2byte] %p2/z -> %z5.s", + "ldff1h (%x12,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s", + "ldff1h (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s", + "ldff1h (%x22,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s", + "ldff1h (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_8_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1H { .S }, /Z, [, .S, ] */ const char *const expected_9_0[6] = { - "ldff1h (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s", - "ldff1h (%x7,%z8.s,uxtw)[16byte] %p2/z -> %z5.s", - "ldff1h (%x12,%z13.s,uxtw)[16byte] %p3/z -> %z10.s", - "ldff1h (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s", - "ldff1h (%x22,%z24.s,uxtw)[16byte] %p6/z -> %z21.s", - "ldff1h (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s", + "ldff1h (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s", + "ldff1h (%x7,%z8.s,uxtw)[2byte] %p2/z -> %z5.s", + "ldff1h (%x12,%z13.s,uxtw)[2byte] %p3/z -> %z10.s", + "ldff1h (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s", + "ldff1h (%x22,%z24.s,uxtw)[2byte] %p6/z -> %z21.s", + "ldff1h (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_9_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_9_1[6] = { - "ldff1h (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s", - "ldff1h (%x7,%z8.s,sxtw)[16byte] %p2/z -> %z5.s", - "ldff1h (%x12,%z13.s,sxtw)[16byte] %p3/z -> %z10.s", - "ldff1h (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s", - "ldff1h (%x22,%z24.s,sxtw)[16byte] %p6/z -> %z21.s", - "ldff1h (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s", + "ldff1h (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s", + "ldff1h (%x7,%z8.s,sxtw)[2byte] %p2/z -> %z5.s", + "ldff1h (%x12,%z13.s,sxtw)[2byte] %p3/z -> %z10.s", + "ldff1h (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s", + "ldff1h (%x22,%z24.s,sxtw)[2byte] %p6/z -> %z21.s", + "ldff1h (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_9_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); } TEST_INSTR(ldff1sb_sve_pred) { /* Testing LDFF1SB { .H }, /Z, [{, }] */ const char *const expected_0_0[6] = { - "ldff1sb (%x0,%x0)[16byte] %p0/z -> %z0.h", - "ldff1sb (%x7,%x8)[16byte] %p2/z -> %z5.h", - "ldff1sb (%x12,%x13)[16byte] %p3/z -> %z10.h", - "ldff1sb (%x17,%x18)[16byte] %p5/z -> %z16.h", - "ldff1sb (%x22,%x23)[16byte] %p6/z -> %z21.h", - "ldff1sb (%sp,%x30)[16byte] %p7/z -> %z31.h", + "ldff1sb (%x0,%x0)[1byte] %p0/z -> %z0.h", + "ldff1sb (%x7,%x8)[1byte] %p2/z -> %z5.h", + "ldff1sb (%x12,%x13)[1byte] %p3/z -> %z10.h", + "ldff1sb (%x17,%x18)[1byte] %p5/z -> %z16.h", + "ldff1sb (%x22,%x23)[1byte] %p6/z -> %z21.h", + "ldff1sb (%sp,%x30)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1SB { .S }, /Z, [{, }] */ const char *const expected_1_0[6] = { - "ldff1sb (%x0,%x0)[8byte] %p0/z -> %z0.s", - "ldff1sb (%x7,%x8)[8byte] %p2/z -> %z5.s", - "ldff1sb (%x12,%x13)[8byte] %p3/z -> %z10.s", - "ldff1sb (%x17,%x18)[8byte] %p5/z -> %z16.s", - "ldff1sb (%x22,%x23)[8byte] %p6/z -> %z21.s", - "ldff1sb (%sp,%x30)[8byte] %p7/z -> %z31.s", + "ldff1sb (%x0,%x0)[1byte] %p0/z -> %z0.s", + "ldff1sb (%x7,%x8)[1byte] %p2/z -> %z5.s", + "ldff1sb (%x12,%x13)[1byte] %p3/z -> %z10.s", + "ldff1sb (%x17,%x18)[1byte] %p5/z -> %z16.s", + "ldff1sb (%x22,%x23)[1byte] %p6/z -> %z21.s", + "ldff1sb (%sp,%x30)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1SB { .D }, /Z, [{, }] */ const char *const expected_2_0[6] = { - "ldff1sb (%x0,%x0)[4byte] %p0/z -> %z0.d", - "ldff1sb (%x7,%x8)[4byte] %p2/z -> %z5.d", - "ldff1sb (%x12,%x13)[4byte] %p3/z -> %z10.d", - "ldff1sb (%x17,%x18)[4byte] %p5/z -> %z16.d", - "ldff1sb (%x22,%x23)[4byte] %p6/z -> %z21.d", - "ldff1sb (%sp,%x30)[4byte] %p7/z -> %z31.d", + "ldff1sb (%x0,%x0)[1byte] %p0/z -> %z0.d", + "ldff1sb (%x7,%x8)[1byte] %p2/z -> %z5.d", + "ldff1sb (%x12,%x13)[1byte] %p3/z -> %z10.d", + "ldff1sb (%x17,%x18)[1byte] %p5/z -> %z16.d", + "ldff1sb (%x22,%x23)[1byte] %p6/z -> %z21.d", + "ldff1sb (%sp,%x30)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDFF1SB { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 8, 13, 19, 24, 31 }; const char *const expected_3_0[6] = { - "ldff1sb (%z0.s)[8byte] %p0/z -> %z0.s", - "ldff1sb +0x08(%z7.s)[8byte] %p2/z -> %z5.s", - "ldff1sb +0x0d(%z12.s)[8byte] %p3/z -> %z10.s", - "ldff1sb +0x13(%z18.s)[8byte] %p5/z -> %z16.s", - "ldff1sb +0x18(%z23.s)[8byte] %p6/z -> %z21.s", - "ldff1sb +0x1f(%z31.s)[8byte] %p7/z -> %z31.s", + "ldff1sb (%z0.s)[1byte] %p0/z -> %z0.s", + "ldff1sb +0x08(%z7.s)[1byte] %p2/z -> %z5.s", + "ldff1sb +0x0d(%z12.s)[1byte] %p3/z -> %z10.s", + "ldff1sb +0x13(%z18.s)[1byte] %p5/z -> %z16.s", + "ldff1sb +0x18(%z23.s)[1byte] %p6/z -> %z21.s", + "ldff1sb +0x1f(%z31.s)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_1, 0)); /* Testing LDFF1SB { .D }, /Z, [.D{, #}] */ const char *const expected_3_1[6] = { - "ldff1sb (%z0.d)[4byte] %p0/z -> %z0.d", - "ldff1sb +0x08(%z7.d)[4byte] %p2/z -> %z5.d", - "ldff1sb +0x0d(%z12.d)[4byte] %p3/z -> %z10.d", - "ldff1sb +0x13(%z18.d)[4byte] %p5/z -> %z16.d", - "ldff1sb +0x18(%z23.d)[4byte] %p6/z -> %z21.d", - "ldff1sb +0x1f(%z31.d)[4byte] %p7/z -> %z31.d", + "ldff1sb (%z0.d)[1byte] %p0/z -> %z0.d", + "ldff1sb +0x08(%z7.d)[1byte] %p2/z -> %z5.d", + "ldff1sb +0x0d(%z12.d)[1byte] %p3/z -> %z10.d", + "ldff1sb +0x13(%z18.d)[1byte] %p5/z -> %z16.d", + "ldff1sb +0x18(%z23.d)[1byte] %p6/z -> %z21.d", + "ldff1sb +0x1f(%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing LDFF1SB { .D }, /Z, [, .D] */ const char *const expected_4_0[6] = { - "ldff1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d", - "ldff1sb (%x7,%z8.d)[4byte] %p2/z -> %z5.d", - "ldff1sb (%x12,%z13.d)[4byte] %p3/z -> %z10.d", - "ldff1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d", - "ldff1sb (%x22,%z24.d)[4byte] %p6/z -> %z21.d", - "ldff1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + "ldff1sb (%x0,%z0.d)[1byte] %p0/z -> %z0.d", + "ldff1sb (%x7,%z8.d)[1byte] %p2/z -> %z5.d", + "ldff1sb (%x12,%z13.d)[1byte] %p3/z -> %z10.d", + "ldff1sb (%x17,%z19.d)[1byte] %p5/z -> %z16.d", + "ldff1sb (%x22,%z24.d)[1byte] %p6/z -> %z21.d", + "ldff1sb (%sp,%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1, 0)); /* Testing LDFF1SB { .D }, /Z, [, .D, ] */ const char *const expected_5_0[6] = { - "ldff1sb (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d", - "ldff1sb (%x7,%z8.d,uxtw)[4byte] %p2/z -> %z5.d", - "ldff1sb (%x12,%z13.d,uxtw)[4byte] %p3/z -> %z10.d", - "ldff1sb (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d", - "ldff1sb (%x22,%z24.d,uxtw)[4byte] %p6/z -> %z21.d", - "ldff1sb (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d", + "ldff1sb (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d", + "ldff1sb (%x7,%z8.d,uxtw)[1byte] %p2/z -> %z5.d", + "ldff1sb (%x12,%z13.d,uxtw)[1byte] %p3/z -> %z10.d", + "ldff1sb (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d", + "ldff1sb (%x22,%z24.d,uxtw)[1byte] %p6/z -> %z21.d", + "ldff1sb (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_5_1[6] = { - "ldff1sb (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d", - "ldff1sb (%x7,%z8.d,sxtw)[4byte] %p2/z -> %z5.d", - "ldff1sb (%x12,%z13.d,sxtw)[4byte] %p3/z -> %z10.d", - "ldff1sb (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d", - "ldff1sb (%x22,%z24.d,sxtw)[4byte] %p6/z -> %z21.d", - "ldff1sb (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d", + "ldff1sb (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d", + "ldff1sb (%x7,%z8.d,sxtw)[1byte] %p2/z -> %z5.d", + "ldff1sb (%x12,%z13.d,sxtw)[1byte] %p3/z -> %z10.d", + "ldff1sb (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d", + "ldff1sb (%x22,%z24.d,sxtw)[1byte] %p6/z -> %z21.d", + "ldff1sb (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing LDFF1SB { .S }, /Z, [, .S, ] */ const char *const expected_6_0[6] = { - "ldff1sb (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s", - "ldff1sb (%x7,%z8.s,uxtw)[8byte] %p2/z -> %z5.s", - "ldff1sb (%x12,%z13.s,uxtw)[8byte] %p3/z -> %z10.s", - "ldff1sb (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s", - "ldff1sb (%x22,%z24.s,uxtw)[8byte] %p6/z -> %z21.s", - "ldff1sb (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s", + "ldff1sb (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s", + "ldff1sb (%x7,%z8.s,uxtw)[1byte] %p2/z -> %z5.s", + "ldff1sb (%x12,%z13.s,uxtw)[1byte] %p3/z -> %z10.s", + "ldff1sb (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s", + "ldff1sb (%x22,%z24.s,uxtw)[1byte] %p6/z -> %z21.s", + "ldff1sb (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_6_1[6] = { - "ldff1sb (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s", - "ldff1sb (%x7,%z8.s,sxtw)[8byte] %p2/z -> %z5.s", - "ldff1sb (%x12,%z13.s,sxtw)[8byte] %p3/z -> %z10.s", - "ldff1sb (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s", - "ldff1sb (%x22,%z24.s,sxtw)[8byte] %p6/z -> %z21.s", - "ldff1sb (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s", + "ldff1sb (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s", + "ldff1sb (%x7,%z8.s,sxtw)[1byte] %p2/z -> %z5.s", + "ldff1sb (%x12,%z13.s,sxtw)[1byte] %p3/z -> %z10.s", + "ldff1sb (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s", + "ldff1sb (%x22,%z24.s,sxtw)[1byte] %p6/z -> %z21.s", + "ldff1sb (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); } TEST_INSTR(ldff1sh_sve_pred) { /* Testing LDFF1SH { .S }, /Z, [{, , LSL #1}] */ const char *const expected_0_0[6] = { - "ldff1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s", - "ldff1sh (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.s", - "ldff1sh (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.s", - "ldff1sh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s", - "ldff1sh (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.s", - "ldff1sh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s", + "ldff1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s", + "ldff1sh (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.s", + "ldff1sh (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.s", + "ldff1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s", + "ldff1sh (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.s", + "ldff1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1SH { .D }, /Z, [{, , LSL #1}] */ const char *const expected_1_0[6] = { - "ldff1sh (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%x8,lsl #1)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%x13,lsl #1)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%x23,lsl #1)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1SH { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 16, 26, 38, 48, 62 }; const char *const expected_2_0[6] = { - "ldff1sh (%z0.s)[16byte] %p0/z -> %z0.s", - "ldff1sh +0x10(%z7.s)[16byte] %p2/z -> %z5.s", - "ldff1sh +0x1a(%z12.s)[16byte] %p3/z -> %z10.s", - "ldff1sh +0x26(%z18.s)[16byte] %p5/z -> %z16.s", - "ldff1sh +0x30(%z23.s)[16byte] %p6/z -> %z21.s", - "ldff1sh +0x3e(%z31.s)[16byte] %p7/z -> %z31.s", + "ldff1sh (%z0.s)[2byte] %p0/z -> %z0.s", + "ldff1sh +0x10(%z7.s)[2byte] %p2/z -> %z5.s", + "ldff1sh +0x1a(%z12.s)[2byte] %p3/z -> %z10.s", + "ldff1sh +0x26(%z18.s)[2byte] %p5/z -> %z16.s", + "ldff1sh +0x30(%z23.s)[2byte] %p6/z -> %z21.s", + "ldff1sh +0x3e(%z31.s)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_2, 0)); /* Testing LDFF1SH { .D }, /Z, [.D{, #}] */ const char *const expected_2_1[6] = { - "ldff1sh (%z0.d)[8byte] %p0/z -> %z0.d", - "ldff1sh +0x10(%z7.d)[8byte] %p2/z -> %z5.d", - "ldff1sh +0x1a(%z12.d)[8byte] %p3/z -> %z10.d", - "ldff1sh +0x26(%z18.d)[8byte] %p5/z -> %z16.d", - "ldff1sh +0x30(%z23.d)[8byte] %p6/z -> %z21.d", - "ldff1sh +0x3e(%z31.d)[8byte] %p7/z -> %z31.d", + "ldff1sh (%z0.d)[2byte] %p0/z -> %z0.d", + "ldff1sh +0x10(%z7.d)[2byte] %p2/z -> %z5.d", + "ldff1sh +0x1a(%z12.d)[2byte] %p3/z -> %z10.d", + "ldff1sh +0x26(%z18.d)[2byte] %p5/z -> %z16.d", + "ldff1sh +0x30(%z23.d)[2byte] %p6/z -> %z21.d", + "ldff1sh +0x3e(%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_2_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing LDFF1SH { .D }, /Z, [, .D, LSL #1] */ const char *const expected_3_0[6] = { - "ldff1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d,lsl #1)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1SH { .D }, /Z, [, .D] */ const char *const expected_4_0[6] = { - "ldff1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%z8.d)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%z13.d)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%z24.d)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%z0.d)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LDFF1SH { .D }, /Z, [, .D, #1] */ const char *const expected_5_0[6] = { - "ldff1sh (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%z8.d,uxtw #1)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d,uxtw #1)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_5_1[6] = { - "ldff1sh (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%z8.d,sxtw #1)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d,sxtw #1)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1SH { .D }, /Z, [, .D, ] */ const char *const expected_6_0[6] = { - "ldff1sh (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%z8.d,uxtw)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%z13.d,uxtw)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%z24.d,uxtw)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d,uxtw)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d,uxtw)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d,uxtw)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_6_1[6] = { - "ldff1sh (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d", - "ldff1sh (%x7,%z8.d,sxtw)[8byte] %p2/z -> %z5.d", - "ldff1sh (%x12,%z13.d,sxtw)[8byte] %p3/z -> %z10.d", - "ldff1sh (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d", - "ldff1sh (%x22,%z24.d,sxtw)[8byte] %p6/z -> %z21.d", - "ldff1sh (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d", + "ldff1sh (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d,sxtw)[2byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d,sxtw)[2byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d,sxtw)[2byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LDFF1SH { .S }, /Z, [, .S, #1] */ const char *const expected_7_0[6] = { - "ldff1sh (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s", - "ldff1sh (%x7,%z8.s,uxtw #1)[16byte] %p2/z -> %z5.s", - "ldff1sh (%x12,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s", - "ldff1sh (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s", - "ldff1sh (%x22,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s", - "ldff1sh (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s", + "ldff1sh (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s", + "ldff1sh (%x7,%z8.s,uxtw #1)[2byte] %p2/z -> %z5.s", + "ldff1sh (%x12,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s", + "ldff1sh (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s", + "ldff1sh (%x22,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s", + "ldff1sh (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_7_1[6] = { - "ldff1sh (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s", - "ldff1sh (%x7,%z8.s,sxtw #1)[16byte] %p2/z -> %z5.s", - "ldff1sh (%x12,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s", - "ldff1sh (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s", - "ldff1sh (%x22,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s", - "ldff1sh (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s", + "ldff1sh (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s", + "ldff1sh (%x7,%z8.s,sxtw #1)[2byte] %p2/z -> %z5.s", + "ldff1sh (%x12,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s", + "ldff1sh (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s", + "ldff1sh (%x22,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s", + "ldff1sh (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LDFF1SH { .S }, /Z, [, .S, ] */ const char *const expected_8_0[6] = { - "ldff1sh (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s", - "ldff1sh (%x7,%z8.s,uxtw)[16byte] %p2/z -> %z5.s", - "ldff1sh (%x12,%z13.s,uxtw)[16byte] %p3/z -> %z10.s", - "ldff1sh (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s", - "ldff1sh (%x22,%z24.s,uxtw)[16byte] %p6/z -> %z21.s", - "ldff1sh (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s", + "ldff1sh (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s", + "ldff1sh (%x7,%z8.s,uxtw)[2byte] %p2/z -> %z5.s", + "ldff1sh (%x12,%z13.s,uxtw)[2byte] %p3/z -> %z10.s", + "ldff1sh (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s", + "ldff1sh (%x22,%z24.s,uxtw)[2byte] %p6/z -> %z21.s", + "ldff1sh (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_8_1[6] = { - "ldff1sh (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s", - "ldff1sh (%x7,%z8.s,sxtw)[16byte] %p2/z -> %z5.s", - "ldff1sh (%x12,%z13.s,sxtw)[16byte] %p3/z -> %z10.s", - "ldff1sh (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s", - "ldff1sh (%x22,%z24.s,sxtw)[16byte] %p6/z -> %z21.s", - "ldff1sh (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s", + "ldff1sh (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s", + "ldff1sh (%x7,%z8.s,sxtw)[2byte] %p2/z -> %z5.s", + "ldff1sh (%x12,%z13.s,sxtw)[2byte] %p3/z -> %z10.s", + "ldff1sh (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s", + "ldff1sh (%x22,%z24.s,sxtw)[2byte] %p6/z -> %z21.s", + "ldff1sh (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_8_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); } TEST_INSTR(ldff1sw_sve_pred) { /* Testing LDFF1SW { .D }, /Z, [{, , LSL #2}] */ const char *const expected_0_0[6] = { - "ldff1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1SW { .D }, /Z, [.D{, #}] */ static const uint imm5[6] = { 0, 32, 52, 76, 96, 124 }; const char *const expected_1_0[6] = { - "ldff1sw (%z0.d)[16byte] %p0/z -> %z0.d", - "ldff1sw +0x20(%z7.d)[16byte] %p2/z -> %z5.d", - "ldff1sw +0x34(%z12.d)[16byte] %p3/z -> %z10.d", - "ldff1sw +0x4c(%z18.d)[16byte] %p5/z -> %z16.d", - "ldff1sw +0x60(%z23.d)[16byte] %p6/z -> %z21.d", - "ldff1sw +0x7c(%z31.d)[16byte] %p7/z -> %z31.d", + "ldff1sw (%z0.d)[4byte] %p0/z -> %z0.d", + "ldff1sw +0x20(%z7.d)[4byte] %p2/z -> %z5.d", + "ldff1sw +0x34(%z12.d)[4byte] %p3/z -> %z10.d", + "ldff1sw +0x4c(%z18.d)[4byte] %p5/z -> %z16.d", + "ldff1sw +0x60(%z23.d)[4byte] %p6/z -> %z21.d", + "ldff1sw +0x7c(%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing LDFF1SW { .D }, /Z, [, .D, LSL #2] */ const char *const expected_2_0[6] = { - "ldff1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d,lsl #2)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1SW { .D }, /Z, [, .D] */ const char *const expected_3_0[6] = { - "ldff1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%z8.d)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%z13.d)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%z24.d)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LDFF1SW { .D }, /Z, [, .D, #2] */ const char *const expected_4_0[6] = { - "ldff1sw (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%z8.d,uxtw #2)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d,uxtw #2)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_4_1[6] = { - "ldff1sw (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%z8.d,sxtw #2)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d,sxtw #2)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1SW { .D }, /Z, [, .D, ] */ const char *const expected_5_0[6] = { - "ldff1sw (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%z8.d,uxtw)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%z13.d,uxtw)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%z24.d,uxtw)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d,uxtw)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d,uxtw)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d,uxtw)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_5_1[6] = { - "ldff1sw (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d", - "ldff1sw (%x7,%z8.d,sxtw)[16byte] %p2/z -> %z5.d", - "ldff1sw (%x12,%z13.d,sxtw)[16byte] %p3/z -> %z10.d", - "ldff1sw (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d", - "ldff1sw (%x22,%z24.d,sxtw)[16byte] %p6/z -> %z21.d", - "ldff1sw (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d", + "ldff1sw (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d,sxtw)[4byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d,sxtw)[4byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d,sxtw)[4byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); } TEST_INSTR(ldff1w_sve_pred) { /* Testing LDFF1W { .S }, /Z, [{, , LSL #2}] */ const char *const expected_0_0[6] = { - "ldff1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s", - "ldff1w (%x7,%x8,lsl #2)[32byte] %p2/z -> %z5.s", - "ldff1w (%x12,%x13,lsl #2)[32byte] %p3/z -> %z10.s", - "ldff1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s", - "ldff1w (%x22,%x23,lsl #2)[32byte] %p6/z -> %z21.s", - "ldff1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s", + "ldff1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s", + "ldff1w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s", + "ldff1w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s", + "ldff1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s", + "ldff1w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s", + "ldff1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1W { .D }, /Z, [{, , LSL #2}] */ const char *const expected_1_0[6] = { - "ldff1w (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1W { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 32, 52, 76, 96, 124 }; const char *const expected_2_0[6] = { - "ldff1w (%z0.s)[32byte] %p0/z -> %z0.s", - "ldff1w +0x20(%z7.s)[32byte] %p2/z -> %z5.s", - "ldff1w +0x34(%z12.s)[32byte] %p3/z -> %z10.s", - "ldff1w +0x4c(%z18.s)[32byte] %p5/z -> %z16.s", - "ldff1w +0x60(%z23.s)[32byte] %p6/z -> %z21.s", - "ldff1w +0x7c(%z31.s)[32byte] %p7/z -> %z31.s", + "ldff1w (%z0.s)[4byte] %p0/z -> %z0.s", + "ldff1w +0x20(%z7.s)[4byte] %p2/z -> %z5.s", + "ldff1w +0x34(%z12.s)[4byte] %p3/z -> %z10.s", + "ldff1w +0x4c(%z18.s)[4byte] %p5/z -> %z16.s", + "ldff1w +0x60(%z23.s)[4byte] %p6/z -> %z21.s", + "ldff1w +0x7c(%z31.s)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_32, 0)); + OPSZ_4, 0)); /* Testing LDFF1W { .D }, /Z, [.D{, #}] */ const char *const expected_2_1[6] = { - "ldff1w (%z0.d)[16byte] %p0/z -> %z0.d", - "ldff1w +0x20(%z7.d)[16byte] %p2/z -> %z5.d", - "ldff1w +0x34(%z12.d)[16byte] %p3/z -> %z10.d", - "ldff1w +0x4c(%z18.d)[16byte] %p5/z -> %z16.d", - "ldff1w +0x60(%z23.d)[16byte] %p6/z -> %z21.d", - "ldff1w +0x7c(%z31.d)[16byte] %p7/z -> %z31.d", + "ldff1w (%z0.d)[4byte] %p0/z -> %z0.d", + "ldff1w +0x20(%z7.d)[4byte] %p2/z -> %z5.d", + "ldff1w +0x34(%z12.d)[4byte] %p3/z -> %z10.d", + "ldff1w +0x4c(%z18.d)[4byte] %p5/z -> %z16.d", + "ldff1w +0x60(%z23.d)[4byte] %p6/z -> %z21.d", + "ldff1w +0x7c(%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_2_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing LDFF1W { .D }, /Z, [, .D, LSL #2] */ const char *const expected_3_0[6] = { - "ldff1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d,lsl #2)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1W { .D }, /Z, [, .D] */ const char *const expected_4_0[6] = { - "ldff1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%z8.d)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%z13.d)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%z24.d)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LDFF1W { .D }, /Z, [, .D, #2] */ const char *const expected_5_0[6] = { - "ldff1w (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%z8.d,uxtw #2)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d,uxtw #2)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_5_1[6] = { - "ldff1w (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%z8.d,sxtw #2)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d,sxtw #2)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1W { .D }, /Z, [, .D, ] */ const char *const expected_6_0[6] = { - "ldff1w (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%z8.d,uxtw)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%z13.d,uxtw)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%z24.d,uxtw)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d,uxtw)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d,uxtw)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d,uxtw)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_6_1[6] = { - "ldff1w (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d", - "ldff1w (%x7,%z8.d,sxtw)[16byte] %p2/z -> %z5.d", - "ldff1w (%x12,%z13.d,sxtw)[16byte] %p3/z -> %z10.d", - "ldff1w (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d", - "ldff1w (%x22,%z24.d,sxtw)[16byte] %p6/z -> %z21.d", - "ldff1w (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d", + "ldff1w (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d,sxtw)[4byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d,sxtw)[4byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d,sxtw)[4byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LDFF1W { .S }, /Z, [, .S, #2] */ const char *const expected_7_0[6] = { - "ldff1w (%x0,%z0.s,uxtw #2)[32byte] %p0/z -> %z0.s", - "ldff1w (%x7,%z8.s,uxtw #2)[32byte] %p2/z -> %z5.s", - "ldff1w (%x12,%z13.s,uxtw #2)[32byte] %p3/z -> %z10.s", - "ldff1w (%x17,%z19.s,uxtw #2)[32byte] %p5/z -> %z16.s", - "ldff1w (%x22,%z24.s,uxtw #2)[32byte] %p6/z -> %z21.s", - "ldff1w (%sp,%z31.s,uxtw #2)[32byte] %p7/z -> %z31.s", + "ldff1w (%x0,%z0.s,uxtw #2)[4byte] %p0/z -> %z0.s", + "ldff1w (%x7,%z8.s,uxtw #2)[4byte] %p2/z -> %z5.s", + "ldff1w (%x12,%z13.s,uxtw #2)[4byte] %p3/z -> %z10.s", + "ldff1w (%x17,%z19.s,uxtw #2)[4byte] %p5/z -> %z16.s", + "ldff1w (%x22,%z24.s,uxtw #2)[4byte] %p6/z -> %z21.s", + "ldff1w (%sp,%z31.s,uxtw #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_7_1[6] = { - "ldff1w (%x0,%z0.s,sxtw #2)[32byte] %p0/z -> %z0.s", - "ldff1w (%x7,%z8.s,sxtw #2)[32byte] %p2/z -> %z5.s", - "ldff1w (%x12,%z13.s,sxtw #2)[32byte] %p3/z -> %z10.s", - "ldff1w (%x17,%z19.s,sxtw #2)[32byte] %p5/z -> %z16.s", - "ldff1w (%x22,%z24.s,sxtw #2)[32byte] %p6/z -> %z21.s", - "ldff1w (%sp,%z31.s,sxtw #2)[32byte] %p7/z -> %z31.s", + "ldff1w (%x0,%z0.s,sxtw #2)[4byte] %p0/z -> %z0.s", + "ldff1w (%x7,%z8.s,sxtw #2)[4byte] %p2/z -> %z5.s", + "ldff1w (%x12,%z13.s,sxtw #2)[4byte] %p3/z -> %z10.s", + "ldff1w (%x17,%z19.s,sxtw #2)[4byte] %p5/z -> %z16.s", + "ldff1w (%x22,%z24.s,sxtw #2)[4byte] %p6/z -> %z21.s", + "ldff1w (%sp,%z31.s,sxtw #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LDFF1W { .S }, /Z, [, .S, ] */ const char *const expected_8_0[6] = { - "ldff1w (%x0,%z0.s,uxtw)[32byte] %p0/z -> %z0.s", - "ldff1w (%x7,%z8.s,uxtw)[32byte] %p2/z -> %z5.s", - "ldff1w (%x12,%z13.s,uxtw)[32byte] %p3/z -> %z10.s", - "ldff1w (%x17,%z19.s,uxtw)[32byte] %p5/z -> %z16.s", - "ldff1w (%x22,%z24.s,uxtw)[32byte] %p6/z -> %z21.s", - "ldff1w (%sp,%z31.s,uxtw)[32byte] %p7/z -> %z31.s", + "ldff1w (%x0,%z0.s,uxtw)[4byte] %p0/z -> %z0.s", + "ldff1w (%x7,%z8.s,uxtw)[4byte] %p2/z -> %z5.s", + "ldff1w (%x12,%z13.s,uxtw)[4byte] %p3/z -> %z10.s", + "ldff1w (%x17,%z19.s,uxtw)[4byte] %p5/z -> %z16.s", + "ldff1w (%x22,%z24.s,uxtw)[4byte] %p6/z -> %z21.s", + "ldff1w (%sp,%z31.s,uxtw)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_8_1[6] = { - "ldff1w (%x0,%z0.s,sxtw)[32byte] %p0/z -> %z0.s", - "ldff1w (%x7,%z8.s,sxtw)[32byte] %p2/z -> %z5.s", - "ldff1w (%x12,%z13.s,sxtw)[32byte] %p3/z -> %z10.s", - "ldff1w (%x17,%z19.s,sxtw)[32byte] %p5/z -> %z16.s", - "ldff1w (%x22,%z24.s,sxtw)[32byte] %p6/z -> %z21.s", - "ldff1w (%sp,%z31.s,sxtw)[32byte] %p7/z -> %z31.s", + "ldff1w (%x0,%z0.s,sxtw)[4byte] %p0/z -> %z0.s", + "ldff1w (%x7,%z8.s,sxtw)[4byte] %p2/z -> %z5.s", + "ldff1w (%x12,%z13.s,sxtw)[4byte] %p3/z -> %z10.s", + "ldff1w (%x17,%z19.s,sxtw)[4byte] %p5/z -> %z16.s", + "ldff1w (%x22,%z24.s,sxtw)[4byte] %p6/z -> %z21.s", + "ldff1w (%sp,%z31.s,sxtw)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_8_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_4, 0)); } TEST_INSTR(fcadd_sve_pred) @@ -15185,895 +15185,894 @@ TEST_INSTR(ld1b_sve_pred) { /* Testing LD1B { .H }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld1b (%x0,%x0)[16byte] %p0/z -> %z0.h", - "ld1b (%x7,%x8)[16byte] %p2/z -> %z5.h", - "ld1b (%x12,%x13)[16byte] %p3/z -> %z10.h", - "ld1b (%x17,%x18)[16byte] %p5/z -> %z16.h", - "ld1b (%x22,%x23)[16byte] %p6/z -> %z21.h", - "ld1b (%sp,%x30)[16byte] %p7/z -> %z31.h", + "ld1b (%x0,%x0)[1byte] %p0/z -> %z0.h", + "ld1b (%x7,%x8)[1byte] %p2/z -> %z5.h", + "ld1b (%x12,%x13)[1byte] %p3/z -> %z10.h", + "ld1b (%x17,%x18)[1byte] %p5/z -> %z16.h", + "ld1b (%x22,%x23)[1byte] %p6/z -> %z21.h", + "ld1b (%sp,%x30)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1B { .S }, /Z, [, ] */ const char *const expected_1_0[6] = { - "ld1b (%x0,%x0)[8byte] %p0/z -> %z0.s", - "ld1b (%x7,%x8)[8byte] %p2/z -> %z5.s", - "ld1b (%x12,%x13)[8byte] %p3/z -> %z10.s", - "ld1b (%x17,%x18)[8byte] %p5/z -> %z16.s", - "ld1b (%x22,%x23)[8byte] %p6/z -> %z21.s", - "ld1b (%sp,%x30)[8byte] %p7/z -> %z31.s", + "ld1b (%x0,%x0)[1byte] %p0/z -> %z0.s", + "ld1b (%x7,%x8)[1byte] %p2/z -> %z5.s", + "ld1b (%x12,%x13)[1byte] %p3/z -> %z10.s", + "ld1b (%x17,%x18)[1byte] %p5/z -> %z16.s", + "ld1b (%x22,%x23)[1byte] %p6/z -> %z21.s", + "ld1b (%sp,%x30)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1B { .D }, /Z, [, ] */ const char *const expected_2_0[6] = { - "ld1b (%x0,%x0)[4byte] %p0/z -> %z0.d", - "ld1b (%x7,%x8)[4byte] %p2/z -> %z5.d", - "ld1b (%x12,%x13)[4byte] %p3/z -> %z10.d", - "ld1b (%x17,%x18)[4byte] %p5/z -> %z16.d", - "ld1b (%x22,%x23)[4byte] %p6/z -> %z21.d", - "ld1b (%sp,%x30)[4byte] %p7/z -> %z31.d", + "ld1b (%x0,%x0)[1byte] %p0/z -> %z0.d", + "ld1b (%x7,%x8)[1byte] %p2/z -> %z5.d", + "ld1b (%x12,%x13)[1byte] %p3/z -> %z10.d", + "ld1b (%x17,%x18)[1byte] %p5/z -> %z16.d", + "ld1b (%x22,%x23)[1byte] %p6/z -> %z21.d", + "ld1b (%sp,%x30)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1B { .B }, /Z, [, ] */ const char *const expected_3_0[6] = { - "ld1b (%x0,%x0)[32byte] %p0/z -> %z0.b", - "ld1b (%x7,%x8)[32byte] %p2/z -> %z5.b", - "ld1b (%x12,%x13)[32byte] %p3/z -> %z10.b", - "ld1b (%x17,%x18)[32byte] %p5/z -> %z16.b", - "ld1b (%x22,%x23)[32byte] %p6/z -> %z21.b", - "ld1b (%sp,%x30)[32byte] %p7/z -> %z31.b", + "ld1b (%x0,%x0)[1byte] %p0/z -> %z0.b", + "ld1b (%x7,%x8)[1byte] %p2/z -> %z5.b", + "ld1b (%x12,%x13)[1byte] %p3/z -> %z10.b", + "ld1b (%x17,%x18)[1byte] %p5/z -> %z16.b", + "ld1b (%x22,%x23)[1byte] %p6/z -> %z21.b", + "ld1b (%sp,%x30)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1B { .D }, /Z, [.D{, #}] */ static const uint imm5[6] = { 0, 8, 13, 19, 24, 31 }; const char *const expected_4_0[6] = { - "ld1b (%z0.d)[4byte] %p0/z -> %z0.d", - "ld1b +0x08(%z7.d)[4byte] %p2/z -> %z5.d", - "ld1b +0x0d(%z12.d)[4byte] %p3/z -> %z10.d", - "ld1b +0x13(%z18.d)[4byte] %p5/z -> %z16.d", - "ld1b +0x18(%z23.d)[4byte] %p6/z -> %z21.d", - "ld1b +0x1f(%z31.d)[4byte] %p7/z -> %z31.d", + "ld1b (%z0.d)[1byte] %p0/z -> %z0.d", + "ld1b +0x08(%z7.d)[1byte] %p2/z -> %z5.d", + "ld1b +0x0d(%z12.d)[1byte] %p3/z -> %z10.d", + "ld1b +0x13(%z18.d)[1byte] %p5/z -> %z16.d", + "ld1b +0x18(%z23.d)[1byte] %p6/z -> %z21.d", + "ld1b +0x1f(%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing LD1B { .S }, /Z, [.S{, #}] */ const char *const expected_4_1[6] = { - "ld1b (%z0.s)[8byte] %p0/z -> %z0.s", - "ld1b +0x08(%z7.s)[8byte] %p2/z -> %z5.s", - "ld1b +0x0d(%z12.s)[8byte] %p3/z -> %z10.s", - "ld1b +0x13(%z18.s)[8byte] %p5/z -> %z16.s", - "ld1b +0x18(%z23.s)[8byte] %p6/z -> %z21.s", - "ld1b +0x1f(%z31.s)[8byte] %p7/z -> %z31.s", + "ld1b (%z0.s)[1byte] %p0/z -> %z0.s", + "ld1b +0x08(%z7.s)[1byte] %p2/z -> %z5.s", + "ld1b +0x0d(%z12.s)[1byte] %p3/z -> %z10.s", + "ld1b +0x13(%z18.s)[1byte] %p5/z -> %z16.s", + "ld1b +0x18(%z23.s)[1byte] %p6/z -> %z21.s", + "ld1b +0x1f(%z31.s)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_1, 0)); /* Testing LD1B { .D }, /Z, [, .D] */ const char *const expected_5_0[6] = { - "ld1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d", - "ld1b (%x7,%z8.d)[4byte] %p2/z -> %z5.d", - "ld1b (%x12,%z13.d)[4byte] %p3/z -> %z10.d", - "ld1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d", - "ld1b (%x22,%z24.d)[4byte] %p6/z -> %z21.d", - "ld1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + "ld1b (%x0,%z0.d)[1byte] %p0/z -> %z0.d", + "ld1b (%x7,%z8.d)[1byte] %p2/z -> %z5.d", + "ld1b (%x12,%z13.d)[1byte] %p3/z -> %z10.d", + "ld1b (%x17,%z19.d)[1byte] %p5/z -> %z16.d", + "ld1b (%x22,%z24.d)[1byte] %p6/z -> %z21.d", + "ld1b (%sp,%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing LD1B { .D }, /Z, [, .D, ] */ const char *const expected_6_0[6] = { - "ld1b (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d", - "ld1b (%x7,%z8.d,uxtw)[4byte] %p2/z -> %z5.d", - "ld1b (%x12,%z13.d,uxtw)[4byte] %p3/z -> %z10.d", - "ld1b (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d", - "ld1b (%x22,%z24.d,uxtw)[4byte] %p6/z -> %z21.d", - "ld1b (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d", + "ld1b (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d", + "ld1b (%x7,%z8.d,uxtw)[1byte] %p2/z -> %z5.d", + "ld1b (%x12,%z13.d,uxtw)[1byte] %p3/z -> %z10.d", + "ld1b (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d", + "ld1b (%x22,%z24.d,uxtw)[1byte] %p6/z -> %z21.d", + "ld1b (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_6_1[6] = { - "ld1b (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d", - "ld1b (%x7,%z8.d,sxtw)[4byte] %p2/z -> %z5.d", - "ld1b (%x12,%z13.d,sxtw)[4byte] %p3/z -> %z10.d", - "ld1b (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d", - "ld1b (%x22,%z24.d,sxtw)[4byte] %p6/z -> %z21.d", - "ld1b (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d", + "ld1b (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d", + "ld1b (%x7,%z8.d,sxtw)[1byte] %p2/z -> %z5.d", + "ld1b (%x12,%z13.d,sxtw)[1byte] %p3/z -> %z10.d", + "ld1b (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d", + "ld1b (%x22,%z24.d,sxtw)[1byte] %p6/z -> %z21.d", + "ld1b (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing LD1B { .S }, /Z, [, .S, ] */ const char *const expected_7_0[6] = { - "ld1b (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s", - "ld1b (%x7,%z8.s,uxtw)[8byte] %p2/z -> %z5.s", - "ld1b (%x12,%z13.s,uxtw)[8byte] %p3/z -> %z10.s", - "ld1b (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s", - "ld1b (%x22,%z24.s,uxtw)[8byte] %p6/z -> %z21.s", - "ld1b (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s", + "ld1b (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s", + "ld1b (%x7,%z8.s,uxtw)[1byte] %p2/z -> %z5.s", + "ld1b (%x12,%z13.s,uxtw)[1byte] %p3/z -> %z10.s", + "ld1b (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s", + "ld1b (%x22,%z24.s,uxtw)[1byte] %p6/z -> %z21.s", + "ld1b (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_7_1[6] = { - "ld1b (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s", - "ld1b (%x7,%z8.s,sxtw)[8byte] %p2/z -> %z5.s", - "ld1b (%x12,%z13.s,sxtw)[8byte] %p3/z -> %z10.s", - "ld1b (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s", - "ld1b (%x22,%z24.s,sxtw)[8byte] %p6/z -> %z21.s", - "ld1b (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s", + "ld1b (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s", + "ld1b (%x7,%z8.s,sxtw)[1byte] %p2/z -> %z5.s", + "ld1b (%x12,%z13.s,sxtw)[1byte] %p3/z -> %z10.s", + "ld1b (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s", + "ld1b (%x22,%z24.s,sxtw)[1byte] %p6/z -> %z21.s", + "ld1b (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing LD1B { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_8_0[6] = { - "ld1b -0x80(%x0)[16byte] %p0/z -> %z0.h", - "ld1b -0x30(%x7)[16byte] %p2/z -> %z5.h", - "ld1b (%x12)[16byte] %p3/z -> %z10.h", - "ld1b +0x30(%x17)[16byte] %p5/z -> %z16.h", - "ld1b +0x50(%x22)[16byte] %p6/z -> %z21.h", - "ld1b +0x70(%sp)[16byte] %p7/z -> %z31.h", + "ld1b -0x80(%x0)[1byte] %p0/z -> %z0.h", + "ld1b -0x30(%x7)[1byte] %p2/z -> %z5.h", + "ld1b (%x12)[1byte] %p3/z -> %z10.h", + "ld1b +0x30(%x17)[1byte] %p5/z -> %z16.h", + "ld1b +0x50(%x22)[1byte] %p6/z -> %z21.h", + "ld1b +0x70(%sp)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_1)); /* Testing LD1B { .S }, /Z, [{, #, MUL VL}] */ const char *const expected_9_0[6] = { - "ld1b -0x40(%x0)[8byte] %p0/z -> %z0.s", - "ld1b -0x18(%x7)[8byte] %p2/z -> %z5.s", - "ld1b (%x12)[8byte] %p3/z -> %z10.s", - "ld1b +0x18(%x17)[8byte] %p5/z -> %z16.s", - "ld1b +0x28(%x22)[8byte] %p6/z -> %z21.s", - "ld1b +0x38(%sp)[8byte] %p7/z -> %z31.s", + "ld1b -0x40(%x0)[1byte] %p0/z -> %z0.s", + "ld1b -0x18(%x7)[1byte] %p2/z -> %z5.s", + "ld1b (%x12)[1byte] %p3/z -> %z10.s", + "ld1b +0x18(%x17)[1byte] %p5/z -> %z16.s", + "ld1b +0x28(%x22)[1byte] %p6/z -> %z21.s", + "ld1b +0x38(%sp)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_9_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_1)); /* Testing LD1B { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_10_0[6] = { - "ld1b -0x20(%x0)[4byte] %p0/z -> %z0.d", - "ld1b -0x0c(%x7)[4byte] %p2/z -> %z5.d", - "ld1b (%x12)[4byte] %p3/z -> %z10.d", - "ld1b +0x0c(%x17)[4byte] %p5/z -> %z16.d", - "ld1b +0x14(%x22)[4byte] %p6/z -> %z21.d", - "ld1b +0x1c(%sp)[4byte] %p7/z -> %z31.d", + "ld1b -0x20(%x0)[1byte] %p0/z -> %z0.d", + "ld1b -0x0c(%x7)[1byte] %p2/z -> %z5.d", + "ld1b (%x12)[1byte] %p3/z -> %z10.d", + "ld1b +0x0c(%x17)[1byte] %p5/z -> %z16.d", + "ld1b +0x14(%x22)[1byte] %p6/z -> %z21.d", + "ld1b +0x1c(%sp)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_10_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 4 * imm4[i], - OPSZ_4)); + OPSZ_1)); /* Testing LD1B { .B }, /Z, [{, #, MUL VL}] */ const char *const expected_11_0[6] = { - "ld1b -0x0100(%x0)[32byte] %p0/z -> %z0.b", - "ld1b -0x60(%x7)[32byte] %p2/z -> %z5.b", - "ld1b (%x12)[32byte] %p3/z -> %z10.b", - "ld1b +0x60(%x17)[32byte] %p5/z -> %z16.b", - "ld1b +0xa0(%x22)[32byte] %p6/z -> %z21.b", - "ld1b +0xe0(%sp)[32byte] %p7/z -> %z31.b", + "ld1b -0x0100(%x0)[1byte] %p0/z -> %z0.b", + "ld1b -0x60(%x7)[1byte] %p2/z -> %z5.b", + "ld1b (%x12)[1byte] %p3/z -> %z10.b", + "ld1b +0x60(%x17)[1byte] %p5/z -> %z16.b", + "ld1b +0xa0(%x22)[1byte] %p6/z -> %z21.b", + "ld1b +0xe0(%sp)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_11_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_1)); } TEST_INSTR(ld1rob_sve_pred) { - /* Testing LD1ROB { .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld1rob (%x0,%x0)[32byte] %p0/z -> %z0.b", - "ld1rob (%x7,%x8)[32byte] %p2/z -> %z5.b", - "ld1rob (%x12,%x13)[32byte] %p3/z -> %z10.b", - "ld1rob (%x17,%x18)[32byte] %p5/z -> %z16.b", - "ld1rob (%x22,%x23)[32byte] %p6/z -> %z21.b", - "ld1rob (%sp,%x30)[32byte] %p7/z -> %z31.b", + "ld1rob (%x0,%x0)[1byte] %p0/z -> %z0.b", + "ld1rob (%x7,%x8)[1byte] %p2/z -> %z5.b", + "ld1rob (%x12,%x13)[1byte] %p3/z -> %z10.b", + "ld1rob (%x17,%x18)[1byte] %p5/z -> %z16.b", + "ld1rob (%x22,%x23)[1byte] %p6/z -> %z21.b", + "ld1rob (%sp,%x30)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ld1rob, ld1rob_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); } TEST_INSTR(ld1rqb_sve_pred) { /* Testing LD1RQB { .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld1rqb (%x0,%x0)[16byte] %p0/z -> %z0.b", - "ld1rqb (%x7,%x8)[16byte] %p2/z -> %z5.b", - "ld1rqb (%x12,%x13)[16byte] %p3/z -> %z10.b", - "ld1rqb (%x17,%x18)[16byte] %p5/z -> %z16.b", - "ld1rqb (%x22,%x23)[16byte] %p6/z -> %z21.b", - "ld1rqb (%sp,%x30)[16byte] %p7/z -> %z31.b", + "ld1rqb (%x0,%x0)[1byte] %p0/z -> %z0.b", + "ld1rqb (%x7,%x8)[1byte] %p2/z -> %z5.b", + "ld1rqb (%x12,%x13)[1byte] %p3/z -> %z10.b", + "ld1rqb (%x17,%x18)[1byte] %p5/z -> %z16.b", + "ld1rqb (%x22,%x23)[1byte] %p6/z -> %z21.b", + "ld1rqb (%sp,%x30)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ld1rqb, ld1rqb_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1RQB { .B }, /Z, [{, #}] */ static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; const char *const expected_1_0[6] = { - "ld1rqb -0x80(%x0)[16byte] %p0/z -> %z0.b", - "ld1rqb -0x30(%x7)[16byte] %p2/z -> %z5.b", - "ld1rqb (%x12)[16byte] %p3/z -> %z10.b", - "ld1rqb +0x30(%x17)[16byte] %p5/z -> %z16.b", - "ld1rqb +0x50(%x22)[16byte] %p6/z -> %z21.b", - "ld1rqb +0x70(%sp)[16byte] %p7/z -> %z31.b", + "ld1rqb -0x80(%x0)[1byte] %p0/z -> %z0.b", + "ld1rqb -0x30(%x7)[1byte] %p2/z -> %z5.b", + "ld1rqb (%x12)[1byte] %p3/z -> %z10.b", + "ld1rqb +0x30(%x17)[1byte] %p5/z -> %z16.b", + "ld1rqb +0x50(%x22)[1byte] %p6/z -> %z21.b", + "ld1rqb +0x70(%sp)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ld1rqb, ld1rqb_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_16)); + OPSZ_1)); } TEST_INSTR(ld1rqd_sve_pred) { /* Testing LD1RQD { .D }, /Z, [, , LSL #3] */ const char *const expected_0_0[6] = { - "ld1rqd (%x0,%x0,lsl #3)[16byte] %p0/z -> %z0.d", - "ld1rqd (%x7,%x8,lsl #3)[16byte] %p2/z -> %z5.d", - "ld1rqd (%x12,%x13,lsl #3)[16byte] %p3/z -> %z10.d", - "ld1rqd (%x17,%x18,lsl #3)[16byte] %p5/z -> %z16.d", - "ld1rqd (%x22,%x23,lsl #3)[16byte] %p6/z -> %z21.d", - "ld1rqd (%sp,%x30,lsl #3)[16byte] %p7/z -> %z31.d", + "ld1rqd (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d", + "ld1rqd (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d", + "ld1rqd (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d", + "ld1rqd (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d", + "ld1rqd (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d", + "ld1rqd (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1rqd, ld1rqd_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing LD1RQD { .D }, /Z, [{, #}] */ static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; const char *const expected_1_0[6] = { - "ld1rqd -0x80(%x0)[16byte] %p0/z -> %z0.d", - "ld1rqd -0x30(%x7)[16byte] %p2/z -> %z5.d", - "ld1rqd (%x12)[16byte] %p3/z -> %z10.d", - "ld1rqd +0x30(%x17)[16byte] %p5/z -> %z16.d", - "ld1rqd +0x50(%x22)[16byte] %p6/z -> %z21.d", - "ld1rqd +0x70(%sp)[16byte] %p7/z -> %z31.d", + "ld1rqd -0x80(%x0)[8byte] %p0/z -> %z0.d", + "ld1rqd -0x30(%x7)[8byte] %p2/z -> %z5.d", + "ld1rqd (%x12)[8byte] %p3/z -> %z10.d", + "ld1rqd +0x30(%x17)[8byte] %p5/z -> %z16.d", + "ld1rqd +0x50(%x22)[8byte] %p6/z -> %z21.d", + "ld1rqd +0x70(%sp)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1rqd, ld1rqd_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_16)); + OPSZ_8)); } TEST_INSTR(ld1rqh_sve_pred) { /* Testing LD1RQH { .H }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld1rqh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.h", - "ld1rqh (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.h", - "ld1rqh (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.h", - "ld1rqh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.h", - "ld1rqh (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.h", - "ld1rqh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.h", + "ld1rqh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h", + "ld1rqh (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h", + "ld1rqh (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h", + "ld1rqh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h", + "ld1rqh (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h", + "ld1rqh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1rqh, ld1rqh_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD1RQH { .H }, /Z, [{, #}] */ static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; const char *const expected_1_0[6] = { - "ld1rqh -0x80(%x0)[16byte] %p0/z -> %z0.h", - "ld1rqh -0x30(%x7)[16byte] %p2/z -> %z5.h", - "ld1rqh (%x12)[16byte] %p3/z -> %z10.h", - "ld1rqh +0x30(%x17)[16byte] %p5/z -> %z16.h", - "ld1rqh +0x50(%x22)[16byte] %p6/z -> %z21.h", - "ld1rqh +0x70(%sp)[16byte] %p7/z -> %z31.h", + "ld1rqh -0x80(%x0)[2byte] %p0/z -> %z0.h", + "ld1rqh -0x30(%x7)[2byte] %p2/z -> %z5.h", + "ld1rqh (%x12)[2byte] %p3/z -> %z10.h", + "ld1rqh +0x30(%x17)[2byte] %p5/z -> %z16.h", + "ld1rqh +0x50(%x22)[2byte] %p6/z -> %z21.h", + "ld1rqh +0x70(%sp)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1rqh, ld1rqh_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_16)); + OPSZ_2)); } TEST_INSTR(ld1rqw_sve_pred) { /* Testing LD1RQW { .S }, /Z, [, , LSL #2] */ const char *const expected_0_0[6] = { - "ld1rqw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.s", - "ld1rqw (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.s", - "ld1rqw (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.s", - "ld1rqw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.s", - "ld1rqw (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.s", - "ld1rqw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.s", + "ld1rqw (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s", + "ld1rqw (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s", + "ld1rqw (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s", + "ld1rqw (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s", + "ld1rqw (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s", + "ld1rqw (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1rqw, ld1rqw_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD1RQW { .S }, /Z, [{, #}] */ static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; const char *const expected_1_0[6] = { - "ld1rqw -0x80(%x0)[16byte] %p0/z -> %z0.s", - "ld1rqw -0x30(%x7)[16byte] %p2/z -> %z5.s", - "ld1rqw (%x12)[16byte] %p3/z -> %z10.s", - "ld1rqw +0x30(%x17)[16byte] %p5/z -> %z16.s", - "ld1rqw +0x50(%x22)[16byte] %p6/z -> %z21.s", - "ld1rqw +0x70(%sp)[16byte] %p7/z -> %z31.s", + "ld1rqw -0x80(%x0)[4byte] %p0/z -> %z0.s", + "ld1rqw -0x30(%x7)[4byte] %p2/z -> %z5.s", + "ld1rqw (%x12)[4byte] %p3/z -> %z10.s", + "ld1rqw +0x30(%x17)[4byte] %p5/z -> %z16.s", + "ld1rqw +0x50(%x22)[4byte] %p6/z -> %z21.s", + "ld1rqw +0x70(%sp)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1rqw, ld1rqw_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_16)); + OPSZ_4)); } TEST_INSTR(ld1sb_sve_pred) { /* Testing LD1SB { .H }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld1sb (%x0,%x0)[16byte] %p0/z -> %z0.h", - "ld1sb (%x7,%x8)[16byte] %p2/z -> %z5.h", - "ld1sb (%x12,%x13)[16byte] %p3/z -> %z10.h", - "ld1sb (%x17,%x18)[16byte] %p5/z -> %z16.h", - "ld1sb (%x22,%x23)[16byte] %p6/z -> %z21.h", - "ld1sb (%sp,%x30)[16byte] %p7/z -> %z31.h", + "ld1sb (%x0,%x0)[1byte] %p0/z -> %z0.h", + "ld1sb (%x7,%x8)[1byte] %p2/z -> %z5.h", + "ld1sb (%x12,%x13)[1byte] %p3/z -> %z10.h", + "ld1sb (%x17,%x18)[1byte] %p5/z -> %z16.h", + "ld1sb (%x22,%x23)[1byte] %p6/z -> %z21.h", + "ld1sb (%sp,%x30)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1SB { .S }, /Z, [, ] */ const char *const expected_1_0[6] = { - "ld1sb (%x0,%x0)[8byte] %p0/z -> %z0.s", - "ld1sb (%x7,%x8)[8byte] %p2/z -> %z5.s", - "ld1sb (%x12,%x13)[8byte] %p3/z -> %z10.s", - "ld1sb (%x17,%x18)[8byte] %p5/z -> %z16.s", - "ld1sb (%x22,%x23)[8byte] %p6/z -> %z21.s", - "ld1sb (%sp,%x30)[8byte] %p7/z -> %z31.s", + "ld1sb (%x0,%x0)[1byte] %p0/z -> %z0.s", + "ld1sb (%x7,%x8)[1byte] %p2/z -> %z5.s", + "ld1sb (%x12,%x13)[1byte] %p3/z -> %z10.s", + "ld1sb (%x17,%x18)[1byte] %p5/z -> %z16.s", + "ld1sb (%x22,%x23)[1byte] %p6/z -> %z21.s", + "ld1sb (%sp,%x30)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1SB { .D }, /Z, [, ] */ const char *const expected_2_0[6] = { - "ld1sb (%x0,%x0)[4byte] %p0/z -> %z0.d", - "ld1sb (%x7,%x8)[4byte] %p2/z -> %z5.d", - "ld1sb (%x12,%x13)[4byte] %p3/z -> %z10.d", - "ld1sb (%x17,%x18)[4byte] %p5/z -> %z16.d", - "ld1sb (%x22,%x23)[4byte] %p6/z -> %z21.d", - "ld1sb (%sp,%x30)[4byte] %p7/z -> %z31.d", + "ld1sb (%x0,%x0)[1byte] %p0/z -> %z0.d", + "ld1sb (%x7,%x8)[1byte] %p2/z -> %z5.d", + "ld1sb (%x12,%x13)[1byte] %p3/z -> %z10.d", + "ld1sb (%x17,%x18)[1byte] %p5/z -> %z16.d", + "ld1sb (%x22,%x23)[1byte] %p6/z -> %z21.d", + "ld1sb (%sp,%x30)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD1SB { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 8, 13, 19, 24, 31 }; const char *const expected_3_0[6] = { - "ld1sb (%z0.s)[8byte] %p0/z -> %z0.s", - "ld1sb +0x08(%z7.s)[8byte] %p2/z -> %z5.s", - "ld1sb +0x0d(%z12.s)[8byte] %p3/z -> %z10.s", - "ld1sb +0x13(%z18.s)[8byte] %p5/z -> %z16.s", - "ld1sb +0x18(%z23.s)[8byte] %p6/z -> %z21.s", - "ld1sb +0x1f(%z31.s)[8byte] %p7/z -> %z31.s", + "ld1sb (%z0.s)[1byte] %p0/z -> %z0.s", + "ld1sb +0x08(%z7.s)[1byte] %p2/z -> %z5.s", + "ld1sb +0x0d(%z12.s)[1byte] %p3/z -> %z10.s", + "ld1sb +0x13(%z18.s)[1byte] %p5/z -> %z16.s", + "ld1sb +0x18(%z23.s)[1byte] %p6/z -> %z21.s", + "ld1sb +0x1f(%z31.s)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_1, 0)); /* Testing LD1SB { .D }, /Z, [.D{, #}] */ const char *const expected_3_1[6] = { - "ld1sb (%z0.d)[4byte] %p0/z -> %z0.d", - "ld1sb +0x08(%z7.d)[4byte] %p2/z -> %z5.d", - "ld1sb +0x0d(%z12.d)[4byte] %p3/z -> %z10.d", - "ld1sb +0x13(%z18.d)[4byte] %p5/z -> %z16.d", - "ld1sb +0x18(%z23.d)[4byte] %p6/z -> %z21.d", - "ld1sb +0x1f(%z31.d)[4byte] %p7/z -> %z31.d", + "ld1sb (%z0.d)[1byte] %p0/z -> %z0.d", + "ld1sb +0x08(%z7.d)[1byte] %p2/z -> %z5.d", + "ld1sb +0x0d(%z12.d)[1byte] %p3/z -> %z10.d", + "ld1sb +0x13(%z18.d)[1byte] %p5/z -> %z16.d", + "ld1sb +0x18(%z23.d)[1byte] %p6/z -> %z21.d", + "ld1sb +0x1f(%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing LD1SB { .D }, /Z, [, .D] */ const char *const expected_4_0[6] = { - "ld1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d", - "ld1sb (%x7,%z8.d)[4byte] %p2/z -> %z5.d", - "ld1sb (%x12,%z13.d)[4byte] %p3/z -> %z10.d", - "ld1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d", - "ld1sb (%x22,%z24.d)[4byte] %p6/z -> %z21.d", - "ld1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + "ld1sb (%x0,%z0.d)[1byte] %p0/z -> %z0.d", + "ld1sb (%x7,%z8.d)[1byte] %p2/z -> %z5.d", + "ld1sb (%x12,%z13.d)[1byte] %p3/z -> %z10.d", + "ld1sb (%x17,%z19.d)[1byte] %p5/z -> %z16.d", + "ld1sb (%x22,%z24.d)[1byte] %p6/z -> %z21.d", + "ld1sb (%sp,%z31.d)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1, 0)); /* Testing LD1SB { .D }, /Z, [, .D, ] */ const char *const expected_5_0[6] = { - "ld1sb (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d", - "ld1sb (%x7,%z8.d,uxtw)[4byte] %p2/z -> %z5.d", - "ld1sb (%x12,%z13.d,uxtw)[4byte] %p3/z -> %z10.d", - "ld1sb (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d", - "ld1sb (%x22,%z24.d,uxtw)[4byte] %p6/z -> %z21.d", - "ld1sb (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d", + "ld1sb (%x0,%z0.d,uxtw)[1byte] %p0/z -> %z0.d", + "ld1sb (%x7,%z8.d,uxtw)[1byte] %p2/z -> %z5.d", + "ld1sb (%x12,%z13.d,uxtw)[1byte] %p3/z -> %z10.d", + "ld1sb (%x17,%z19.d,uxtw)[1byte] %p5/z -> %z16.d", + "ld1sb (%x22,%z24.d,uxtw)[1byte] %p6/z -> %z21.d", + "ld1sb (%sp,%z31.d,uxtw)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_5_1[6] = { - "ld1sb (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d", - "ld1sb (%x7,%z8.d,sxtw)[4byte] %p2/z -> %z5.d", - "ld1sb (%x12,%z13.d,sxtw)[4byte] %p3/z -> %z10.d", - "ld1sb (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d", - "ld1sb (%x22,%z24.d,sxtw)[4byte] %p6/z -> %z21.d", - "ld1sb (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d", + "ld1sb (%x0,%z0.d,sxtw)[1byte] %p0/z -> %z0.d", + "ld1sb (%x7,%z8.d,sxtw)[1byte] %p2/z -> %z5.d", + "ld1sb (%x12,%z13.d,sxtw)[1byte] %p3/z -> %z10.d", + "ld1sb (%x17,%z19.d,sxtw)[1byte] %p5/z -> %z16.d", + "ld1sb (%x22,%z24.d,sxtw)[1byte] %p6/z -> %z21.d", + "ld1sb (%sp,%z31.d,sxtw)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing LD1SB { .S }, /Z, [, .S, ] */ const char *const expected_6_0[6] = { - "ld1sb (%x0,%z0.s,uxtw)[8byte] %p0/z -> %z0.s", - "ld1sb (%x7,%z8.s,uxtw)[8byte] %p2/z -> %z5.s", - "ld1sb (%x12,%z13.s,uxtw)[8byte] %p3/z -> %z10.s", - "ld1sb (%x17,%z19.s,uxtw)[8byte] %p5/z -> %z16.s", - "ld1sb (%x22,%z24.s,uxtw)[8byte] %p6/z -> %z21.s", - "ld1sb (%sp,%z31.s,uxtw)[8byte] %p7/z -> %z31.s", + "ld1sb (%x0,%z0.s,uxtw)[1byte] %p0/z -> %z0.s", + "ld1sb (%x7,%z8.s,uxtw)[1byte] %p2/z -> %z5.s", + "ld1sb (%x12,%z13.s,uxtw)[1byte] %p3/z -> %z10.s", + "ld1sb (%x17,%z19.s,uxtw)[1byte] %p5/z -> %z16.s", + "ld1sb (%x22,%z24.s,uxtw)[1byte] %p6/z -> %z21.s", + "ld1sb (%sp,%z31.s,uxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_6_1[6] = { - "ld1sb (%x0,%z0.s,sxtw)[8byte] %p0/z -> %z0.s", - "ld1sb (%x7,%z8.s,sxtw)[8byte] %p2/z -> %z5.s", - "ld1sb (%x12,%z13.s,sxtw)[8byte] %p3/z -> %z10.s", - "ld1sb (%x17,%z19.s,sxtw)[8byte] %p5/z -> %z16.s", - "ld1sb (%x22,%z24.s,sxtw)[8byte] %p6/z -> %z21.s", - "ld1sb (%sp,%z31.s,sxtw)[8byte] %p7/z -> %z31.s", + "ld1sb (%x0,%z0.s,sxtw)[1byte] %p0/z -> %z0.s", + "ld1sb (%x7,%z8.s,sxtw)[1byte] %p2/z -> %z5.s", + "ld1sb (%x12,%z13.s,sxtw)[1byte] %p3/z -> %z10.s", + "ld1sb (%x17,%z19.s,sxtw)[1byte] %p5/z -> %z16.s", + "ld1sb (%x22,%z24.s,sxtw)[1byte] %p6/z -> %z21.s", + "ld1sb (%sp,%z31.s,sxtw)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing LD1SB { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_7_0[6] = { - "ld1sb -0x80(%x0)[16byte] %p0/z -> %z0.h", - "ld1sb -0x30(%x7)[16byte] %p2/z -> %z5.h", - "ld1sb (%x12)[16byte] %p3/z -> %z10.h", - "ld1sb +0x30(%x17)[16byte] %p5/z -> %z16.h", - "ld1sb +0x50(%x22)[16byte] %p6/z -> %z21.h", - "ld1sb +0x70(%sp)[16byte] %p7/z -> %z31.h", + "ld1sb -0x80(%x0)[1byte] %p0/z -> %z0.h", + "ld1sb -0x30(%x7)[1byte] %p2/z -> %z5.h", + "ld1sb (%x12)[1byte] %p3/z -> %z10.h", + "ld1sb +0x30(%x17)[1byte] %p5/z -> %z16.h", + "ld1sb +0x50(%x22)[1byte] %p6/z -> %z21.h", + "ld1sb +0x70(%sp)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_1)); /* Testing LD1SB { .S }, /Z, [{, #, MUL VL}] */ const char *const expected_8_0[6] = { - "ld1sb -0x40(%x0)[8byte] %p0/z -> %z0.s", - "ld1sb -0x18(%x7)[8byte] %p2/z -> %z5.s", - "ld1sb (%x12)[8byte] %p3/z -> %z10.s", - "ld1sb +0x18(%x17)[8byte] %p5/z -> %z16.s", - "ld1sb +0x28(%x22)[8byte] %p6/z -> %z21.s", - "ld1sb +0x38(%sp)[8byte] %p7/z -> %z31.s", + "ld1sb -0x40(%x0)[1byte] %p0/z -> %z0.s", + "ld1sb -0x18(%x7)[1byte] %p2/z -> %z5.s", + "ld1sb (%x12)[1byte] %p3/z -> %z10.s", + "ld1sb +0x18(%x17)[1byte] %p5/z -> %z16.s", + "ld1sb +0x28(%x22)[1byte] %p6/z -> %z21.s", + "ld1sb +0x38(%sp)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_1)); /* Testing LD1SB { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_9_0[6] = { - "ld1sb -0x20(%x0)[4byte] %p0/z -> %z0.d", - "ld1sb -0x0c(%x7)[4byte] %p2/z -> %z5.d", - "ld1sb (%x12)[4byte] %p3/z -> %z10.d", - "ld1sb +0x0c(%x17)[4byte] %p5/z -> %z16.d", - "ld1sb +0x14(%x22)[4byte] %p6/z -> %z21.d", - "ld1sb +0x1c(%sp)[4byte] %p7/z -> %z31.d", + "ld1sb -0x20(%x0)[1byte] %p0/z -> %z0.d", + "ld1sb -0x0c(%x7)[1byte] %p2/z -> %z5.d", + "ld1sb (%x12)[1byte] %p3/z -> %z10.d", + "ld1sb +0x0c(%x17)[1byte] %p5/z -> %z16.d", + "ld1sb +0x14(%x22)[1byte] %p6/z -> %z21.d", + "ld1sb +0x1c(%sp)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_9_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 4 * imm4[i], - OPSZ_4)); + OPSZ_1)); } TEST_INSTR(ldnt1b_sve_pred) { /* Testing LDNT1B { .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ldnt1b (%x0,%x0)[32byte] %p0/z -> %z0.b", - "ldnt1b (%x7,%x8)[32byte] %p2/z -> %z5.b", - "ldnt1b (%x12,%x13)[32byte] %p3/z -> %z10.b", - "ldnt1b (%x17,%x18)[32byte] %p5/z -> %z16.b", - "ldnt1b (%x22,%x23)[32byte] %p6/z -> %z21.b", - "ldnt1b (%sp,%x30)[32byte] %p7/z -> %z31.b", + "ldnt1b (%x0,%x0)[1byte] %p0/z -> %z0.b", + "ldnt1b (%x7,%x8)[1byte] %p2/z -> %z5.b", + "ldnt1b (%x12,%x13)[1byte] %p3/z -> %z10.b", + "ldnt1b (%x17,%x18)[1byte] %p5/z -> %z16.b", + "ldnt1b (%x22,%x23)[1byte] %p6/z -> %z21.b", + "ldnt1b (%sp,%x30)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ldnt1b, ldnt1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LDNT1B { .B }, /Z, [{, #, MUL VL}] */ static const int imm4_1_0[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "ldnt1b -0x0100(%x0)[32byte] %p0/z -> %z0.b", - "ldnt1b -0x60(%x7)[32byte] %p2/z -> %z5.b", - "ldnt1b (%x12)[32byte] %p3/z -> %z10.b", - "ldnt1b +0x60(%x17)[32byte] %p5/z -> %z16.b", - "ldnt1b +0xa0(%x22)[32byte] %p6/z -> %z21.b", - "ldnt1b +0xe0(%sp)[32byte] %p7/z -> %z31.b", + "ldnt1b -0x0100(%x0)[1byte] %p0/z -> %z0.b", + "ldnt1b -0x60(%x7)[1byte] %p2/z -> %z5.b", + "ldnt1b (%x12)[1byte] %p3/z -> %z10.b", + "ldnt1b +0x60(%x17)[1byte] %p5/z -> %z16.b", + "ldnt1b +0xa0(%x22)[1byte] %p6/z -> %z21.b", + "ldnt1b +0xe0(%sp)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ldnt1b, ldnt1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_32)); + OPSZ_1)); } TEST_INSTR(st1b_sve_pred) { /* Testing ST1B { . }, , [, ] */ const char *const expected_0_0[6] = { - "st1b %z0.b %p0 -> (%x0,%x0)[32byte]", - "st1b %z5.b %p2 -> (%x7,%x8)[32byte]", - "st1b %z10.b %p3 -> (%x12,%x13)[32byte]", - "st1b %z16.b %p5 -> (%x17,%x18)[32byte]", - "st1b %z21.b %p6 -> (%x22,%x23)[32byte]", - "st1b %z31.b %p7 -> (%sp,%x30)[32byte]", + "st1b %z0.b %p0 -> (%x0,%x0)[1byte]", + "st1b %z5.b %p2 -> (%x7,%x8)[1byte]", + "st1b %z10.b %p3 -> (%x12,%x13)[1byte]", + "st1b %z16.b %p5 -> (%x17,%x18)[1byte]", + "st1b %z21.b %p6 -> (%x22,%x23)[1byte]", + "st1b %z31.b %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); const char *const expected_0_1[6] = { - "st1b %z0.h %p0 -> (%x0,%x0)[16byte]", - "st1b %z5.h %p2 -> (%x7,%x8)[16byte]", - "st1b %z10.h %p3 -> (%x12,%x13)[16byte]", - "st1b %z16.h %p5 -> (%x17,%x18)[16byte]", - "st1b %z21.h %p6 -> (%x22,%x23)[16byte]", - "st1b %z31.h %p7 -> (%sp,%x30)[16byte]", + "st1b %z0.h %p0 -> (%x0,%x0)[1byte]", + "st1b %z5.h %p2 -> (%x7,%x8)[1byte]", + "st1b %z10.h %p3 -> (%x12,%x13)[1byte]", + "st1b %z16.h %p5 -> (%x17,%x18)[1byte]", + "st1b %z21.h %p6 -> (%x22,%x23)[1byte]", + "st1b %z31.h %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); const char *const expected_0_2[6] = { - "st1b %z0.s %p0 -> (%x0,%x0)[8byte]", - "st1b %z5.s %p2 -> (%x7,%x8)[8byte]", - "st1b %z10.s %p3 -> (%x12,%x13)[8byte]", - "st1b %z16.s %p5 -> (%x17,%x18)[8byte]", - "st1b %z21.s %p6 -> (%x22,%x23)[8byte]", - "st1b %z31.s %p7 -> (%sp,%x30)[8byte]", + "st1b %z0.s %p0 -> (%x0,%x0)[1byte]", + "st1b %z5.s %p2 -> (%x7,%x8)[1byte]", + "st1b %z10.s %p3 -> (%x12,%x13)[1byte]", + "st1b %z16.s %p5 -> (%x17,%x18)[1byte]", + "st1b %z21.s %p6 -> (%x22,%x23)[1byte]", + "st1b %z31.s %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_0_2[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); const char *const expected_0_3[6] = { - "st1b %z0.d %p0 -> (%x0,%x0)[4byte]", - "st1b %z5.d %p2 -> (%x7,%x8)[4byte]", - "st1b %z10.d %p3 -> (%x12,%x13)[4byte]", - "st1b %z16.d %p5 -> (%x17,%x18)[4byte]", - "st1b %z21.d %p6 -> (%x22,%x23)[4byte]", - "st1b %z31.d %p7 -> (%sp,%x30)[4byte]", + "st1b %z0.d %p0 -> (%x0,%x0)[1byte]", + "st1b %z5.d %p2 -> (%x7,%x8)[1byte]", + "st1b %z10.d %p3 -> (%x12,%x13)[1byte]", + "st1b %z16.d %p5 -> (%x17,%x18)[1byte]", + "st1b %z21.d %p6 -> (%x22,%x23)[1byte]", + "st1b %z31.d %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_0_3[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing ST1B { .S }, , [.S{, #}] */ static const uint imm5[6] = { 0, 8, 13, 19, 24, 31 }; const char *const expected_1_0[6] = { - "st1b %z0.s %p0 -> (%z0.s)[8byte]", - "st1b %z5.s %p2 -> +0x08(%z7.s)[8byte]", - "st1b %z10.s %p3 -> +0x0d(%z12.s)[8byte]", - "st1b %z16.s %p5 -> +0x13(%z18.s)[8byte]", - "st1b %z21.s %p6 -> +0x18(%z23.s)[8byte]", - "st1b %z31.s %p7 -> +0x1f(%z31.s)[8byte]", + "st1b %z0.s %p0 -> (%z0.s)[1byte]", + "st1b %z5.s %p2 -> +0x08(%z7.s)[1byte]", + "st1b %z10.s %p3 -> +0x0d(%z12.s)[1byte]", + "st1b %z16.s %p5 -> +0x13(%z18.s)[1byte]", + "st1b %z21.s %p6 -> +0x18(%z23.s)[1byte]", + "st1b %z31.s %p7 -> +0x1f(%z31.s)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_1, 0)); /* Testing ST1B { .D }, , [.D{, #}] */ const char *const expected_1_1[6] = { - "st1b %z0.d %p0 -> (%z0.d)[4byte]", - "st1b %z5.d %p2 -> +0x08(%z7.d)[4byte]", - "st1b %z10.d %p3 -> +0x0d(%z12.d)[4byte]", - "st1b %z16.d %p5 -> +0x13(%z18.d)[4byte]", - "st1b %z21.d %p6 -> +0x18(%z23.d)[4byte]", - "st1b %z31.d %p7 -> +0x1f(%z31.d)[4byte]", + "st1b %z0.d %p0 -> (%z0.d)[1byte]", + "st1b %z5.d %p2 -> +0x08(%z7.d)[1byte]", + "st1b %z10.d %p3 -> +0x0d(%z12.d)[1byte]", + "st1b %z16.d %p5 -> +0x13(%z18.d)[1byte]", + "st1b %z21.d %p6 -> +0x18(%z23.d)[1byte]", + "st1b %z31.d %p7 -> +0x1f(%z31.d)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_1_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing ST1B { .D }, , [, .D] */ const char *const expected_2_0[6] = { - "st1b %z0.d %p0 -> (%x0,%z0.d)[4byte]", - "st1b %z5.d %p2 -> (%x7,%z8.d)[4byte]", - "st1b %z10.d %p3 -> (%x12,%z13.d)[4byte]", - "st1b %z16.d %p5 -> (%x17,%z19.d)[4byte]", - "st1b %z21.d %p6 -> (%x22,%z24.d)[4byte]", - "st1b %z31.d %p7 -> (%sp,%z31.d)[4byte]", + "st1b %z0.d %p0 -> (%x0,%z0.d)[1byte]", + "st1b %z5.d %p2 -> (%x7,%z8.d)[1byte]", + "st1b %z10.d %p3 -> (%x12,%z13.d)[1byte]", + "st1b %z16.d %p5 -> (%x17,%z19.d)[1byte]", + "st1b %z21.d %p6 -> (%x22,%z24.d)[1byte]", + "st1b %z31.d %p7 -> (%sp,%z31.d)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing ST1B { .D }, , [, .D, ] */ const char *const expected_3_0[6] = { - "st1b %z0.d %p0 -> (%x0,%z0.d,uxtw)[4byte]", - "st1b %z5.d %p2 -> (%x7,%z8.d,uxtw)[4byte]", - "st1b %z10.d %p3 -> (%x12,%z13.d,uxtw)[4byte]", - "st1b %z16.d %p5 -> (%x17,%z19.d,uxtw)[4byte]", - "st1b %z21.d %p6 -> (%x22,%z24.d,uxtw)[4byte]", - "st1b %z31.d %p7 -> (%sp,%z31.d,uxtw)[4byte]", + "st1b %z0.d %p0 -> (%x0,%z0.d,uxtw)[1byte]", + "st1b %z5.d %p2 -> (%x7,%z8.d,uxtw)[1byte]", + "st1b %z10.d %p3 -> (%x12,%z13.d,uxtw)[1byte]", + "st1b %z16.d %p5 -> (%x17,%z19.d,uxtw)[1byte]", + "st1b %z21.d %p6 -> (%x22,%z24.d,uxtw)[1byte]", + "st1b %z31.d %p7 -> (%sp,%z31.d,uxtw)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_3_1[6] = { - "st1b %z0.d %p0 -> (%x0,%z0.d,sxtw)[4byte]", - "st1b %z5.d %p2 -> (%x7,%z8.d,sxtw)[4byte]", - "st1b %z10.d %p3 -> (%x12,%z13.d,sxtw)[4byte]", - "st1b %z16.d %p5 -> (%x17,%z19.d,sxtw)[4byte]", - "st1b %z21.d %p6 -> (%x22,%z24.d,sxtw)[4byte]", - "st1b %z31.d %p7 -> (%sp,%z31.d,sxtw)[4byte]", + "st1b %z0.d %p0 -> (%x0,%z0.d,sxtw)[1byte]", + "st1b %z5.d %p2 -> (%x7,%z8.d,sxtw)[1byte]", + "st1b %z10.d %p3 -> (%x12,%z13.d,sxtw)[1byte]", + "st1b %z16.d %p5 -> (%x17,%z19.d,sxtw)[1byte]", + "st1b %z21.d %p6 -> (%x22,%z24.d,sxtw)[1byte]", + "st1b %z31.d %p7 -> (%sp,%z31.d,sxtw)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_4, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing ST1B { .S }, , [, .S, ] */ const char *const expected_4_0[6] = { - "st1b %z0.s %p0 -> (%x0,%z0.s,uxtw)[8byte]", - "st1b %z5.s %p2 -> (%x7,%z8.s,uxtw)[8byte]", - "st1b %z10.s %p3 -> (%x12,%z13.s,uxtw)[8byte]", - "st1b %z16.s %p5 -> (%x17,%z19.s,uxtw)[8byte]", - "st1b %z21.s %p6 -> (%x22,%z24.s,uxtw)[8byte]", - "st1b %z31.s %p7 -> (%sp,%z31.s,uxtw)[8byte]", + "st1b %z0.s %p0 -> (%x0,%z0.s,uxtw)[1byte]", + "st1b %z5.s %p2 -> (%x7,%z8.s,uxtw)[1byte]", + "st1b %z10.s %p3 -> (%x12,%z13.s,uxtw)[1byte]", + "st1b %z16.s %p5 -> (%x17,%z19.s,uxtw)[1byte]", + "st1b %z21.s %p6 -> (%x22,%z24.s,uxtw)[1byte]", + "st1b %z31.s %p7 -> (%sp,%z31.s,uxtw)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); const char *const expected_4_1[6] = { - "st1b %z0.s %p0 -> (%x0,%z0.s,sxtw)[8byte]", - "st1b %z5.s %p2 -> (%x7,%z8.s,sxtw)[8byte]", - "st1b %z10.s %p3 -> (%x12,%z13.s,sxtw)[8byte]", - "st1b %z16.s %p5 -> (%x17,%z19.s,sxtw)[8byte]", - "st1b %z21.s %p6 -> (%x22,%z24.s,sxtw)[8byte]", - "st1b %z31.s %p7 -> (%sp,%z31.s,sxtw)[8byte]", + "st1b %z0.s %p0 -> (%x0,%z0.s,sxtw)[1byte]", + "st1b %z5.s %p2 -> (%x7,%z8.s,sxtw)[1byte]", + "st1b %z10.s %p3 -> (%x12,%z13.s,sxtw)[1byte]", + "st1b %z16.s %p5 -> (%x17,%z19.s,sxtw)[1byte]", + "st1b %z21.s %p6 -> (%x22,%z24.s,sxtw)[1byte]", + "st1b %z31.s %p7 -> (%sp,%z31.s,sxtw)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_1, 0)); /* Testing ST1B { . }, , [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_5_0[6] = { - "st1b %z0.b %p0 -> -0x0100(%x0)[32byte]", - "st1b %z5.b %p2 -> -0x60(%x7)[32byte]", - "st1b %z10.b %p3 -> (%x12)[32byte]", - "st1b %z16.b %p5 -> +0x60(%x17)[32byte]", - "st1b %z21.b %p6 -> +0xa0(%x22)[32byte]", - "st1b %z31.b %p7 -> +0xe0(%sp)[32byte]", + "st1b %z0.b %p0 -> -0x0100(%x0)[1byte]", + "st1b %z5.b %p2 -> -0x60(%x7)[1byte]", + "st1b %z10.b %p3 -> (%x12)[1byte]", + "st1b %z16.b %p5 -> +0x60(%x17)[1byte]", + "st1b %z21.b %p6 -> +0xa0(%x22)[1byte]", + "st1b %z31.b %p7 -> +0xe0(%sp)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_1)); const char *const expected_5_1[6] = { - "st1b %z0.h %p0 -> -0x80(%x0)[16byte]", - "st1b %z5.h %p2 -> -0x30(%x7)[16byte]", - "st1b %z10.h %p3 -> (%x12)[16byte]", - "st1b %z16.h %p5 -> +0x30(%x17)[16byte]", - "st1b %z21.h %p6 -> +0x50(%x22)[16byte]", - "st1b %z31.h %p7 -> +0x70(%sp)[16byte]", + "st1b %z0.h %p0 -> -0x80(%x0)[1byte]", + "st1b %z5.h %p2 -> -0x30(%x7)[1byte]", + "st1b %z10.h %p3 -> (%x12)[1byte]", + "st1b %z16.h %p5 -> +0x30(%x17)[1byte]", + "st1b %z21.h %p6 -> +0x50(%x22)[1byte]", + "st1b %z31.h %p7 -> +0x70(%sp)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_1)); const char *const expected_5_2[6] = { - "st1b %z0.s %p0 -> -0x40(%x0)[8byte]", - "st1b %z5.s %p2 -> -0x18(%x7)[8byte]", - "st1b %z10.s %p3 -> (%x12)[8byte]", - "st1b %z16.s %p5 -> +0x18(%x17)[8byte]", - "st1b %z21.s %p6 -> +0x28(%x22)[8byte]", - "st1b %z31.s %p7 -> +0x38(%sp)[8byte]", + "st1b %z0.s %p0 -> -0x40(%x0)[1byte]", + "st1b %z5.s %p2 -> -0x18(%x7)[1byte]", + "st1b %z10.s %p3 -> (%x12)[1byte]", + "st1b %z16.s %p5 -> +0x18(%x17)[1byte]", + "st1b %z21.s %p6 -> +0x28(%x22)[1byte]", + "st1b %z31.s %p7 -> +0x38(%sp)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_5_2[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_1)); const char *const expected_5_3[6] = { - "st1b %z0.d %p0 -> -0x20(%x0)[4byte]", - "st1b %z5.d %p2 -> -0x0c(%x7)[4byte]", - "st1b %z10.d %p3 -> (%x12)[4byte]", - "st1b %z16.d %p5 -> +0x0c(%x17)[4byte]", - "st1b %z21.d %p6 -> +0x14(%x22)[4byte]", - "st1b %z31.d %p7 -> +0x1c(%sp)[4byte]", + "st1b %z0.d %p0 -> -0x20(%x0)[1byte]", + "st1b %z5.d %p2 -> -0x0c(%x7)[1byte]", + "st1b %z10.d %p3 -> (%x12)[1byte]", + "st1b %z16.d %p5 -> +0x0c(%x17)[1byte]", + "st1b %z21.d %p6 -> +0x14(%x22)[1byte]", + "st1b %z31.d %p7 -> +0x1c(%sp)[1byte]", }; TEST_LOOP(st1b, st1b_sve_pred, 6, expected_5_3[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 4 * imm4[i], - OPSZ_4)); + OPSZ_1)); } TEST_INSTR(stnt1b_sve_pred) { /* Testing STNT1B { .B }, , [, ] */ const char *const expected_0_0[6] = { - "stnt1b %z0.b %p0 -> (%x0,%x0)[32byte]", - "stnt1b %z5.b %p2 -> (%x7,%x8)[32byte]", - "stnt1b %z10.b %p3 -> (%x12,%x13)[32byte]", - "stnt1b %z16.b %p5 -> (%x17,%x18)[32byte]", - "stnt1b %z21.b %p6 -> (%x22,%x23)[32byte]", - "stnt1b %z31.b %p7 -> (%sp,%x30)[32byte]", + "stnt1b %z0.b %p0 -> (%x0,%x0)[1byte]", + "stnt1b %z5.b %p2 -> (%x7,%x8)[1byte]", + "stnt1b %z10.b %p3 -> (%x12,%x13)[1byte]", + "stnt1b %z16.b %p5 -> (%x17,%x18)[1byte]", + "stnt1b %z21.b %p6 -> (%x22,%x23)[1byte]", + "stnt1b %z31.b %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(stnt1b, stnt1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing STNT1B { .B }, , [{, #, MUL VL}] */ static const int imm4_1_0[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "stnt1b %z0.b %p0 -> -0x0100(%x0)[32byte]", - "stnt1b %z5.b %p2 -> -0x60(%x7)[32byte]", - "stnt1b %z10.b %p3 -> (%x12)[32byte]", - "stnt1b %z16.b %p5 -> +0x60(%x17)[32byte]", - "stnt1b %z21.b %p6 -> +0xa0(%x22)[32byte]", - "stnt1b %z31.b %p7 -> +0xe0(%sp)[32byte]", + "stnt1b %z0.b %p0 -> -0x0100(%x0)[1byte]", + "stnt1b %z5.b %p2 -> -0x60(%x7)[1byte]", + "stnt1b %z10.b %p3 -> (%x12)[1byte]", + "stnt1b %z16.b %p5 -> +0x60(%x17)[1byte]", + "stnt1b %z21.b %p6 -> +0xa0(%x22)[1byte]", + "stnt1b %z31.b %p7 -> +0xe0(%sp)[1byte]", }; TEST_LOOP(stnt1b, stnt1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_32)); + OPSZ_1)); } TEST_INSTR(bfcvt_sve_pred) @@ -17060,69 +17059,69 @@ TEST_INSTR(ld2b_sve_pred) { /* Testing LD2B { .B, .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld2b (%x0,%x0)[64byte] %p0/z -> %z0.b %z1.b", - "ld2b (%x7,%x8)[64byte] %p2/z -> %z5.b %z6.b", - "ld2b (%x12,%x13)[64byte] %p3/z -> %z10.b %z11.b", - "ld2b (%x17,%x18)[64byte] %p5/z -> %z16.b %z17.b", - "ld2b (%x22,%x23)[64byte] %p6/z -> %z21.b %z22.b", - "ld2b (%sp,%x30)[64byte] %p7/z -> %z31.b %z0.b", + "ld2b (%x0,%x0)[1byte] %p0/z -> %z0.b %z1.b", + "ld2b (%x7,%x8)[1byte] %p2/z -> %z5.b %z6.b", + "ld2b (%x12,%x13)[1byte] %p3/z -> %z10.b %z11.b", + "ld2b (%x17,%x18)[1byte] %p5/z -> %z16.b %z17.b", + "ld2b (%x22,%x23)[1byte] %p6/z -> %z21.b %z22.b", + "ld2b (%sp,%x30)[1byte] %p7/z -> %z31.b %z0.b", }; TEST_LOOP(ld2b, ld2b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_64)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD2B { .B, .B }, /Z, [{, #, MUL VL}] */ static const int imm4_1_0[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "ld2b -0x0200(%x0)[64byte] %p0/z -> %z0.b %z1.b", - "ld2b -0xc0(%x7)[64byte] %p2/z -> %z5.b %z6.b", - "ld2b (%x12)[64byte] %p3/z -> %z10.b %z11.b", - "ld2b +0xc0(%x17)[64byte] %p5/z -> %z16.b %z17.b", - "ld2b +0x0140(%x22)[64byte] %p6/z -> %z21.b %z22.b", - "ld2b +0x01c0(%sp)[64byte] %p7/z -> %z31.b %z0.b", + "ld2b -0x0200(%x0)[1byte] %p0/z -> %z0.b %z1.b", + "ld2b -0xc0(%x7)[1byte] %p2/z -> %z5.b %z6.b", + "ld2b (%x12)[1byte] %p3/z -> %z10.b %z11.b", + "ld2b +0xc0(%x17)[1byte] %p5/z -> %z16.b %z17.b", + "ld2b +0x0140(%x22)[1byte] %p6/z -> %z21.b %z22.b", + "ld2b +0x01c0(%sp)[1byte] %p7/z -> %z31.b %z0.b", }; TEST_LOOP(ld2b, ld2b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_64)); + OPSZ_1)); } TEST_INSTR(ld3b_sve_pred) { /* Testing LD3B { .B, .B, .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld3b (%x0,%x0)[96byte] %p0/z -> %z0.b %z1.b %z2.b", - "ld3b (%x7,%x8)[96byte] %p2/z -> %z5.b %z6.b %z7.b", - "ld3b (%x12,%x13)[96byte] %p3/z -> %z10.b %z11.b %z12.b", - "ld3b (%x17,%x18)[96byte] %p5/z -> %z16.b %z17.b %z18.b", - "ld3b (%x22,%x23)[96byte] %p6/z -> %z21.b %z22.b %z23.b", - "ld3b (%sp,%x30)[96byte] %p7/z -> %z31.b %z0.b %z1.b", + "ld3b (%x0,%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b", + "ld3b (%x7,%x8)[1byte] %p2/z -> %z5.b %z6.b %z7.b", + "ld3b (%x12,%x13)[1byte] %p3/z -> %z10.b %z11.b %z12.b", + "ld3b (%x17,%x18)[1byte] %p5/z -> %z16.b %z17.b %z18.b", + "ld3b (%x22,%x23)[1byte] %p6/z -> %z21.b %z22.b %z23.b", + "ld3b (%sp,%x30)[1byte] %p7/z -> %z31.b %z0.b %z1.b", }; TEST_LOOP(ld3b, ld3b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_96)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD3B { .B, .B, .B }, /Z, [{, #, MUL VL}] */ static const int imm4_1_0[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "ld3b -0x0300(%x0)[96byte] %p0/z -> %z0.b %z1.b %z2.b", - "ld3b -0x0120(%x7)[96byte] %p2/z -> %z5.b %z6.b %z7.b", - "ld3b (%x12)[96byte] %p3/z -> %z10.b %z11.b %z12.b", - "ld3b +0x0120(%x17)[96byte] %p5/z -> %z16.b %z17.b %z18.b", - "ld3b +0x01e0(%x22)[96byte] %p6/z -> %z21.b %z22.b %z23.b", - "ld3b +0x02a0(%sp)[96byte] %p7/z -> %z31.b %z0.b %z1.b", + "ld3b -0x0300(%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b", + "ld3b -0x0120(%x7)[1byte] %p2/z -> %z5.b %z6.b %z7.b", + "ld3b (%x12)[1byte] %p3/z -> %z10.b %z11.b %z12.b", + "ld3b +0x0120(%x17)[1byte] %p5/z -> %z16.b %z17.b %z18.b", + "ld3b +0x01e0(%x22)[1byte] %p6/z -> %z21.b %z22.b %z23.b", + "ld3b +0x02a0(%sp)[1byte] %p7/z -> %z31.b %z0.b %z1.b", }; TEST_LOOP(ld3b, ld3b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_96)); + OPSZ_1)); } TEST_INSTR(ld4b_sve_pred) @@ -17130,140 +17129,139 @@ TEST_INSTR(ld4b_sve_pred) /* Testing LD4B { .B, .B, .B, .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld4b (%x0,%x0)[128byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b", - "ld4b (%x7,%x8)[128byte] %p2/z -> %z5.b %z6.b %z7.b %z8.b", - "ld4b (%x12,%x13)[128byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b", - "ld4b (%x17,%x18)[128byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b", - "ld4b (%x22,%x23)[128byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b", - "ld4b (%sp,%x30)[128byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b", + "ld4b (%x0,%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b", + "ld4b (%x7,%x8)[1byte] %p2/z -> %z5.b %z6.b %z7.b %z8.b", + "ld4b (%x12,%x13)[1byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b", + "ld4b (%x17,%x18)[1byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b", + "ld4b (%x22,%x23)[1byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b", + "ld4b (%sp,%x30)[1byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b", }; TEST_LOOP(ld4b, ld4b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_128)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing LD4B { .B, .B, .B, .B }, /Z, [{, #, * MUL VL}] */ static const int imm4_1_0[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "ld4b -0x0400(%x0)[128byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b", - "ld4b -0x0180(%x7)[128byte] %p2/z -> %z5.b %z6.b %z7.b %z8.b", - "ld4b (%x12)[128byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b", - "ld4b +0x0180(%x17)[128byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b", - "ld4b +0x0280(%x22)[128byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b", - "ld4b +0x0380(%sp)[128byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b", + "ld4b -0x0400(%x0)[1byte] %p0/z -> %z0.b %z1.b %z2.b %z3.b", + "ld4b -0x0180(%x7)[1byte] %p2/z -> %z5.b %z6.b %z7.b %z8.b", + "ld4b (%x12)[1byte] %p3/z -> %z10.b %z11.b %z12.b %z13.b", + "ld4b +0x0180(%x17)[1byte] %p5/z -> %z16.b %z17.b %z18.b %z19.b", + "ld4b +0x0280(%x22)[1byte] %p6/z -> %z21.b %z22.b %z23.b %z24.b", + "ld4b +0x0380(%sp)[1byte] %p7/z -> %z31.b %z0.b %z1.b %z2.b", }; TEST_LOOP(ld4b, ld4b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_128)); + OPSZ_1)); } TEST_INSTR(st2b_sve_pred) { /* Testing ST2B { .B, .B }, , [, ] */ const char *const expected_0_0[6] = { - "st2b %z0.b %z1.b %p0 -> (%x0,%x0)[64byte]", - "st2b %z5.b %z6.b %p2 -> (%x7,%x8)[64byte]", - "st2b %z10.b %z11.b %p3 -> (%x12,%x13)[64byte]", - "st2b %z16.b %z17.b %p5 -> (%x17,%x18)[64byte]", - "st2b %z21.b %z22.b %p6 -> (%x22,%x23)[64byte]", - "st2b %z31.b %z0.b %p7 -> (%sp,%x30)[64byte]", + "st2b %z0.b %z1.b %p0 -> (%x0,%x0)[1byte]", + "st2b %z5.b %z6.b %p2 -> (%x7,%x8)[1byte]", + "st2b %z10.b %z11.b %p3 -> (%x12,%x13)[1byte]", + "st2b %z16.b %z17.b %p5 -> (%x17,%x18)[1byte]", + "st2b %z21.b %z22.b %p6 -> (%x22,%x23)[1byte]", + "st2b %z31.b %z0.b %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st2b, st2b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_64)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing ST2B { .B, .B }, , [{, #, MUL VL}] */ static const int imm4_1_0[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "st2b %z0.b %z1.b %p0 -> -0x0200(%x0)[64byte]", - "st2b %z5.b %z6.b %p2 -> -0xc0(%x7)[64byte]", - "st2b %z10.b %z11.b %p3 -> (%x12)[64byte]", - "st2b %z16.b %z17.b %p5 -> +0xc0(%x17)[64byte]", - "st2b %z21.b %z22.b %p6 -> +0x0140(%x22)[64byte]", - "st2b %z31.b %z0.b %p7 -> +0x01c0(%sp)[64byte]", + "st2b %z0.b %z1.b %p0 -> -0x0200(%x0)[1byte]", + "st2b %z5.b %z6.b %p2 -> -0xc0(%x7)[1byte]", + "st2b %z10.b %z11.b %p3 -> (%x12)[1byte]", + "st2b %z16.b %z17.b %p5 -> +0xc0(%x17)[1byte]", + "st2b %z21.b %z22.b %p6 -> +0x0140(%x22)[1byte]", + "st2b %z31.b %z0.b %p7 -> +0x01c0(%sp)[1byte]", }; TEST_LOOP(st2b, st2b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_64)); + OPSZ_1)); } TEST_INSTR(st3b_sve_pred) { /* Testing ST3B { .B, .B, .B }, , [, ] */ const char *const expected_0_0[6] = { - "st3b %z0.b %z1.b %z2.b %p0 -> (%x0,%x0)[96byte]", - "st3b %z5.b %z6.b %z7.b %p2 -> (%x7,%x8)[96byte]", - "st3b %z10.b %z11.b %z12.b %p3 -> (%x12,%x13)[96byte]", - "st3b %z16.b %z17.b %z18.b %p5 -> (%x17,%x18)[96byte]", - "st3b %z21.b %z22.b %z23.b %p6 -> (%x22,%x23)[96byte]", - "st3b %z31.b %z0.b %z1.b %p7 -> (%sp,%x30)[96byte]", + "st3b %z0.b %z1.b %z2.b %p0 -> (%x0,%x0)[1byte]", + "st3b %z5.b %z6.b %z7.b %p2 -> (%x7,%x8)[1byte]", + "st3b %z10.b %z11.b %z12.b %p3 -> (%x12,%x13)[1byte]", + "st3b %z16.b %z17.b %z18.b %p5 -> (%x17,%x18)[1byte]", + "st3b %z21.b %z22.b %z23.b %p6 -> (%x22,%x23)[1byte]", + "st3b %z31.b %z0.b %z1.b %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st3b, st3b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_96)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing ST3B { .B, .B, .B }, , [{, #, MUL VL}] */ static const int imm4_1_0[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "st3b %z0.b %z1.b %z2.b %p0 -> -0x0300(%x0)[96byte]", - "st3b %z5.b %z6.b %z7.b %p2 -> -0x0120(%x7)[96byte]", - "st3b %z10.b %z11.b %z12.b %p3 -> (%x12)[96byte]", - "st3b %z16.b %z17.b %z18.b %p5 -> +0x0120(%x17)[96byte]", - "st3b %z21.b %z22.b %z23.b %p6 -> +0x01e0(%x22)[96byte]", - "st3b %z31.b %z0.b %z1.b %p7 -> +0x02a0(%sp)[96byte]", + "st3b %z0.b %z1.b %z2.b %p0 -> -0x0300(%x0)[1byte]", + "st3b %z5.b %z6.b %z7.b %p2 -> -0x0120(%x7)[1byte]", + "st3b %z10.b %z11.b %z12.b %p3 -> (%x12)[1byte]", + "st3b %z16.b %z17.b %z18.b %p5 -> +0x0120(%x17)[1byte]", + "st3b %z21.b %z22.b %z23.b %p6 -> +0x01e0(%x22)[1byte]", + "st3b %z31.b %z0.b %z1.b %p7 -> +0x02a0(%sp)[1byte]", }; TEST_LOOP(st3b, st3b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_96)); + OPSZ_1)); } TEST_INSTR(st4b_sve_pred) { - /* Testing ST4B { .B, .B, .B, .B }, , [, ] */ const char *const expected_0_0[6] = { - "st4b %z0.b %z1.b %z2.b %z3.b %p0 -> (%x0,%x0)[128byte]", - "st4b %z5.b %z6.b %z7.b %z8.b %p2 -> (%x7,%x8)[128byte]", - "st4b %z10.b %z11.b %z12.b %z13.b %p3 -> (%x12,%x13)[128byte]", - "st4b %z16.b %z17.b %z18.b %z19.b %p5 -> (%x17,%x18)[128byte]", - "st4b %z21.b %z22.b %z23.b %z24.b %p6 -> (%x22,%x23)[128byte]", - "st4b %z31.b %z0.b %z1.b %z2.b %p7 -> (%sp,%x30)[128byte]", + "st4b %z0.b %z1.b %z2.b %z3.b %p0 -> (%x0,%x0)[1byte]", + "st4b %z5.b %z6.b %z7.b %z8.b %p2 -> (%x7,%x8)[1byte]", + "st4b %z10.b %z11.b %z12.b %z13.b %p3 -> (%x12,%x13)[1byte]", + "st4b %z16.b %z17.b %z18.b %z19.b %p5 -> (%x17,%x18)[1byte]", + "st4b %z21.b %z22.b %z23.b %z24.b %p6 -> (%x22,%x23)[1byte]", + "st4b %z31.b %z0.b %z1.b %z2.b %p7 -> (%sp,%x30)[1byte]", }; TEST_LOOP(st4b, st4b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_128)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)); /* Testing ST4B { .B, .B, .B, .B }, , [{, #, * MUL VL}] */ static const int imm4_1_0[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "st4b %z0.b %z1.b %z2.b %z3.b %p0 -> -0x0400(%x0)[128byte]", - "st4b %z5.b %z6.b %z7.b %z8.b %p2 -> -0x0180(%x7)[128byte]", - "st4b %z10.b %z11.b %z12.b %z13.b %p3 -> (%x12)[128byte]", - "st4b %z16.b %z17.b %z18.b %z19.b %p5 -> +0x0180(%x17)[128byte]", - "st4b %z21.b %z22.b %z23.b %z24.b %p6 -> +0x0280(%x22)[128byte]", - "st4b %z31.b %z0.b %z1.b %z2.b %p7 -> +0x0380(%sp)[128byte]", + "st4b %z0.b %z1.b %z2.b %z3.b %p0 -> -0x0400(%x0)[1byte]", + "st4b %z5.b %z6.b %z7.b %z8.b %p2 -> -0x0180(%x7)[1byte]", + "st4b %z10.b %z11.b %z12.b %z13.b %p3 -> (%x12)[1byte]", + "st4b %z16.b %z17.b %z18.b %z19.b %p5 -> +0x0180(%x17)[1byte]", + "st4b %z21.b %z22.b %z23.b %z24.b %p6 -> +0x0280(%x22)[1byte]", + "st4b %z31.b %z0.b %z1.b %z2.b %p7 -> +0x0380(%sp)[1byte]", }; TEST_LOOP(st4b, st4b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], - OPSZ_128)); + OPSZ_1)); } TEST_INSTR(ld1h_sve_pred) @@ -17271,285 +17269,285 @@ TEST_INSTR(ld1h_sve_pred) /* Testing LD1H { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 16, 26, 38, 48, 62 }; const char *const expected_0_0[6] = { - "ld1h (%z0.s)[16byte] %p0/z -> %z0.s", - "ld1h +0x10(%z7.s)[16byte] %p2/z -> %z5.s", - "ld1h +0x1a(%z12.s)[16byte] %p3/z -> %z10.s", - "ld1h +0x26(%z18.s)[16byte] %p5/z -> %z16.s", - "ld1h +0x30(%z23.s)[16byte] %p6/z -> %z21.s", - "ld1h +0x3e(%z31.s)[16byte] %p7/z -> %z31.s", + "ld1h (%z0.s)[2byte] %p0/z -> %z0.s", + "ld1h +0x10(%z7.s)[2byte] %p2/z -> %z5.s", + "ld1h +0x1a(%z12.s)[2byte] %p3/z -> %z10.s", + "ld1h +0x26(%z18.s)[2byte] %p5/z -> %z16.s", + "ld1h +0x30(%z23.s)[2byte] %p6/z -> %z21.s", + "ld1h +0x3e(%z31.s)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_2, 0)); /* Testing LD1H { .D }, /Z, [.D{, #}] */ const char *const expected_0_1[6] = { - "ld1h (%z0.d)[8byte] %p0/z -> %z0.d", - "ld1h +0x10(%z7.d)[8byte] %p2/z -> %z5.d", - "ld1h +0x1a(%z12.d)[8byte] %p3/z -> %z10.d", - "ld1h +0x26(%z18.d)[8byte] %p5/z -> %z16.d", - "ld1h +0x30(%z23.d)[8byte] %p6/z -> %z21.d", - "ld1h +0x3e(%z31.d)[8byte] %p7/z -> %z31.d", + "ld1h (%z0.d)[2byte] %p0/z -> %z0.d", + "ld1h +0x10(%z7.d)[2byte] %p2/z -> %z5.d", + "ld1h +0x1a(%z12.d)[2byte] %p3/z -> %z10.d", + "ld1h +0x26(%z18.d)[2byte] %p5/z -> %z16.d", + "ld1h +0x30(%z23.d)[2byte] %p6/z -> %z21.d", + "ld1h +0x3e(%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing LD1H { .D }, /Z, [, .D, LSL #1] */ const char *const expected_1_0[6] = { - "ld1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d,lsl #1)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LD1H { .D }, /Z, [, .D] */ const char *const expected_2_0[6] = { - "ld1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%z8.d)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%z13.d)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%z24.d)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%z0.d)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LD1H { .D }, /Z, [, .D, #1] */ const char *const expected_3_0[6] = { - "ld1h (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%z8.d,uxtw #1)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d,uxtw #1)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_3_1[6] = { - "ld1h (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%z8.d,sxtw #1)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d,sxtw #1)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LD1H { .D }, /Z, [, .D, ] */ const char *const expected_4_0[6] = { - "ld1h (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%z8.d,uxtw)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%z13.d,uxtw)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%z24.d,uxtw)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d,uxtw)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d,uxtw)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d,uxtw)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_4_1[6] = { - "ld1h (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%z8.d,sxtw)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%z13.d,sxtw)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%z24.d,sxtw)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d,sxtw)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d,sxtw)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d,sxtw)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LD1H { .S }, /Z, [, .S, #1] */ const char *const expected_5_0[6] = { - "ld1h (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s", - "ld1h (%x7,%z8.s,uxtw #1)[16byte] %p2/z -> %z5.s", - "ld1h (%x12,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s", - "ld1h (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s", - "ld1h (%x22,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s", - "ld1h (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s", + "ld1h (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s", + "ld1h (%x7,%z8.s,uxtw #1)[2byte] %p2/z -> %z5.s", + "ld1h (%x12,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s", + "ld1h (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s", + "ld1h (%x22,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s", + "ld1h (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_5_1[6] = { - "ld1h (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s", - "ld1h (%x7,%z8.s,sxtw #1)[16byte] %p2/z -> %z5.s", - "ld1h (%x12,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s", - "ld1h (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s", - "ld1h (%x22,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s", - "ld1h (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s", + "ld1h (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s", + "ld1h (%x7,%z8.s,sxtw #1)[2byte] %p2/z -> %z5.s", + "ld1h (%x12,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s", + "ld1h (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s", + "ld1h (%x22,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s", + "ld1h (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LD1H { .S }, /Z, [, .S, ] */ const char *const expected_6_0[6] = { - "ld1h (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s", - "ld1h (%x7,%z8.s,uxtw)[16byte] %p2/z -> %z5.s", - "ld1h (%x12,%z13.s,uxtw)[16byte] %p3/z -> %z10.s", - "ld1h (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s", - "ld1h (%x22,%z24.s,uxtw)[16byte] %p6/z -> %z21.s", - "ld1h (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s", + "ld1h (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s", + "ld1h (%x7,%z8.s,uxtw)[2byte] %p2/z -> %z5.s", + "ld1h (%x12,%z13.s,uxtw)[2byte] %p3/z -> %z10.s", + "ld1h (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s", + "ld1h (%x22,%z24.s,uxtw)[2byte] %p6/z -> %z21.s", + "ld1h (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_6_1[6] = { - "ld1h (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s", - "ld1h (%x7,%z8.s,sxtw)[16byte] %p2/z -> %z5.s", - "ld1h (%x12,%z13.s,sxtw)[16byte] %p3/z -> %z10.s", - "ld1h (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s", - "ld1h (%x22,%z24.s,sxtw)[16byte] %p6/z -> %z21.s", - "ld1h (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s", + "ld1h (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s", + "ld1h (%x7,%z8.s,sxtw)[2byte] %p2/z -> %z5.s", + "ld1h (%x12,%z13.s,sxtw)[2byte] %p3/z -> %z10.s", + "ld1h (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s", + "ld1h (%x22,%z24.s,sxtw)[2byte] %p6/z -> %z21.s", + "ld1h (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LD1H { .H }, /Z, [, , LSL #1] */ const char *const expected_7_0[6] = { - "ld1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h", - "ld1h (%x7,%x8,lsl #1)[32byte] %p2/z -> %z5.h", - "ld1h (%x12,%x13,lsl #1)[32byte] %p3/z -> %z10.h", - "ld1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h", - "ld1h (%x22,%x23,lsl #1)[32byte] %p6/z -> %z21.h", - "ld1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h", + "ld1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h", + "ld1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h", + "ld1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h", + "ld1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h", + "ld1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h", + "ld1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD1H { .S }, /Z, [, , LSL #1] */ const char *const expected_8_0[6] = { - "ld1h (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s", - "ld1h (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.s", - "ld1h (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.s", - "ld1h (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s", - "ld1h (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.s", - "ld1h (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s", + "ld1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s", + "ld1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.s", + "ld1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.s", + "ld1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s", + "ld1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.s", + "ld1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD1H { .D }, /Z, [, , LSL #1] */ const char *const expected_9_0[6] = { - "ld1h (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d", - "ld1h (%x7,%x8,lsl #1)[8byte] %p2/z -> %z5.d", - "ld1h (%x12,%x13,lsl #1)[8byte] %p3/z -> %z10.d", - "ld1h (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d", - "ld1h (%x22,%x23,lsl #1)[8byte] %p6/z -> %z21.d", - "ld1h (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d", + "ld1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d", + "ld1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.d", + "ld1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.d", + "ld1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d", + "ld1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.d", + "ld1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_9_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_8, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD1H { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_10_0[6] = { - "ld1h -0x0100(%x0)[32byte] %p0/z -> %z0.h", - "ld1h -0x60(%x7)[32byte] %p2/z -> %z5.h", - "ld1h (%x12)[32byte] %p3/z -> %z10.h", - "ld1h +0x60(%x17)[32byte] %p5/z -> %z16.h", - "ld1h +0xa0(%x22)[32byte] %p6/z -> %z21.h", - "ld1h +0xe0(%sp)[32byte] %p7/z -> %z31.h", + "ld1h -0x0100(%x0)[2byte] %p0/z -> %z0.h", + "ld1h -0x60(%x7)[2byte] %p2/z -> %z5.h", + "ld1h (%x12)[2byte] %p3/z -> %z10.h", + "ld1h +0x60(%x17)[2byte] %p5/z -> %z16.h", + "ld1h +0xa0(%x22)[2byte] %p6/z -> %z21.h", + "ld1h +0xe0(%sp)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_10_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_2)); /* Testing LD1H { .S }, /Z, [{, #, MUL VL}] */ const char *const expected_11_0[6] = { - "ld1h -0x80(%x0)[16byte] %p0/z -> %z0.s", - "ld1h -0x30(%x7)[16byte] %p2/z -> %z5.s", - "ld1h (%x12)[16byte] %p3/z -> %z10.s", - "ld1h +0x30(%x17)[16byte] %p5/z -> %z16.s", - "ld1h +0x50(%x22)[16byte] %p6/z -> %z21.s", - "ld1h +0x70(%sp)[16byte] %p7/z -> %z31.s", + "ld1h -0x80(%x0)[2byte] %p0/z -> %z0.s", + "ld1h -0x30(%x7)[2byte] %p2/z -> %z5.s", + "ld1h (%x12)[2byte] %p3/z -> %z10.s", + "ld1h +0x30(%x17)[2byte] %p5/z -> %z16.s", + "ld1h +0x50(%x22)[2byte] %p6/z -> %z21.s", + "ld1h +0x70(%sp)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_11_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_2)); /* Testing LD1H { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_12_0[6] = { - "ld1h -0x40(%x0)[8byte] %p0/z -> %z0.d", - "ld1h -0x18(%x7)[8byte] %p2/z -> %z5.d", - "ld1h (%x12)[8byte] %p3/z -> %z10.d", - "ld1h +0x18(%x17)[8byte] %p5/z -> %z16.d", - "ld1h +0x28(%x22)[8byte] %p6/z -> %z21.d", - "ld1h +0x38(%sp)[8byte] %p7/z -> %z31.d", + "ld1h -0x40(%x0)[2byte] %p0/z -> %z0.d", + "ld1h -0x18(%x7)[2byte] %p2/z -> %z5.d", + "ld1h (%x12)[2byte] %p3/z -> %z10.d", + "ld1h +0x18(%x17)[2byte] %p5/z -> %z16.d", + "ld1h +0x28(%x22)[2byte] %p6/z -> %z21.d", + "ld1h +0x38(%sp)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_12_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_2)); } TEST_INSTR(ld1sh_sve_pred) @@ -17557,508 +17555,509 @@ TEST_INSTR(ld1sh_sve_pred) /* Testing LD1SH { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 16, 26, 38, 48, 62 }; const char *const expected_0_0[6] = { - "ld1sh (%z0.s)[16byte] %p0/z -> %z0.s", - "ld1sh +0x10(%z7.s)[16byte] %p2/z -> %z5.s", - "ld1sh +0x1a(%z12.s)[16byte] %p3/z -> %z10.s", - "ld1sh +0x26(%z18.s)[16byte] %p5/z -> %z16.s", - "ld1sh +0x30(%z23.s)[16byte] %p6/z -> %z21.s", - "ld1sh +0x3e(%z31.s)[16byte] %p7/z -> %z31.s", + "ld1sh (%z0.s)[2byte] %p0/z -> %z0.s", + "ld1sh +0x10(%z7.s)[2byte] %p2/z -> %z5.s", + "ld1sh +0x1a(%z12.s)[2byte] %p3/z -> %z10.s", + "ld1sh +0x26(%z18.s)[2byte] %p5/z -> %z16.s", + "ld1sh +0x30(%z23.s)[2byte] %p6/z -> %z21.s", + "ld1sh +0x3e(%z31.s)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_2, 0)); /* Testing LD1SH { .D }, /Z, [.D{, #}] */ const char *const expected_0_1[6] = { - "ld1sh (%z0.d)[8byte] %p0/z -> %z0.d", - "ld1sh +0x10(%z7.d)[8byte] %p2/z -> %z5.d", - "ld1sh +0x1a(%z12.d)[8byte] %p3/z -> %z10.d", - "ld1sh +0x26(%z18.d)[8byte] %p5/z -> %z16.d", - "ld1sh +0x30(%z23.d)[8byte] %p6/z -> %z21.d", - "ld1sh +0x3e(%z31.d)[8byte] %p7/z -> %z31.d", + "ld1sh (%z0.d)[2byte] %p0/z -> %z0.d", + "ld1sh +0x10(%z7.d)[2byte] %p2/z -> %z5.d", + "ld1sh +0x1a(%z12.d)[2byte] %p3/z -> %z10.d", + "ld1sh +0x26(%z18.d)[2byte] %p5/z -> %z16.d", + "ld1sh +0x30(%z23.d)[2byte] %p6/z -> %z21.d", + "ld1sh +0x3e(%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing LD1SH { .D }, /Z, [, .D, LSL #1] */ const char *const expected_1_0[6] = { - "ld1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%z0.d,lsl #1)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d,lsl #1)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d,lsl #1)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d,lsl #1)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d,lsl #1)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LD1SH { .D }, /Z, [, .D] */ const char *const expected_2_0[6] = { - "ld1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%z8.d)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%z13.d)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%z24.d)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%z0.d)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LD1SH { .D }, /Z, [, .D, #1] */ const char *const expected_3_0[6] = { - "ld1sh (%x0,%z0.d,uxtw #1)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%z8.d,uxtw #1)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%z13.d,uxtw #1)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%z19.d,uxtw #1)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%z24.d,uxtw #1)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%z31.d,uxtw #1)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%z0.d,uxtw #1)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d,uxtw #1)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d,uxtw #1)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d,uxtw #1)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d,uxtw #1)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d,uxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_3_1[6] = { - "ld1sh (%x0,%z0.d,sxtw #1)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%z8.d,sxtw #1)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%z13.d,sxtw #1)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%z19.d,sxtw #1)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%z24.d,sxtw #1)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%z31.d,sxtw #1)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%z0.d,sxtw #1)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d,sxtw #1)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d,sxtw #1)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d,sxtw #1)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d,sxtw #1)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d,sxtw #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LD1SH { .D }, /Z, [, .D, ] */ const char *const expected_4_0[6] = { - "ld1sh (%x0,%z0.d,uxtw)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%z8.d,uxtw)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%z13.d,uxtw)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%z19.d,uxtw)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%z24.d,uxtw)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%z31.d,uxtw)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%z0.d,uxtw)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d,uxtw)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d,uxtw)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d,uxtw)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d,uxtw)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d,uxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_4_1[6] = { - "ld1sh (%x0,%z0.d,sxtw)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%z8.d,sxtw)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%z13.d,sxtw)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%z19.d,sxtw)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%z24.d,sxtw)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%z31.d,sxtw)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%z0.d,sxtw)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d,sxtw)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d,sxtw)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d,sxtw)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d,sxtw)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d,sxtw)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LD1SH { .S }, /Z, [, .S, #1] */ const char *const expected_5_0[6] = { - "ld1sh (%x0,%z0.s,uxtw #1)[16byte] %p0/z -> %z0.s", - "ld1sh (%x7,%z8.s,uxtw #1)[16byte] %p2/z -> %z5.s", - "ld1sh (%x12,%z13.s,uxtw #1)[16byte] %p3/z -> %z10.s", - "ld1sh (%x17,%z19.s,uxtw #1)[16byte] %p5/z -> %z16.s", - "ld1sh (%x22,%z24.s,uxtw #1)[16byte] %p6/z -> %z21.s", - "ld1sh (%sp,%z31.s,uxtw #1)[16byte] %p7/z -> %z31.s", + "ld1sh (%x0,%z0.s,uxtw #1)[2byte] %p0/z -> %z0.s", + "ld1sh (%x7,%z8.s,uxtw #1)[2byte] %p2/z -> %z5.s", + "ld1sh (%x12,%z13.s,uxtw #1)[2byte] %p3/z -> %z10.s", + "ld1sh (%x17,%z19.s,uxtw #1)[2byte] %p5/z -> %z16.s", + "ld1sh (%x22,%z24.s,uxtw #1)[2byte] %p6/z -> %z21.s", + "ld1sh (%sp,%z31.s,uxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_5_1[6] = { - "ld1sh (%x0,%z0.s,sxtw #1)[16byte] %p0/z -> %z0.s", - "ld1sh (%x7,%z8.s,sxtw #1)[16byte] %p2/z -> %z5.s", - "ld1sh (%x12,%z13.s,sxtw #1)[16byte] %p3/z -> %z10.s", - "ld1sh (%x17,%z19.s,sxtw #1)[16byte] %p5/z -> %z16.s", - "ld1sh (%x22,%z24.s,sxtw #1)[16byte] %p6/z -> %z21.s", - "ld1sh (%sp,%z31.s,sxtw #1)[16byte] %p7/z -> %z31.s", + "ld1sh (%x0,%z0.s,sxtw #1)[2byte] %p0/z -> %z0.s", + "ld1sh (%x7,%z8.s,sxtw #1)[2byte] %p2/z -> %z5.s", + "ld1sh (%x12,%z13.s,sxtw #1)[2byte] %p3/z -> %z10.s", + "ld1sh (%x17,%z19.s,sxtw #1)[2byte] %p5/z -> %z16.s", + "ld1sh (%x22,%z24.s,sxtw #1)[2byte] %p6/z -> %z21.s", + "ld1sh (%sp,%z31.s,sxtw #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing LD1SH { .S }, /Z, [, .S, ] */ const char *const expected_6_0[6] = { - "ld1sh (%x0,%z0.s,uxtw)[16byte] %p0/z -> %z0.s", - "ld1sh (%x7,%z8.s,uxtw)[16byte] %p2/z -> %z5.s", - "ld1sh (%x12,%z13.s,uxtw)[16byte] %p3/z -> %z10.s", - "ld1sh (%x17,%z19.s,uxtw)[16byte] %p5/z -> %z16.s", - "ld1sh (%x22,%z24.s,uxtw)[16byte] %p6/z -> %z21.s", - "ld1sh (%sp,%z31.s,uxtw)[16byte] %p7/z -> %z31.s", + "ld1sh (%x0,%z0.s,uxtw)[2byte] %p0/z -> %z0.s", + "ld1sh (%x7,%z8.s,uxtw)[2byte] %p2/z -> %z5.s", + "ld1sh (%x12,%z13.s,uxtw)[2byte] %p3/z -> %z10.s", + "ld1sh (%x17,%z19.s,uxtw)[2byte] %p5/z -> %z16.s", + "ld1sh (%x22,%z24.s,uxtw)[2byte] %p6/z -> %z21.s", + "ld1sh (%sp,%z31.s,uxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_6_1[6] = { - "ld1sh (%x0,%z0.s,sxtw)[16byte] %p0/z -> %z0.s", - "ld1sh (%x7,%z8.s,sxtw)[16byte] %p2/z -> %z5.s", - "ld1sh (%x12,%z13.s,sxtw)[16byte] %p3/z -> %z10.s", - "ld1sh (%x17,%z19.s,sxtw)[16byte] %p5/z -> %z16.s", - "ld1sh (%x22,%z24.s,sxtw)[16byte] %p6/z -> %z21.s", - "ld1sh (%sp,%z31.s,sxtw)[16byte] %p7/z -> %z31.s", + "ld1sh (%x0,%z0.s,sxtw)[2byte] %p0/z -> %z0.s", + "ld1sh (%x7,%z8.s,sxtw)[2byte] %p2/z -> %z5.s", + "ld1sh (%x12,%z13.s,sxtw)[2byte] %p3/z -> %z10.s", + "ld1sh (%x17,%z19.s,sxtw)[2byte] %p5/z -> %z16.s", + "ld1sh (%x22,%z24.s,sxtw)[2byte] %p6/z -> %z21.s", + "ld1sh (%sp,%z31.s,sxtw)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing LD1SH { .S }, /Z, [, , LSL #1] */ const char *const expected_7_0[6] = { - "ld1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s", - "ld1sh (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.s", - "ld1sh (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.s", - "ld1sh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s", - "ld1sh (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.s", - "ld1sh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s", + "ld1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.s", + "ld1sh (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.s", + "ld1sh (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.s", + "ld1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.s", + "ld1sh (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.s", + "ld1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD1SH { .D }, /Z, [, , LSL #1] */ const char *const expected_8_0[6] = { - "ld1sh (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d", - "ld1sh (%x7,%x8,lsl #1)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12,%x13,lsl #1)[8byte] %p3/z -> %z10.d", - "ld1sh (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d", - "ld1sh (%x22,%x23,lsl #1)[8byte] %p6/z -> %z21.d", - "ld1sh (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d", + "ld1sh (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.d", + "ld1sh (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.d", + "ld1sh (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.d", + "ld1sh (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.d", + "ld1sh (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_8, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD1SH { .S }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_9_0[6] = { - "ld1sh -0x80(%x0)[16byte] %p0/z -> %z0.s", - "ld1sh -0x30(%x7)[16byte] %p2/z -> %z5.s", - "ld1sh (%x12)[16byte] %p3/z -> %z10.s", - "ld1sh +0x30(%x17)[16byte] %p5/z -> %z16.s", - "ld1sh +0x50(%x22)[16byte] %p6/z -> %z21.s", - "ld1sh +0x70(%sp)[16byte] %p7/z -> %z31.s", + "ld1sh -0x80(%x0)[2byte] %p0/z -> %z0.s", + "ld1sh -0x30(%x7)[2byte] %p2/z -> %z5.s", + "ld1sh (%x12)[2byte] %p3/z -> %z10.s", + "ld1sh +0x30(%x17)[2byte] %p5/z -> %z16.s", + "ld1sh +0x50(%x22)[2byte] %p6/z -> %z21.s", + "ld1sh +0x70(%sp)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_9_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_2)); /* Testing LD1SH { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_10_0[6] = { - "ld1sh -0x40(%x0)[8byte] %p0/z -> %z0.d", - "ld1sh -0x18(%x7)[8byte] %p2/z -> %z5.d", - "ld1sh (%x12)[8byte] %p3/z -> %z10.d", - "ld1sh +0x18(%x17)[8byte] %p5/z -> %z16.d", - "ld1sh +0x28(%x22)[8byte] %p6/z -> %z21.d", - "ld1sh +0x38(%sp)[8byte] %p7/z -> %z31.d", + "ld1sh -0x40(%x0)[2byte] %p0/z -> %z0.d", + "ld1sh -0x18(%x7)[2byte] %p2/z -> %z5.d", + "ld1sh (%x12)[2byte] %p3/z -> %z10.d", + "ld1sh +0x18(%x17)[2byte] %p5/z -> %z16.d", + "ld1sh +0x28(%x22)[2byte] %p6/z -> %z21.d", + "ld1sh +0x38(%sp)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_10_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_2)); } TEST_INSTR(ld1w_sve_pred) { + /* Testing LD1W { .S }, /Z, [.S{, #}] */ static const uint imm5[6] = { 0, 32, 52, 76, 96, 124 }; const char *const expected_0_0[6] = { - "ld1w (%z0.s)[32byte] %p0/z -> %z0.s", - "ld1w +0x20(%z7.s)[32byte] %p2/z -> %z5.s", - "ld1w +0x34(%z12.s)[32byte] %p3/z -> %z10.s", - "ld1w +0x4c(%z18.s)[32byte] %p5/z -> %z16.s", - "ld1w +0x60(%z23.s)[32byte] %p6/z -> %z21.s", - "ld1w +0x7c(%z31.s)[32byte] %p7/z -> %z31.s", + "ld1w (%z0.s)[4byte] %p0/z -> %z0.s", + "ld1w +0x20(%z7.s)[4byte] %p2/z -> %z5.s", + "ld1w +0x34(%z12.s)[4byte] %p3/z -> %z10.s", + "ld1w +0x4c(%z18.s)[4byte] %p5/z -> %z16.s", + "ld1w +0x60(%z23.s)[4byte] %p6/z -> %z21.s", + "ld1w +0x7c(%z31.s)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_32, 0)); + OPSZ_4, 0)); /* Testing LD1W { .D }, /Z, [.D{, #}] */ const char *const expected_0_1[6] = { - "ld1w (%z0.d)[16byte] %p0/z -> %z0.d", - "ld1w +0x20(%z7.d)[16byte] %p2/z -> %z5.d", - "ld1w +0x34(%z12.d)[16byte] %p3/z -> %z10.d", - "ld1w +0x4c(%z18.d)[16byte] %p5/z -> %z16.d", - "ld1w +0x60(%z23.d)[16byte] %p6/z -> %z21.d", - "ld1w +0x7c(%z31.d)[16byte] %p7/z -> %z31.d", + "ld1w (%z0.d)[4byte] %p0/z -> %z0.d", + "ld1w +0x20(%z7.d)[4byte] %p2/z -> %z5.d", + "ld1w +0x34(%z12.d)[4byte] %p3/z -> %z10.d", + "ld1w +0x4c(%z18.d)[4byte] %p5/z -> %z16.d", + "ld1w +0x60(%z23.d)[4byte] %p6/z -> %z21.d", + "ld1w +0x7c(%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing LD1W { .D }, /Z, [, .D, LSL #2] */ const char *const expected_1_0[6] = { - "ld1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d,lsl #2)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LD1W { .D }, /Z, [, .D] */ const char *const expected_2_0[6] = { - "ld1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%z8.d)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%z13.d)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%z24.d)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LD1W { .D }, /Z, [, .D, #2] */ const char *const expected_3_0[6] = { - "ld1w (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%z8.d,uxtw #2)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d,uxtw #2)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_3_1[6] = { - "ld1w (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%z8.d,sxtw #2)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d,sxtw #2)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LD1W { .D }, /Z, [, .D, ] */ const char *const expected_4_0[6] = { - "ld1w (%x0,%z0.d,uxtw)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%z8.d,uxtw)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%z13.d,uxtw)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%z19.d,uxtw)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%z24.d,uxtw)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%z31.d,uxtw)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%z0.d,uxtw)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d,uxtw)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d,uxtw)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d,uxtw)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d,uxtw)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d,uxtw)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_4_1[6] = { - "ld1w (%x0,%z0.d,sxtw)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%z8.d,sxtw)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%z13.d,sxtw)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%z19.d,sxtw)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%z24.d,sxtw)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%z31.d,sxtw)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%z0.d,sxtw)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d,sxtw)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d,sxtw)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d,sxtw)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d,sxtw)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d,sxtw)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LD1W { .S }, /Z, [, .S, #2] */ const char *const expected_5_0[6] = { - "ld1w (%x0,%z0.s,uxtw #2)[32byte] %p0/z -> %z0.s", - "ld1w (%x7,%z8.s,uxtw #2)[32byte] %p2/z -> %z5.s", - "ld1w (%x12,%z13.s,uxtw #2)[32byte] %p3/z -> %z10.s", - "ld1w (%x17,%z19.s,uxtw #2)[32byte] %p5/z -> %z16.s", - "ld1w (%x22,%z24.s,uxtw #2)[32byte] %p6/z -> %z21.s", - "ld1w (%sp,%z31.s,uxtw #2)[32byte] %p7/z -> %z31.s", + "ld1w (%x0,%z0.s,uxtw #2)[4byte] %p0/z -> %z0.s", + "ld1w (%x7,%z8.s,uxtw #2)[4byte] %p2/z -> %z5.s", + "ld1w (%x12,%z13.s,uxtw #2)[4byte] %p3/z -> %z10.s", + "ld1w (%x17,%z19.s,uxtw #2)[4byte] %p5/z -> %z16.s", + "ld1w (%x22,%z24.s,uxtw #2)[4byte] %p6/z -> %z21.s", + "ld1w (%sp,%z31.s,uxtw #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_5_1[6] = { - "ld1w (%x0,%z0.s,sxtw #2)[32byte] %p0/z -> %z0.s", - "ld1w (%x7,%z8.s,sxtw #2)[32byte] %p2/z -> %z5.s", - "ld1w (%x12,%z13.s,sxtw #2)[32byte] %p3/z -> %z10.s", - "ld1w (%x17,%z19.s,sxtw #2)[32byte] %p5/z -> %z16.s", - "ld1w (%x22,%z24.s,sxtw #2)[32byte] %p6/z -> %z21.s", - "ld1w (%sp,%z31.s,sxtw #2)[32byte] %p7/z -> %z31.s", + "ld1w (%x0,%z0.s,sxtw #2)[4byte] %p0/z -> %z0.s", + "ld1w (%x7,%z8.s,sxtw #2)[4byte] %p2/z -> %z5.s", + "ld1w (%x12,%z13.s,sxtw #2)[4byte] %p3/z -> %z10.s", + "ld1w (%x17,%z19.s,sxtw #2)[4byte] %p5/z -> %z16.s", + "ld1w (%x22,%z24.s,sxtw #2)[4byte] %p6/z -> %z21.s", + "ld1w (%sp,%z31.s,sxtw #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LD1W { .S }, /Z, [, .S, ] */ const char *const expected_6_0[6] = { - "ld1w (%x0,%z0.s,uxtw)[32byte] %p0/z -> %z0.s", - "ld1w (%x7,%z8.s,uxtw)[32byte] %p2/z -> %z5.s", - "ld1w (%x12,%z13.s,uxtw)[32byte] %p3/z -> %z10.s", - "ld1w (%x17,%z19.s,uxtw)[32byte] %p5/z -> %z16.s", - "ld1w (%x22,%z24.s,uxtw)[32byte] %p6/z -> %z21.s", - "ld1w (%sp,%z31.s,uxtw)[32byte] %p7/z -> %z31.s", + "ld1w (%x0,%z0.s,uxtw)[4byte] %p0/z -> %z0.s", + "ld1w (%x7,%z8.s,uxtw)[4byte] %p2/z -> %z5.s", + "ld1w (%x12,%z13.s,uxtw)[4byte] %p3/z -> %z10.s", + "ld1w (%x17,%z19.s,uxtw)[4byte] %p5/z -> %z16.s", + "ld1w (%x22,%z24.s,uxtw)[4byte] %p6/z -> %z21.s", + "ld1w (%sp,%z31.s,uxtw)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_6_1[6] = { - "ld1w (%x0,%z0.s,sxtw)[32byte] %p0/z -> %z0.s", - "ld1w (%x7,%z8.s,sxtw)[32byte] %p2/z -> %z5.s", - "ld1w (%x12,%z13.s,sxtw)[32byte] %p3/z -> %z10.s", - "ld1w (%x17,%z19.s,sxtw)[32byte] %p5/z -> %z16.s", - "ld1w (%x22,%z24.s,sxtw)[32byte] %p6/z -> %z21.s", - "ld1w (%sp,%z31.s,sxtw)[32byte] %p7/z -> %z31.s", + "ld1w (%x0,%z0.s,sxtw)[4byte] %p0/z -> %z0.s", + "ld1w (%x7,%z8.s,sxtw)[4byte] %p2/z -> %z5.s", + "ld1w (%x12,%z13.s,sxtw)[4byte] %p3/z -> %z10.s", + "ld1w (%x17,%z19.s,sxtw)[4byte] %p5/z -> %z16.s", + "ld1w (%x22,%z24.s,sxtw)[4byte] %p6/z -> %z21.s", + "ld1w (%sp,%z31.s,sxtw)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LD1W { .S }, /Z, [, , LSL #2] */ const char *const expected_7_0[6] = { - "ld1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s", - "ld1w (%x7,%x8,lsl #2)[32byte] %p2/z -> %z5.s", - "ld1w (%x12,%x13,lsl #2)[32byte] %p3/z -> %z10.s", - "ld1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s", - "ld1w (%x22,%x23,lsl #2)[32byte] %p6/z -> %z21.s", - "ld1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s", + "ld1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s", + "ld1w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s", + "ld1w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s", + "ld1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s", + "ld1w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s", + "ld1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD1W { .D }, /Z, [, , LSL #2] */ const char *const expected_8_0[6] = { - "ld1w (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d", - "ld1w (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.d", - "ld1w (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.d", - "ld1w (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d", - "ld1w (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.d", - "ld1w (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d", + "ld1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d", + "ld1w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.d", + "ld1w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.d", + "ld1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d", + "ld1w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.d", + "ld1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD1W { .S }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_9_0[6] = { - "ld1w -0x0100(%x0)[32byte] %p0/z -> %z0.s", - "ld1w -0x60(%x7)[32byte] %p2/z -> %z5.s", - "ld1w (%x12)[32byte] %p3/z -> %z10.s", - "ld1w +0x60(%x17)[32byte] %p5/z -> %z16.s", - "ld1w +0xa0(%x22)[32byte] %p6/z -> %z21.s", - "ld1w +0xe0(%sp)[32byte] %p7/z -> %z31.s", + "ld1w -0x0100(%x0)[4byte] %p0/z -> %z0.s", + "ld1w -0x60(%x7)[4byte] %p2/z -> %z5.s", + "ld1w (%x12)[4byte] %p3/z -> %z10.s", + "ld1w +0x60(%x17)[4byte] %p5/z -> %z16.s", + "ld1w +0xa0(%x22)[4byte] %p6/z -> %z21.s", + "ld1w +0xe0(%sp)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_9_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_4)); /* Testing LD1W { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_10_0[6] = { - "ld1w -0x80(%x0)[16byte] %p0/z -> %z0.d", - "ld1w -0x30(%x7)[16byte] %p2/z -> %z5.d", - "ld1w (%x12)[16byte] %p3/z -> %z10.d", - "ld1w +0x30(%x17)[16byte] %p5/z -> %z16.d", - "ld1w +0x50(%x22)[16byte] %p6/z -> %z21.d", - "ld1w +0x70(%sp)[16byte] %p7/z -> %z31.d", + "ld1w -0x80(%x0)[4byte] %p0/z -> %z0.d", + "ld1w -0x30(%x7)[4byte] %p2/z -> %z5.d", + "ld1w (%x12)[4byte] %p3/z -> %z10.d", + "ld1w +0x30(%x17)[4byte] %p5/z -> %z16.d", + "ld1w +0x50(%x22)[4byte] %p6/z -> %z21.d", + "ld1w +0x70(%sp)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_10_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_4)); } TEST_INSTR(ld1d_sve_pred) @@ -18066,145 +18065,145 @@ TEST_INSTR(ld1d_sve_pred) /* Testing LD1D { .D }, /Z, [.D{, #}] */ static const uint imm5[6] = { 0, 64, 104, 152, 192, 248 }; const char *const expected_0_0[6] = { - "ld1d (%z0.d)[32byte] %p0/z -> %z0.d", - "ld1d +0x40(%z7.d)[32byte] %p2/z -> %z5.d", - "ld1d +0x68(%z12.d)[32byte] %p3/z -> %z10.d", - "ld1d +0x98(%z18.d)[32byte] %p5/z -> %z16.d", - "ld1d +0xc0(%z23.d)[32byte] %p6/z -> %z21.d", - "ld1d +0xf8(%z31.d)[32byte] %p7/z -> %z31.d", + "ld1d (%z0.d)[8byte] %p0/z -> %z0.d", + "ld1d +0x40(%z7.d)[8byte] %p2/z -> %z5.d", + "ld1d +0x68(%z12.d)[8byte] %p3/z -> %z10.d", + "ld1d +0x98(%z18.d)[8byte] %p5/z -> %z16.d", + "ld1d +0xc0(%z23.d)[8byte] %p6/z -> %z21.d", + "ld1d +0xf8(%z31.d)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_32, 0)); + OPSZ_8, 0)); /* Testing LD1D { .D }, /Z, [, .D, LSL #3] */ const char *const expected_1_0[6] = { - "ld1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%z8.d,lsl #3)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%z0.d,lsl #3)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d,lsl #3)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d,lsl #3)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d,lsl #3)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d,lsl #3)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d,lsl #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing LD1D { .D }, /Z, [, .D] */ const char *const expected_2_0[6] = { - "ld1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%z8.d)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%z13.d)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%z24.d)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); /* Testing LD1D { .D }, /Z, [, .D, #3] */ const char *const expected_3_0[6] = { - "ld1d (%x0,%z0.d,uxtw #3)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%z8.d,uxtw #3)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%z13.d,uxtw #3)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%z19.d,uxtw #3)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%z24.d,uxtw #3)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%z31.d,uxtw #3)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%z0.d,uxtw #3)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d,uxtw #3)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d,uxtw #3)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d,uxtw #3)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d,uxtw #3)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d,uxtw #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); const char *const expected_3_1[6] = { - "ld1d (%x0,%z0.d,sxtw #3)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%z8.d,sxtw #3)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%z13.d,sxtw #3)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%z19.d,sxtw #3)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%z24.d,sxtw #3)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%z31.d,sxtw #3)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%z0.d,sxtw #3)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d,sxtw #3)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d,sxtw #3)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d,sxtw #3)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d,sxtw #3)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d,sxtw #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing LD1D { .D }, /Z, [, .D, ] */ const char *const expected_4_0[6] = { - "ld1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%z8.d)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%z13.d)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%z24.d)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); const char *const expected_4_1[6] = { - "ld1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%z8.d)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%z13.d)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%z24.d)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); /* Testing LD1D { .D }, /Z, [, , LSL #3] */ const char *const expected_5_0[6] = { - "ld1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d", - "ld1d (%x7,%x8,lsl #3)[32byte] %p2/z -> %z5.d", - "ld1d (%x12,%x13,lsl #3)[32byte] %p3/z -> %z10.d", - "ld1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d", - "ld1d (%x22,%x23,lsl #3)[32byte] %p6/z -> %z21.d", - "ld1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d", + "ld1d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d", + "ld1d (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d", + "ld1d (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d", + "ld1d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d", + "ld1d (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d", + "ld1d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing LD1D { .D }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_6_0[6] = { - "ld1d -0x0100(%x0)[32byte] %p0/z -> %z0.d", - "ld1d -0x60(%x7)[32byte] %p2/z -> %z5.d", - "ld1d (%x12)[32byte] %p3/z -> %z10.d", - "ld1d +0x60(%x17)[32byte] %p5/z -> %z16.d", - "ld1d +0xa0(%x22)[32byte] %p6/z -> %z21.d", - "ld1d +0xe0(%sp)[32byte] %p7/z -> %z31.d", + "ld1d -0x0100(%x0)[8byte] %p0/z -> %z0.d", + "ld1d -0x60(%x7)[8byte] %p2/z -> %z5.d", + "ld1d (%x12)[8byte] %p3/z -> %z10.d", + "ld1d +0x60(%x17)[8byte] %p5/z -> %z16.d", + "ld1d +0xa0(%x22)[8byte] %p6/z -> %z21.d", + "ld1d +0xe0(%sp)[8byte] %p7/z -> %z31.d", }; TEST_LOOP( ld1d, ld1d_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ld1sw_sve_pred) @@ -18212,145 +18211,145 @@ TEST_INSTR(ld1sw_sve_pred) /* Testing LD1SW { .D }, /Z, [.D{, #}] */ static const uint imm5[6] = { 0, 32, 52, 76, 96, 124 }; const char *const expected_0_0[6] = { - "ld1sw (%z0.d)[16byte] %p0/z -> %z0.d", - "ld1sw +0x20(%z7.d)[16byte] %p2/z -> %z5.d", - "ld1sw +0x34(%z12.d)[16byte] %p3/z -> %z10.d", - "ld1sw +0x4c(%z18.d)[16byte] %p5/z -> %z16.d", - "ld1sw +0x60(%z23.d)[16byte] %p6/z -> %z21.d", - "ld1sw +0x7c(%z31.d)[16byte] %p7/z -> %z31.d", + "ld1sw (%z0.d)[4byte] %p0/z -> %z0.d", + "ld1sw +0x20(%z7.d)[4byte] %p2/z -> %z5.d", + "ld1sw +0x34(%z12.d)[4byte] %p3/z -> %z10.d", + "ld1sw +0x4c(%z18.d)[4byte] %p5/z -> %z16.d", + "ld1sw +0x60(%z23.d)[4byte] %p6/z -> %z21.d", + "ld1sw +0x7c(%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing LD1SW { .D }, /Z, [, .D, LSL #2] */ const char *const expected_1_0[6] = { - "ld1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%z0.d,lsl #2)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d,lsl #2)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d,lsl #2)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d,lsl #2)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d,lsl #2)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LD1SW { .D }, /Z, [, .D] */ const char *const expected_2_0[6] = { - "ld1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%z8.d)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%z13.d)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%z24.d)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LD1SW { .D }, /Z, [, .D, #2] */ const char *const expected_3_0[6] = { - "ld1sw (%x0,%z0.d,uxtw #2)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%z8.d,uxtw #2)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%z13.d,uxtw #2)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%z19.d,uxtw #2)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%z24.d,uxtw #2)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%z31.d,uxtw #2)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%z0.d,uxtw #2)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d,uxtw #2)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d,uxtw #2)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d,uxtw #2)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d,uxtw #2)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d,uxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_3_1[6] = { - "ld1sw (%x0,%z0.d,sxtw #2)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%z8.d,sxtw #2)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%z13.d,sxtw #2)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%z19.d,sxtw #2)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%z24.d,sxtw #2)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%z31.d,sxtw #2)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%z0.d,sxtw #2)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d,sxtw #2)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d,sxtw #2)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d,sxtw #2)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d,sxtw #2)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d,sxtw #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing LD1SW { .D }, /Z, [, .D, ] */ const char *const expected_4_0[6] = { - "ld1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%z8.d)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%z13.d)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%z24.d)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_4_1[6] = { - "ld1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%z8.d)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%z13.d)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%z24.d)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing LD1SW { .D }, /Z, [, , LSL #2] */ const char *const expected_5_0[6] = { - "ld1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d", - "ld1sw (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.d", - "ld1sw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d", - "ld1sw (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.d", - "ld1sw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d", + "ld1sw (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.d", + "ld1sw (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.d", + "ld1sw (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.d", + "ld1sw (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.d", + "ld1sw (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD1SW { .D }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -128, -48, 0, 48, 80, 112 }; const char *const expected_6_0[6] = { - "ld1sw -0x80(%x0)[16byte] %p0/z -> %z0.d", - "ld1sw -0x30(%x7)[16byte] %p2/z -> %z5.d", - "ld1sw (%x12)[16byte] %p3/z -> %z10.d", - "ld1sw +0x30(%x17)[16byte] %p5/z -> %z16.d", - "ld1sw +0x50(%x22)[16byte] %p6/z -> %z21.d", - "ld1sw +0x70(%sp)[16byte] %p7/z -> %z31.d", + "ld1sw -0x80(%x0)[4byte] %p0/z -> %z0.d", + "ld1sw -0x30(%x7)[4byte] %p2/z -> %z5.d", + "ld1sw (%x12)[4byte] %p3/z -> %z10.d", + "ld1sw +0x30(%x17)[4byte] %p5/z -> %z16.d", + "ld1sw +0x50(%x22)[4byte] %p6/z -> %z21.d", + "ld1sw +0x70(%sp)[4byte] %p7/z -> %z31.d", }; TEST_LOOP( ld1sw, ld1sw_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_16)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(st1h_sve_pred) @@ -18358,281 +18357,281 @@ TEST_INSTR(st1h_sve_pred) /* Testing ST1H { .S }, , [.S{, #}] */ static const uint imm5[6] = { 0, 16, 26, 38, 48, 62 }; const char *const expected_0_0[6] = { - "st1h %z0.s %p0 -> (%z0.s)[16byte]", - "st1h %z5.s %p2 -> +0x10(%z7.s)[16byte]", - "st1h %z10.s %p3 -> +0x1a(%z12.s)[16byte]", - "st1h %z16.s %p5 -> +0x26(%z18.s)[16byte]", - "st1h %z21.s %p6 -> +0x30(%z23.s)[16byte]", - "st1h %z31.s %p7 -> +0x3e(%z31.s)[16byte]", + "st1h %z0.s %p0 -> (%z0.s)[2byte]", + "st1h %z5.s %p2 -> +0x10(%z7.s)[2byte]", + "st1h %z10.s %p3 -> +0x1a(%z12.s)[2byte]", + "st1h %z16.s %p5 -> +0x26(%z18.s)[2byte]", + "st1h %z21.s %p6 -> +0x30(%z23.s)[2byte]", + "st1h %z31.s %p7 -> +0x3e(%z31.s)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_2, 0)); /* Testing ST1H { .D }, , [.D{, #}] */ const char *const expected_0_1[6] = { - "st1h %z0.d %p0 -> (%z0.d)[8byte]", - "st1h %z5.d %p2 -> +0x10(%z7.d)[8byte]", - "st1h %z10.d %p3 -> +0x1a(%z12.d)[8byte]", - "st1h %z16.d %p5 -> +0x26(%z18.d)[8byte]", - "st1h %z21.d %p6 -> +0x30(%z23.d)[8byte]", - "st1h %z31.d %p7 -> +0x3e(%z31.d)[8byte]", + "st1h %z0.d %p0 -> (%z0.d)[2byte]", + "st1h %z5.d %p2 -> +0x10(%z7.d)[2byte]", + "st1h %z10.d %p3 -> +0x1a(%z12.d)[2byte]", + "st1h %z16.d %p5 -> +0x26(%z18.d)[2byte]", + "st1h %z21.d %p6 -> +0x30(%z23.d)[2byte]", + "st1h %z31.d %p7 -> +0x3e(%z31.d)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing ST1H { .D }, , [, .D, LSL #1] */ const char *const expected_1_0[6] = { - "st1h %z0.d %p0 -> (%x0,%z0.d,lsl #1)[8byte]", - "st1h %z5.d %p2 -> (%x7,%z8.d,lsl #1)[8byte]", - "st1h %z10.d %p3 -> (%x12,%z13.d,lsl #1)[8byte]", - "st1h %z16.d %p5 -> (%x17,%z19.d,lsl #1)[8byte]", - "st1h %z21.d %p6 -> (%x22,%z24.d,lsl #1)[8byte]", - "st1h %z31.d %p7 -> (%sp,%z31.d,lsl #1)[8byte]", + "st1h %z0.d %p0 -> (%x0,%z0.d,lsl #1)[2byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d,lsl #1)[2byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d,lsl #1)[2byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d,lsl #1)[2byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d,lsl #1)[2byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d,lsl #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing ST1H { .D }, , [, .D] */ const char *const expected_2_0[6] = { - "st1h %z0.d %p0 -> (%x0,%z0.d)[8byte]", - "st1h %z5.d %p2 -> (%x7,%z8.d)[8byte]", - "st1h %z10.d %p3 -> (%x12,%z13.d)[8byte]", - "st1h %z16.d %p5 -> (%x17,%z19.d)[8byte]", - "st1h %z21.d %p6 -> (%x22,%z24.d)[8byte]", - "st1h %z31.d %p7 -> (%sp,%z31.d)[8byte]", + "st1h %z0.d %p0 -> (%x0,%z0.d)[2byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d)[2byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d)[2byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d)[2byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d)[2byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing ST1H { .D }, , [, .D, #1] */ const char *const expected_3_0[6] = { - "st1h %z0.d %p0 -> (%x0,%z0.d,uxtw #1)[8byte]", - "st1h %z5.d %p2 -> (%x7,%z8.d,uxtw #1)[8byte]", - "st1h %z10.d %p3 -> (%x12,%z13.d,uxtw #1)[8byte]", - "st1h %z16.d %p5 -> (%x17,%z19.d,uxtw #1)[8byte]", - "st1h %z21.d %p6 -> (%x22,%z24.d,uxtw #1)[8byte]", - "st1h %z31.d %p7 -> (%sp,%z31.d,uxtw #1)[8byte]", + "st1h %z0.d %p0 -> (%x0,%z0.d,uxtw #1)[2byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d,uxtw #1)[2byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d,uxtw #1)[2byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d,uxtw #1)[2byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d,uxtw #1)[2byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d,uxtw #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_3_1[6] = { - "st1h %z0.d %p0 -> (%x0,%z0.d,sxtw #1)[8byte]", - "st1h %z5.d %p2 -> (%x7,%z8.d,sxtw #1)[8byte]", - "st1h %z10.d %p3 -> (%x12,%z13.d,sxtw #1)[8byte]", - "st1h %z16.d %p5 -> (%x17,%z19.d,sxtw #1)[8byte]", - "st1h %z21.d %p6 -> (%x22,%z24.d,sxtw #1)[8byte]", - "st1h %z31.d %p7 -> (%sp,%z31.d,sxtw #1)[8byte]", + "st1h %z0.d %p0 -> (%x0,%z0.d,sxtw #1)[2byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d,sxtw #1)[2byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d,sxtw #1)[2byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d,sxtw #1)[2byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d,sxtw #1)[2byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d,sxtw #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_8, 1)); + 0, 0, OPSZ_2, 1)); /* Testing ST1H { .D }, , [, .D, ] */ const char *const expected_4_0[6] = { - "st1h %z0.d %p0 -> (%x0,%z0.d,uxtw)[8byte]", - "st1h %z5.d %p2 -> (%x7,%z8.d,uxtw)[8byte]", - "st1h %z10.d %p3 -> (%x12,%z13.d,uxtw)[8byte]", - "st1h %z16.d %p5 -> (%x17,%z19.d,uxtw)[8byte]", - "st1h %z21.d %p6 -> (%x22,%z24.d,uxtw)[8byte]", - "st1h %z31.d %p7 -> (%sp,%z31.d,uxtw)[8byte]", + "st1h %z0.d %p0 -> (%x0,%z0.d,uxtw)[2byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d,uxtw)[2byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d,uxtw)[2byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d,uxtw)[2byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d,uxtw)[2byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d,uxtw)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_4_1[6] = { - "st1h %z0.d %p0 -> (%x0,%z0.d,sxtw)[8byte]", - "st1h %z5.d %p2 -> (%x7,%z8.d,sxtw)[8byte]", - "st1h %z10.d %p3 -> (%x12,%z13.d,sxtw)[8byte]", - "st1h %z16.d %p5 -> (%x17,%z19.d,sxtw)[8byte]", - "st1h %z21.d %p6 -> (%x22,%z24.d,sxtw)[8byte]", - "st1h %z31.d %p7 -> (%sp,%z31.d,sxtw)[8byte]", + "st1h %z0.d %p0 -> (%x0,%z0.d,sxtw)[2byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d,sxtw)[2byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d,sxtw)[2byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d,sxtw)[2byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d,sxtw)[2byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d,sxtw)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_8, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing ST1H { .S }, , [, .S, #1] */ const char *const expected_5_0[6] = { - "st1h %z0.s %p0 -> (%x0,%z0.s,uxtw #1)[16byte]", - "st1h %z5.s %p2 -> (%x7,%z8.s,uxtw #1)[16byte]", - "st1h %z10.s %p3 -> (%x12,%z13.s,uxtw #1)[16byte]", - "st1h %z16.s %p5 -> (%x17,%z19.s,uxtw #1)[16byte]", - "st1h %z21.s %p6 -> (%x22,%z24.s,uxtw #1)[16byte]", - "st1h %z31.s %p7 -> (%sp,%z31.s,uxtw #1)[16byte]", + "st1h %z0.s %p0 -> (%x0,%z0.s,uxtw #1)[2byte]", + "st1h %z5.s %p2 -> (%x7,%z8.s,uxtw #1)[2byte]", + "st1h %z10.s %p3 -> (%x12,%z13.s,uxtw #1)[2byte]", + "st1h %z16.s %p5 -> (%x17,%z19.s,uxtw #1)[2byte]", + "st1h %z21.s %p6 -> (%x22,%z24.s,uxtw #1)[2byte]", + "st1h %z31.s %p7 -> (%sp,%z31.s,uxtw #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); const char *const expected_5_1[6] = { - "st1h %z0.s %p0 -> (%x0,%z0.s,sxtw #1)[16byte]", - "st1h %z5.s %p2 -> (%x7,%z8.s,sxtw #1)[16byte]", - "st1h %z10.s %p3 -> (%x12,%z13.s,sxtw #1)[16byte]", - "st1h %z16.s %p5 -> (%x17,%z19.s,sxtw #1)[16byte]", - "st1h %z21.s %p6 -> (%x22,%z24.s,sxtw #1)[16byte]", - "st1h %z31.s %p7 -> (%sp,%z31.s,sxtw #1)[16byte]", + "st1h %z0.s %p0 -> (%x0,%z0.s,sxtw #1)[2byte]", + "st1h %z5.s %p2 -> (%x7,%z8.s,sxtw #1)[2byte]", + "st1h %z10.s %p3 -> (%x12,%z13.s,sxtw #1)[2byte]", + "st1h %z16.s %p5 -> (%x17,%z19.s,sxtw #1)[2byte]", + "st1h %z21.s %p6 -> (%x22,%z24.s,sxtw #1)[2byte]", + "st1h %z31.s %p7 -> (%sp,%z31.s,sxtw #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 1)); + 0, 0, OPSZ_2, 1)); /* Testing ST1H { .S }, , [, .S, ] */ const char *const expected_6_0[6] = { - "st1h %z0.s %p0 -> (%x0,%z0.s,uxtw)[16byte]", - "st1h %z5.s %p2 -> (%x7,%z8.s,uxtw)[16byte]", - "st1h %z10.s %p3 -> (%x12,%z13.s,uxtw)[16byte]", - "st1h %z16.s %p5 -> (%x17,%z19.s,uxtw)[16byte]", - "st1h %z21.s %p6 -> (%x22,%z24.s,uxtw)[16byte]", - "st1h %z31.s %p7 -> (%sp,%z31.s,uxtw)[16byte]", + "st1h %z0.s %p0 -> (%x0,%z0.s,uxtw)[2byte]", + "st1h %z5.s %p2 -> (%x7,%z8.s,uxtw)[2byte]", + "st1h %z10.s %p3 -> (%x12,%z13.s,uxtw)[2byte]", + "st1h %z16.s %p5 -> (%x17,%z19.s,uxtw)[2byte]", + "st1h %z21.s %p6 -> (%x22,%z24.s,uxtw)[2byte]", + "st1h %z31.s %p7 -> (%sp,%z31.s,uxtw)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); const char *const expected_6_1[6] = { - "st1h %z0.s %p0 -> (%x0,%z0.s,sxtw)[16byte]", - "st1h %z5.s %p2 -> (%x7,%z8.s,sxtw)[16byte]", - "st1h %z10.s %p3 -> (%x12,%z13.s,sxtw)[16byte]", - "st1h %z16.s %p5 -> (%x17,%z19.s,sxtw)[16byte]", - "st1h %z21.s %p6 -> (%x22,%z24.s,sxtw)[16byte]", - "st1h %z31.s %p7 -> (%sp,%z31.s,sxtw)[16byte]", + "st1h %z0.s %p0 -> (%x0,%z0.s,sxtw)[2byte]", + "st1h %z5.s %p2 -> (%x7,%z8.s,sxtw)[2byte]", + "st1h %z10.s %p3 -> (%x12,%z13.s,sxtw)[2byte]", + "st1h %z16.s %p5 -> (%x17,%z19.s,sxtw)[2byte]", + "st1h %z21.s %p6 -> (%x22,%z24.s,sxtw)[2byte]", + "st1h %z31.s %p7 -> (%sp,%z31.s,sxtw)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_2, 0)); /* Testing ST1H { . }, , [, , LSL #1] */ const char *const expected_7_0[6] = { - "st1h %z0.h %p0 -> (%x0,%x0,lsl #1)[32byte]", - "st1h %z5.h %p2 -> (%x7,%x8,lsl #1)[32byte]", - "st1h %z10.h %p3 -> (%x12,%x13,lsl #1)[32byte]", - "st1h %z16.h %p5 -> (%x17,%x18,lsl #1)[32byte]", - "st1h %z21.h %p6 -> (%x22,%x23,lsl #1)[32byte]", - "st1h %z31.h %p7 -> (%sp,%x30,lsl #1)[32byte]", + "st1h %z0.h %p0 -> (%x0,%x0,lsl #1)[2byte]", + "st1h %z5.h %p2 -> (%x7,%x8,lsl #1)[2byte]", + "st1h %z10.h %p3 -> (%x12,%x13,lsl #1)[2byte]", + "st1h %z16.h %p5 -> (%x17,%x18,lsl #1)[2byte]", + "st1h %z21.h %p6 -> (%x22,%x23,lsl #1)[2byte]", + "st1h %z31.h %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 1)); + true, 0, 0, OPSZ_2, 1)); const char *const expected_7_1[6] = { - "st1h %z0.s %p0 -> (%x0,%x0,lsl #1)[16byte]", - "st1h %z5.s %p2 -> (%x7,%x8,lsl #1)[16byte]", - "st1h %z10.s %p3 -> (%x12,%x13,lsl #1)[16byte]", - "st1h %z16.s %p5 -> (%x17,%x18,lsl #1)[16byte]", - "st1h %z21.s %p6 -> (%x22,%x23,lsl #1)[16byte]", - "st1h %z31.s %p7 -> (%sp,%x30,lsl #1)[16byte]", + "st1h %z0.s %p0 -> (%x0,%x0,lsl #1)[2byte]", + "st1h %z5.s %p2 -> (%x7,%x8,lsl #1)[2byte]", + "st1h %z10.s %p3 -> (%x12,%x13,lsl #1)[2byte]", + "st1h %z16.s %p5 -> (%x17,%x18,lsl #1)[2byte]", + "st1h %z21.s %p6 -> (%x22,%x23,lsl #1)[2byte]", + "st1h %z31.s %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 1)); + true, 0, 0, OPSZ_2, 1)); const char *const expected_7_2[6] = { - "st1h %z0.d %p0 -> (%x0,%x0,lsl #1)[8byte]", - "st1h %z5.d %p2 -> (%x7,%x8,lsl #1)[8byte]", - "st1h %z10.d %p3 -> (%x12,%x13,lsl #1)[8byte]", - "st1h %z16.d %p5 -> (%x17,%x18,lsl #1)[8byte]", - "st1h %z21.d %p6 -> (%x22,%x23,lsl #1)[8byte]", - "st1h %z31.d %p7 -> (%sp,%x30,lsl #1)[8byte]", + "st1h %z0.d %p0 -> (%x0,%x0,lsl #1)[2byte]", + "st1h %z5.d %p2 -> (%x7,%x8,lsl #1)[2byte]", + "st1h %z10.d %p3 -> (%x12,%x13,lsl #1)[2byte]", + "st1h %z16.d %p5 -> (%x17,%x18,lsl #1)[2byte]", + "st1h %z21.d %p6 -> (%x22,%x23,lsl #1)[2byte]", + "st1h %z31.d %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_7_2[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_8, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing ST1H { . }, , [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_8_0[6] = { - "st1h %z0.h %p0 -> -0x0100(%x0)[32byte]", - "st1h %z5.h %p2 -> -0x60(%x7)[32byte]", - "st1h %z10.h %p3 -> (%x12)[32byte]", - "st1h %z16.h %p5 -> +0x60(%x17)[32byte]", - "st1h %z21.h %p6 -> +0xa0(%x22)[32byte]", - "st1h %z31.h %p7 -> +0xe0(%sp)[32byte]", + "st1h %z0.h %p0 -> -0x0100(%x0)[2byte]", + "st1h %z5.h %p2 -> -0x60(%x7)[2byte]", + "st1h %z10.h %p3 -> (%x12)[2byte]", + "st1h %z16.h %p5 -> +0x60(%x17)[2byte]", + "st1h %z21.h %p6 -> +0xa0(%x22)[2byte]", + "st1h %z31.h %p7 -> +0xe0(%sp)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_2)); const char *const expected_8_1[6] = { - "st1h %z0.s %p0 -> -0x80(%x0)[16byte]", - "st1h %z5.s %p2 -> -0x30(%x7)[16byte]", - "st1h %z10.s %p3 -> (%x12)[16byte]", - "st1h %z16.s %p5 -> +0x30(%x17)[16byte]", - "st1h %z21.s %p6 -> +0x50(%x22)[16byte]", - "st1h %z31.s %p7 -> +0x70(%sp)[16byte]", + "st1h %z0.s %p0 -> -0x80(%x0)[2byte]", + "st1h %z5.s %p2 -> -0x30(%x7)[2byte]", + "st1h %z10.s %p3 -> (%x12)[2byte]", + "st1h %z16.s %p5 -> +0x30(%x17)[2byte]", + "st1h %z21.s %p6 -> +0x50(%x22)[2byte]", + "st1h %z31.s %p7 -> +0x70(%sp)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_8_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_2)); const char *const expected_8_2[6] = { - "st1h %z0.d %p0 -> -0x40(%x0)[8byte]", - "st1h %z5.d %p2 -> -0x18(%x7)[8byte]", - "st1h %z10.d %p3 -> (%x12)[8byte]", - "st1h %z16.d %p5 -> +0x18(%x17)[8byte]", - "st1h %z21.d %p6 -> +0x28(%x22)[8byte]", - "st1h %z31.d %p7 -> +0x38(%sp)[8byte]", + "st1h %z0.d %p0 -> -0x40(%x0)[2byte]", + "st1h %z5.d %p2 -> -0x18(%x7)[2byte]", + "st1h %z10.d %p3 -> (%x12)[2byte]", + "st1h %z16.d %p5 -> +0x18(%x17)[2byte]", + "st1h %z21.d %p6 -> +0x28(%x22)[2byte]", + "st1h %z31.d %p7 -> +0x38(%sp)[2byte]", }; TEST_LOOP(st1h, st1h_sve_pred, 6, expected_8_2[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_2)); } TEST_INSTR(st1w_sve_pred) @@ -18640,252 +18639,252 @@ TEST_INSTR(st1w_sve_pred) /* Testing ST1W { .S }, , [.S{, #}] */ static const uint imm5[6] = { 0, 32, 52, 76, 96, 124 }; const char *const expected_0_0[6] = { - "st1w %z0.s %p0 -> (%z0.s)[32byte]", - "st1w %z5.s %p2 -> +0x20(%z7.s)[32byte]", - "st1w %z10.s %p3 -> +0x34(%z12.s)[32byte]", - "st1w %z16.s %p5 -> +0x4c(%z18.s)[32byte]", - "st1w %z21.s %p6 -> +0x60(%z23.s)[32byte]", - "st1w %z31.s %p7 -> +0x7c(%z31.s)[32byte]", + "st1w %z0.s %p0 -> (%z0.s)[4byte]", + "st1w %z5.s %p2 -> +0x20(%z7.s)[4byte]", + "st1w %z10.s %p3 -> +0x34(%z12.s)[4byte]", + "st1w %z16.s %p5 -> +0x4c(%z18.s)[4byte]", + "st1w %z21.s %p6 -> +0x60(%z23.s)[4byte]", + "st1w %z31.s %p7 -> +0x7c(%z31.s)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_32, 0)); + OPSZ_4, 0)); /* Testing ST1W { .D }, , [.D{, #}] */ const char *const expected_0_1[6] = { - "st1w %z0.d %p0 -> (%z0.d)[16byte]", - "st1w %z5.d %p2 -> +0x20(%z7.d)[16byte]", - "st1w %z10.d %p3 -> +0x34(%z12.d)[16byte]", - "st1w %z16.d %p5 -> +0x4c(%z18.d)[16byte]", - "st1w %z21.d %p6 -> +0x60(%z23.d)[16byte]", - "st1w %z31.d %p7 -> +0x7c(%z31.d)[16byte]", + "st1w %z0.d %p0 -> (%z0.d)[4byte]", + "st1w %z5.d %p2 -> +0x20(%z7.d)[4byte]", + "st1w %z10.d %p3 -> +0x34(%z12.d)[4byte]", + "st1w %z16.d %p5 -> +0x4c(%z18.d)[4byte]", + "st1w %z21.d %p6 -> +0x60(%z23.d)[4byte]", + "st1w %z31.d %p7 -> +0x7c(%z31.d)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing ST1W { .D }, , [, .D, LSL #2] */ const char *const expected_1_0[6] = { - "st1w %z0.d %p0 -> (%x0,%z0.d,lsl #2)[16byte]", - "st1w %z5.d %p2 -> (%x7,%z8.d,lsl #2)[16byte]", - "st1w %z10.d %p3 -> (%x12,%z13.d,lsl #2)[16byte]", - "st1w %z16.d %p5 -> (%x17,%z19.d,lsl #2)[16byte]", - "st1w %z21.d %p6 -> (%x22,%z24.d,lsl #2)[16byte]", - "st1w %z31.d %p7 -> (%sp,%z31.d,lsl #2)[16byte]", + "st1w %z0.d %p0 -> (%x0,%z0.d,lsl #2)[4byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d,lsl #2)[4byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d,lsl #2)[4byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d,lsl #2)[4byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d,lsl #2)[4byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d,lsl #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing ST1W { .D }, , [, .D] */ const char *const expected_2_0[6] = { - "st1w %z0.d %p0 -> (%x0,%z0.d)[16byte]", - "st1w %z5.d %p2 -> (%x7,%z8.d)[16byte]", - "st1w %z10.d %p3 -> (%x12,%z13.d)[16byte]", - "st1w %z16.d %p5 -> (%x17,%z19.d)[16byte]", - "st1w %z21.d %p6 -> (%x22,%z24.d)[16byte]", - "st1w %z31.d %p7 -> (%sp,%z31.d)[16byte]", + "st1w %z0.d %p0 -> (%x0,%z0.d)[4byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d)[4byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d)[4byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d)[4byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d)[4byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing ST1W { .D }, , [, .D, #2] */ const char *const expected_3_0[6] = { - "st1w %z0.d %p0 -> (%x0,%z0.d,uxtw #2)[16byte]", - "st1w %z5.d %p2 -> (%x7,%z8.d,uxtw #2)[16byte]", - "st1w %z10.d %p3 -> (%x12,%z13.d,uxtw #2)[16byte]", - "st1w %z16.d %p5 -> (%x17,%z19.d,uxtw #2)[16byte]", - "st1w %z21.d %p6 -> (%x22,%z24.d,uxtw #2)[16byte]", - "st1w %z31.d %p7 -> (%sp,%z31.d,uxtw #2)[16byte]", + "st1w %z0.d %p0 -> (%x0,%z0.d,uxtw #2)[4byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d,uxtw #2)[4byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d,uxtw #2)[4byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d,uxtw #2)[4byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d,uxtw #2)[4byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d,uxtw #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_3_1[6] = { - "st1w %z0.d %p0 -> (%x0,%z0.d,sxtw #2)[16byte]", - "st1w %z5.d %p2 -> (%x7,%z8.d,sxtw #2)[16byte]", - "st1w %z10.d %p3 -> (%x12,%z13.d,sxtw #2)[16byte]", - "st1w %z16.d %p5 -> (%x17,%z19.d,sxtw #2)[16byte]", - "st1w %z21.d %p6 -> (%x22,%z24.d,sxtw #2)[16byte]", - "st1w %z31.d %p7 -> (%sp,%z31.d,sxtw #2)[16byte]", + "st1w %z0.d %p0 -> (%x0,%z0.d,sxtw #2)[4byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d,sxtw #2)[4byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d,sxtw #2)[4byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d,sxtw #2)[4byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d,sxtw #2)[4byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d,sxtw #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_16, 2)); + 0, 0, OPSZ_4, 2)); /* Testing ST1W { .D }, , [, .D, ] */ const char *const expected_4_0[6] = { - "st1w %z0.d %p0 -> (%x0,%z0.d,uxtw)[16byte]", - "st1w %z5.d %p2 -> (%x7,%z8.d,uxtw)[16byte]", - "st1w %z10.d %p3 -> (%x12,%z13.d,uxtw)[16byte]", - "st1w %z16.d %p5 -> (%x17,%z19.d,uxtw)[16byte]", - "st1w %z21.d %p6 -> (%x22,%z24.d,uxtw)[16byte]", - "st1w %z31.d %p7 -> (%sp,%z31.d,uxtw)[16byte]", + "st1w %z0.d %p0 -> (%x0,%z0.d,uxtw)[4byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d,uxtw)[4byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d,uxtw)[4byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d,uxtw)[4byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d,uxtw)[4byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d,uxtw)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_4_1[6] = { - "st1w %z0.d %p0 -> (%x0,%z0.d,sxtw)[16byte]", - "st1w %z5.d %p2 -> (%x7,%z8.d,sxtw)[16byte]", - "st1w %z10.d %p3 -> (%x12,%z13.d,sxtw)[16byte]", - "st1w %z16.d %p5 -> (%x17,%z19.d,sxtw)[16byte]", - "st1w %z21.d %p6 -> (%x22,%z24.d,sxtw)[16byte]", - "st1w %z31.d %p7 -> (%sp,%z31.d,sxtw)[16byte]", + "st1w %z0.d %p0 -> (%x0,%z0.d,sxtw)[4byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d,sxtw)[4byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d,sxtw)[4byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d,sxtw)[4byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d,sxtw)[4byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d,sxtw)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_16, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing ST1W { .S }, , [, .S, #2] */ const char *const expected_5_0[6] = { - "st1w %z0.s %p0 -> (%x0,%z0.s,uxtw #2)[32byte]", - "st1w %z5.s %p2 -> (%x7,%z8.s,uxtw #2)[32byte]", - "st1w %z10.s %p3 -> (%x12,%z13.s,uxtw #2)[32byte]", - "st1w %z16.s %p5 -> (%x17,%z19.s,uxtw #2)[32byte]", - "st1w %z21.s %p6 -> (%x22,%z24.s,uxtw #2)[32byte]", - "st1w %z31.s %p7 -> (%sp,%z31.s,uxtw #2)[32byte]", + "st1w %z0.s %p0 -> (%x0,%z0.s,uxtw #2)[4byte]", + "st1w %z5.s %p2 -> (%x7,%z8.s,uxtw #2)[4byte]", + "st1w %z10.s %p3 -> (%x12,%z13.s,uxtw #2)[4byte]", + "st1w %z16.s %p5 -> (%x17,%z19.s,uxtw #2)[4byte]", + "st1w %z21.s %p6 -> (%x22,%z24.s,uxtw #2)[4byte]", + "st1w %z31.s %p7 -> (%sp,%z31.s,uxtw #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); const char *const expected_5_1[6] = { - "st1w %z0.s %p0 -> (%x0,%z0.s,sxtw #2)[32byte]", - "st1w %z5.s %p2 -> (%x7,%z8.s,sxtw #2)[32byte]", - "st1w %z10.s %p3 -> (%x12,%z13.s,sxtw #2)[32byte]", - "st1w %z16.s %p5 -> (%x17,%z19.s,sxtw #2)[32byte]", - "st1w %z21.s %p6 -> (%x22,%z24.s,sxtw #2)[32byte]", - "st1w %z31.s %p7 -> (%sp,%z31.s,sxtw #2)[32byte]", + "st1w %z0.s %p0 -> (%x0,%z0.s,sxtw #2)[4byte]", + "st1w %z5.s %p2 -> (%x7,%z8.s,sxtw #2)[4byte]", + "st1w %z10.s %p3 -> (%x12,%z13.s,sxtw #2)[4byte]", + "st1w %z16.s %p5 -> (%x17,%z19.s,sxtw #2)[4byte]", + "st1w %z21.s %p6 -> (%x22,%z24.s,sxtw #2)[4byte]", + "st1w %z31.s %p7 -> (%sp,%z31.s,sxtw #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_5_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_32, 2)); + 0, 0, OPSZ_4, 2)); /* Testing ST1W { .S }, , [, .S, ] */ const char *const expected_6_0[6] = { - "st1w %z0.s %p0 -> (%x0,%z0.s,uxtw)[32byte]", - "st1w %z5.s %p2 -> (%x7,%z8.s,uxtw)[32byte]", - "st1w %z10.s %p3 -> (%x12,%z13.s,uxtw)[32byte]", - "st1w %z16.s %p5 -> (%x17,%z19.s,uxtw)[32byte]", - "st1w %z21.s %p6 -> (%x22,%z24.s,uxtw)[32byte]", - "st1w %z31.s %p7 -> (%sp,%z31.s,uxtw)[32byte]", + "st1w %z0.s %p0 -> (%x0,%z0.s,uxtw)[4byte]", + "st1w %z5.s %p2 -> (%x7,%z8.s,uxtw)[4byte]", + "st1w %z10.s %p3 -> (%x12,%z13.s,uxtw)[4byte]", + "st1w %z16.s %p5 -> (%x17,%z19.s,uxtw)[4byte]", + "st1w %z21.s %p6 -> (%x22,%z24.s,uxtw)[4byte]", + "st1w %z31.s %p7 -> (%sp,%z31.s,uxtw)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_4, 0)); const char *const expected_6_1[6] = { - "st1w %z0.s %p0 -> (%x0,%z0.s,sxtw)[32byte]", - "st1w %z5.s %p2 -> (%x7,%z8.s,sxtw)[32byte]", - "st1w %z10.s %p3 -> (%x12,%z13.s,sxtw)[32byte]", - "st1w %z16.s %p5 -> (%x17,%z19.s,sxtw)[32byte]", - "st1w %z21.s %p6 -> (%x22,%z24.s,sxtw)[32byte]", - "st1w %z31.s %p7 -> (%sp,%z31.s,sxtw)[32byte]", + "st1w %z0.s %p0 -> (%x0,%z0.s,sxtw)[4byte]", + "st1w %z5.s %p2 -> (%x7,%z8.s,sxtw)[4byte]", + "st1w %z10.s %p3 -> (%x12,%z13.s,sxtw)[4byte]", + "st1w %z16.s %p5 -> (%x17,%z19.s,sxtw)[4byte]", + "st1w %z21.s %p6 -> (%x22,%z24.s,sxtw)[4byte]", + "st1w %z31.s %p7 -> (%sp,%z31.s,sxtw)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_6_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_4, DR_EXTEND_SXTW, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_4, 0)); /* Testing ST1W { . }, , [, , LSL #2] */ const char *const expected_7_0[6] = { - "st1w %z0.s %p0 -> (%x0,%x0,lsl #2)[32byte]", - "st1w %z5.s %p2 -> (%x7,%x8,lsl #2)[32byte]", - "st1w %z10.s %p3 -> (%x12,%x13,lsl #2)[32byte]", - "st1w %z16.s %p5 -> (%x17,%x18,lsl #2)[32byte]", - "st1w %z21.s %p6 -> (%x22,%x23,lsl #2)[32byte]", - "st1w %z31.s %p7 -> (%sp,%x30,lsl #2)[32byte]", + "st1w %z0.s %p0 -> (%x0,%x0,lsl #2)[4byte]", + "st1w %z5.s %p2 -> (%x7,%x8,lsl #2)[4byte]", + "st1w %z10.s %p3 -> (%x12,%x13,lsl #2)[4byte]", + "st1w %z16.s %p5 -> (%x17,%x18,lsl #2)[4byte]", + "st1w %z21.s %p6 -> (%x22,%x23,lsl #2)[4byte]", + "st1w %z31.s %p7 -> (%sp,%x30,lsl #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_7_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 2)); + true, 0, 0, OPSZ_4, 2)); const char *const expected_7_1[6] = { - "st1w %z0.d %p0 -> (%x0,%x0,lsl #2)[16byte]", - "st1w %z5.d %p2 -> (%x7,%x8,lsl #2)[16byte]", - "st1w %z10.d %p3 -> (%x12,%x13,lsl #2)[16byte]", - "st1w %z16.d %p5 -> (%x17,%x18,lsl #2)[16byte]", - "st1w %z21.d %p6 -> (%x22,%x23,lsl #2)[16byte]", - "st1w %z31.d %p7 -> (%sp,%x30,lsl #2)[16byte]", + "st1w %z0.d %p0 -> (%x0,%x0,lsl #2)[4byte]", + "st1w %z5.d %p2 -> (%x7,%x8,lsl #2)[4byte]", + "st1w %z10.d %p3 -> (%x12,%x13,lsl #2)[4byte]", + "st1w %z16.d %p5 -> (%x17,%x18,lsl #2)[4byte]", + "st1w %z21.d %p6 -> (%x22,%x23,lsl #2)[4byte]", + "st1w %z31.d %p7 -> (%sp,%x30,lsl #2)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_7_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_16, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing ST1W { . }, , [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_8_0[6] = { - "st1w %z0.s %p0 -> -0x0100(%x0)[32byte]", - "st1w %z5.s %p2 -> -0x60(%x7)[32byte]", - "st1w %z10.s %p3 -> (%x12)[32byte]", - "st1w %z16.s %p5 -> +0x60(%x17)[32byte]", - "st1w %z21.s %p6 -> +0xa0(%x22)[32byte]", - "st1w %z31.s %p7 -> +0xe0(%sp)[32byte]", + "st1w %z0.s %p0 -> -0x0100(%x0)[4byte]", + "st1w %z5.s %p2 -> -0x60(%x7)[4byte]", + "st1w %z10.s %p3 -> (%x12)[4byte]", + "st1w %z16.s %p5 -> +0x60(%x17)[4byte]", + "st1w %z21.s %p6 -> +0xa0(%x22)[4byte]", + "st1w %z31.s %p7 -> +0xe0(%sp)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_8_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_4)); const char *const expected_8_1[6] = { - "st1w %z0.d %p0 -> -0x80(%x0)[16byte]", - "st1w %z5.d %p2 -> -0x30(%x7)[16byte]", - "st1w %z10.d %p3 -> (%x12)[16byte]", - "st1w %z16.d %p5 -> +0x30(%x17)[16byte]", - "st1w %z21.d %p6 -> +0x50(%x22)[16byte]", - "st1w %z31.d %p7 -> +0x70(%sp)[16byte]", + "st1w %z0.d %p0 -> -0x80(%x0)[4byte]", + "st1w %z5.d %p2 -> -0x30(%x7)[4byte]", + "st1w %z10.d %p3 -> (%x12)[4byte]", + "st1w %z16.d %p5 -> +0x30(%x17)[4byte]", + "st1w %z21.d %p6 -> +0x50(%x22)[4byte]", + "st1w %z31.d %p7 -> +0x70(%sp)[4byte]", }; TEST_LOOP(st1w, st1w_sve_pred, 6, expected_8_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_4)); } TEST_INSTR(st1d_sve_pred) @@ -18893,250 +18892,250 @@ TEST_INSTR(st1d_sve_pred) /* Testing ST1D { .D }, , [.D{, #}] */ static const uint imm5_0_0[6] = { 0, 64, 104, 152, 192, 248 }; const char *const expected_0_0[6] = { - "st1d %z0.d %p0 -> (%z0.d)[32byte]", - "st1d %z5.d %p2 -> +0x40(%z7.d)[32byte]", - "st1d %z10.d %p3 -> +0x68(%z12.d)[32byte]", - "st1d %z16.d %p5 -> +0x98(%z18.d)[32byte]", - "st1d %z21.d %p6 -> +0xc0(%z23.d)[32byte]", - "st1d %z31.d %p7 -> +0xf8(%z31.d)[32byte]", + "st1d %z0.d %p0 -> (%z0.d)[8byte]", + "st1d %z5.d %p2 -> +0x40(%z7.d)[8byte]", + "st1d %z10.d %p3 -> +0x68(%z12.d)[8byte]", + "st1d %z16.d %p5 -> +0x98(%z18.d)[8byte]", + "st1d %z21.d %p6 -> +0xc0(%z23.d)[8byte]", + "st1d %z31.d %p7 -> +0xf8(%z31.d)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5_0_0[i], - 0, OPSZ_32, 0)); + 0, OPSZ_8, 0)); /* Testing ST1D { .D }, , [, .D, LSL #3] */ const char *const expected_1_0[6] = { - "st1d %z0.d %p0 -> (%x0,%z0.d,lsl #3)[32byte]", - "st1d %z5.d %p2 -> (%x7,%z8.d,lsl #3)[32byte]", - "st1d %z10.d %p3 -> (%x12,%z13.d,lsl #3)[32byte]", - "st1d %z16.d %p5 -> (%x17,%z19.d,lsl #3)[32byte]", - "st1d %z21.d %p6 -> (%x22,%z24.d,lsl #3)[32byte]", - "st1d %z31.d %p7 -> (%sp,%z31.d,lsl #3)[32byte]", + "st1d %z0.d %p0 -> (%x0,%z0.d,lsl #3)[8byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d,lsl #3)[8byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d,lsl #3)[8byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d,lsl #3)[8byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d,lsl #3)[8byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d,lsl #3)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing ST1D { .D }, , [, .D] */ const char *const expected_2_0[6] = { - "st1d %z0.d %p0 -> (%x0,%z0.d)[32byte]", - "st1d %z5.d %p2 -> (%x7,%z8.d)[32byte]", - "st1d %z10.d %p3 -> (%x12,%z13.d)[32byte]", - "st1d %z16.d %p5 -> (%x17,%z19.d)[32byte]", - "st1d %z21.d %p6 -> (%x22,%z24.d)[32byte]", - "st1d %z31.d %p7 -> (%sp,%z31.d)[32byte]", + "st1d %z0.d %p0 -> (%x0,%z0.d)[8byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d)[8byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d)[8byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d)[8byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d)[8byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); /* Testing ST1D { .D }, , [, .D, #3] */ const char *const expected_3_0[6] = { - "st1d %z0.d %p0 -> (%x0,%z0.d,uxtw #3)[32byte]", - "st1d %z5.d %p2 -> (%x7,%z8.d,uxtw #3)[32byte]", - "st1d %z10.d %p3 -> (%x12,%z13.d,uxtw #3)[32byte]", - "st1d %z16.d %p5 -> (%x17,%z19.d,uxtw #3)[32byte]", - "st1d %z21.d %p6 -> (%x22,%z24.d,uxtw #3)[32byte]", - "st1d %z31.d %p7 -> (%sp,%z31.d,uxtw #3)[32byte]", + "st1d %z0.d %p0 -> (%x0,%z0.d,uxtw #3)[8byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d,uxtw #3)[8byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d,uxtw #3)[8byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d,uxtw #3)[8byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d,uxtw #3)[8byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d,uxtw #3)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTW, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); const char *const expected_3_1[6] = { - "st1d %z0.d %p0 -> (%x0,%z0.d,sxtw #3)[32byte]", - "st1d %z5.d %p2 -> (%x7,%z8.d,sxtw #3)[32byte]", - "st1d %z10.d %p3 -> (%x12,%z13.d,sxtw #3)[32byte]", - "st1d %z16.d %p5 -> (%x17,%z19.d,sxtw #3)[32byte]", - "st1d %z21.d %p6 -> (%x22,%z24.d,sxtw #3)[32byte]", - "st1d %z31.d %p7 -> (%sp,%z31.d,sxtw #3)[32byte]", + "st1d %z0.d %p0 -> (%x0,%z0.d,sxtw #3)[8byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d,sxtw #3)[8byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d,sxtw #3)[8byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d,sxtw #3)[8byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d,sxtw #3)[8byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d,sxtw #3)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_3_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_SXTW, true, - 0, 0, OPSZ_32, 3)); + 0, 0, OPSZ_8, 3)); /* Testing ST1D { .D }, , [, .D, ] */ const char *const expected_4_0[6] = { - "st1d %z0.d %p0 -> (%x0,%z0.d)[32byte]", - "st1d %z5.d %p2 -> (%x7,%z8.d)[32byte]", - "st1d %z10.d %p3 -> (%x12,%z13.d)[32byte]", - "st1d %z16.d %p5 -> (%x17,%z19.d)[32byte]", - "st1d %z21.d %p6 -> (%x22,%z24.d)[32byte]", - "st1d %z31.d %p7 -> (%sp,%z31.d)[32byte]", + "st1d %z0.d %p0 -> (%x0,%z0.d)[8byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d)[8byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d)[8byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d)[8byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d)[8byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_4_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); const char *const expected_4_1[6] = { - "st1d %z0.d %p0 -> (%x0,%z0.d)[32byte]", - "st1d %z5.d %p2 -> (%x7,%z8.d)[32byte]", - "st1d %z10.d %p3 -> (%x12,%z13.d)[32byte]", - "st1d %z16.d %p5 -> (%x17,%z19.d)[32byte]", - "st1d %z21.d %p6 -> (%x22,%z24.d)[32byte]", - "st1d %z31.d %p7 -> (%sp,%z31.d)[32byte]", + "st1d %z0.d %p0 -> (%x0,%z0.d)[8byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d)[8byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d)[8byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d)[8byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d)[8byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_4_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64( Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, - false, 0, 0, OPSZ_32, 0)); + false, 0, 0, OPSZ_8, 0)); /* Testing ST1D { .D }, , [, , LSL #3] */ const char *const expected_5_0[6] = { - "st1d %z0.d %p0 -> (%x0,%x0,lsl #3)[32byte]", - "st1d %z5.d %p2 -> (%x7,%x8,lsl #3)[32byte]", - "st1d %z10.d %p3 -> (%x12,%x13,lsl #3)[32byte]", - "st1d %z16.d %p5 -> (%x17,%x18,lsl #3)[32byte]", - "st1d %z21.d %p6 -> (%x22,%x23,lsl #3)[32byte]", - "st1d %z31.d %p7 -> (%sp,%x30,lsl #3)[32byte]", + "st1d %z0.d %p0 -> (%x0,%x0,lsl #3)[8byte]", + "st1d %z5.d %p2 -> (%x7,%x8,lsl #3)[8byte]", + "st1d %z10.d %p3 -> (%x12,%x13,lsl #3)[8byte]", + "st1d %z16.d %p5 -> (%x17,%x18,lsl #3)[8byte]", + "st1d %z21.d %p6 -> (%x22,%x23,lsl #3)[8byte]", + "st1d %z31.d %p7 -> (%sp,%x30,lsl #3)[8byte]", }; TEST_LOOP(st1d, st1d_sve_pred, 6, expected_5_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing ST1D { .D }, , [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_6_0[6] = { - "st1d %z0.d %p0 -> -0x0100(%x0)[32byte]", - "st1d %z5.d %p2 -> -0x60(%x7)[32byte]", - "st1d %z10.d %p3 -> (%x12)[32byte]", - "st1d %z16.d %p5 -> +0x60(%x17)[32byte]", - "st1d %z21.d %p6 -> +0xa0(%x22)[32byte]", - "st1d %z31.d %p7 -> +0xe0(%sp)[32byte]", + "st1d %z0.d %p0 -> -0x0100(%x0)[8byte]", + "st1d %z5.d %p2 -> -0x60(%x7)[8byte]", + "st1d %z10.d %p3 -> (%x12)[8byte]", + "st1d %z16.d %p5 -> +0x60(%x17)[8byte]", + "st1d %z21.d %p6 -> +0xa0(%x22)[8byte]", + "st1d %z31.d %p7 -> +0xe0(%sp)[8byte]", }; TEST_LOOP( st1d, st1d_sve_pred, 6, expected_6_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ld2d_sve_pred) { /* Testing LD2D { .D, .D }, /Z, [, , LSL #3] */ const char *const expected_0_0[6] = { - "ld2d (%x0,%x0,lsl #3)[64byte] %p0/z -> %z0.d %z1.d", - "ld2d (%x7,%x8,lsl #3)[64byte] %p2/z -> %z5.d %z6.d", - "ld2d (%x12,%x13,lsl #3)[64byte] %p3/z -> %z10.d %z11.d", - "ld2d (%x17,%x18,lsl #3)[64byte] %p5/z -> %z16.d %z17.d", - "ld2d (%x22,%x23,lsl #3)[64byte] %p6/z -> %z21.d %z22.d", - "ld2d (%sp,%x30,lsl #3)[64byte] %p7/z -> %z31.d %z0.d", + "ld2d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d %z1.d", + "ld2d (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d %z6.d", + "ld2d (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d %z11.d", + "ld2d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d %z17.d", + "ld2d (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d %z22.d", + "ld2d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d %z0.d", }; TEST_LOOP(ld2d, ld2d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_64, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing LD2D { .D, .D }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "ld2d -0x0200(%x0)[64byte] %p0/z -> %z0.d %z1.d", - "ld2d -0xc0(%x7)[64byte] %p2/z -> %z5.d %z6.d", - "ld2d (%x12)[64byte] %p3/z -> %z10.d %z11.d", - "ld2d +0xc0(%x17)[64byte] %p5/z -> %z16.d %z17.d", - "ld2d +0x0140(%x22)[64byte] %p6/z -> %z21.d %z22.d", - "ld2d +0x01c0(%sp)[64byte] %p7/z -> %z31.d %z0.d", + "ld2d -0x0200(%x0)[8byte] %p0/z -> %z0.d %z1.d", + "ld2d -0xc0(%x7)[8byte] %p2/z -> %z5.d %z6.d", + "ld2d (%x12)[8byte] %p3/z -> %z10.d %z11.d", + "ld2d +0xc0(%x17)[8byte] %p5/z -> %z16.d %z17.d", + "ld2d +0x0140(%x22)[8byte] %p6/z -> %z21.d %z22.d", + "ld2d +0x01c0(%sp)[8byte] %p7/z -> %z31.d %z0.d", }; TEST_LOOP( ld2d, ld2d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_64)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ld2h_sve_pred) { /* Testing LD2H { .H, .H }, /Z, [, , LSL #1] */ const char *const expected_0_0[6] = { - "ld2h (%x0,%x0,lsl #1)[64byte] %p0/z -> %z0.h %z1.h", - "ld2h (%x7,%x8,lsl #1)[64byte] %p2/z -> %z5.h %z6.h", - "ld2h (%x12,%x13,lsl #1)[64byte] %p3/z -> %z10.h %z11.h", - "ld2h (%x17,%x18,lsl #1)[64byte] %p5/z -> %z16.h %z17.h", - "ld2h (%x22,%x23,lsl #1)[64byte] %p6/z -> %z21.h %z22.h", - "ld2h (%sp,%x30,lsl #1)[64byte] %p7/z -> %z31.h %z0.h", + "ld2h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h %z1.h", + "ld2h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h %z6.h", + "ld2h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h %z11.h", + "ld2h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h %z17.h", + "ld2h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h %z22.h", + "ld2h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h %z0.h", }; TEST_LOOP(ld2h, ld2h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_64, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD2H { .H, .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "ld2h -0x0200(%x0)[64byte] %p0/z -> %z0.h %z1.h", - "ld2h -0xc0(%x7)[64byte] %p2/z -> %z5.h %z6.h", - "ld2h (%x12)[64byte] %p3/z -> %z10.h %z11.h", - "ld2h +0xc0(%x17)[64byte] %p5/z -> %z16.h %z17.h", - "ld2h +0x0140(%x22)[64byte] %p6/z -> %z21.h %z22.h", - "ld2h +0x01c0(%sp)[64byte] %p7/z -> %z31.h %z0.h", + "ld2h -0x0200(%x0)[2byte] %p0/z -> %z0.h %z1.h", + "ld2h -0xc0(%x7)[2byte] %p2/z -> %z5.h %z6.h", + "ld2h (%x12)[2byte] %p3/z -> %z10.h %z11.h", + "ld2h +0xc0(%x17)[2byte] %p5/z -> %z16.h %z17.h", + "ld2h +0x0140(%x22)[2byte] %p6/z -> %z21.h %z22.h", + "ld2h +0x01c0(%sp)[2byte] %p7/z -> %z31.h %z0.h", }; TEST_LOOP( ld2h, ld2h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_64)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(ld2w_sve_pred) { /* Testing LD2W { .S, .S }, /Z, [, , LSL #2] */ const char *const expected_0_0[6] = { - "ld2w (%x0,%x0,lsl #2)[64byte] %p0/z -> %z0.s %z1.s", - "ld2w (%x7,%x8,lsl #2)[64byte] %p2/z -> %z5.s %z6.s", - "ld2w (%x12,%x13,lsl #2)[64byte] %p3/z -> %z10.s %z11.s", - "ld2w (%x17,%x18,lsl #2)[64byte] %p5/z -> %z16.s %z17.s", - "ld2w (%x22,%x23,lsl #2)[64byte] %p6/z -> %z21.s %z22.s", - "ld2w (%sp,%x30,lsl #2)[64byte] %p7/z -> %z31.s %z0.s", + "ld2w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s %z1.s", + "ld2w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s %z6.s", + "ld2w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s %z11.s", + "ld2w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s %z17.s", + "ld2w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s %z22.s", + "ld2w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s %z0.s", }; TEST_LOOP(ld2w, ld2w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_64, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD2W { .S, .S }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "ld2w -0x0200(%x0)[64byte] %p0/z -> %z0.s %z1.s", - "ld2w -0xc0(%x7)[64byte] %p2/z -> %z5.s %z6.s", - "ld2w (%x12)[64byte] %p3/z -> %z10.s %z11.s", - "ld2w +0xc0(%x17)[64byte] %p5/z -> %z16.s %z17.s", - "ld2w +0x0140(%x22)[64byte] %p6/z -> %z21.s %z22.s", - "ld2w +0x01c0(%sp)[64byte] %p7/z -> %z31.s %z0.s", + "ld2w -0x0200(%x0)[4byte] %p0/z -> %z0.s %z1.s", + "ld2w -0xc0(%x7)[4byte] %p2/z -> %z5.s %z6.s", + "ld2w (%x12)[4byte] %p3/z -> %z10.s %z11.s", + "ld2w +0xc0(%x17)[4byte] %p5/z -> %z16.s %z17.s", + "ld2w +0x0140(%x22)[4byte] %p6/z -> %z21.s %z22.s", + "ld2w +0x01c0(%sp)[4byte] %p7/z -> %z31.s %z0.s", }; TEST_LOOP( ld2w, ld2w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_64)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(ldnf1b_sve_pred) @@ -19144,63 +19143,63 @@ TEST_INSTR(ldnf1b_sve_pred) /* Testing LDNF1B { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_0_0[6] = { - "ldnf1b -0x80(%x0)[16byte] %p0/z -> %z0.h", - "ldnf1b -0x30(%x7)[16byte] %p2/z -> %z5.h", - "ldnf1b (%x12)[16byte] %p3/z -> %z10.h", - "ldnf1b +0x30(%x17)[16byte] %p5/z -> %z16.h", - "ldnf1b +0x50(%x22)[16byte] %p6/z -> %z21.h", - "ldnf1b +0x70(%sp)[16byte] %p7/z -> %z31.h", + "ldnf1b -0x80(%x0)[1byte] %p0/z -> %z0.h", + "ldnf1b -0x30(%x7)[1byte] %p2/z -> %z5.h", + "ldnf1b (%x12)[1byte] %p3/z -> %z10.h", + "ldnf1b +0x30(%x17)[1byte] %p5/z -> %z16.h", + "ldnf1b +0x50(%x22)[1byte] %p6/z -> %z21.h", + "ldnf1b +0x70(%sp)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ldnf1b, ldnf1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_1)); /* Testing LDNF1B { .S }, /Z, [{, #, MUL VL}] */ const char *const expected_1_0[6] = { - "ldnf1b -0x40(%x0)[8byte] %p0/z -> %z0.s", - "ldnf1b -0x18(%x7)[8byte] %p2/z -> %z5.s", - "ldnf1b (%x12)[8byte] %p3/z -> %z10.s", - "ldnf1b +0x18(%x17)[8byte] %p5/z -> %z16.s", - "ldnf1b +0x28(%x22)[8byte] %p6/z -> %z21.s", - "ldnf1b +0x38(%sp)[8byte] %p7/z -> %z31.s", + "ldnf1b -0x40(%x0)[1byte] %p0/z -> %z0.s", + "ldnf1b -0x18(%x7)[1byte] %p2/z -> %z5.s", + "ldnf1b (%x12)[1byte] %p3/z -> %z10.s", + "ldnf1b +0x18(%x17)[1byte] %p5/z -> %z16.s", + "ldnf1b +0x28(%x22)[1byte] %p6/z -> %z21.s", + "ldnf1b +0x38(%sp)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnf1b, ldnf1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_1)); /* Testing LDNF1B { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_2_0[6] = { - "ldnf1b -0x20(%x0)[4byte] %p0/z -> %z0.d", - "ldnf1b -0x0c(%x7)[4byte] %p2/z -> %z5.d", - "ldnf1b (%x12)[4byte] %p3/z -> %z10.d", - "ldnf1b +0x0c(%x17)[4byte] %p5/z -> %z16.d", - "ldnf1b +0x14(%x22)[4byte] %p6/z -> %z21.d", - "ldnf1b +0x1c(%sp)[4byte] %p7/z -> %z31.d", + "ldnf1b -0x20(%x0)[1byte] %p0/z -> %z0.d", + "ldnf1b -0x0c(%x7)[1byte] %p2/z -> %z5.d", + "ldnf1b (%x12)[1byte] %p3/z -> %z10.d", + "ldnf1b +0x0c(%x17)[1byte] %p5/z -> %z16.d", + "ldnf1b +0x14(%x22)[1byte] %p6/z -> %z21.d", + "ldnf1b +0x1c(%sp)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnf1b, ldnf1b_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 4 * imm4[i], - OPSZ_4)); + OPSZ_1)); /* Testing LDNF1B { .B }, /Z, [{, #, MUL VL}] */ const char *const expected_3_0[6] = { - "ldnf1b -0x0100(%x0)[32byte] %p0/z -> %z0.b", - "ldnf1b -0x60(%x7)[32byte] %p2/z -> %z5.b", - "ldnf1b (%x12)[32byte] %p3/z -> %z10.b", - "ldnf1b +0x60(%x17)[32byte] %p5/z -> %z16.b", - "ldnf1b +0xa0(%x22)[32byte] %p6/z -> %z21.b", - "ldnf1b +0xe0(%sp)[32byte] %p7/z -> %z31.b", + "ldnf1b -0x0100(%x0)[1byte] %p0/z -> %z0.b", + "ldnf1b -0x60(%x7)[1byte] %p2/z -> %z5.b", + "ldnf1b (%x12)[1byte] %p3/z -> %z10.b", + "ldnf1b +0x60(%x17)[1byte] %p5/z -> %z16.b", + "ldnf1b +0xa0(%x22)[1byte] %p6/z -> %z21.b", + "ldnf1b +0xe0(%sp)[1byte] %p7/z -> %z31.b", }; TEST_LOOP(ldnf1b, ldnf1b_sve_pred, 6, expected_3_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_1)); } TEST_INSTR(ldnf1d_sve_pred) @@ -19208,18 +19207,18 @@ TEST_INSTR(ldnf1d_sve_pred) /* Testing LDNF1D { .D }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_0_0[6] = { - "ldnf1d -0x0100(%x0)[32byte] %p0/z -> %z0.d", - "ldnf1d -0x60(%x7)[32byte] %p2/z -> %z5.d", - "ldnf1d (%x12)[32byte] %p3/z -> %z10.d", - "ldnf1d +0x60(%x17)[32byte] %p5/z -> %z16.d", - "ldnf1d +0xa0(%x22)[32byte] %p6/z -> %z21.d", - "ldnf1d +0xe0(%sp)[32byte] %p7/z -> %z31.d", + "ldnf1d -0x0100(%x0)[8byte] %p0/z -> %z0.d", + "ldnf1d -0x60(%x7)[8byte] %p2/z -> %z5.d", + "ldnf1d (%x12)[8byte] %p3/z -> %z10.d", + "ldnf1d +0x60(%x17)[8byte] %p5/z -> %z16.d", + "ldnf1d +0xa0(%x22)[8byte] %p6/z -> %z21.d", + "ldnf1d +0xe0(%sp)[8byte] %p7/z -> %z31.d", }; TEST_LOOP( ldnf1d, ldnf1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ldnf1h_sve_pred) @@ -19227,48 +19226,48 @@ TEST_INSTR(ldnf1h_sve_pred) /* Testing LDNF1H { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_0_0[6] = { - "ldnf1h -0x0100(%x0)[32byte] %p0/z -> %z0.h", - "ldnf1h -0x60(%x7)[32byte] %p2/z -> %z5.h", - "ldnf1h (%x12)[32byte] %p3/z -> %z10.h", - "ldnf1h +0x60(%x17)[32byte] %p5/z -> %z16.h", - "ldnf1h +0xa0(%x22)[32byte] %p6/z -> %z21.h", - "ldnf1h +0xe0(%sp)[32byte] %p7/z -> %z31.h", + "ldnf1h -0x0100(%x0)[2byte] %p0/z -> %z0.h", + "ldnf1h -0x60(%x7)[2byte] %p2/z -> %z5.h", + "ldnf1h (%x12)[2byte] %p3/z -> %z10.h", + "ldnf1h +0x60(%x17)[2byte] %p5/z -> %z16.h", + "ldnf1h +0xa0(%x22)[2byte] %p6/z -> %z21.h", + "ldnf1h +0xe0(%sp)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ldnf1h, ldnf1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_2)); /* Testing LDNF1H { .S }, /Z, [{, #, MUL VL}] */ const char *const expected_1_0[6] = { - "ldnf1h -0x80(%x0)[16byte] %p0/z -> %z0.s", - "ldnf1h -0x30(%x7)[16byte] %p2/z -> %z5.s", - "ldnf1h (%x12)[16byte] %p3/z -> %z10.s", - "ldnf1h +0x30(%x17)[16byte] %p5/z -> %z16.s", - "ldnf1h +0x50(%x22)[16byte] %p6/z -> %z21.s", - "ldnf1h +0x70(%sp)[16byte] %p7/z -> %z31.s", + "ldnf1h -0x80(%x0)[2byte] %p0/z -> %z0.s", + "ldnf1h -0x30(%x7)[2byte] %p2/z -> %z5.s", + "ldnf1h (%x12)[2byte] %p3/z -> %z10.s", + "ldnf1h +0x30(%x17)[2byte] %p5/z -> %z16.s", + "ldnf1h +0x50(%x22)[2byte] %p6/z -> %z21.s", + "ldnf1h +0x70(%sp)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnf1h, ldnf1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_2)); /* Testing LDNF1H { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_2_0[6] = { - "ldnf1h -0x40(%x0)[8byte] %p0/z -> %z0.d", - "ldnf1h -0x18(%x7)[8byte] %p2/z -> %z5.d", - "ldnf1h (%x12)[8byte] %p3/z -> %z10.d", - "ldnf1h +0x18(%x17)[8byte] %p5/z -> %z16.d", - "ldnf1h +0x28(%x22)[8byte] %p6/z -> %z21.d", - "ldnf1h +0x38(%sp)[8byte] %p7/z -> %z31.d", + "ldnf1h -0x40(%x0)[2byte] %p0/z -> %z0.d", + "ldnf1h -0x18(%x7)[2byte] %p2/z -> %z5.d", + "ldnf1h (%x12)[2byte] %p3/z -> %z10.d", + "ldnf1h +0x18(%x17)[2byte] %p5/z -> %z16.d", + "ldnf1h +0x28(%x22)[2byte] %p6/z -> %z21.d", + "ldnf1h +0x38(%sp)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnf1h, ldnf1h_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_2)); } TEST_INSTR(ldnf1sb_sve_pred) @@ -19276,48 +19275,48 @@ TEST_INSTR(ldnf1sb_sve_pred) /* Testing LDNF1SB { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_0_0[6] = { - "ldnf1sb -0x80(%x0)[16byte] %p0/z -> %z0.h", - "ldnf1sb -0x30(%x7)[16byte] %p2/z -> %z5.h", - "ldnf1sb (%x12)[16byte] %p3/z -> %z10.h", - "ldnf1sb +0x30(%x17)[16byte] %p5/z -> %z16.h", - "ldnf1sb +0x50(%x22)[16byte] %p6/z -> %z21.h", - "ldnf1sb +0x70(%sp)[16byte] %p7/z -> %z31.h", + "ldnf1sb -0x80(%x0)[1byte] %p0/z -> %z0.h", + "ldnf1sb -0x30(%x7)[1byte] %p2/z -> %z5.h", + "ldnf1sb (%x12)[1byte] %p3/z -> %z10.h", + "ldnf1sb +0x30(%x17)[1byte] %p5/z -> %z16.h", + "ldnf1sb +0x50(%x22)[1byte] %p6/z -> %z21.h", + "ldnf1sb +0x70(%sp)[1byte] %p7/z -> %z31.h", }; TEST_LOOP(ldnf1sb, ldnf1sb_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_1)); /* Testing LDNF1SB { .S }, /Z, [{, #, MUL VL}] */ const char *const expected_1_0[6] = { - "ldnf1sb -0x40(%x0)[8byte] %p0/z -> %z0.s", - "ldnf1sb -0x18(%x7)[8byte] %p2/z -> %z5.s", - "ldnf1sb (%x12)[8byte] %p3/z -> %z10.s", - "ldnf1sb +0x18(%x17)[8byte] %p5/z -> %z16.s", - "ldnf1sb +0x28(%x22)[8byte] %p6/z -> %z21.s", - "ldnf1sb +0x38(%sp)[8byte] %p7/z -> %z31.s", + "ldnf1sb -0x40(%x0)[1byte] %p0/z -> %z0.s", + "ldnf1sb -0x18(%x7)[1byte] %p2/z -> %z5.s", + "ldnf1sb (%x12)[1byte] %p3/z -> %z10.s", + "ldnf1sb +0x18(%x17)[1byte] %p5/z -> %z16.s", + "ldnf1sb +0x28(%x22)[1byte] %p6/z -> %z21.s", + "ldnf1sb +0x38(%sp)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnf1sb, ldnf1sb_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_1)); /* Testing LDNF1SB { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_2_0[6] = { - "ldnf1sb -0x20(%x0)[4byte] %p0/z -> %z0.d", - "ldnf1sb -0x0c(%x7)[4byte] %p2/z -> %z5.d", - "ldnf1sb (%x12)[4byte] %p3/z -> %z10.d", - "ldnf1sb +0x0c(%x17)[4byte] %p5/z -> %z16.d", - "ldnf1sb +0x14(%x22)[4byte] %p6/z -> %z21.d", - "ldnf1sb +0x1c(%sp)[4byte] %p7/z -> %z31.d", + "ldnf1sb -0x20(%x0)[1byte] %p0/z -> %z0.d", + "ldnf1sb -0x0c(%x7)[1byte] %p2/z -> %z5.d", + "ldnf1sb (%x12)[1byte] %p3/z -> %z10.d", + "ldnf1sb +0x0c(%x17)[1byte] %p5/z -> %z16.d", + "ldnf1sb +0x14(%x22)[1byte] %p6/z -> %z21.d", + "ldnf1sb +0x1c(%sp)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnf1sb, ldnf1sb_sve_pred, 6, expected_2_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 4 * imm4[i], - OPSZ_4)); + OPSZ_1)); } TEST_INSTR(ldnf1sh_sve_pred) @@ -19325,33 +19324,33 @@ TEST_INSTR(ldnf1sh_sve_pred) /* Testing LDNF1SH { .S }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_0_0[6] = { - "ldnf1sh -0x80(%x0)[16byte] %p0/z -> %z0.s", - "ldnf1sh -0x30(%x7)[16byte] %p2/z -> %z5.s", - "ldnf1sh (%x12)[16byte] %p3/z -> %z10.s", - "ldnf1sh +0x30(%x17)[16byte] %p5/z -> %z16.s", - "ldnf1sh +0x50(%x22)[16byte] %p6/z -> %z21.s", - "ldnf1sh +0x70(%sp)[16byte] %p7/z -> %z31.s", + "ldnf1sh -0x80(%x0)[2byte] %p0/z -> %z0.s", + "ldnf1sh -0x30(%x7)[2byte] %p2/z -> %z5.s", + "ldnf1sh (%x12)[2byte] %p3/z -> %z10.s", + "ldnf1sh +0x30(%x17)[2byte] %p5/z -> %z16.s", + "ldnf1sh +0x50(%x22)[2byte] %p6/z -> %z21.s", + "ldnf1sh +0x70(%sp)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnf1sh, ldnf1sh_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_2)); /* Testing LDNF1SH { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_1_0[6] = { - "ldnf1sh -0x40(%x0)[8byte] %p0/z -> %z0.d", - "ldnf1sh -0x18(%x7)[8byte] %p2/z -> %z5.d", - "ldnf1sh (%x12)[8byte] %p3/z -> %z10.d", - "ldnf1sh +0x18(%x17)[8byte] %p5/z -> %z16.d", - "ldnf1sh +0x28(%x22)[8byte] %p6/z -> %z21.d", - "ldnf1sh +0x38(%sp)[8byte] %p7/z -> %z31.d", + "ldnf1sh -0x40(%x0)[2byte] %p0/z -> %z0.d", + "ldnf1sh -0x18(%x7)[2byte] %p2/z -> %z5.d", + "ldnf1sh (%x12)[2byte] %p3/z -> %z10.d", + "ldnf1sh +0x18(%x17)[2byte] %p5/z -> %z16.d", + "ldnf1sh +0x28(%x22)[2byte] %p6/z -> %z21.d", + "ldnf1sh +0x38(%sp)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnf1sh, ldnf1sh_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 8 * imm4[i], - OPSZ_8)); + OPSZ_2)); } TEST_INSTR(ldnf1sw_sve_pred) @@ -19359,18 +19358,18 @@ TEST_INSTR(ldnf1sw_sve_pred) /* Testing LDNF1SW { .D }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -128, -48, 0, 48, 80, 112 }; const char *const expected_0_0[6] = { - "ldnf1sw -0x80(%x0)[16byte] %p0/z -> %z0.d", - "ldnf1sw -0x30(%x7)[16byte] %p2/z -> %z5.d", - "ldnf1sw (%x12)[16byte] %p3/z -> %z10.d", - "ldnf1sw +0x30(%x17)[16byte] %p5/z -> %z16.d", - "ldnf1sw +0x50(%x22)[16byte] %p6/z -> %z21.d", - "ldnf1sw +0x70(%sp)[16byte] %p7/z -> %z31.d", + "ldnf1sw -0x80(%x0)[4byte] %p0/z -> %z0.d", + "ldnf1sw -0x30(%x7)[4byte] %p2/z -> %z5.d", + "ldnf1sw (%x12)[4byte] %p3/z -> %z10.d", + "ldnf1sw +0x30(%x17)[4byte] %p5/z -> %z16.d", + "ldnf1sw +0x50(%x22)[4byte] %p6/z -> %z21.d", + "ldnf1sw +0x70(%sp)[4byte] %p7/z -> %z31.d", }; TEST_LOOP( ldnf1sw, ldnf1sw_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_16)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(ldnf1w_sve_pred) @@ -19378,144 +19377,144 @@ TEST_INSTR(ldnf1w_sve_pred) /* Testing LDNF1W { .S }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -8, -3, 0, 3, 5, 7 }; const char *const expected_0_0[6] = { - "ldnf1w -0x0100(%x0)[32byte] %p0/z -> %z0.s", - "ldnf1w -0x60(%x7)[32byte] %p2/z -> %z5.s", - "ldnf1w (%x12)[32byte] %p3/z -> %z10.s", - "ldnf1w +0x60(%x17)[32byte] %p5/z -> %z16.s", - "ldnf1w +0xa0(%x22)[32byte] %p6/z -> %z21.s", - "ldnf1w +0xe0(%sp)[32byte] %p7/z -> %z31.s", + "ldnf1w -0x0100(%x0)[4byte] %p0/z -> %z0.s", + "ldnf1w -0x60(%x7)[4byte] %p2/z -> %z5.s", + "ldnf1w (%x12)[4byte] %p3/z -> %z10.s", + "ldnf1w +0x60(%x17)[4byte] %p5/z -> %z16.s", + "ldnf1w +0xa0(%x22)[4byte] %p6/z -> %z21.s", + "ldnf1w +0xe0(%sp)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnf1w, ldnf1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 32 * imm4[i], - OPSZ_32)); + OPSZ_4)); /* Testing LDNF1W { .D }, /Z, [{, #, MUL VL}] */ const char *const expected_1_0[6] = { - "ldnf1w -0x80(%x0)[16byte] %p0/z -> %z0.d", - "ldnf1w -0x30(%x7)[16byte] %p2/z -> %z5.d", - "ldnf1w (%x12)[16byte] %p3/z -> %z10.d", - "ldnf1w +0x30(%x17)[16byte] %p5/z -> %z16.d", - "ldnf1w +0x50(%x22)[16byte] %p6/z -> %z21.d", - "ldnf1w +0x70(%sp)[16byte] %p7/z -> %z31.d", + "ldnf1w -0x80(%x0)[4byte] %p0/z -> %z0.d", + "ldnf1w -0x30(%x7)[4byte] %p2/z -> %z5.d", + "ldnf1w (%x12)[4byte] %p3/z -> %z10.d", + "ldnf1w +0x30(%x17)[4byte] %p5/z -> %z16.d", + "ldnf1w +0x50(%x22)[4byte] %p6/z -> %z21.d", + "ldnf1w +0x70(%sp)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnf1w, ldnf1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, 16 * imm4[i], - OPSZ_16)); + OPSZ_4)); } TEST_INSTR(ld3d_sve_pred) { /* Testing LD3D { .D, .D, .D }, /Z, [, , LSL #3] */ const char *const expected_0_0[6] = { - "ld3d (%x0,%x0,lsl #3)[96byte] %p0/z -> %z0.d %z1.d %z2.d", - "ld3d (%x7,%x8,lsl #3)[96byte] %p2/z -> %z5.d %z6.d %z7.d", - "ld3d (%x12,%x13,lsl #3)[96byte] %p3/z -> %z10.d %z11.d %z12.d", - "ld3d (%x17,%x18,lsl #3)[96byte] %p5/z -> %z16.d %z17.d %z18.d", - "ld3d (%x22,%x23,lsl #3)[96byte] %p6/z -> %z21.d %z22.d %z23.d", - "ld3d (%sp,%x30,lsl #3)[96byte] %p7/z -> %z31.d %z0.d %z1.d", + "ld3d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d %z1.d %z2.d", + "ld3d (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d %z6.d %z7.d", + "ld3d (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d %z11.d %z12.d", + "ld3d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d %z17.d %z18.d", + "ld3d (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d %z22.d %z23.d", + "ld3d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d %z0.d %z1.d", }; TEST_LOOP(ld3d, ld3d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_96, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing LD3D { .D, .D, .D }, /Z, [{, #, MUL * VL}] */ static const int imm4[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "ld3d -0x0300(%x0)[96byte] %p0/z -> %z0.d %z1.d %z2.d", - "ld3d -0x0120(%x7)[96byte] %p2/z -> %z5.d %z6.d %z7.d", - "ld3d (%x12)[96byte] %p3/z -> %z10.d %z11.d %z12.d", - "ld3d +0x0120(%x17)[96byte] %p5/z -> %z16.d %z17.d %z18.d", - "ld3d +0x01e0(%x22)[96byte] %p6/z -> %z21.d %z22.d %z23.d", - "ld3d +0x02a0(%sp)[96byte] %p7/z -> %z31.d %z0.d %z1.d", + "ld3d -0x0300(%x0)[8byte] %p0/z -> %z0.d %z1.d %z2.d", + "ld3d -0x0120(%x7)[8byte] %p2/z -> %z5.d %z6.d %z7.d", + "ld3d (%x12)[8byte] %p3/z -> %z10.d %z11.d %z12.d", + "ld3d +0x0120(%x17)[8byte] %p5/z -> %z16.d %z17.d %z18.d", + "ld3d +0x01e0(%x22)[8byte] %p6/z -> %z21.d %z22.d %z23.d", + "ld3d +0x02a0(%sp)[8byte] %p7/z -> %z31.d %z0.d %z1.d", }; TEST_LOOP( ld3d, ld3d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_96)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ld3h_sve_pred) { /* Testing LD3H { .H, .H, .H }, /Z, [, , LSL #1] */ const char *const expected_0_0[6] = { - "ld3h (%x0,%x0,lsl #1)[96byte] %p0/z -> %z0.h %z1.h %z2.h", - "ld3h (%x7,%x8,lsl #1)[96byte] %p2/z -> %z5.h %z6.h %z7.h", - "ld3h (%x12,%x13,lsl #1)[96byte] %p3/z -> %z10.h %z11.h %z12.h", - "ld3h (%x17,%x18,lsl #1)[96byte] %p5/z -> %z16.h %z17.h %z18.h", - "ld3h (%x22,%x23,lsl #1)[96byte] %p6/z -> %z21.h %z22.h %z23.h", - "ld3h (%sp,%x30,lsl #1)[96byte] %p7/z -> %z31.h %z0.h %z1.h", + "ld3h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h %z1.h %z2.h", + "ld3h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h %z6.h %z7.h", + "ld3h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h %z11.h %z12.h", + "ld3h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h %z17.h %z18.h", + "ld3h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h %z22.h %z23.h", + "ld3h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h %z0.h %z1.h", }; TEST_LOOP(ld3h, ld3h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_96, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD3H { .H, .H, .H }, /Z, [{, #, MUL * VL}] */ static const int imm4[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "ld3h -0x0300(%x0)[96byte] %p0/z -> %z0.h %z1.h %z2.h", - "ld3h -0x0120(%x7)[96byte] %p2/z -> %z5.h %z6.h %z7.h", - "ld3h (%x12)[96byte] %p3/z -> %z10.h %z11.h %z12.h", - "ld3h +0x0120(%x17)[96byte] %p5/z -> %z16.h %z17.h %z18.h", - "ld3h +0x01e0(%x22)[96byte] %p6/z -> %z21.h %z22.h %z23.h", - "ld3h +0x02a0(%sp)[96byte] %p7/z -> %z31.h %z0.h %z1.h", + "ld3h -0x0300(%x0)[2byte] %p0/z -> %z0.h %z1.h %z2.h", + "ld3h -0x0120(%x7)[2byte] %p2/z -> %z5.h %z6.h %z7.h", + "ld3h (%x12)[2byte] %p3/z -> %z10.h %z11.h %z12.h", + "ld3h +0x0120(%x17)[2byte] %p5/z -> %z16.h %z17.h %z18.h", + "ld3h +0x01e0(%x22)[2byte] %p6/z -> %z21.h %z22.h %z23.h", + "ld3h +0x02a0(%sp)[2byte] %p7/z -> %z31.h %z0.h %z1.h", }; TEST_LOOP( ld3h, ld3h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_96)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(ld3w_sve_pred) { /* Testing LD3W { .S, .S, .S }, /Z, [, , LSL #2] */ const char *const expected_0_0[6] = { - "ld3w (%x0,%x0,lsl #2)[96byte] %p0/z -> %z0.s %z1.s %z2.s", - "ld3w (%x7,%x8,lsl #2)[96byte] %p2/z -> %z5.s %z6.s %z7.s", - "ld3w (%x12,%x13,lsl #2)[96byte] %p3/z -> %z10.s %z11.s %z12.s", - "ld3w (%x17,%x18,lsl #2)[96byte] %p5/z -> %z16.s %z17.s %z18.s", - "ld3w (%x22,%x23,lsl #2)[96byte] %p6/z -> %z21.s %z22.s %z23.s", - "ld3w (%sp,%x30,lsl #2)[96byte] %p7/z -> %z31.s %z0.s %z1.s", + "ld3w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s %z1.s %z2.s", + "ld3w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s %z6.s %z7.s", + "ld3w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s %z11.s %z12.s", + "ld3w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s %z17.s %z18.s", + "ld3w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s %z22.s %z23.s", + "ld3w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s %z0.s %z1.s", }; TEST_LOOP(ld3w, ld3w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_96, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD3W { .S, .S, .S }, /Z, [{, #, MUL * VL}] */ static const int imm4[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "ld3w -0x0300(%x0)[96byte] %p0/z -> %z0.s %z1.s %z2.s", - "ld3w -0x0120(%x7)[96byte] %p2/z -> %z5.s %z6.s %z7.s", - "ld3w (%x12)[96byte] %p3/z -> %z10.s %z11.s %z12.s", - "ld3w +0x0120(%x17)[96byte] %p5/z -> %z16.s %z17.s %z18.s", - "ld3w +0x01e0(%x22)[96byte] %p6/z -> %z21.s %z22.s %z23.s", - "ld3w +0x02a0(%sp)[96byte] %p7/z -> %z31.s %z0.s %z1.s", + "ld3w -0x0300(%x0)[4byte] %p0/z -> %z0.s %z1.s %z2.s", + "ld3w -0x0120(%x7)[4byte] %p2/z -> %z5.s %z6.s %z7.s", + "ld3w (%x12)[4byte] %p3/z -> %z10.s %z11.s %z12.s", + "ld3w +0x0120(%x17)[4byte] %p5/z -> %z16.s %z17.s %z18.s", + "ld3w +0x01e0(%x22)[4byte] %p6/z -> %z21.s %z22.s %z23.s", + "ld3w +0x02a0(%sp)[4byte] %p7/z -> %z31.s %z0.s %z1.s", }; TEST_LOOP( ld3w, ld3w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_96)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(ld4d_sve_pred) @@ -19523,36 +19522,36 @@ TEST_INSTR(ld4d_sve_pred) /* Testing LD4D { .D, .D, .D, .D }, /Z, [, , LSL * #3] */ const char *const expected_0_0[6] = { - "ld4d (%x0,%x0,lsl #3)[128byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d", - "ld4d (%x7,%x8,lsl #3)[128byte] %p2/z -> %z5.d %z6.d %z7.d %z8.d", - "ld4d (%x12,%x13,lsl #3)[128byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d", - "ld4d (%x17,%x18,lsl #3)[128byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d", - "ld4d (%x22,%x23,lsl #3)[128byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d", - "ld4d (%sp,%x30,lsl #3)[128byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d", + "ld4d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d", + "ld4d (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d %z6.d %z7.d %z8.d", + "ld4d (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d", + "ld4d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d", + "ld4d (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d", + "ld4d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d", }; TEST_LOOP(ld4d, ld4d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_128, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing LD4D { .D, .D, .D, .D }, /Z, [{, * #, MUL VL}] */ static const int imm4[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "ld4d -0x0400(%x0)[128byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d", - "ld4d -0x0180(%x7)[128byte] %p2/z -> %z5.d %z6.d %z7.d %z8.d", - "ld4d (%x12)[128byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d", - "ld4d +0x0180(%x17)[128byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d", - "ld4d +0x0280(%x22)[128byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d", - "ld4d +0x0380(%sp)[128byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d", + "ld4d -0x0400(%x0)[8byte] %p0/z -> %z0.d %z1.d %z2.d %z3.d", + "ld4d -0x0180(%x7)[8byte] %p2/z -> %z5.d %z6.d %z7.d %z8.d", + "ld4d (%x12)[8byte] %p3/z -> %z10.d %z11.d %z12.d %z13.d", + "ld4d +0x0180(%x17)[8byte] %p5/z -> %z16.d %z17.d %z18.d %z19.d", + "ld4d +0x0280(%x22)[8byte] %p6/z -> %z21.d %z22.d %z23.d %z24.d", + "ld4d +0x0380(%sp)[8byte] %p7/z -> %z31.d %z0.d %z1.d %z2.d", }; TEST_LOOP( ld4d, ld4d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_128)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ld4h_sve_pred) @@ -19560,36 +19559,36 @@ TEST_INSTR(ld4h_sve_pred) /* Testing LD4H { .H, .H, .H, .H }, /Z, [, , LSL * #1] */ const char *const expected_0_0[6] = { - "ld4h (%x0,%x0,lsl #1)[128byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h", - "ld4h (%x7,%x8,lsl #1)[128byte] %p2/z -> %z5.h %z6.h %z7.h %z8.h", - "ld4h (%x12,%x13,lsl #1)[128byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h", - "ld4h (%x17,%x18,lsl #1)[128byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h", - "ld4h (%x22,%x23,lsl #1)[128byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h", - "ld4h (%sp,%x30,lsl #1)[128byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h", + "ld4h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h", + "ld4h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h %z6.h %z7.h %z8.h", + "ld4h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h", + "ld4h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h", + "ld4h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h", + "ld4h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h", }; TEST_LOOP(ld4h, ld4h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_128, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LD4H { .H, .H, .H, .H }, /Z, [{, * #, MUL VL}] */ static const int imm4[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "ld4h -0x0400(%x0)[128byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h", - "ld4h -0x0180(%x7)[128byte] %p2/z -> %z5.h %z6.h %z7.h %z8.h", - "ld4h (%x12)[128byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h", - "ld4h +0x0180(%x17)[128byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h", - "ld4h +0x0280(%x22)[128byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h", - "ld4h +0x0380(%sp)[128byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h", + "ld4h -0x0400(%x0)[2byte] %p0/z -> %z0.h %z1.h %z2.h %z3.h", + "ld4h -0x0180(%x7)[2byte] %p2/z -> %z5.h %z6.h %z7.h %z8.h", + "ld4h (%x12)[2byte] %p3/z -> %z10.h %z11.h %z12.h %z13.h", + "ld4h +0x0180(%x17)[2byte] %p5/z -> %z16.h %z17.h %z18.h %z19.h", + "ld4h +0x0280(%x22)[2byte] %p6/z -> %z21.h %z22.h %z23.h %z24.h", + "ld4h +0x0380(%sp)[2byte] %p7/z -> %z31.h %z0.h %z1.h %z2.h", }; TEST_LOOP( ld4h, ld4h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_128)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(ld4w_sve_pred) @@ -19597,357 +19596,357 @@ TEST_INSTR(ld4w_sve_pred) /* Testing LD4W { .S, .S, .S, .S }, /Z, [, , LSL * #2] */ const char *const expected_0_0[6] = { - "ld4w (%x0,%x0,lsl #2)[128byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s", - "ld4w (%x7,%x8,lsl #2)[128byte] %p2/z -> %z5.s %z6.s %z7.s %z8.s", - "ld4w (%x12,%x13,lsl #2)[128byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s", - "ld4w (%x17,%x18,lsl #2)[128byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s", - "ld4w (%x22,%x23,lsl #2)[128byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s", - "ld4w (%sp,%x30,lsl #2)[128byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s", + "ld4w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s", + "ld4w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s %z6.s %z7.s %z8.s", + "ld4w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s", + "ld4w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s", + "ld4w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s", + "ld4w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s", }; TEST_LOOP(ld4w, ld4w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_128, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LD4W { .S, .S, .S, .S }, /Z, [{, * #, MUL VL}] */ static const int imm4[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "ld4w -0x0400(%x0)[128byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s", - "ld4w -0x0180(%x7)[128byte] %p2/z -> %z5.s %z6.s %z7.s %z8.s", - "ld4w (%x12)[128byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s", - "ld4w +0x0180(%x17)[128byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s", - "ld4w +0x0280(%x22)[128byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s", - "ld4w +0x0380(%sp)[128byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s", + "ld4w -0x0400(%x0)[4byte] %p0/z -> %z0.s %z1.s %z2.s %z3.s", + "ld4w -0x0180(%x7)[4byte] %p2/z -> %z5.s %z6.s %z7.s %z8.s", + "ld4w (%x12)[4byte] %p3/z -> %z10.s %z11.s %z12.s %z13.s", + "ld4w +0x0180(%x17)[4byte] %p5/z -> %z16.s %z17.s %z18.s %z19.s", + "ld4w +0x0280(%x22)[4byte] %p6/z -> %z21.s %z22.s %z23.s %z24.s", + "ld4w +0x0380(%sp)[4byte] %p7/z -> %z31.s %z0.s %z1.s %z2.s", }; TEST_LOOP( ld4w, ld4w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_128)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(ldnt1d_sve_pred) { /* Testing LDNT1D { .D }, /Z, [, , LSL #3] */ const char *const expected_0_0[6] = { - "ldnt1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d", - "ldnt1d (%x7,%x8,lsl #3)[32byte] %p2/z -> %z5.d", - "ldnt1d (%x12,%x13,lsl #3)[32byte] %p3/z -> %z10.d", - "ldnt1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d", - "ldnt1d (%x22,%x23,lsl #3)[32byte] %p6/z -> %z21.d", - "ldnt1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d", + "ldnt1d (%x0,%x0,lsl #3)[8byte] %p0/z -> %z0.d", + "ldnt1d (%x7,%x8,lsl #3)[8byte] %p2/z -> %z5.d", + "ldnt1d (%x12,%x13,lsl #3)[8byte] %p3/z -> %z10.d", + "ldnt1d (%x17,%x18,lsl #3)[8byte] %p5/z -> %z16.d", + "ldnt1d (%x22,%x23,lsl #3)[8byte] %p6/z -> %z21.d", + "ldnt1d (%sp,%x30,lsl #3)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1d, ldnt1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing LDNT1D { .D }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "ldnt1d -0x0100(%x0)[32byte] %p0/z -> %z0.d", - "ldnt1d -0x60(%x7)[32byte] %p2/z -> %z5.d", - "ldnt1d (%x12)[32byte] %p3/z -> %z10.d", - "ldnt1d +0x60(%x17)[32byte] %p5/z -> %z16.d", - "ldnt1d +0xa0(%x22)[32byte] %p6/z -> %z21.d", - "ldnt1d +0xe0(%sp)[32byte] %p7/z -> %z31.d", + "ldnt1d -0x0100(%x0)[8byte] %p0/z -> %z0.d", + "ldnt1d -0x60(%x7)[8byte] %p2/z -> %z5.d", + "ldnt1d (%x12)[8byte] %p3/z -> %z10.d", + "ldnt1d +0x60(%x17)[8byte] %p5/z -> %z16.d", + "ldnt1d +0xa0(%x22)[8byte] %p6/z -> %z21.d", + "ldnt1d +0xe0(%sp)[8byte] %p7/z -> %z31.d", }; TEST_LOOP( ldnt1d, ldnt1d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(ldnt1w_sve_pred) { /* Testing LDNT1W { .S }, /Z, [, , LSL #2] */ const char *const expected_0_0[6] = { - "ldnt1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s", - "ldnt1w (%x7,%x8,lsl #2)[32byte] %p2/z -> %z5.s", - "ldnt1w (%x12,%x13,lsl #2)[32byte] %p3/z -> %z10.s", - "ldnt1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s", - "ldnt1w (%x22,%x23,lsl #2)[32byte] %p6/z -> %z21.s", - "ldnt1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s", + "ldnt1w (%x0,%x0,lsl #2)[4byte] %p0/z -> %z0.s", + "ldnt1w (%x7,%x8,lsl #2)[4byte] %p2/z -> %z5.s", + "ldnt1w (%x12,%x13,lsl #2)[4byte] %p3/z -> %z10.s", + "ldnt1w (%x17,%x18,lsl #2)[4byte] %p5/z -> %z16.s", + "ldnt1w (%x22,%x23,lsl #2)[4byte] %p6/z -> %z21.s", + "ldnt1w (%sp,%x30,lsl #2)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnt1w, ldnt1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing LDNT1W { .S }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "ldnt1w -0x0100(%x0)[32byte] %p0/z -> %z0.s", - "ldnt1w -0x60(%x7)[32byte] %p2/z -> %z5.s", - "ldnt1w (%x12)[32byte] %p3/z -> %z10.s", - "ldnt1w +0x60(%x17)[32byte] %p5/z -> %z16.s", - "ldnt1w +0xa0(%x22)[32byte] %p6/z -> %z21.s", - "ldnt1w +0xe0(%sp)[32byte] %p7/z -> %z31.s", + "ldnt1w -0x0100(%x0)[4byte] %p0/z -> %z0.s", + "ldnt1w -0x60(%x7)[4byte] %p2/z -> %z5.s", + "ldnt1w (%x12)[4byte] %p3/z -> %z10.s", + "ldnt1w +0x60(%x17)[4byte] %p5/z -> %z16.s", + "ldnt1w +0xa0(%x22)[4byte] %p6/z -> %z21.s", + "ldnt1w +0xe0(%sp)[4byte] %p7/z -> %z31.s", }; TEST_LOOP( ldnt1w, ldnt1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(ldnt1h_sve_pred) { /* Testing LDNT1H { .H }, /Z, [, , LSL #1] */ const char *const expected_0_0[6] = { - "ldnt1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h", - "ldnt1h (%x7,%x8,lsl #1)[32byte] %p2/z -> %z5.h", - "ldnt1h (%x12,%x13,lsl #1)[32byte] %p3/z -> %z10.h", - "ldnt1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h", - "ldnt1h (%x22,%x23,lsl #1)[32byte] %p6/z -> %z21.h", - "ldnt1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h", + "ldnt1h (%x0,%x0,lsl #1)[2byte] %p0/z -> %z0.h", + "ldnt1h (%x7,%x8,lsl #1)[2byte] %p2/z -> %z5.h", + "ldnt1h (%x12,%x13,lsl #1)[2byte] %p3/z -> %z10.h", + "ldnt1h (%x17,%x18,lsl #1)[2byte] %p5/z -> %z16.h", + "ldnt1h (%x22,%x23,lsl #1)[2byte] %p6/z -> %z21.h", + "ldnt1h (%sp,%x30,lsl #1)[2byte] %p7/z -> %z31.h", }; TEST_LOOP(ldnt1h, ldnt1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing LDNT1H { .H }, /Z, [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "ldnt1h -0x0100(%x0)[32byte] %p0/z -> %z0.h", - "ldnt1h -0x60(%x7)[32byte] %p2/z -> %z5.h", - "ldnt1h (%x12)[32byte] %p3/z -> %z10.h", - "ldnt1h +0x60(%x17)[32byte] %p5/z -> %z16.h", - "ldnt1h +0xa0(%x22)[32byte] %p6/z -> %z21.h", - "ldnt1h +0xe0(%sp)[32byte] %p7/z -> %z31.h", + "ldnt1h -0x0100(%x0)[2byte] %p0/z -> %z0.h", + "ldnt1h -0x60(%x7)[2byte] %p2/z -> %z5.h", + "ldnt1h (%x12)[2byte] %p3/z -> %z10.h", + "ldnt1h +0x60(%x17)[2byte] %p5/z -> %z16.h", + "ldnt1h +0xa0(%x22)[2byte] %p6/z -> %z21.h", + "ldnt1h +0xe0(%sp)[2byte] %p7/z -> %z31.h", }; TEST_LOOP( ldnt1h, ldnt1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(st2d_sve_pred) { /* Testing ST2D { .D, .D }, , [, , LSL #3] */ const char *const expected_0_0[6] = { - "st2d %z0.d %z1.d %p0 -> (%x0,%x0,lsl #3)[64byte]", - "st2d %z5.d %z6.d %p2 -> (%x7,%x8,lsl #3)[64byte]", - "st2d %z10.d %z11.d %p3 -> (%x12,%x13,lsl #3)[64byte]", - "st2d %z16.d %z17.d %p5 -> (%x17,%x18,lsl #3)[64byte]", - "st2d %z21.d %z22.d %p6 -> (%x22,%x23,lsl #3)[64byte]", - "st2d %z31.d %z0.d %p7 -> (%sp,%x30,lsl #3)[64byte]", + "st2d %z0.d %z1.d %p0 -> (%x0,%x0,lsl #3)[8byte]", + "st2d %z5.d %z6.d %p2 -> (%x7,%x8,lsl #3)[8byte]", + "st2d %z10.d %z11.d %p3 -> (%x12,%x13,lsl #3)[8byte]", + "st2d %z16.d %z17.d %p5 -> (%x17,%x18,lsl #3)[8byte]", + "st2d %z21.d %z22.d %p6 -> (%x22,%x23,lsl #3)[8byte]", + "st2d %z31.d %z0.d %p7 -> (%sp,%x30,lsl #3)[8byte]", }; TEST_LOOP(st2d, st2d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_64, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing ST2D { .D, .D }, , [{, #, MUL VL}] */ static const int imm4[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "st2d %z0.d %z1.d %p0 -> -0x0200(%x0)[64byte]", - "st2d %z5.d %z6.d %p2 -> -0xc0(%x7)[64byte]", - "st2d %z10.d %z11.d %p3 -> (%x12)[64byte]", - "st2d %z16.d %z17.d %p5 -> +0xc0(%x17)[64byte]", - "st2d %z21.d %z22.d %p6 -> +0x0140(%x22)[64byte]", - "st2d %z31.d %z0.d %p7 -> +0x01c0(%sp)[64byte]", + "st2d %z0.d %z1.d %p0 -> -0x0200(%x0)[8byte]", + "st2d %z5.d %z6.d %p2 -> -0xc0(%x7)[8byte]", + "st2d %z10.d %z11.d %p3 -> (%x12)[8byte]", + "st2d %z16.d %z17.d %p5 -> +0xc0(%x17)[8byte]", + "st2d %z21.d %z22.d %p6 -> +0x0140(%x22)[8byte]", + "st2d %z31.d %z0.d %p7 -> +0x01c0(%sp)[8byte]", }; TEST_LOOP( st2d, st2d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_64)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(st2h_sve_pred) { /* Testing ST2H { .H, .H }, , [, , LSL #1] */ const char *const expected_0_0[6] = { - "st2h %z0.h %z1.h %p0 -> (%x0,%x0,lsl #1)[64byte]", - "st2h %z5.h %z6.h %p2 -> (%x7,%x8,lsl #1)[64byte]", - "st2h %z10.h %z11.h %p3 -> (%x12,%x13,lsl #1)[64byte]", - "st2h %z16.h %z17.h %p5 -> (%x17,%x18,lsl #1)[64byte]", - "st2h %z21.h %z22.h %p6 -> (%x22,%x23,lsl #1)[64byte]", - "st2h %z31.h %z0.h %p7 -> (%sp,%x30,lsl #1)[64byte]", + "st2h %z0.h %z1.h %p0 -> (%x0,%x0,lsl #1)[2byte]", + "st2h %z5.h %z6.h %p2 -> (%x7,%x8,lsl #1)[2byte]", + "st2h %z10.h %z11.h %p3 -> (%x12,%x13,lsl #1)[2byte]", + "st2h %z16.h %z17.h %p5 -> (%x17,%x18,lsl #1)[2byte]", + "st2h %z21.h %z22.h %p6 -> (%x22,%x23,lsl #1)[2byte]", + "st2h %z31.h %z0.h %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(st2h, st2h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_64, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing ST2H { .H, .H }, , [{, #, MUL VL}] */ static const int imm4[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "st2h %z0.h %z1.h %p0 -> -0x0200(%x0)[64byte]", - "st2h %z5.h %z6.h %p2 -> -0xc0(%x7)[64byte]", - "st2h %z10.h %z11.h %p3 -> (%x12)[64byte]", - "st2h %z16.h %z17.h %p5 -> +0xc0(%x17)[64byte]", - "st2h %z21.h %z22.h %p6 -> +0x0140(%x22)[64byte]", - "st2h %z31.h %z0.h %p7 -> +0x01c0(%sp)[64byte]", + "st2h %z0.h %z1.h %p0 -> -0x0200(%x0)[2byte]", + "st2h %z5.h %z6.h %p2 -> -0xc0(%x7)[2byte]", + "st2h %z10.h %z11.h %p3 -> (%x12)[2byte]", + "st2h %z16.h %z17.h %p5 -> +0xc0(%x17)[2byte]", + "st2h %z21.h %z22.h %p6 -> +0x0140(%x22)[2byte]", + "st2h %z31.h %z0.h %p7 -> +0x01c0(%sp)[2byte]", }; TEST_LOOP( st2h, st2h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_64)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(st2w_sve_pred) { /* Testing ST2W { .S, .S }, , [, , LSL #2] */ const char *const expected_0_0[6] = { - "st2w %z0.s %z1.s %p0 -> (%x0,%x0,lsl #2)[64byte]", - "st2w %z5.s %z6.s %p2 -> (%x7,%x8,lsl #2)[64byte]", - "st2w %z10.s %z11.s %p3 -> (%x12,%x13,lsl #2)[64byte]", - "st2w %z16.s %z17.s %p5 -> (%x17,%x18,lsl #2)[64byte]", - "st2w %z21.s %z22.s %p6 -> (%x22,%x23,lsl #2)[64byte]", - "st2w %z31.s %z0.s %p7 -> (%sp,%x30,lsl #2)[64byte]", + "st2w %z0.s %z1.s %p0 -> (%x0,%x0,lsl #2)[4byte]", + "st2w %z5.s %z6.s %p2 -> (%x7,%x8,lsl #2)[4byte]", + "st2w %z10.s %z11.s %p3 -> (%x12,%x13,lsl #2)[4byte]", + "st2w %z16.s %z17.s %p5 -> (%x17,%x18,lsl #2)[4byte]", + "st2w %z21.s %z22.s %p6 -> (%x22,%x23,lsl #2)[4byte]", + "st2w %z31.s %z0.s %p7 -> (%sp,%x30,lsl #2)[4byte]", }; TEST_LOOP(st2w, st2w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_64, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing ST2W { .S, .S }, , [{, #, MUL VL}] */ static const int imm4[6] = { -512, -192, 0, 192, 320, 448 }; const char *const expected_1_0[6] = { - "st2w %z0.s %z1.s %p0 -> -0x0200(%x0)[64byte]", - "st2w %z5.s %z6.s %p2 -> -0xc0(%x7)[64byte]", - "st2w %z10.s %z11.s %p3 -> (%x12)[64byte]", - "st2w %z16.s %z17.s %p5 -> +0xc0(%x17)[64byte]", - "st2w %z21.s %z22.s %p6 -> +0x0140(%x22)[64byte]", - "st2w %z31.s %z0.s %p7 -> +0x01c0(%sp)[64byte]", + "st2w %z0.s %z1.s %p0 -> -0x0200(%x0)[4byte]", + "st2w %z5.s %z6.s %p2 -> -0xc0(%x7)[4byte]", + "st2w %z10.s %z11.s %p3 -> (%x12)[4byte]", + "st2w %z16.s %z17.s %p5 -> +0xc0(%x17)[4byte]", + "st2w %z21.s %z22.s %p6 -> +0x0140(%x22)[4byte]", + "st2w %z31.s %z0.s %p7 -> +0x01c0(%sp)[4byte]", }; TEST_LOOP( st2w, st2w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_64)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(st3d_sve_pred) { /* Testing ST3D { .D, .D, .D }, , [, , LSL #3] */ const char *const expected_0_0[6] = { - "st3d %z0.d %z1.d %z2.d %p0 -> (%x0,%x0,lsl #3)[96byte]", - "st3d %z5.d %z6.d %z7.d %p2 -> (%x7,%x8,lsl #3)[96byte]", - "st3d %z10.d %z11.d %z12.d %p3 -> (%x12,%x13,lsl #3)[96byte]", - "st3d %z16.d %z17.d %z18.d %p5 -> (%x17,%x18,lsl #3)[96byte]", - "st3d %z21.d %z22.d %z23.d %p6 -> (%x22,%x23,lsl #3)[96byte]", - "st3d %z31.d %z0.d %z1.d %p7 -> (%sp,%x30,lsl #3)[96byte]", + "st3d %z0.d %z1.d %z2.d %p0 -> (%x0,%x0,lsl #3)[8byte]", + "st3d %z5.d %z6.d %z7.d %p2 -> (%x7,%x8,lsl #3)[8byte]", + "st3d %z10.d %z11.d %z12.d %p3 -> (%x12,%x13,lsl #3)[8byte]", + "st3d %z16.d %z17.d %z18.d %p5 -> (%x17,%x18,lsl #3)[8byte]", + "st3d %z21.d %z22.d %z23.d %p6 -> (%x22,%x23,lsl #3)[8byte]", + "st3d %z31.d %z0.d %z1.d %p7 -> (%sp,%x30,lsl #3)[8byte]", }; TEST_LOOP(st3d, st3d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_96, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing ST3D { .D, .D, .D }, , [{, #, MUL * VL}] */ static const int imm4[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "st3d %z0.d %z1.d %z2.d %p0 -> -0x0300(%x0)[96byte]", - "st3d %z5.d %z6.d %z7.d %p2 -> -0x0120(%x7)[96byte]", - "st3d %z10.d %z11.d %z12.d %p3 -> (%x12)[96byte]", - "st3d %z16.d %z17.d %z18.d %p5 -> +0x0120(%x17)[96byte]", - "st3d %z21.d %z22.d %z23.d %p6 -> +0x01e0(%x22)[96byte]", - "st3d %z31.d %z0.d %z1.d %p7 -> +0x02a0(%sp)[96byte]", + "st3d %z0.d %z1.d %z2.d %p0 -> -0x0300(%x0)[8byte]", + "st3d %z5.d %z6.d %z7.d %p2 -> -0x0120(%x7)[8byte]", + "st3d %z10.d %z11.d %z12.d %p3 -> (%x12)[8byte]", + "st3d %z16.d %z17.d %z18.d %p5 -> +0x0120(%x17)[8byte]", + "st3d %z21.d %z22.d %z23.d %p6 -> +0x01e0(%x22)[8byte]", + "st3d %z31.d %z0.d %z1.d %p7 -> +0x02a0(%sp)[8byte]", }; TEST_LOOP( st3d, st3d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_96)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(st3h_sve_pred) { /* Testing ST3H { .H, .H, .H }, , [, , LSL #1] */ const char *const expected_0_0[6] = { - "st3h %z0.h %z1.h %z2.h %p0 -> (%x0,%x0,lsl #1)[96byte]", - "st3h %z5.h %z6.h %z7.h %p2 -> (%x7,%x8,lsl #1)[96byte]", - "st3h %z10.h %z11.h %z12.h %p3 -> (%x12,%x13,lsl #1)[96byte]", - "st3h %z16.h %z17.h %z18.h %p5 -> (%x17,%x18,lsl #1)[96byte]", - "st3h %z21.h %z22.h %z23.h %p6 -> (%x22,%x23,lsl #1)[96byte]", - "st3h %z31.h %z0.h %z1.h %p7 -> (%sp,%x30,lsl #1)[96byte]", + "st3h %z0.h %z1.h %z2.h %p0 -> (%x0,%x0,lsl #1)[2byte]", + "st3h %z5.h %z6.h %z7.h %p2 -> (%x7,%x8,lsl #1)[2byte]", + "st3h %z10.h %z11.h %z12.h %p3 -> (%x12,%x13,lsl #1)[2byte]", + "st3h %z16.h %z17.h %z18.h %p5 -> (%x17,%x18,lsl #1)[2byte]", + "st3h %z21.h %z22.h %z23.h %p6 -> (%x22,%x23,lsl #1)[2byte]", + "st3h %z31.h %z0.h %z1.h %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(st3h, st3h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_96, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing ST3H { .H, .H, .H }, , [{, #, MUL * VL}] */ static const int imm4[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "st3h %z0.h %z1.h %z2.h %p0 -> -0x0300(%x0)[96byte]", - "st3h %z5.h %z6.h %z7.h %p2 -> -0x0120(%x7)[96byte]", - "st3h %z10.h %z11.h %z12.h %p3 -> (%x12)[96byte]", - "st3h %z16.h %z17.h %z18.h %p5 -> +0x0120(%x17)[96byte]", - "st3h %z21.h %z22.h %z23.h %p6 -> +0x01e0(%x22)[96byte]", - "st3h %z31.h %z0.h %z1.h %p7 -> +0x02a0(%sp)[96byte]", + "st3h %z0.h %z1.h %z2.h %p0 -> -0x0300(%x0)[2byte]", + "st3h %z5.h %z6.h %z7.h %p2 -> -0x0120(%x7)[2byte]", + "st3h %z10.h %z11.h %z12.h %p3 -> (%x12)[2byte]", + "st3h %z16.h %z17.h %z18.h %p5 -> +0x0120(%x17)[2byte]", + "st3h %z21.h %z22.h %z23.h %p6 -> +0x01e0(%x22)[2byte]", + "st3h %z31.h %z0.h %z1.h %p7 -> +0x02a0(%sp)[2byte]", }; TEST_LOOP( st3h, st3h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_96)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(st3w_sve_pred) { /* Testing ST3W { .S, .S, .S }, , [, , LSL #2] */ const char *const expected_0_0[6] = { - "st3w %z0.s %z1.s %z2.s %p0 -> (%x0,%x0,lsl #2)[96byte]", - "st3w %z5.s %z6.s %z7.s %p2 -> (%x7,%x8,lsl #2)[96byte]", - "st3w %z10.s %z11.s %z12.s %p3 -> (%x12,%x13,lsl #2)[96byte]", - "st3w %z16.s %z17.s %z18.s %p5 -> (%x17,%x18,lsl #2)[96byte]", - "st3w %z21.s %z22.s %z23.s %p6 -> (%x22,%x23,lsl #2)[96byte]", - "st3w %z31.s %z0.s %z1.s %p7 -> (%sp,%x30,lsl #2)[96byte]", + "st3w %z0.s %z1.s %z2.s %p0 -> (%x0,%x0,lsl #2)[4byte]", + "st3w %z5.s %z6.s %z7.s %p2 -> (%x7,%x8,lsl #2)[4byte]", + "st3w %z10.s %z11.s %z12.s %p3 -> (%x12,%x13,lsl #2)[4byte]", + "st3w %z16.s %z17.s %z18.s %p5 -> (%x17,%x18,lsl #2)[4byte]", + "st3w %z21.s %z22.s %z23.s %p6 -> (%x22,%x23,lsl #2)[4byte]", + "st3w %z31.s %z0.s %z1.s %p7 -> (%sp,%x30,lsl #2)[4byte]", }; TEST_LOOP(st3w, st3w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_96, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing ST3W { .S, .S, .S }, , [{, #, MUL * VL}] */ static const int imm4[6] = { -768, -288, 0, 288, 480, 672 }; const char *const expected_1_0[6] = { - "st3w %z0.s %z1.s %z2.s %p0 -> -0x0300(%x0)[96byte]", - "st3w %z5.s %z6.s %z7.s %p2 -> -0x0120(%x7)[96byte]", - "st3w %z10.s %z11.s %z12.s %p3 -> (%x12)[96byte]", - "st3w %z16.s %z17.s %z18.s %p5 -> +0x0120(%x17)[96byte]", - "st3w %z21.s %z22.s %z23.s %p6 -> +0x01e0(%x22)[96byte]", - "st3w %z31.s %z0.s %z1.s %p7 -> +0x02a0(%sp)[96byte]", + "st3w %z0.s %z1.s %z2.s %p0 -> -0x0300(%x0)[4byte]", + "st3w %z5.s %z6.s %z7.s %p2 -> -0x0120(%x7)[4byte]", + "st3w %z10.s %z11.s %z12.s %p3 -> (%x12)[4byte]", + "st3w %z16.s %z17.s %z18.s %p5 -> +0x0120(%x17)[4byte]", + "st3w %z21.s %z22.s %z23.s %p6 -> +0x01e0(%x22)[4byte]", + "st3w %z31.s %z0.s %z1.s %p7 -> +0x02a0(%sp)[4byte]", }; TEST_LOOP( st3w, st3w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_96)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(st4d_sve_pred) @@ -19955,36 +19954,36 @@ TEST_INSTR(st4d_sve_pred) /* Testing ST4D { .D, .D, .D, .D }, , [, , * LSL #3] */ const char *const expected_0_0[6] = { - "st4d %z0.d %z1.d %z2.d %z3.d %p0 -> (%x0,%x0,lsl #3)[128byte]", - "st4d %z5.d %z6.d %z7.d %z8.d %p2 -> (%x7,%x8,lsl #3)[128byte]", - "st4d %z10.d %z11.d %z12.d %z13.d %p3 -> (%x12,%x13,lsl #3)[128byte]", - "st4d %z16.d %z17.d %z18.d %z19.d %p5 -> (%x17,%x18,lsl #3)[128byte]", - "st4d %z21.d %z22.d %z23.d %z24.d %p6 -> (%x22,%x23,lsl #3)[128byte]", - "st4d %z31.d %z0.d %z1.d %z2.d %p7 -> (%sp,%x30,lsl #3)[128byte]", + "st4d %z0.d %z1.d %z2.d %z3.d %p0 -> (%x0,%x0,lsl #3)[8byte]", + "st4d %z5.d %z6.d %z7.d %z8.d %p2 -> (%x7,%x8,lsl #3)[8byte]", + "st4d %z10.d %z11.d %z12.d %z13.d %p3 -> (%x12,%x13,lsl #3)[8byte]", + "st4d %z16.d %z17.d %z18.d %z19.d %p5 -> (%x17,%x18,lsl #3)[8byte]", + "st4d %z21.d %z22.d %z23.d %z24.d %p6 -> (%x22,%x23,lsl #3)[8byte]", + "st4d %z31.d %z0.d %z1.d %z2.d %p7 -> (%sp,%x30,lsl #3)[8byte]", }; TEST_LOOP(st4d, st4d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_128, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing ST4D { .D, .D, .D, .D }, , [{, * #, MUL VL}] */ static const int imm4[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "st4d %z0.d %z1.d %z2.d %z3.d %p0 -> -0x0400(%x0)[128byte]", - "st4d %z5.d %z6.d %z7.d %z8.d %p2 -> -0x0180(%x7)[128byte]", - "st4d %z10.d %z11.d %z12.d %z13.d %p3 -> (%x12)[128byte]", - "st4d %z16.d %z17.d %z18.d %z19.d %p5 -> +0x0180(%x17)[128byte]", - "st4d %z21.d %z22.d %z23.d %z24.d %p6 -> +0x0280(%x22)[128byte]", - "st4d %z31.d %z0.d %z1.d %z2.d %p7 -> +0x0380(%sp)[128byte]", + "st4d %z0.d %z1.d %z2.d %z3.d %p0 -> -0x0400(%x0)[8byte]", + "st4d %z5.d %z6.d %z7.d %z8.d %p2 -> -0x0180(%x7)[8byte]", + "st4d %z10.d %z11.d %z12.d %z13.d %p3 -> (%x12)[8byte]", + "st4d %z16.d %z17.d %z18.d %z19.d %p5 -> +0x0180(%x17)[8byte]", + "st4d %z21.d %z22.d %z23.d %z24.d %p6 -> +0x0280(%x22)[8byte]", + "st4d %z31.d %z0.d %z1.d %z2.d %p7 -> +0x0380(%sp)[8byte]", }; TEST_LOOP( st4d, st4d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_128)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(st4h_sve_pred) @@ -19992,36 +19991,36 @@ TEST_INSTR(st4h_sve_pred) /* Testing ST4H { .H, .H, .H, .H }, , [, , * LSL #1] */ const char *const expected_0_0[6] = { - "st4h %z0.h %z1.h %z2.h %z3.h %p0 -> (%x0,%x0,lsl #1)[128byte]", - "st4h %z5.h %z6.h %z7.h %z8.h %p2 -> (%x7,%x8,lsl #1)[128byte]", - "st4h %z10.h %z11.h %z12.h %z13.h %p3 -> (%x12,%x13,lsl #1)[128byte]", - "st4h %z16.h %z17.h %z18.h %z19.h %p5 -> (%x17,%x18,lsl #1)[128byte]", - "st4h %z21.h %z22.h %z23.h %z24.h %p6 -> (%x22,%x23,lsl #1)[128byte]", - "st4h %z31.h %z0.h %z1.h %z2.h %p7 -> (%sp,%x30,lsl #1)[128byte]", + "st4h %z0.h %z1.h %z2.h %z3.h %p0 -> (%x0,%x0,lsl #1)[2byte]", + "st4h %z5.h %z6.h %z7.h %z8.h %p2 -> (%x7,%x8,lsl #1)[2byte]", + "st4h %z10.h %z11.h %z12.h %z13.h %p3 -> (%x12,%x13,lsl #1)[2byte]", + "st4h %z16.h %z17.h %z18.h %z19.h %p5 -> (%x17,%x18,lsl #1)[2byte]", + "st4h %z21.h %z22.h %z23.h %z24.h %p6 -> (%x22,%x23,lsl #1)[2byte]", + "st4h %z31.h %z0.h %z1.h %z2.h %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(st4h, st4h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_128, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing ST4H { .H, .H, .H, .H }, , [{, * #, MUL VL}] */ static const int imm4[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "st4h %z0.h %z1.h %z2.h %z3.h %p0 -> -0x0400(%x0)[128byte]", - "st4h %z5.h %z6.h %z7.h %z8.h %p2 -> -0x0180(%x7)[128byte]", - "st4h %z10.h %z11.h %z12.h %z13.h %p3 -> (%x12)[128byte]", - "st4h %z16.h %z17.h %z18.h %z19.h %p5 -> +0x0180(%x17)[128byte]", - "st4h %z21.h %z22.h %z23.h %z24.h %p6 -> +0x0280(%x22)[128byte]", - "st4h %z31.h %z0.h %z1.h %z2.h %p7 -> +0x0380(%sp)[128byte]", + "st4h %z0.h %z1.h %z2.h %z3.h %p0 -> -0x0400(%x0)[2byte]", + "st4h %z5.h %z6.h %z7.h %z8.h %p2 -> -0x0180(%x7)[2byte]", + "st4h %z10.h %z11.h %z12.h %z13.h %p3 -> (%x12)[2byte]", + "st4h %z16.h %z17.h %z18.h %z19.h %p5 -> +0x0180(%x17)[2byte]", + "st4h %z21.h %z22.h %z23.h %z24.h %p6 -> +0x0280(%x22)[2byte]", + "st4h %z31.h %z0.h %z1.h %z2.h %p7 -> +0x0380(%sp)[2byte]", }; TEST_LOOP( st4h, st4h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_128)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(st4w_sve_pred) @@ -20029,141 +20028,141 @@ TEST_INSTR(st4w_sve_pred) /* Testing ST4W { .S, .S, .S, .S }, , [, , * LSL #2] */ const char *const expected_0_0[6] = { - "st4w %z0.s %z1.s %z2.s %z3.s %p0 -> (%x0,%x0,lsl #2)[128byte]", - "st4w %z5.s %z6.s %z7.s %z8.s %p2 -> (%x7,%x8,lsl #2)[128byte]", - "st4w %z10.s %z11.s %z12.s %z13.s %p3 -> (%x12,%x13,lsl #2)[128byte]", - "st4w %z16.s %z17.s %z18.s %z19.s %p5 -> (%x17,%x18,lsl #2)[128byte]", - "st4w %z21.s %z22.s %z23.s %z24.s %p6 -> (%x22,%x23,lsl #2)[128byte]", - "st4w %z31.s %z0.s %z1.s %z2.s %p7 -> (%sp,%x30,lsl #2)[128byte]", + "st4w %z0.s %z1.s %z2.s %z3.s %p0 -> (%x0,%x0,lsl #2)[4byte]", + "st4w %z5.s %z6.s %z7.s %z8.s %p2 -> (%x7,%x8,lsl #2)[4byte]", + "st4w %z10.s %z11.s %z12.s %z13.s %p3 -> (%x12,%x13,lsl #2)[4byte]", + "st4w %z16.s %z17.s %z18.s %z19.s %p5 -> (%x17,%x18,lsl #2)[4byte]", + "st4w %z21.s %z22.s %z23.s %z24.s %p6 -> (%x22,%x23,lsl #2)[4byte]", + "st4w %z31.s %z0.s %z1.s %z2.s %p7 -> (%sp,%x30,lsl #2)[4byte]", }; TEST_LOOP(st4w, st4w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_128, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing ST4W { .S, .S, .S, .S }, , [{, * #, MUL VL}] */ static const int imm4[6] = { -1024, -384, 0, 384, 640, 896 }; const char *const expected_1_0[6] = { - "st4w %z0.s %z1.s %z2.s %z3.s %p0 -> -0x0400(%x0)[128byte]", - "st4w %z5.s %z6.s %z7.s %z8.s %p2 -> -0x0180(%x7)[128byte]", - "st4w %z10.s %z11.s %z12.s %z13.s %p3 -> (%x12)[128byte]", - "st4w %z16.s %z17.s %z18.s %z19.s %p5 -> +0x0180(%x17)[128byte]", - "st4w %z21.s %z22.s %z23.s %z24.s %p6 -> +0x0280(%x22)[128byte]", - "st4w %z31.s %z0.s %z1.s %z2.s %p7 -> +0x0380(%sp)[128byte]", + "st4w %z0.s %z1.s %z2.s %z3.s %p0 -> -0x0400(%x0)[4byte]", + "st4w %z5.s %z6.s %z7.s %z8.s %p2 -> -0x0180(%x7)[4byte]", + "st4w %z10.s %z11.s %z12.s %z13.s %p3 -> (%x12)[4byte]", + "st4w %z16.s %z17.s %z18.s %z19.s %p5 -> +0x0180(%x17)[4byte]", + "st4w %z21.s %z22.s %z23.s %z24.s %p6 -> +0x0280(%x22)[4byte]", + "st4w %z31.s %z0.s %z1.s %z2.s %p7 -> +0x0380(%sp)[4byte]", }; TEST_LOOP( st4w, st4w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_128)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(stnt1d_sve_pred) { /* Testing STNT1D { .D }, , [, , LSL #3] */ const char *const expected_0_0[6] = { - "stnt1d %z0.d %p0 -> (%x0,%x0,lsl #3)[32byte]", - "stnt1d %z5.d %p2 -> (%x7,%x8,lsl #3)[32byte]", - "stnt1d %z10.d %p3 -> (%x12,%x13,lsl #3)[32byte]", - "stnt1d %z16.d %p5 -> (%x17,%x18,lsl #3)[32byte]", - "stnt1d %z21.d %p6 -> (%x22,%x23,lsl #3)[32byte]", - "stnt1d %z31.d %p7 -> (%sp,%x30,lsl #3)[32byte]", + "stnt1d %z0.d %p0 -> (%x0,%x0,lsl #3)[8byte]", + "stnt1d %z5.d %p2 -> (%x7,%x8,lsl #3)[8byte]", + "stnt1d %z10.d %p3 -> (%x12,%x13,lsl #3)[8byte]", + "stnt1d %z16.d %p5 -> (%x17,%x18,lsl #3)[8byte]", + "stnt1d %z21.d %p6 -> (%x22,%x23,lsl #3)[8byte]", + "stnt1d %z31.d %p7 -> (%sp,%x30,lsl #3)[8byte]", }; TEST_LOOP(stnt1d, stnt1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 3)); + true, 0, 0, OPSZ_8, 3)); /* Testing STNT1D { .D }, , [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "stnt1d %z0.d %p0 -> -0x0100(%x0)[32byte]", - "stnt1d %z5.d %p2 -> -0x60(%x7)[32byte]", - "stnt1d %z10.d %p3 -> (%x12)[32byte]", - "stnt1d %z16.d %p5 -> +0x60(%x17)[32byte]", - "stnt1d %z21.d %p6 -> +0xa0(%x22)[32byte]", - "stnt1d %z31.d %p7 -> +0xe0(%sp)[32byte]", + "stnt1d %z0.d %p0 -> -0x0100(%x0)[8byte]", + "stnt1d %z5.d %p2 -> -0x60(%x7)[8byte]", + "stnt1d %z10.d %p3 -> (%x12)[8byte]", + "stnt1d %z16.d %p5 -> +0x60(%x17)[8byte]", + "stnt1d %z21.d %p6 -> +0xa0(%x22)[8byte]", + "stnt1d %z31.d %p7 -> +0xe0(%sp)[8byte]", }; TEST_LOOP( stnt1d, stnt1d_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_8)); } TEST_INSTR(stnt1h_sve_pred) { /* Testing STNT1H { .H }, , [, , LSL #1] */ const char *const expected_0_0[6] = { - "stnt1h %z0.h %p0 -> (%x0,%x0,lsl #1)[32byte]", - "stnt1h %z5.h %p2 -> (%x7,%x8,lsl #1)[32byte]", - "stnt1h %z10.h %p3 -> (%x12,%x13,lsl #1)[32byte]", - "stnt1h %z16.h %p5 -> (%x17,%x18,lsl #1)[32byte]", - "stnt1h %z21.h %p6 -> (%x22,%x23,lsl #1)[32byte]", - "stnt1h %z31.h %p7 -> (%sp,%x30,lsl #1)[32byte]", + "stnt1h %z0.h %p0 -> (%x0,%x0,lsl #1)[2byte]", + "stnt1h %z5.h %p2 -> (%x7,%x8,lsl #1)[2byte]", + "stnt1h %z10.h %p3 -> (%x12,%x13,lsl #1)[2byte]", + "stnt1h %z16.h %p5 -> (%x17,%x18,lsl #1)[2byte]", + "stnt1h %z21.h %p6 -> (%x22,%x23,lsl #1)[2byte]", + "stnt1h %z31.h %p7 -> (%sp,%x30,lsl #1)[2byte]", }; TEST_LOOP(stnt1h, stnt1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 1)); + true, 0, 0, OPSZ_2, 1)); /* Testing STNT1H { .H }, , [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "stnt1h %z0.h %p0 -> -0x0100(%x0)[32byte]", - "stnt1h %z5.h %p2 -> -0x60(%x7)[32byte]", - "stnt1h %z10.h %p3 -> (%x12)[32byte]", - "stnt1h %z16.h %p5 -> +0x60(%x17)[32byte]", - "stnt1h %z21.h %p6 -> +0xa0(%x22)[32byte]", - "stnt1h %z31.h %p7 -> +0xe0(%sp)[32byte]", + "stnt1h %z0.h %p0 -> -0x0100(%x0)[2byte]", + "stnt1h %z5.h %p2 -> -0x60(%x7)[2byte]", + "stnt1h %z10.h %p3 -> (%x12)[2byte]", + "stnt1h %z16.h %p5 -> +0x60(%x17)[2byte]", + "stnt1h %z21.h %p6 -> +0xa0(%x22)[2byte]", + "stnt1h %z31.h %p7 -> +0xe0(%sp)[2byte]", }; TEST_LOOP( stnt1h, stnt1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_2)); } TEST_INSTR(stnt1w_sve_pred) { /* Testing STNT1W { .S }, , [, , LSL #2] */ const char *const expected_0_0[6] = { - "stnt1w %z0.s %p0 -> (%x0,%x0,lsl #2)[32byte]", - "stnt1w %z5.s %p2 -> (%x7,%x8,lsl #2)[32byte]", - "stnt1w %z10.s %p3 -> (%x12,%x13,lsl #2)[32byte]", - "stnt1w %z16.s %p5 -> (%x17,%x18,lsl #2)[32byte]", - "stnt1w %z21.s %p6 -> (%x22,%x23,lsl #2)[32byte]", - "stnt1w %z31.s %p7 -> (%sp,%x30,lsl #2)[32byte]", + "stnt1w %z0.s %p0 -> (%x0,%x0,lsl #2)[4byte]", + "stnt1w %z5.s %p2 -> (%x7,%x8,lsl #2)[4byte]", + "stnt1w %z10.s %p3 -> (%x12,%x13,lsl #2)[4byte]", + "stnt1w %z16.s %p5 -> (%x17,%x18,lsl #2)[4byte]", + "stnt1w %z21.s %p6 -> (%x22,%x23,lsl #2)[4byte]", + "stnt1w %z31.s %p7 -> (%sp,%x30,lsl #2)[4byte]", }; TEST_LOOP(stnt1w, stnt1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], DR_EXTEND_UXTX, - true, 0, 0, OPSZ_32, 2)); + true, 0, 0, OPSZ_4, 2)); /* Testing STNT1W { .S }, , [{, #, MUL VL}] */ static const int imm4[6] = { -256, -96, 0, 96, 160, 224 }; const char *const expected_1_0[6] = { - "stnt1w %z0.s %p0 -> -0x0100(%x0)[32byte]", - "stnt1w %z5.s %p2 -> -0x60(%x7)[32byte]", - "stnt1w %z10.s %p3 -> (%x12)[32byte]", - "stnt1w %z16.s %p5 -> +0x60(%x17)[32byte]", - "stnt1w %z21.s %p6 -> +0xa0(%x22)[32byte]", - "stnt1w %z31.s %p7 -> +0xe0(%sp)[32byte]", + "stnt1w %z0.s %p0 -> -0x0100(%x0)[4byte]", + "stnt1w %z5.s %p2 -> -0x60(%x7)[4byte]", + "stnt1w %z10.s %p3 -> (%x12)[4byte]", + "stnt1w %z16.s %p5 -> +0x60(%x17)[4byte]", + "stnt1w %z21.s %p6 -> +0xa0(%x22)[4byte]", + "stnt1w %z31.s %p7 -> +0xe0(%sp)[4byte]", }; TEST_LOOP( stnt1w, stnt1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), - opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_32)); + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4[i], OPSZ_4)); } TEST_INSTR(sdot_sve) @@ -20779,7 +20778,7 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(insr_sve_simd_fp); RUN_INSTR_TEST(ext_sve); - RUN_INSTR_TEST(splice_sve); + RUN_INSTR_TEST(splice_sve_des); RUN_INSTR_TEST(rev_sve_pred); RUN_INSTR_TEST(rev_sve); diff --git a/suite/tests/api/ir_aarch64_sve2.c b/suite/tests/api/ir_aarch64_sve2.c index 1fb8f6248cf..8172861ba3f 100644 --- a/suite/tests/api/ir_aarch64_sve2.c +++ b/suite/tests/api/ir_aarch64_sve2.c @@ -7664,94 +7664,91 @@ TEST_INSTR(xar_sve) TEST_INSTR(ldnt1sb_sve_pred) { - /* Testing LDNT1SB { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1sb (%z0.d,%x0)[4byte] %p0/z -> %z0.d", - "ldnt1sb (%z7.d,%x8)[4byte] %p2/z -> %z5.d", - "ldnt1sb (%z12.d,%x13)[4byte] %p3/z -> %z10.d", - "ldnt1sb (%z18.d,%x18)[4byte] %p5/z -> %z16.d", - "ldnt1sb (%z23.d,%x23)[4byte] %p6/z -> %z21.d", - "ldnt1sb (%z31.d,%x30)[4byte] %p7/z -> %z31.d", + "ldnt1sb (%z0.d,%x0)[1byte] %p0/z -> %z0.d", + "ldnt1sb (%z7.d,%x8)[1byte] %p2/z -> %z5.d", + "ldnt1sb (%z12.d,%x13)[1byte] %p3/z -> %z10.d", + "ldnt1sb (%z18.d,%x18)[1byte] %p5/z -> %z16.d", + "ldnt1sb (%z23.d,%x23)[1byte] %p6/z -> %z21.d", + "ldnt1sb (%z31.d,%x30)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1sb, ldnt1sb_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing LDNT1SB { .S }, /Z, [.S{, }] */ const char *const expected_1_0[6] = { - "ldnt1sb (%z0.s,%x0)[8byte] %p0/z -> %z0.s", - "ldnt1sb (%z7.s,%x8)[8byte] %p2/z -> %z5.s", - "ldnt1sb (%z12.s,%x13)[8byte] %p3/z -> %z10.s", - "ldnt1sb (%z18.s,%x18)[8byte] %p5/z -> %z16.s", - "ldnt1sb (%z23.s,%x23)[8byte] %p6/z -> %z21.s", - "ldnt1sb (%z31.s,%x30)[8byte] %p7/z -> %z31.s", + "ldnt1sb (%z0.s,%x0)[1byte] %p0/z -> %z0.s", + "ldnt1sb (%z7.s,%x8)[1byte] %p2/z -> %z5.s", + "ldnt1sb (%z12.s,%x13)[1byte] %p3/z -> %z10.s", + "ldnt1sb (%z18.s,%x18)[1byte] %p5/z -> %z16.s", + "ldnt1sb (%z23.s,%x23)[1byte] %p6/z -> %z21.s", + "ldnt1sb (%z31.s,%x30)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnt1sb, ldnt1sb_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_8, 0)); + OPSZ_1, 0)); } TEST_INSTR(ldnt1sh_sve_pred) { - /* Testing LDNT1SH { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1sh (%z0.d,%x0)[8byte] %p0/z -> %z0.d", - "ldnt1sh (%z7.d,%x8)[8byte] %p2/z -> %z5.d", - "ldnt1sh (%z12.d,%x13)[8byte] %p3/z -> %z10.d", - "ldnt1sh (%z18.d,%x18)[8byte] %p5/z -> %z16.d", - "ldnt1sh (%z23.d,%x23)[8byte] %p6/z -> %z21.d", - "ldnt1sh (%z31.d,%x30)[8byte] %p7/z -> %z31.d", + "ldnt1sh (%z0.d,%x0)[2byte] %p0/z -> %z0.d", + "ldnt1sh (%z7.d,%x8)[2byte] %p2/z -> %z5.d", + "ldnt1sh (%z12.d,%x13)[2byte] %p3/z -> %z10.d", + "ldnt1sh (%z18.d,%x18)[2byte] %p5/z -> %z16.d", + "ldnt1sh (%z23.d,%x23)[2byte] %p6/z -> %z21.d", + "ldnt1sh (%z31.d,%x30)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1sh, ldnt1sh_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing LDNT1SH { .S }, /Z, [.S{, }] */ const char *const expected_1_0[6] = { - "ldnt1sh (%z0.s,%x0)[16byte] %p0/z -> %z0.s", - "ldnt1sh (%z7.s,%x8)[16byte] %p2/z -> %z5.s", - "ldnt1sh (%z12.s,%x13)[16byte] %p3/z -> %z10.s", - "ldnt1sh (%z18.s,%x18)[16byte] %p5/z -> %z16.s", - "ldnt1sh (%z23.s,%x23)[16byte] %p6/z -> %z21.s", - "ldnt1sh (%z31.s,%x30)[16byte] %p7/z -> %z31.s", + "ldnt1sh (%z0.s,%x0)[2byte] %p0/z -> %z0.s", + "ldnt1sh (%z7.s,%x8)[2byte] %p2/z -> %z5.s", + "ldnt1sh (%z12.s,%x13)[2byte] %p3/z -> %z10.s", + "ldnt1sh (%z18.s,%x18)[2byte] %p5/z -> %z16.s", + "ldnt1sh (%z23.s,%x23)[2byte] %p6/z -> %z21.s", + "ldnt1sh (%z31.s,%x30)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnt1sh, ldnt1sh_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_16, 0)); + OPSZ_2, 0)); } TEST_INSTR(ldnt1sw_sve_pred) { - /* Testing LDNT1SW { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1sw (%z0.d,%x0)[16byte] %p0/z -> %z0.d", - "ldnt1sw (%z7.d,%x8)[16byte] %p2/z -> %z5.d", - "ldnt1sw (%z12.d,%x13)[16byte] %p3/z -> %z10.d", - "ldnt1sw (%z18.d,%x18)[16byte] %p5/z -> %z16.d", - "ldnt1sw (%z23.d,%x23)[16byte] %p6/z -> %z21.d", - "ldnt1sw (%z31.d,%x30)[16byte] %p7/z -> %z31.d", + "ldnt1sw (%z0.d,%x0)[4byte] %p0/z -> %z0.d", + "ldnt1sw (%z7.d,%x8)[4byte] %p2/z -> %z5.d", + "ldnt1sw (%z12.d,%x13)[4byte] %p3/z -> %z10.d", + "ldnt1sw (%z18.d,%x18)[4byte] %p5/z -> %z16.d", + "ldnt1sw (%z23.d,%x23)[4byte] %p6/z -> %z21.d", + "ldnt1sw (%z31.d,%x30)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1sw, ldnt1sw_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_16, 0)); + OPSZ_4, 0)); } TEST_INSTR(cdot_sve_idx_imm_vector) @@ -8362,258 +8359,250 @@ TEST_INSTR(whilewr_sve) TEST_INSTR(ldnt1b_sve_pred) { - /* Testing LDNT1B { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1b (%z0.d,%x0)[4byte] %p0/z -> %z0.d", - "ldnt1b (%z7.d,%x8)[4byte] %p2/z -> %z5.d", - "ldnt1b (%z12.d,%x13)[4byte] %p3/z -> %z10.d", - "ldnt1b (%z18.d,%x18)[4byte] %p5/z -> %z16.d", - "ldnt1b (%z23.d,%x23)[4byte] %p6/z -> %z21.d", - "ldnt1b (%z31.d,%x30)[4byte] %p7/z -> %z31.d", + "ldnt1b (%z0.d,%x0)[1byte] %p0/z -> %z0.d", + "ldnt1b (%z7.d,%x8)[1byte] %p2/z -> %z5.d", + "ldnt1b (%z12.d,%x13)[1byte] %p3/z -> %z10.d", + "ldnt1b (%z18.d,%x18)[1byte] %p5/z -> %z16.d", + "ldnt1b (%z23.d,%x23)[1byte] %p6/z -> %z21.d", + "ldnt1b (%z31.d,%x30)[1byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1b, ldnt1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing LDNT1B { .S }, /Z, [.S{, }] */ const char *const expected_1_0[6] = { - "ldnt1b (%z0.s,%x0)[8byte] %p0/z -> %z0.s", - "ldnt1b (%z7.s,%x8)[8byte] %p2/z -> %z5.s", - "ldnt1b (%z12.s,%x13)[8byte] %p3/z -> %z10.s", - "ldnt1b (%z18.s,%x18)[8byte] %p5/z -> %z16.s", - "ldnt1b (%z23.s,%x23)[8byte] %p6/z -> %z21.s", - "ldnt1b (%z31.s,%x30)[8byte] %p7/z -> %z31.s", + "ldnt1b (%z0.s,%x0)[1byte] %p0/z -> %z0.s", + "ldnt1b (%z7.s,%x8)[1byte] %p2/z -> %z5.s", + "ldnt1b (%z12.s,%x13)[1byte] %p3/z -> %z10.s", + "ldnt1b (%z18.s,%x18)[1byte] %p5/z -> %z16.s", + "ldnt1b (%z23.s,%x23)[1byte] %p6/z -> %z21.s", + "ldnt1b (%z31.s,%x30)[1byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnt1b, ldnt1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_8, 0)); + OPSZ_1, 0)); } TEST_INSTR(ldnt1d_sve_pred) { - /* Testing LDNT1D { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1d (%z0.d,%x0)[32byte] %p0/z -> %z0.d", - "ldnt1d (%z7.d,%x8)[32byte] %p2/z -> %z5.d", - "ldnt1d (%z12.d,%x13)[32byte] %p3/z -> %z10.d", - "ldnt1d (%z18.d,%x18)[32byte] %p5/z -> %z16.d", - "ldnt1d (%z23.d,%x23)[32byte] %p6/z -> %z21.d", - "ldnt1d (%z31.d,%x30)[32byte] %p7/z -> %z31.d", + "ldnt1d (%z0.d,%x0)[8byte] %p0/z -> %z0.d", + "ldnt1d (%z7.d,%x8)[8byte] %p2/z -> %z5.d", + "ldnt1d (%z12.d,%x13)[8byte] %p3/z -> %z10.d", + "ldnt1d (%z18.d,%x18)[8byte] %p5/z -> %z16.d", + "ldnt1d (%z23.d,%x23)[8byte] %p6/z -> %z21.d", + "ldnt1d (%z31.d,%x30)[8byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1d, ldnt1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_32, 0)); + OPSZ_8, 0)); } TEST_INSTR(ldnt1h_sve_pred) { - /* Testing LDNT1H { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1h (%z0.d,%x0)[8byte] %p0/z -> %z0.d", - "ldnt1h (%z7.d,%x8)[8byte] %p2/z -> %z5.d", - "ldnt1h (%z12.d,%x13)[8byte] %p3/z -> %z10.d", - "ldnt1h (%z18.d,%x18)[8byte] %p5/z -> %z16.d", - "ldnt1h (%z23.d,%x23)[8byte] %p6/z -> %z21.d", - "ldnt1h (%z31.d,%x30)[8byte] %p7/z -> %z31.d", + "ldnt1h (%z0.d,%x0)[2byte] %p0/z -> %z0.d", + "ldnt1h (%z7.d,%x8)[2byte] %p2/z -> %z5.d", + "ldnt1h (%z12.d,%x13)[2byte] %p3/z -> %z10.d", + "ldnt1h (%z18.d,%x18)[2byte] %p5/z -> %z16.d", + "ldnt1h (%z23.d,%x23)[2byte] %p6/z -> %z21.d", + "ldnt1h (%z31.d,%x30)[2byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1h, ldnt1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing LDNT1H { .S }, /Z, [.S{, }] */ const char *const expected_1_0[6] = { - "ldnt1h (%z0.s,%x0)[16byte] %p0/z -> %z0.s", - "ldnt1h (%z7.s,%x8)[16byte] %p2/z -> %z5.s", - "ldnt1h (%z12.s,%x13)[16byte] %p3/z -> %z10.s", - "ldnt1h (%z18.s,%x18)[16byte] %p5/z -> %z16.s", - "ldnt1h (%z23.s,%x23)[16byte] %p6/z -> %z21.s", - "ldnt1h (%z31.s,%x30)[16byte] %p7/z -> %z31.s", + "ldnt1h (%z0.s,%x0)[2byte] %p0/z -> %z0.s", + "ldnt1h (%z7.s,%x8)[2byte] %p2/z -> %z5.s", + "ldnt1h (%z12.s,%x13)[2byte] %p3/z -> %z10.s", + "ldnt1h (%z18.s,%x18)[2byte] %p5/z -> %z16.s", + "ldnt1h (%z23.s,%x23)[2byte] %p6/z -> %z21.s", + "ldnt1h (%z31.s,%x30)[2byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnt1h, ldnt1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_16, 0)); + OPSZ_2, 0)); } TEST_INSTR(ldnt1w_sve_pred) { - /* Testing LDNT1W { .D }, /Z, [.D{, }] */ const char *const expected_0_0[6] = { - "ldnt1w (%z0.d,%x0)[16byte] %p0/z -> %z0.d", - "ldnt1w (%z7.d,%x8)[16byte] %p2/z -> %z5.d", - "ldnt1w (%z12.d,%x13)[16byte] %p3/z -> %z10.d", - "ldnt1w (%z18.d,%x18)[16byte] %p5/z -> %z16.d", - "ldnt1w (%z23.d,%x23)[16byte] %p6/z -> %z21.d", - "ldnt1w (%z31.d,%x30)[16byte] %p7/z -> %z31.d", + "ldnt1w (%z0.d,%x0)[4byte] %p0/z -> %z0.d", + "ldnt1w (%z7.d,%x8)[4byte] %p2/z -> %z5.d", + "ldnt1w (%z12.d,%x13)[4byte] %p3/z -> %z10.d", + "ldnt1w (%z18.d,%x18)[4byte] %p5/z -> %z16.d", + "ldnt1w (%z23.d,%x23)[4byte] %p6/z -> %z21.d", + "ldnt1w (%z31.d,%x30)[4byte] %p7/z -> %z31.d", }; TEST_LOOP(ldnt1w, ldnt1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing LDNT1W { .S }, /Z, [.S{, }] */ const char *const expected_1_0[6] = { - "ldnt1w (%z0.s,%x0)[32byte] %p0/z -> %z0.s", - "ldnt1w (%z7.s,%x8)[32byte] %p2/z -> %z5.s", - "ldnt1w (%z12.s,%x13)[32byte] %p3/z -> %z10.s", - "ldnt1w (%z18.s,%x18)[32byte] %p5/z -> %z16.s", - "ldnt1w (%z23.s,%x23)[32byte] %p6/z -> %z21.s", - "ldnt1w (%z31.s,%x30)[32byte] %p7/z -> %z31.s", + "ldnt1w (%z0.s,%x0)[4byte] %p0/z -> %z0.s", + "ldnt1w (%z7.s,%x8)[4byte] %p2/z -> %z5.s", + "ldnt1w (%z12.s,%x13)[4byte] %p3/z -> %z10.s", + "ldnt1w (%z18.s,%x18)[4byte] %p5/z -> %z16.s", + "ldnt1w (%z23.s,%x23)[4byte] %p6/z -> %z21.s", + "ldnt1w (%z31.s,%x30)[4byte] %p7/z -> %z31.s", }; TEST_LOOP(ldnt1w, ldnt1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_32, 0)); + OPSZ_4, 0)); } TEST_INSTR(stnt1b_sve_pred) { - /* Testing STNT1B { .D }, , [.D{, }] */ const char *const expected_0_0[6] = { - "stnt1b %z0.d %p0 -> (%z0.d,%x0)[4byte]", - "stnt1b %z5.d %p2 -> (%z7.d,%x8)[4byte]", - "stnt1b %z10.d %p3 -> (%z12.d,%x13)[4byte]", - "stnt1b %z16.d %p5 -> (%z18.d,%x18)[4byte]", - "stnt1b %z21.d %p6 -> (%z23.d,%x23)[4byte]", - "stnt1b %z31.d %p7 -> (%z31.d,%x30)[4byte]", + "stnt1b %z0.d %p0 -> (%z0.d,%x0)[1byte]", + "stnt1b %z5.d %p2 -> (%z7.d,%x8)[1byte]", + "stnt1b %z10.d %p3 -> (%z12.d,%x13)[1byte]", + "stnt1b %z16.d %p5 -> (%z18.d,%x18)[1byte]", + "stnt1b %z21.d %p6 -> (%z23.d,%x23)[1byte]", + "stnt1b %z31.d %p7 -> (%z31.d,%x30)[1byte]", }; TEST_LOOP(stnt1b, stnt1b_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_4, 0)); + OPSZ_1, 0)); /* Testing STNT1B { .S }, , [.S{, }] */ const char *const expected_1_0[6] = { - "stnt1b %z0.s %p0 -> (%z0.s,%x0)[8byte]", - "stnt1b %z5.s %p2 -> (%z7.s,%x8)[8byte]", - "stnt1b %z10.s %p3 -> (%z12.s,%x13)[8byte]", - "stnt1b %z16.s %p5 -> (%z18.s,%x18)[8byte]", - "stnt1b %z21.s %p6 -> (%z23.s,%x23)[8byte]", - "stnt1b %z31.s %p7 -> (%z31.s,%x30)[8byte]", + "stnt1b %z0.s %p0 -> (%z0.s,%x0)[1byte]", + "stnt1b %z5.s %p2 -> (%z7.s,%x8)[1byte]", + "stnt1b %z10.s %p3 -> (%z12.s,%x13)[1byte]", + "stnt1b %z16.s %p5 -> (%z18.s,%x18)[1byte]", + "stnt1b %z21.s %p6 -> (%z23.s,%x23)[1byte]", + "stnt1b %z31.s %p7 -> (%z31.s,%x30)[1byte]", }; TEST_LOOP(stnt1b, stnt1b_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_8, 0)); + OPSZ_1, 0)); } TEST_INSTR(stnt1d_sve_pred) { - /* Testing STNT1D { .D }, , [.D{, }] */ const char *const expected_0_0[6] = { - "stnt1d %z0.d %p0 -> (%z0.d,%x0)[32byte]", - "stnt1d %z5.d %p2 -> (%z7.d,%x8)[32byte]", - "stnt1d %z10.d %p3 -> (%z12.d,%x13)[32byte]", - "stnt1d %z16.d %p5 -> (%z18.d,%x18)[32byte]", - "stnt1d %z21.d %p6 -> (%z23.d,%x23)[32byte]", - "stnt1d %z31.d %p7 -> (%z31.d,%x30)[32byte]", + "stnt1d %z0.d %p0 -> (%z0.d,%x0)[8byte]", + "stnt1d %z5.d %p2 -> (%z7.d,%x8)[8byte]", + "stnt1d %z10.d %p3 -> (%z12.d,%x13)[8byte]", + "stnt1d %z16.d %p5 -> (%z18.d,%x18)[8byte]", + "stnt1d %z21.d %p6 -> (%z23.d,%x23)[8byte]", + "stnt1d %z31.d %p7 -> (%z31.d,%x30)[8byte]", }; TEST_LOOP(stnt1d, stnt1d_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_32, 0)); + OPSZ_8, 0)); } TEST_INSTR(stnt1h_sve_pred) { - /* Testing STNT1H { .D }, , [.D{, }] */ const char *const expected_0_0[6] = { - "stnt1h %z0.d %p0 -> (%z0.d,%x0)[8byte]", - "stnt1h %z5.d %p2 -> (%z7.d,%x8)[8byte]", - "stnt1h %z10.d %p3 -> (%z12.d,%x13)[8byte]", - "stnt1h %z16.d %p5 -> (%z18.d,%x18)[8byte]", - "stnt1h %z21.d %p6 -> (%z23.d,%x23)[8byte]", - "stnt1h %z31.d %p7 -> (%z31.d,%x30)[8byte]", + "stnt1h %z0.d %p0 -> (%z0.d,%x0)[2byte]", + "stnt1h %z5.d %p2 -> (%z7.d,%x8)[2byte]", + "stnt1h %z10.d %p3 -> (%z12.d,%x13)[2byte]", + "stnt1h %z16.d %p5 -> (%z18.d,%x18)[2byte]", + "stnt1h %z21.d %p6 -> (%z23.d,%x23)[2byte]", + "stnt1h %z31.d %p7 -> (%z31.d,%x30)[2byte]", }; TEST_LOOP(stnt1h, stnt1h_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_8, 0)); + OPSZ_2, 0)); /* Testing STNT1H { .S }, , [.S{, }] */ const char *const expected_1_0[6] = { - "stnt1h %z0.s %p0 -> (%z0.s,%x0)[16byte]", - "stnt1h %z5.s %p2 -> (%z7.s,%x8)[16byte]", - "stnt1h %z10.s %p3 -> (%z12.s,%x13)[16byte]", - "stnt1h %z16.s %p5 -> (%z18.s,%x18)[16byte]", - "stnt1h %z21.s %p6 -> (%z23.s,%x23)[16byte]", - "stnt1h %z31.s %p7 -> (%z31.s,%x30)[16byte]", + "stnt1h %z0.s %p0 -> (%z0.s,%x0)[2byte]", + "stnt1h %z5.s %p2 -> (%z7.s,%x8)[2byte]", + "stnt1h %z10.s %p3 -> (%z12.s,%x13)[2byte]", + "stnt1h %z16.s %p5 -> (%z18.s,%x18)[2byte]", + "stnt1h %z21.s %p6 -> (%z23.s,%x23)[2byte]", + "stnt1h %z31.s %p7 -> (%z31.s,%x30)[2byte]", }; TEST_LOOP(stnt1h, stnt1h_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_16, 0)); + OPSZ_2, 0)); } TEST_INSTR(stnt1w_sve_pred) { - /* Testing STNT1W { .D }, , [.D{, }] */ const char *const expected_0_0[6] = { - "stnt1w %z0.d %p0 -> (%z0.d,%x0)[16byte]", - "stnt1w %z5.d %p2 -> (%z7.d,%x8)[16byte]", - "stnt1w %z10.d %p3 -> (%z12.d,%x13)[16byte]", - "stnt1w %z16.d %p5 -> (%z18.d,%x18)[16byte]", - "stnt1w %z21.d %p6 -> (%z23.d,%x23)[16byte]", - "stnt1w %z31.d %p7 -> (%z31.d,%x30)[16byte]", + "stnt1w %z0.d %p0 -> (%z0.d,%x0)[4byte]", + "stnt1w %z5.d %p2 -> (%z7.d,%x8)[4byte]", + "stnt1w %z10.d %p3 -> (%z12.d,%x13)[4byte]", + "stnt1w %z16.d %p5 -> (%z18.d,%x18)[4byte]", + "stnt1w %z21.d %p6 -> (%z23.d,%x23)[4byte]", + "stnt1w %z31.d %p7 -> (%z31.d,%x30)[4byte]", }; TEST_LOOP(stnt1w, stnt1w_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_16, 0)); + OPSZ_4, 0)); /* Testing STNT1W { .S }, , [.S{, }] */ const char *const expected_1_0[6] = { - "stnt1w %z0.s %p0 -> (%z0.s,%x0)[32byte]", - "stnt1w %z5.s %p2 -> (%z7.s,%x8)[32byte]", - "stnt1w %z10.s %p3 -> (%z12.s,%x13)[32byte]", - "stnt1w %z16.s %p5 -> (%z18.s,%x18)[32byte]", - "stnt1w %z21.s %p6 -> (%z23.s,%x23)[32byte]", - "stnt1w %z31.s %p7 -> (%z31.s,%x30)[32byte]", + "stnt1w %z0.s %p0 -> (%z0.s,%x0)[4byte]", + "stnt1w %z5.s %p2 -> (%z7.s,%x8)[4byte]", + "stnt1w %z10.s %p3 -> (%z12.s,%x13)[4byte]", + "stnt1w %z16.s %p5 -> (%z18.s,%x18)[4byte]", + "stnt1w %z21.s %p6 -> (%z23.s,%x23)[4byte]", + "stnt1w %z31.s %p7 -> (%z31.s,%x30)[4byte]", }; TEST_LOOP(stnt1w, stnt1w_sve_pred, 6, expected_1_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_reg(Pn_half_six_offset_0[i]), opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], Xn_six_offset_3[i], OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, - OPSZ_32, 0)); + OPSZ_4, 0)); } TEST_INSTR(mul_sve_vector) @@ -8710,6 +8699,95 @@ TEST_INSTR(mul_sve_idx) opnd_create_immed_uint(i2_0_0[i], OPSZ_2b)); } +TEST_INSTR(splice_sve_con) +{ + + /* Testing SPLICE ., , { ., . } */ + const char *const expected_0_0[6] = { + "splice %p0 %z0.b %z1.b -> %z0.b", "splice %p2 %z7.b %z8.b -> %z5.b", + "splice %p3 %z12.b %z13.b -> %z10.b", "splice %p5 %z18.b %z19.b -> %z16.b", + "splice %p6 %z23.b %z24.b -> %z21.b", "splice %p7 %z31.b %z0.b -> %z31.b", + }; + TEST_LOOP(splice, splice_sve_con, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_1)); + + const char *const expected_0_1[6] = { + "splice %p0 %z0.h %z1.h -> %z0.h", "splice %p2 %z7.h %z8.h -> %z5.h", + "splice %p3 %z12.h %z13.h -> %z10.h", "splice %p5 %z18.h %z19.h -> %z16.h", + "splice %p6 %z23.h %z24.h -> %z21.h", "splice %p7 %z31.h %z0.h -> %z31.h", + }; + TEST_LOOP(splice, splice_sve_con, 6, expected_0_1[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_2)); + + const char *const expected_0_2[6] = { + "splice %p0 %z0.s %z1.s -> %z0.s", "splice %p2 %z7.s %z8.s -> %z5.s", + "splice %p3 %z12.s %z13.s -> %z10.s", "splice %p5 %z18.s %z19.s -> %z16.s", + "splice %p6 %z23.s %z24.s -> %z21.s", "splice %p7 %z31.s %z0.s -> %z31.s", + }; + TEST_LOOP(splice, splice_sve_con, 6, expected_0_2[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_4)); + + const char *const expected_0_3[6] = { + "splice %p0 %z0.d %z1.d -> %z0.d", "splice %p2 %z7.d %z8.d -> %z5.d", + "splice %p3 %z12.d %z13.d -> %z10.d", "splice %p5 %z18.d %z19.d -> %z16.d", + "splice %p6 %z23.d %z24.d -> %z21.d", "splice %p7 %z31.d %z0.d -> %z31.d", + }; + TEST_LOOP(splice, splice_sve_con, 6, expected_0_3[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8)); +} + +TEST_INSTR(tbl_sve_mulvec) +{ + + /* Testing TBL ., { ., . }, . */ + const char *const expected_0_0[6] = { + "tbl %z0.b %z1.b %z0.b -> %z0.b", "tbl %z6.b %z7.b %z7.b -> %z5.b", + "tbl %z11.b %z12.b %z12.b -> %z10.b", "tbl %z17.b %z18.b %z18.b -> %z16.b", + "tbl %z22.b %z23.b %z23.b -> %z21.b", "tbl %z31.b %z0.b %z31.b -> %z31.b", + }; + TEST_LOOP(tbl, tbl_sve_mulvec, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_1), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_1)); + + const char *const expected_0_1[6] = { + "tbl %z0.h %z1.h %z0.h -> %z0.h", "tbl %z6.h %z7.h %z7.h -> %z5.h", + "tbl %z11.h %z12.h %z12.h -> %z10.h", "tbl %z17.h %z18.h %z18.h -> %z16.h", + "tbl %z22.h %z23.h %z23.h -> %z21.h", "tbl %z31.h %z0.h %z31.h -> %z31.h", + }; + TEST_LOOP(tbl, tbl_sve_mulvec, 6, expected_0_1[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_2), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_2)); + + const char *const expected_0_2[6] = { + "tbl %z0.s %z1.s %z0.s -> %z0.s", "tbl %z6.s %z7.s %z7.s -> %z5.s", + "tbl %z11.s %z12.s %z12.s -> %z10.s", "tbl %z17.s %z18.s %z18.s -> %z16.s", + "tbl %z22.s %z23.s %z23.s -> %z21.s", "tbl %z31.s %z0.s %z31.s -> %z31.s", + }; + TEST_LOOP(tbl, tbl_sve_mulvec, 6, expected_0_2[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_4), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_4)); + + const char *const expected_0_3[6] = { + "tbl %z0.d %z1.d %z0.d -> %z0.d", "tbl %z6.d %z7.d %z7.d -> %z5.d", + "tbl %z11.d %z12.d %z12.d -> %z10.d", "tbl %z17.d %z18.d %z18.d -> %z16.d", + "tbl %z22.d %z23.d %z23.d -> %z21.d", "tbl %z31.d %z0.d %z31.d -> %z31.d", + }; + TEST_LOOP(tbl, tbl_sve_mulvec, 6, expected_0_3[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_8), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8)); +} int main(int argc, char *argv[]) { @@ -8966,6 +9044,10 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(mul_sve_vector); RUN_INSTR_TEST(mul_sve_idx); + RUN_INSTR_TEST(splice_sve_con); + + RUN_INSTR_TEST(tbl_sve_mulvec); + print("All SVE2 tests complete.\n"); #ifndef STANDALONE_DECODER dr_standalone_exit(); diff --git a/suite/tests/api/ir_aarch64_v85.c b/suite/tests/api/ir_aarch64_v85.c new file mode 100644 index 00000000000..4befc2a348a --- /dev/null +++ b/suite/tests/api/ir_aarch64_v85.c @@ -0,0 +1,83 @@ +/* ********************************************************** + * Copyright (c) 2023 ARM Limited. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of ARM Limited nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL ARM LIMITED OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* Define DR_FAST_IR to verify that everything compiles when we call the inline + * versions of these routines. + */ +#ifndef STANDALONE_DECODER +# define DR_FAST_IR 1 +#endif + +/* Uses the DR API, using DR as a standalone library, rather than + * being a client library working with DR on a target program. + */ + +#include "configure.h" +#include "dr_api.h" +#include "tools.h" + +#include "ir_aarch64.h" + +TEST_INSTR(bti) +{ + /* Testing BTI # */ + const int imm[4] = { 0, 1, 2, 3 }; + + const char *const expected_0_0[4] = { "bti $0x00", "bti $0x01", "bti $0x02", + "bti $0x03" }; + TEST_LOOP(bti, bti, 4, expected_0_0[i], opnd_create_immed_uint(imm[i], OPSZ_3b)); +} + +int +main(int argc, char *argv[]) +{ +#ifdef STANDALONE_DECODER + void *dcontext = GLOBAL_DCONTEXT; +#else + void *dcontext = dr_standalone_init(); +#endif + bool result = true; + bool test_result; + instr_t *instr; + + enable_all_test_cpu_features(); + + RUN_INSTR_TEST(bti); + + print("All v8.5 tests complete.\n"); +#ifndef STANDALONE_DECODER + dr_standalone_exit(); +#endif + if (result) + return 0; + return 1; +} diff --git a/suite/tests/api/ir_aarch64_v85.expect b/suite/tests/api/ir_aarch64_v85.expect new file mode 100644 index 00000000000..a1a8b6b27b9 --- /dev/null +++ b/suite/tests/api/ir_aarch64_v85.expect @@ -0,0 +1 @@ +All v8.5 tests complete. diff --git a/suite/tests/api/ir_riscv64.c b/suite/tests/api/ir_riscv64.c index 03ee76024e3..bd983a2dc54 100644 --- a/suite/tests/api/ir_riscv64.c +++ b/suite/tests/api/ir_riscv64.c @@ -306,97 +306,120 @@ test_atomic(void *dc) /* FIXME i#3544: Use [aq][rl] instead of hex number when disassembling. */ /* LR/SC */ - instr = INSTR_CREATE_lr_w(dc, opnd_create_reg(DR_REG_A0), opnd_create_reg(DR_REG_A1), + instr = INSTR_CREATE_lr_w(dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), opnd_create_immed_int(0b00, OPSZ_2b)); ASSERT(instr_is_exclusive_load(instr)); test_instr_encoding(dc, OP_lr_w, instr); - instr = INSTR_CREATE_lr_d(dc, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X31), - opnd_create_immed_int(0b10, OPSZ_2b)); + instr = + INSTR_CREATE_lr_d(dc, opnd_create_reg(DR_REG_X0), + opnd_create_base_disp(DR_REG_X31, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_immed_int(0b10, OPSZ_2b)); ASSERT(instr_is_exclusive_load(instr)); test_instr_encoding(dc, OP_lr_d, instr); - instr = INSTR_CREATE_sc_w(dc, opnd_create_reg(DR_REG_A0), opnd_create_reg(DR_REG_A1), - opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = + INSTR_CREATE_sc_w(dc, opnd_create_base_disp(DR_REG_A2, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A0), opnd_create_reg(DR_REG_A1), + opnd_create_immed_int(0b01, OPSZ_2b)); ASSERT(instr_is_exclusive_store(instr)); test_instr_encoding(dc, OP_sc_w, instr); - instr = INSTR_CREATE_sc_d(dc, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X31), - opnd_create_reg(DR_REG_A1), - opnd_create_immed_int(0b11, OPSZ_2b)); + instr = + INSTR_CREATE_sc_d(dc, opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X31), + opnd_create_immed_int(0b11, OPSZ_2b)); ASSERT(instr_is_exclusive_store(instr)); test_instr_encoding(dc, OP_sc_d, instr); /* AMO */ instr = INSTR_CREATE_amoswap_w( - dc, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1), + dc, opnd_create_reg(DR_REG_X0), + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, 0, OPSZ_4), opnd_create_reg(DR_REG_X31), opnd_create_immed_int(0b00, OPSZ_2b)); test_instr_encoding(dc, OP_amoswap_w, instr); - instr = INSTR_CREATE_amoswap_d(dc, opnd_create_reg(DR_REG_X31), - opnd_create_reg(DR_REG_X1), opnd_create_reg(DR_REG_X0), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoswap_d( + dc, opnd_create_reg(DR_REG_X31), + opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_X0), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoswap_d, instr); - instr = INSTR_CREATE_amoadd_w(dc, opnd_create_reg(DR_REG_X0), - opnd_create_reg(DR_REG_X31), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b10, OPSZ_2b)); + instr = INSTR_CREATE_amoadd_w( + dc, opnd_create_reg(DR_REG_X0), + opnd_create_base_disp(DR_REG_X31, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b10, OPSZ_2b)); test_instr_encoding(dc, OP_amoadd_w, instr); - instr = INSTR_CREATE_amoadd_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b11, OPSZ_2b)); + instr = INSTR_CREATE_amoadd_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b11, OPSZ_2b)); test_instr_encoding(dc, OP_amoadd_d, instr); - instr = INSTR_CREATE_amoxor_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoxor_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoxor_w, instr); - instr = INSTR_CREATE_amoxor_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoxor_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoxor_d, instr); - instr = INSTR_CREATE_amoand_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoand_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoand_w, instr); - instr = INSTR_CREATE_amoand_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoand_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoand_d, instr); - instr = INSTR_CREATE_amoor_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoor_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoor_w, instr); - instr = INSTR_CREATE_amoor_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amoor_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amoor_d, instr); - instr = INSTR_CREATE_amomin_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amomin_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amomin_w, instr); - instr = INSTR_CREATE_amomin_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amomin_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amomin_d, instr); - instr = INSTR_CREATE_amomax_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amomax_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amomax_w, instr); - instr = INSTR_CREATE_amomax_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amomax_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amomax_d, instr); - instr = INSTR_CREATE_amominu_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amominu_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amominu_w, instr); - instr = INSTR_CREATE_amominu_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amominu_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amominu_d, instr); - instr = INSTR_CREATE_amomaxu_w(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amomaxu_w( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_4), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amomaxu_w, instr); - instr = INSTR_CREATE_amomaxu_d(dc, opnd_create_reg(DR_REG_A0), - opnd_create_reg(DR_REG_A1), opnd_create_reg(DR_REG_A2), - opnd_create_immed_int(0b01, OPSZ_2b)); + instr = INSTR_CREATE_amomaxu_d( + dc, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(DR_REG_A1, DR_REG_NULL, 0, 0, OPSZ_8), + opnd_create_reg(DR_REG_A2), opnd_create_immed_int(0b01, OPSZ_2b)); test_instr_encoding(dc, OP_amomaxu_d, instr); } diff --git a/suite/tests/api/ir_riscv64.expect b/suite/tests/api/ir_riscv64.expect index b7809fdb4d2..de41dff5b5a 100644 --- a/suite/tests/api/ir_riscv64.expect +++ b/suite/tests/api/ir_riscv64.expect @@ -29,28 +29,28 @@ c.fld +248(a5)[8byte] -> fs0 c.fsdsp ft11 -> (sp)[8byte] c.fsd fs0 -> +248(a5)[8byte] test_float_load_store complete -lr.w a1 0x0 -> a0 -lr.d t6 0x2 -> zero -sc.w a1 a2 0x1 -> a0 -sc.d t6 a1 0x3 -> zero -amoswap.w ra t6 0x0 -> zero -amoswap.d ra zero 0x1 -> t6 -amoadd.w t6 a2 0x2 -> zero -amoadd.d a1 a2 0x3 -> a0 -amoxor.w a1 a2 0x1 -> a0 -amoxor.d a1 a2 0x1 -> a0 -amoand.w a1 a2 0x1 -> a0 -amoand.d a1 a2 0x1 -> a0 -amoor.w a1 a2 0x1 -> a0 -amoor.d a1 a2 0x1 -> a0 -amomin.w a1 a2 0x1 -> a0 -amomin.d a1 a2 0x1 -> a0 -amomax.w a1 a2 0x1 -> a0 -amomax.d a1 a2 0x1 -> a0 -amominu.w a1 a2 0x1 -> a0 -amominu.d a1 a2 0x1 -> a0 -amomaxu.w a1 a2 0x1 -> a0 -amomaxu.d a1 a2 0x1 -> a0 +lr.w (a1)[4byte] 0x0 -> a0 +lr.d (t6)[8byte] 0x2 -> zero +sc.w a1 0x1 -> (a2)[4byte] a0 +sc.d t6 0x3 -> (a1)[8byte] zero +amoswap.w (ra)[4byte] t6 0x0 -> (ra)[4byte] zero +amoswap.d (ra)[8byte] zero 0x1 -> (ra)[8byte] t6 +amoadd.w (t6)[4byte] a2 0x2 -> (t6)[4byte] zero +amoadd.d (a1)[8byte] a2 0x3 -> (a1)[8byte] a0 +amoxor.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amoxor.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 +amoand.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amoand.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 +amoor.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amoor.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 +amomin.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amomin.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 +amomax.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amomax.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 +amominu.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amominu.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 +amomaxu.w (a1)[4byte] a2 0x1 -> (a1)[4byte] a0 +amomaxu.d (a1)[8byte] a2 0x1 -> (a1)[8byte] a0 test_atomic complete fcvt.l.s 0x0 ft0 -> a0 fcvt.lu.s 0x1 ft0 -> a0 @@ -221,7 +221,7 @@ c.xor fp a5 -> fp c.sub fp a5 -> fp test_integer_arith complete lui 0x2a -> a0 - 0x0000004000018254 -> a0'> + 0x0000004000019264 -> a0'> jalr a1 42 -> a0 c.jalr a0 0 -> ra c.li zero 31 -> a1 diff --git a/suite/tests/api/ir_x86.c b/suite/tests/api/ir_x86.c index 1e59b70e474..d14cd66f654 100644 --- a/suite/tests/api/ir_x86.c +++ b/suite/tests/api/ir_x86.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2023 Google, Inc. All rights reserved. + * Copyright (c) 2011-2024 Google, Inc. All rights reserved. * Copyright (c) 2007-2008 VMware, Inc. All rights reserved. * **********************************************************/ @@ -2870,6 +2870,8 @@ main(int argc, char *argv[]) #else void *dcontext = dr_standalone_init(); + ASSERT(!dr_running_under_dynamorio()); + /* simple test of deadlock_avoidance, etc. being disabled in standalone */ void *x = dr_mutex_create(); dr_mutex_lock(x); diff --git a/suite/tests/client-interface/detach_test.dll.c b/suite/tests/client-interface/detach_test.dll.c index daac52d602b..a6d4cc9c3ca 100644 --- a/suite/tests/client-interface/detach_test.dll.c +++ b/suite/tests/client-interface/detach_test.dll.c @@ -39,11 +39,18 @@ static thread_id_t injection_tid; static bool first_thread = true; +static bool saw_attach_event = false; static void dr_exit(void) { + if (!saw_attach_event) + dr_fprintf(STDERR, "Error: never saw attach event!\n"); +#ifdef WINDOWS dr_fprintf(STDERR, "done\n"); +#else + /* The app prints 'done' for us. */ +#endif } static void @@ -87,7 +94,8 @@ dr_exception_event(void *drcontext, dr_exception_t *excpt) static void event_post_attach(void) { - dr_fprintf(STDERR, "attach\n"); + // We do not print here as the ordering is non-deterministic vs thread init. + saw_attach_event = true; } static void diff --git a/suite/tests/client-interface/detach_test.template b/suite/tests/client-interface/detach_test.template index 1fe679addcb..fd8496ea131 100644 --- a/suite/tests/client-interface/detach_test.template +++ b/suite/tests/client-interface/detach_test.template @@ -1,6 +1,9 @@ +#ifdef WINDOWS starting attachee +#else +starting +#endif thank you for testing detach -attach thread init detach done diff --git a/suite/tests/client-interface/dr_options.dll.c b/suite/tests/client-interface/dr_options.dll.c index 14992843056..0096a2b8a2e 100644 --- a/suite/tests/client-interface/dr_options.dll.c +++ b/suite/tests/client-interface/dr_options.dll.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2012 Google, Inc. All rights reserved. + * Copyright (c) 2012-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -64,6 +64,8 @@ dr_init(client_id_t client_id) */ ASSERT(dr_using_all_private_caches()); + ASSERT(dr_running_under_dynamorio()); + /* Query non-existent options. */ int_option = 1; success = dr_get_string_option("opt_does_not_exist", buf, sizeof(buf)); diff --git a/suite/tests/client-interface/drsyms-test.dll.cpp b/suite/tests/client-interface/drsyms-test.dll.cpp index acab0814385..d1670c35be7 100644 --- a/suite/tests/client-interface/drsyms-test.dll.cpp +++ b/suite/tests/client-interface/drsyms-test.dll.cpp @@ -580,7 +580,7 @@ lookup_dll_syms(void *dc, const module_data_t *dll_data, bool loaded) dll_base = dll_data->start; #ifdef UNIX - if (strstr(dll_path, "/libc-")) { + if (strstr(dll_path, "/libc-") != nullptr || strstr(dll_path, "/libc.") != nullptr) { lookup_glibc_syms(dc, dll_data); return; } diff --git a/suite/tests/client-interface/drx-scattergather-aarch64.cpp b/suite/tests/client-interface/drx-scattergather-aarch64.cpp index 7661cc862a8..f840217a38e 100644 --- a/suite/tests/client-interface/drx-scattergather-aarch64.cpp +++ b/suite/tests/client-interface/drx-scattergather-aarch64.cpp @@ -5817,6 +5817,453 @@ test_st4_scalar_plus_immediate() } #endif // defined(__ARM_FEATURE_SVE) + +#if defined(__ARM_FEATURE_SVE2) + +struct test_ptrs_with_index_t : public basic_test_ptrs_t { + int64_t index; // Scalar index used for the test instruction. + + test_ptrs_with_index_t(const void *z_restore_base_, const void *p_restore_base_, + void *z_save_base_, void *p_save_base_, int64_t index_) + : basic_test_ptrs_t { z_restore_base_, p_restore_base_, z_save_base_, + p_save_base_ } + , index(index_) + { + } +}; + +struct vector_plus_scalar_load_test_case_t + : public test_case_base_t { + vector_reg_value128_t reference_data_; + vector_reg_value128_t base_data_; + + struct registers_used_t { + unsigned dest_z; + unsigned governing_p; + unsigned base_z; + } registers_used_; + + int64_t index_; // The scalar index used for the test instruction. + // This gets copied to the test_ptrs_t object to pass to the test + // function. + + template + vector_plus_scalar_load_test_case_t( + std::string name, test_func_t func, registers_used_t registers_used, + std::array reference_data, + std::array base, int64_t index) + : test_case_base_t(std::move(name), std::move(func), + registers_used.governing_p, + static_cast(sizeof(BASE_T))) + , registers_used_(registers_used) + , index_(index) + + { + std::memcpy(reference_data_.data(), reference_data.data(), + reference_data_.size()); + std::memcpy(base_data_.data(), base.data(), base_data_.size()); + } + + void + setup(sve_register_file_t ®ister_values) override + { + // Set the value for the base vector register. + register_values.set_z_register_value(registers_used_.base_z, base_data_); + } + + void + check_output(predicate_reg_value128_t pred, + const test_register_data_t ®ister_data) override + { + const auto vl_bytes = get_vl_bytes(); + + std::vector expected_output_data; + expected_output_data.resize(vl_bytes); + + assert(reference_data_.size() == TEST_VL_BYTES); + for (size_t i = 0; i < vl_bytes / TEST_VL_BYTES; i++) { + memcpy(&expected_output_data[TEST_VL_BYTES * i], reference_data_.data(), + TEST_VL_BYTES); + } + apply_predicate_mask(expected_output_data, pred, element_size_); + const scalable_reg_value_t expected_output { + expected_output_data.data(), + vl_bytes, + }; + + const auto output_value = + register_data.after.get_z_register_value(registers_used_.dest_z); + + if (output_value != expected_output) { + test_failed(); + print("predicate: "); + print_predicate( + register_data.before.get_p_register_value(registers_used_.governing_p)); + print("\nexpected: "); + print_vector(expected_output); + print("\nactual: "); + print_vector(output_value); + print("\n"); + } + + // Check that the values of the other Z registers have been preserved. + for (size_t i = 0; i < NUM_Z_REGS; i++) { + if (i == registers_used_.dest_z) + continue; + check_z_reg(i, register_data); + } + // Check that the values of the P registers have been preserved. + for (size_t i = 0; i < NUM_P_REGS; i++) { + check_p_reg(i, register_data); + } + } + + test_ptrs_t + create_test_ptrs(test_register_data_t ®ister_data) override + { + return { + register_data.before.z.data(), + register_data.before.p.data(), + register_data.after.z.data(), + register_data.after.p.data(), + index_, + }; + } +}; + +test_result_t +test_ld1_vector_plus_scalar() +{ +# define TEST_FUNC(ld_instruction) \ + [](vector_plus_scalar_load_test_case_t::test_ptrs_t &ptrs) { \ + asm(/* clang-format off */ \ + RESTORE_Z_REGISTERS(z_restore_base) \ + RESTORE_P_REGISTERS(p_restore_base) \ + ld_instruction "\n" \ + SAVE_Z_REGISTERS(z_save_base) \ + SAVE_P_REGISTERS(p_save_base) /* clang-format on */ \ + : \ + : [z_restore_base] "r"(ptrs.z_restore_base), \ + [z_save_base] "r"(ptrs.z_save_base), \ + [p_restore_base] "r"(ptrs.p_restore_base), \ + [p_save_base] "r"(ptrs.p_save_base), [index] "r"(ptrs.index) \ + : ALL_Z_REGS, ALL_P_REGS, "memory"); \ + } + + const auto get_base_ptr = [&](element_size_t element_size, size_t offset) { + void *start = INPUT_DATA.base_addr_for_data_size(element_size); + switch (element_size) { + case element_size_t::BYTE: + return reinterpret_cast(&static_cast(start)[offset]); + case element_size_t::HALF: + return reinterpret_cast(&static_cast(start)[offset]); + case element_size_t::SINGLE: + return reinterpret_cast(&static_cast(start)[offset]); + case element_size_t::DOUBLE: + return reinterpret_cast(&static_cast(start)[offset]); + } + assert(false); // unreachable + return uintptr_t(0); + }; + return run_tests({ + /* { + * Test name, + * Function that executes the test instruction, + * Registers used {zt, pg, zn}, + * Expected output data, + * Base data (value for zn), + * Index value, + * }, + */ + /* TODO i#5036: Add tests for 32-bit element variants. + * For example: ldnt1b z0.s, p0/z, [z31.s, x2]. + * These instructions require 32-bit base pointers and I'm not sure + * how we can reliably and portably guarantee that allocated memory + * has an address that fits into 32-bits. + */ + { + "ldnt1b vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1b z0.d, p0/z, [z31.d, %[index]]"), + { /*zt=*/0, /*pg=*/0, /*zn=*/31 }, + std::array { 0x00, 0x16 }, + std::array { + get_base_ptr(element_size_t::BYTE, 0), + get_base_ptr(element_size_t::BYTE, 16), + }, + 0, + }, + { + "ldnt1sb vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1sb z7.d, p1/z, [z24.d, %[index]]"), + { /*zt=*/7, /*pg=*/1, /*zn=*/24 }, + std::array { -15, 0x15 }, + std::array { + get_base_ptr(element_size_t::BYTE, 0), + get_base_ptr(element_size_t::BYTE, 16), + }, + -1, + }, + { + "ldnt1h vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1h z14.d, p2/z, [z17.d, %[index]]"), + { /*zt=*/14, /*pg=*/2, /*zn=*/17 }, + std::array { 0x12, 0x14 }, + std::array { + get_base_ptr(element_size_t::HALF, 8), + get_base_ptr(element_size_t::HALF, 10), + }, + 8, + }, + { + "ldnt1sh vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1sh z21.d, p3/z, [z10.d, %[index]]"), + { /*zt=*/21, /*pg=*/3, /*zn=*/10 }, + std::array { -15, 0x17 }, + std::array { + get_base_ptr(element_size_t::HALF, 2), + get_base_ptr(element_size_t::HALF, 20), + }, + -6, + }, + { + "ldnt1w vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1w z28.d, p4/z, [z3.d, %[index]]"), + { /*zt=*/28, /*pg=*/4, /*zn=*/3 }, + std::array { 0xfffffff4, 0xfffffff3 }, + std::array { + get_base_ptr(element_size_t::SINGLE, 4), + get_base_ptr(element_size_t::SINGLE, 5), + }, + -32, + }, + { + "ldnt1sw vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1sw z29.d, p5/z, [z4.d, %[index]]"), + { /*zt=*/29, /*pg=*/5, /*zn=*/4 }, + std::array { -12, -13 }, + std::array { + get_base_ptr(element_size_t::SINGLE, 4), + get_base_ptr(element_size_t::SINGLE, 5), + }, + -32, + }, + { + "ldnt1d vector+scalar 64bit unscaled offset", + TEST_FUNC("ldnt1d z22.d, p6/z, [z11.d, %[index]]"), + { /*zt=*/22, /*pg=*/6, /*zn=*/11 }, + std::array { 0x03, 0x19 }, + std::array { + get_base_ptr(element_size_t::DOUBLE, 0), + get_base_ptr(element_size_t::DOUBLE, 16), + }, + 24, + }, + }); +# undef TEST_FUNC +} + +struct vector_plus_scalar_store_test_case_t + : public test_case_base_t { + vector_reg_value128_t base_data_; + std::array base_ptrs_; + + struct registers_used_t { + unsigned src_z; + unsigned governing_p; + unsigned base_z; + } registers_used_; + + element_size_t stored_value_size_; + + expected_values_t expected_values_; + + int64_t index_; // The scalar index used for the test instruction. + // This gets copied to the test_ptrs_t object to pass to the test + // function. + + vector_plus_scalar_store_test_case_t(std::string name, test_func_t func, + registers_used_t registers_used, + std::array base_offsets, + element_size_t stored_value_size, + std::ptrdiff_t offset) + : test_case_base_t(std::move(name), std::move(func), + registers_used.governing_p, + element_size_t::DOUBLE) + , registers_used_(registers_used) + , stored_value_size_(stored_value_size) + , expected_values_(std::array { offset, offset }, + stored_value_size) + , index_(static_cast(offset)) + { + base_ptrs_[0] = + static_cast(OUTPUT_DATA.base_addr()) + base_offsets[0]; + base_ptrs_[1] = + static_cast(OUTPUT_DATA.base_addr()) + base_offsets[1]; + std::memcpy(base_data_.data(), base_ptrs_.data(), base_data_.size()); + } + + void + setup(sve_register_file_t ®ister_values) override + { + // Set the value for the base register. + register_values.set_z_register_value(registers_used_.base_z, base_data_); + + register_values.set_z_register_value(registers_used_.src_z, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, + 0x07, 0x08, 0x09, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15 }); + OUTPUT_DATA.reset(); + } + + void + check_output(predicate_reg_value128_t pred, + const test_register_data_t ®ister_data) override + { + // Check that the values of the Z registers have been preserved. + for (size_t i = 0; i < NUM_Z_REGS; i++) { + check_z_reg(i, register_data); + } + // Check that the values of the P registers have been preserved. + for (size_t i = 0; i < NUM_P_REGS; i++) { + check_p_reg(i, register_data); + } + + const bool scaled = false; + assert(element_size_ == element_size_t::DOUBLE); + + switch (stored_value_size_) { + case element_size_t::BYTE: + check_expected_values(expected_values_.u8x2, pred, base_ptrs_, scaled); + break; + case element_size_t::HALF: + check_expected_values(expected_values_.u16x2, pred, base_ptrs_, scaled); + break; + case element_size_t::SINGLE: + check_expected_values(expected_values_.u32x2, pred, base_ptrs_, scaled); + break; + case element_size_t::DOUBLE: + check_expected_values(expected_values_.u64x2, pred, base_ptrs_, scaled); + break; + } + } + + test_ptrs_t + create_test_ptrs(test_register_data_t ®ister_data) override + { + return { + register_data.before.z.data(), + register_data.before.p.data(), + register_data.after.z.data(), + register_data.after.p.data(), + index_, + }; + } +# undef TEST_FUNC +}; + +test_result_t +test_st1_vector_plus_scalar() +{ +# define TEST_FUNC(st_instruction) \ + [](vector_plus_scalar_load_test_case_t::test_ptrs_t &ptrs) { \ + asm(/* clang-format off */ \ + RESTORE_Z_REGISTERS(z_restore_base) \ + RESTORE_P_REGISTERS(p_restore_base) \ + st_instruction "\n" \ + SAVE_Z_REGISTERS(z_save_base) \ + SAVE_P_REGISTERS(p_save_base) /* clang-format on */ \ + : \ + : [z_restore_base] "r"(ptrs.z_restore_base), \ + [z_save_base] "r"(ptrs.z_save_base), \ + [p_restore_base] "r"(ptrs.p_restore_base), \ + [p_save_base] "r"(ptrs.p_save_base), [index] "r"(ptrs.index) \ + : ALL_Z_REGS, ALL_P_REGS, "memory"); \ + } + + return run_tests({ + /* { + * Test name, + * Function that executes the test instruction, + * Registers used {zt, pg, zn}, + * Offsets + * Stored value size + * index value + * }, + */ + /* TODO i#5036: Add tests for 32-bit element variants. + * For example: stnt1b z0.s, p0/z, [z31.s, x5]. + * These instructions require 32-bit base pointers and I'm not sure + * how we can reliably and portably guarantee that allocated memory + * has an address that fits into 32-bits. + */ + { + "stnt1b vector+scalar 64bit unscaled offset", + TEST_FUNC("stnt1b z0.d, p7, [z28.d, %[index]]"), + { /*zt=*/0, /*pg=*/7, /*zn=*/28 }, + std::array { 0, 16 }, + element_size_t::BYTE, + 0, + }, + { + "stnt1b vector+scalar 64bit unscaled offset (repeated base)", + TEST_FUNC("stnt1b z3.d, p6, [z24.d, %[index]]"), + { /*zt=*/3, /*pg=*/6, /*zn=*/24 }, + std::array { 7, 7 }, + element_size_t::BYTE, + 0, + }, + { + "stnt1h vector+scalar 64bit unscaled offset", + TEST_FUNC("stnt1h z7.d, p5, [z20.d, %[index]]"), + { /*zt=*/7, /*pg=*/5, /*zn=*/20 }, + std::array { -32, -16 }, + element_size_t::HALF, + -10, + }, + { + "stnt1h vector+scalar 64bit unscaled offset (repeated base)", + TEST_FUNC("stnt1h z11.d, p4, [z16.d, %[index]]"), + { /*zt=*/11, /*pg=*/4, /*zn=*/16 }, + std::array { -32, -32 }, + element_size_t::HALF, + -10, + }, + { + "stnt1w vector+scalar 64bit unscaled offset", + TEST_FUNC("stnt1w z15.d, p3, [z12.d, %[index]]"), + { /*zt=*/15, /*pg=*/3, /*zn=*/12 }, + std::array { 14, 100 }, + element_size_t::SINGLE, + 32, + }, + { + "stnt1w vector+scalar 64bit unscaled offset (repeated base)", + TEST_FUNC("stnt1w z19.d, p2, [z8.d, %[index]]"), + { /*zt=*/19, /*pg=*/2, /*zn=*/8 }, + std::array { 14, 14 }, + element_size_t::SINGLE, + 32, + }, + { + "stnt1d vector+scalar 64bit unscaled offset", + TEST_FUNC("stnt1d z23.d, p1, [z4.d, %[index]]"), + { /*zt=*/23, /*pg=*/1, /*zn=*/4 }, + std::array { -16, 16 }, + element_size_t::DOUBLE, + 50, + }, + { + "stnt1d vector+scalar 64bit unscaled offset (repeated base)", + TEST_FUNC("stnt1d z27.d, p0, [z0.d, %[index]]"), + { /*zt=*/27, /*pg=*/0, /*zn=*/0 }, + std::array { -16, 16 }, + element_size_t::DOUBLE, + 50, + }, + }); +} + +#endif // defined(__ARM_FEATURE_SVE2) } // namespace int @@ -5865,6 +6312,12 @@ main(int argc, char **argv) if (test_st4_scalar_plus_immediate() == FAIL) status = FAIL; #endif +#if defined(__ARM_FEATURE_SVE2) + if (test_ld1_vector_plus_scalar() == FAIL) + status = FAIL; + if (test_st1_vector_plus_scalar() == FAIL) + status = FAIL; +#endif return status == PASS ? 0 : 1; } diff --git a/suite/tests/client-interface/drx-scattergather-aarch64.templatex b/suite/tests/client-interface/drx-scattergather-aarch64.templatex index 12bdc719316..0c574e31f37 100644 --- a/suite/tests/client-interface/drx-scattergather-aarch64.templatex +++ b/suite/tests/client-interface/drx-scattergather-aarch64.templatex @@ -277,8 +277,27 @@ st4d scalar\+immediate: PASS st4d scalar\+immediate \(min index\): PASS st4d scalar\+immediate \(max index\): PASS #endif /* __ARM_FEATURE_SVE */ +#ifdef __ARM_FEATURE_SVE2 +ldnt1b vector\+scalar 64bit unscaled offset: PASS +ldnt1sb vector\+scalar 64bit unscaled offset: PASS +ldnt1h vector\+scalar 64bit unscaled offset: PASS +ldnt1sh vector\+scalar 64bit unscaled offset: PASS +ldnt1w vector\+scalar 64bit unscaled offset: PASS +ldnt1sw vector\+scalar 64bit unscaled offset: PASS +ldnt1d vector\+scalar 64bit unscaled offset: PASS +stnt1b vector\+scalar 64bit unscaled offset: PASS +stnt1b vector\+scalar 64bit unscaled offset \(repeated base\): PASS +stnt1h vector\+scalar 64bit unscaled offset: PASS +stnt1h vector\+scalar 64bit unscaled offset \(repeated base\): PASS +stnt1w vector\+scalar 64bit unscaled offset: PASS +stnt1w vector\+scalar 64bit unscaled offset \(repeated base\): PASS +stnt1d vector\+scalar 64bit unscaled offset: PASS +stnt1d vector\+scalar 64bit unscaled offset \(repeated base\): PASS +#endif /* __ARM_FEATURE_SVE2 */ #ifndef TEST_SAMPLE_CLIENT -#ifdef __ARM_FEATURE_SVE +#if defined(__ARM_FEATURE_SVE2) +event_exit, 1168 scatter/gather instructions +#elif defined( __ARM_FEATURE_SVE) event_exit, 1108 scatter/gather instructions #else event_exit, 0 scatter/gather instructions diff --git a/suite/tests/linux/bad-signal-stack.c b/suite/tests/linux/bad-signal-stack.c index 9067fca86e2..00a3434c296 100644 --- a/suite/tests/linux/bad-signal-stack.c +++ b/suite/tests/linux/bad-signal-stack.c @@ -30,13 +30,14 @@ * DAMAGE. */ -#include "tools.h" #include #include -#include #include #include #include +/* i#6615 include tools.h after signal.h to avoid issues on + * ubuntu 22:04 caused by _GNU_SOURCE being defined */ +#include "tools.h" #define ALT_STACK_SIZE (SIGSTKSZ * 4) diff --git a/suite/tests/linux/clone.c b/suite/tests/linux/clone.c index 6ef8d4e2503..2d422e336c7 100644 --- a/suite/tests/linux/clone.c +++ b/suite/tests/linux/clone.c @@ -47,7 +47,8 @@ #include #include -#include "tools.h" /* for nolibc_* wrappers. */ +#include "tools.h" /* for nolibc_* wrappers. */ +#include "../../core/unix/include/clone3.h" /* for clone3_syscall_args_t */ #ifdef ANDROID typedef unsigned long ulong; @@ -56,7 +57,7 @@ typedef unsigned long ulong; /* The first published clone_args had all fields till 'tls'. A clone3 * syscall made by the user must have a struct of at least this size. */ -#define CLONE_ARGS_SIZE_MIN_POSSIBLE 64 +#define CLONE_ARGS_SIZE_MIN_POSSIBLE CLONE_ARGS_SIZE_VER0 /* We define this constant so that we can try to make the clone3 * syscall on systems where it is not available, to verify that it @@ -64,10 +65,6 @@ typedef unsigned long ulong; */ #define CLONE3_SYSCALL_NUM 435 -/* i#762: Hard to get clone() from sched.h, so copy prototype. */ -extern int -clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg, ...); - #define THREAD_STACK_SIZE (32 * 1024) #ifdef X64 @@ -78,14 +75,18 @@ clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg, ...); /* forward declarations */ static int +make_clone_syscall(uint flags, byte *newsp, void *ptid, void *tls, void *ctid, + void (*fcn)(void)); +static int make_clone3_syscall(void *clone_args, ulong clone_args_size, void (*fcn)(void)); static pid_t -create_thread(int (*fcn)(void *), void *arg, void **stack, bool share_sighand, - bool clone_vm); -#ifdef SYS_clone3 +create_thread(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm); + static pid_t create_thread_clone3(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm); -#endif + +static bool clone3_available = false; + static void delete_thread(pid_t pid, void *stack); int @@ -104,24 +105,72 @@ static void *stack; void test_thread(bool share_sighand, bool clone_vm, bool use_clone3) { - if (use_clone3) { -#ifdef SYS_clone3 - child = create_thread_clone3(run_with_exit, &stack, share_sighand, clone_vm); -#else - /* If SYS_clone3 is not defined, we simply use SYS_clone instead, so that - * the expected output is the same in both cases. - */ - child = create_thread(run, NULL, &stack, share_sighand, clone_vm); -#endif - } else - child = create_thread(run, NULL, &stack, share_sighand, clone_vm); + print("%s(share_sighand %d, clone_vm %d, use_clone3 %d)\n", __FUNCTION__, + share_sighand, clone_vm, use_clone3); + + /* Use create_thread when clone3 is asked for but not available so that + * the output is the same. + */ + pid_t (*create_thread_func)(void (*fcn)(void), void **stack, bool share_sighand, + bool clone_vm) = + (use_clone3 && clone3_available) ? create_thread_clone3 : create_thread; + + child = create_thread_func(run_with_exit, &stack, share_sighand, clone_vm); + assert(child > -1); delete_thread(child, stack); } +#ifdef X86 /* i#6514: dynamorio_clone needs to be updated for other arches. */ + +/* i#6514: Test passing NULL for the stack pointer to the syscall. */ +void +test_with_null_stack_pointer(bool clone_vm, bool use_clone3) +{ + print("%s(clone_vm %d, use_clone3 %d)\n", __FUNCTION__, clone_vm, use_clone3); + int flags = clone_vm ? (CLONE_VFORK | CLONE_VM) : 0; + int ret; + /* If we don't have clone3, keep expected output the same and just use clone. */ + if (use_clone3 && clone3_available) { + clone3_syscall_args_t cl_args = { 0 }; + cl_args.flags = flags; + cl_args.exit_signal = SIGCHLD; + ret = make_clone3_syscall(&cl_args, sizeof(cl_args), run_with_exit); + } else { + flags = flags | SIGCHLD; + ret = make_clone_syscall(flags, /*stack=*/NULL, /*parent_tid=*/NULL, + /*tls=*/NULL, /*child_tid=*/NULL, run_with_exit); + } + if (ret == -1) { + perror("Error calling clone"); + return; + } + delete_thread(ret, NULL); +} + +#endif + int main() { + /* Try using clone3 when it is possibly not defined. This is done for two + * reasons: test whether the kernel supports it, and our handling of clone3 + * when it doesn't. + */ + int ret_failure_clone3 = make_clone3_syscall(NULL, 0, NULL); + assert(ret_failure_clone3 == -1); + + /* In some environments, we see that the kernel supports clone3 even though + * SYS_clone3 is not defined by glibc. So we don't predicate our efforts on + * whether SYS_clone3 is defined. Plus in some scenarios SYS_clone3 is + * defined but clone3 returns ENOSYS. + * E.g., when running in a container under Ubuntu 22.04 i#6596 + * see https://github.com/moby/moby/pull/42681 + */ + assert(errno == ENOSYS || errno == EINVAL); + if (errno != ENOSYS) + clone3_available = true; + /* First test a thread that does not share signal handlers * (xref i#2089). */ @@ -138,19 +187,12 @@ main() test_thread(true /*share_sighand*/, true /*clone_vm*/, false /*use_clone3*/); test_thread(true /*share_sighand*/, true /*clone_vm*/, true /*use_clone3*/); - /* Try using clone3 when it is possibly not defined. */ - int ret_failure_clone3 = make_clone3_syscall(NULL, 0, NULL); - assert(ret_failure_clone3 == -1); -#ifdef SYS_clone3 - /* Though there's no guarantee, we assume that the kernel supports clone3 if - * SYS_clone3 is defined. - */ - assert(errno == EINVAL); -#else - /* On some environments, we see that the kernel supports clone3 even though - * SYS_clone3 is not defined by glibc. - */ - assert(errno == ENOSYS || errno == EINVAL); +#if defined(X86) + /* Test passing NULL for the stack pointer (xref i#6514). */ + test_with_null_stack_pointer(/*clone_vm=*/false, /*use_clone3=*/false); + test_with_null_stack_pointer(/*clone_vm=*/false, /*use_clone3=*/true); + test_with_null_stack_pointer(/*clone_vm=*/true, /*use_clone3=*/false); + test_with_null_stack_pointer(/*clone_vm=*/true, /*use_clone3=*/true); #endif } @@ -183,6 +225,19 @@ run_with_exit(void) exit(run(NULL)); } +/* A wrapper on dynamorio_clone to set errno. */ +static int +make_clone_syscall(uint flags, byte *newsp, void *ptid, void *tls, void *ctid, + void (*fcn)(void)) +{ + int ret = dynamorio_clone(flags, newsp, ptid, tls, ctid, fcn); + if (ret < 0) { + errno = -ret; + return -1; + } + return ret; +} + void *p_tid, *c_tid; /* Create a new thread. It should be passed "fcn", a function which @@ -190,14 +245,14 @@ void *p_tid, *c_tid; * first argument is passed in "arg". Returns the PID of the new * thread */ static pid_t -create_thread(int (*fcn)(void *), void *arg, void **stack, bool share_sighand, - bool clone_vm) +create_thread(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm) { /* !clone_vm && share_sighand is not supported. */ assert(clone_vm || !share_sighand); pid_t newpid; int flags; void *my_stack; + void *stack_ptr; my_stack = stack_alloc(THREAD_STACK_SIZE); @@ -209,10 +264,10 @@ create_thread(int (*fcn)(void *), void *arg, void **stack, bool share_sighand, flags = (SIGCHLD | CLONE_FS | CLONE_FILES | (share_sighand ? CLONE_SIGHAND : 0) | (clone_vm ? CLONE_VM : 0)); /* The stack arg should point to the stack's highest address (non-inclusive). */ - newpid = clone(fcn, (void *)((size_t)my_stack + THREAD_STACK_SIZE), flags, arg, - &p_tid, NULL, &c_tid); + stack_ptr = (void *)((size_t)my_stack + THREAD_STACK_SIZE); + newpid = make_clone_syscall(flags, stack_ptr, &p_tid, NULL, &c_tid, fcn); - if (newpid == -1) { + if (newpid < 0) { perror("Error calling clone\n"); stack_free(my_stack, THREAD_STACK_SIZE); return -1; @@ -222,7 +277,7 @@ create_thread(int (*fcn)(void *), void *arg, void **stack, bool share_sighand, return newpid; } -/* glibc does not provide a wrapper for clone3 yet. This makes it difficult +/* glibc,drlibc do not provide a wrapper for clone3 yet. This makes it difficult * to create new threads in C code using syscall(), as we have to deal with * complexities associated with the child thread having a fresh stack * without any return addresses or space for local variables. So, we @@ -245,13 +300,13 @@ make_clone3_syscall(void *clone_args, ulong clone_args_size, void (*fcn)(void)) "mov %[fcn], %%rdx\n\t" "syscall\n\t" "test %%rax, %%rax\n\t" - "jnz parent\n\t" + "jnz 1f\n\t" "call *%%rdx\n\t" - "parent:\n\t" + "1:\n\t" "mov %%rax, %[result]\n\t" - : [ result ] "=m"(result) - : [ sys_clone3 ] "i"(CLONE3_SYSCALL_NUM), [ clone_args ] "m"(clone_args), - [ clone_args_size ] "m"(clone_args_size), [ fcn ] "m"(fcn) + : [result] "=m"(result) + : [sys_clone3] "i"(CLONE3_SYSCALL_NUM), [clone_args] "m"(clone_args), + [clone_args_size] "m"(clone_args_size), [fcn] "m"(fcn) /* syscall clobbers rcx and r11 */ : "rax", "rdi", "rsi", "rdx", "rcx", "r11", "memory"); # else @@ -261,13 +316,13 @@ make_clone3_syscall(void *clone_args, ulong clone_args_size, void (*fcn)(void)) "mov %[fcn], %%edx\n\t" "int $0x80\n\t" "test %%eax, %%eax\n\t" - "jnz parent\n\t" + "jnz 1f\n\t" "call *%%edx\n\t" - "parent:\n\t" + "1:\n\t" "mov %%eax, %[result]\n\t" - : [ result ] "=m"(result) - : [ sys_clone3 ] "i"(CLONE3_SYSCALL_NUM), [ clone_args ] "m"(clone_args), - [ clone_args_size ] "m"(clone_args_size), [ fcn ] "m"(fcn) + : [result] "=m"(result) + : [sys_clone3] "i"(CLONE3_SYSCALL_NUM), [clone_args] "m"(clone_args), + [clone_args_size] "m"(clone_args_size), [fcn] "m"(fcn) : "eax", "ebx", "ecx", "edx", "memory"); # endif #elif defined(AARCH64) @@ -276,13 +331,13 @@ make_clone3_syscall(void *clone_args, ulong clone_args_size, void (*fcn)(void)) "ldr x1, %[clone_args_size]\n\t" "ldr x2, %[fcn]\n\t" "svc #0\n\t" - "cbnz x0, parent\n\t" + "cbnz x0, 1f\n\t" "blr x2\n\t" - "parent:\n\t" + "1:\n\t" "str x0, %[result]\n\t" - : [ result ] "=m"(result) - : [ sys_clone3 ] "i"(CLONE3_SYSCALL_NUM), [ clone_args ] "m"(clone_args), - [ clone_args_size ] "m"(clone_args_size), [ fcn ] "m"(fcn) + : [result] "=m"(result) + : [sys_clone3] "i"(CLONE3_SYSCALL_NUM), [clone_args] "m"(clone_args), + [clone_args_size] "m"(clone_args_size), [fcn] "m"(fcn) : "x0", "x1", "x2", "x8", "memory"); #elif defined(ARM) /* XXX: Add asm wrapper for ARM. @@ -299,13 +354,12 @@ make_clone3_syscall(void *clone_args, ulong clone_args_size, void (*fcn)(void)) return result; } -#ifdef SYS_clone3 static pid_t create_thread_clone3(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm) { /* !clone_vm && share_sighand is not supported. */ assert(clone_vm || !share_sighand); - struct clone_args cl_args = { 0 }; + clone3_syscall_args_t cl_args = { 0 }; void *my_stack; my_stack = stack_alloc(THREAD_STACK_SIZE); /* We're not doing CLONE_THREAD => child has its own pid @@ -318,17 +372,17 @@ create_thread_clone3(void (*fcn)(void), void **stack, bool share_sighand, bool c cl_args.exit_signal = SIGCHLD; cl_args.stack = (ptr_uint_t)my_stack; cl_args.stack_size = THREAD_STACK_SIZE; - int ret = make_clone3_syscall(NULL, sizeof(struct clone_args), fcn); + int ret = make_clone3_syscall(NULL, sizeof(clone3_syscall_args_t), fcn); assert(errno == EFAULT); ret = make_clone3_syscall((void *)0x123 /* bogus address */, - sizeof(struct clone_args), fcn); + sizeof(clone3_syscall_args_t), fcn); assert(errno == EFAULT); ret = make_clone3_syscall(&cl_args, CLONE_ARGS_SIZE_MIN_POSSIBLE - 1, fcn); assert(errno == EINVAL); - ret = make_clone3_syscall(&cl_args, sizeof(struct clone_args), fcn); + ret = make_clone3_syscall(&cl_args, sizeof(clone3_syscall_args_t), fcn); /* Child threads should already have been directed to fcn. */ assert(ret != 0); if (ret == -1) { @@ -337,25 +391,28 @@ create_thread_clone3(void (*fcn)(void), void **stack, bool share_sighand, bool c return -1; } else { assert(ret > 0); - /* Ensure that DR restores fields in clone_args after the syscall. */ + /* Ensure that DR restores fields in cl_args after the syscall. */ assert(cl_args.stack == (ptr_uint_t)my_stack && cl_args.stack_size == THREAD_STACK_SIZE); } *stack = my_stack; return (pid_t)ret; } -#endif static void delete_thread(pid_t pid, void *stack) { pid_t result; /* do not print out pids to make diff easy */ - result = waitpid(pid, NULL, 0); + int wait_status; + result = waitpid(pid, &wait_status, 0); print("Child has exited\n"); if (result == -1 || result != pid) perror("delete_thread waitpid"); - stack_free(stack, THREAD_STACK_SIZE); + else if (!WIFEXITED(wait_status) || WEXITSTATUS(wait_status) != 0) + print("delete_thread bad wait_status: 0x%x\n", wait_status); + if (stack != NULL) + stack_free(stack, THREAD_STACK_SIZE); } /* Allocate stack storage on the app's heap. Returns the lowest address of the diff --git a/suite/tests/linux/clone.expect b/suite/tests/linux/clone.expect deleted file mode 100644 index 2976fcc17ae..00000000000 --- a/suite/tests/linux/clone.expect +++ /dev/null @@ -1,78 +0,0 @@ -Sideline thread started -i = 2500000 -i = 5000000 -i = 7500000 -i = 10000000 -i = 12500000 -i = 15000000 -i = 17500000 -i = 20000000 -i = 22500000 -i = 25000000 -Sideline thread finished -Child has exited -Sideline thread started -i = 2500000 -i = 5000000 -i = 7500000 -i = 10000000 -i = 12500000 -i = 15000000 -i = 17500000 -i = 20000000 -i = 22500000 -i = 25000000 -Sideline thread finished -Child has exited -Sideline thread started -i = 2500000 -i = 5000000 -i = 7500000 -i = 10000000 -i = 12500000 -i = 15000000 -i = 17500000 -i = 20000000 -i = 22500000 -i = 25000000 -Sideline thread finished -Child has exited -Sideline thread started -i = 2500000 -i = 5000000 -i = 7500000 -i = 10000000 -i = 12500000 -i = 15000000 -i = 17500000 -i = 20000000 -i = 22500000 -i = 25000000 -Sideline thread finished -Child has exited -Sideline thread started -i = 2500000 -i = 5000000 -i = 7500000 -i = 10000000 -i = 12500000 -i = 15000000 -i = 17500000 -i = 20000000 -i = 22500000 -i = 25000000 -Sideline thread finished -Child has exited -Sideline thread started -i = 2500000 -i = 5000000 -i = 7500000 -i = 10000000 -i = 12500000 -i = 15000000 -i = 17500000 -i = 20000000 -i = 22500000 -i = 25000000 -Sideline thread finished -Child has exited diff --git a/suite/tests/linux/clone.template b/suite/tests/linux/clone.template new file mode 100644 index 00000000000..4396d962ed6 --- /dev/null +++ b/suite/tests/linux/clone.template @@ -0,0 +1,142 @@ +test_thread(share_sighand 0, clone_vm 0, use_clone3 0) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_thread(share_sighand 0, clone_vm 0, use_clone3 1) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_thread(share_sighand 0, clone_vm 1, use_clone3 0) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_thread(share_sighand 0, clone_vm 1, use_clone3 1) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_thread(share_sighand 1, clone_vm 1, use_clone3 0) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_thread(share_sighand 1, clone_vm 1, use_clone3 1) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +#if defined(X86) +test_with_null_stack_pointer(clone_vm 0, use_clone3 0) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_with_null_stack_pointer(clone_vm 0, use_clone3 1) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_with_null_stack_pointer(clone_vm 1, use_clone3 0) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +test_with_null_stack_pointer(clone_vm 1, use_clone3 1) +Sideline thread started +i = 2500000 +i = 5000000 +i = 7500000 +i = 10000000 +i = 12500000 +i = 15000000 +i = 17500000 +i = 20000000 +i = 22500000 +i = 25000000 +Sideline thread finished +Child has exited +#endif diff --git a/suite/tests/runall.cmake b/suite/tests/runall.cmake index 7c7ad7823ea..08eea9ed4d5 100644 --- a/suite/tests/runall.cmake +++ b/suite/tests/runall.cmake @@ -90,6 +90,12 @@ else (UNIX) set(nudge_cmd drconfig) endif (UNIX) +if (UNIX) + set(detach_cmd drconfig) +else () + set(detach_cmd drconfig.exe) +endif() + if (UNIX) set(MAX_ITERS 50000) else () @@ -268,17 +274,6 @@ if ("${orig_nudge}" MATCHES "-client") endif () endwhile() elseif ("${orig_nudge}" MATCHES "" OR "${orig_nudge}" MATCHES "") - # Wait until attached. - set(iters 0) - while (NOT "${output}" MATCHES "attach\n") - do_sleep(0.1) - file(READ "${out}" output) - math(EXPR iters "${iters}+1") - if (${iters} GREATER ${MAX_ITERS}) - kill_background_process(ON) - message(FATAL_ERROR "Timed out waiting for attach") - endif () - endwhile() # Wait until thread init. set(iters 0) while (NOT "${output}" MATCHES "thread init\n") @@ -287,7 +282,7 @@ elseif ("${orig_nudge}" MATCHES "" OR "${orig_nudge}" MATCHES "" math(EXPR iters "${iters}+1") if (${iters} GREATER ${MAX_ITERS}) kill_background_process(ON) - message(FATAL_ERROR "Timed out waiting for attach") + message(FATAL_ERROR "Timed out waiting for thread init") endif () endwhile() else () @@ -298,7 +293,7 @@ else () endif () if ("${orig_nudge}" MATCHES "") - execute_process(COMMAND "${toolbindir}/drconfig.exe" "-detach" ${pid} + execute_process(COMMAND "${toolbindir}/${detach_cmd}" "-detach" ${pid} RESULT_VARIABLE detach_result ERROR_VARIABLE detach_err OUTPUT_VARIABLE detach_out) @@ -314,10 +309,10 @@ if ("${orig_nudge}" MATCHES "") math(EXPR iters "${iters}+1") if (${iters} GREATER ${MAX_ITERS}) kill_background_process(ON) - message(FATAL_ERROR "Timed out waiting for attach") + message(FATAL_ERROR "Timed out waiting for detach") endif () endwhile() -endif() +endif () kill_background_process(OFF) diff --git a/suite/tests/runmulti.cmake b/suite/tests/runmulti.cmake index 0fb4588b806..d2b887eaf8e 100644 --- a/suite/tests/runmulti.cmake +++ b/suite/tests/runmulti.cmake @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2015-2023 Google, Inc. All rights reserved. +# Copyright (c) 2015-2024 Google, Inc. All rights reserved. # ********************************************************** # Redistribution and use in source and binary forms, with or without @@ -46,6 +46,9 @@ # glob-expansion is passed to the command. # If the expansion is empty for precmd, the precmd execution is skipped. +# Recognize literals in if statements. +cmake_policy(SET CMP0012 NEW) + # Intra-arg space=@@ and inter-arg space=@. # XXX i#1327: now that we have -c and other option passing improvements we # should be able to get rid of this @@ stuff. @@ -115,7 +118,7 @@ macro(process_cmdline line skip_empty err_and_out) set(${err_and_out} "${${err_and_out}}${cmd_err}${cmd_out}") endmacro() -process_cmdline(precmd ON ignore) +process_cmdline(precmd ON tomatch) process_cmdline(cmd OFF tomatch) diff --git a/third_party/elfutils b/third_party/elfutils new file mode 160000 index 00000000000..c1058da5a45 --- /dev/null +++ b/third_party/elfutils @@ -0,0 +1 @@ +Subproject commit c1058da5a450e33e72b72abb53bc3ffd7f6b361b diff --git a/third_party/libgcc/README.dynamorio b/third_party/libgcc/README.dynamorio index 8c2bf27a237..993b50f58aa 100644 --- a/third_party/libgcc/README.dynamorio +++ b/third_party/libgcc/README.dynamorio @@ -50,11 +50,13 @@ as it breaks some old assemblers: For udivmoddi4.c, it contains the __udivmoddi4 and __moddi3 functions extracted from libgcc2.c with the following lines added: -> /* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added: */ -> typedef short Wtype; -> typedef int DWtype; -> typedef unsigned int UWtype; -> typedef unsigned long long UDWtype; +> /* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added. +> Note that for current targets (x86, aarch, riscv - 32 and 64 bit) +> LIBGCC2_UNITS_PER_WORD == 4. Thus a double word is 8 bytes. */ +> typedef int32_t Wtype; +> typedef int64_t DWtype; +> typedef uint32_t UWtype; +> typedef uint64_t UDWtype; > #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ > struct DWstruct {Wtype high, low;}; > #else diff --git a/third_party/libgcc/udivmoddi4.c b/third_party/libgcc/udivmoddi4.c index 2b20661f0d3..188f828d8f9 100644 --- a/third_party/libgcc/udivmoddi4.c +++ b/third_party/libgcc/udivmoddi4.c @@ -23,11 +23,15 @@ a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ -/* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added: */ -typedef short Wtype; -typedef int DWtype; -typedef unsigned int UWtype; -typedef unsigned long long UDWtype; +#include + +/* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added. + Note that for current targets (x86, aarch, riscv - 32 and 64 bit) + LIBGCC2_UNITS_PER_WORD == 4. Thus a double word is 8 bytes. */ +typedef int32_t Wtype; +typedef int64_t DWtype; +typedef uint32_t UWtype; +typedef uint64_t UDWtype; #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ struct DWstruct {Wtype high, low;}; #else diff --git a/tools/drdeploy.c b/tools/drdeploy.c index 05175816564..a6b5c49a480 100644 --- a/tools/drdeploy.c +++ b/tools/drdeploy.c @@ -52,6 +52,10 @@ # include #endif +#ifdef LINUX +# include +#endif + #include #include #include @@ -64,6 +68,15 @@ #include "dr_inject.h" #include "dr_frontend.h" +#ifdef LINUX +/* XXX: It would be cleaner to have a header for this and have nudgesig.c be in its + * own static library instead of compiled separately for the core and drdeploy. + */ +extern bool +create_nudge_signal_payload(siginfo_t *info DR_PARAM_OUT, uint action_mask, + client_id_t client_id, uint flags, uint64 client_arg); +#endif + typedef enum _action_t { action_none, action_nudge, @@ -1211,7 +1224,7 @@ _tmain(int argc, TCHAR *targv[]) bool exit0 = false; #endif #if defined(DRCONFIG) -# ifdef WINDOWS +# if defined(WINDOWS) || defined(LINUX) process_id_t detach_pid = 0; # endif #endif @@ -1508,7 +1521,10 @@ _tmain(int argc, TCHAR *targv[]) nudge_all = true; nudge_id = strtoul(argv[++i], NULL, 16); nudge_arg = _strtoui64(argv[++i], NULL, 16); - } else if (strcmp(argv[i], "-detach") == 0) { + } +# endif +# if defined(WINDOWS) || defined(LINUX) + else if (strcmp(argv[i], "-detach") == 0) { if (i + 1 >= argc) usage(false, "detach requires a process id"); const char *pid_str = argv[++i]; @@ -1836,12 +1852,32 @@ _tmain(int argc, TCHAR *targv[]) if (!unregister_proc(process, 0, global, dr_platform)) die(); } +# if defined(WINDOWS) || defined(LINUX) + else if (detach_pid != 0) { +# ifdef WINDOWS + dr_config_status_t res = detach(detach_pid, TRUE, detach_timeout); + if (res != DR_SUCCESS) + error("unable to detach: check pid and system ptrace permissions"); +# else + siginfo_t info; + uint action_mask = NUDGE_FREE_ARG; + client_id_t client_id = 0; + uint64 client_arg = 0; + bool success = + create_nudge_signal_payload(&info, action_mask, 0, client_id, client_arg); + assert(success); /* failure means kernel's sigqueueinfo has changed */ + /* send the nudge */ + i = syscall(SYS_rt_sigqueueinfo, detach_pid, NUDGESIG_SIGNUM, &info); + if (i < 0) + fprintf(stderr, "nudge FAILED with error %d\n", i); +# endif + } +# endif # ifndef WINDOWS else { usage(false, "no action specified"); } # else /* WINDOWS */ - /* FIXME i#840: Nudge NYI on Linux. */ else if (action == action_nudge) { int count = 1; dr_config_status_t res = DR_SUCCESS; @@ -1878,12 +1914,6 @@ _tmain(int argc, TCHAR *targv[]) dr_registered_process_iterator_stop(iter); } } - /* FIXME i#95: Process detach NYI for UNIX. */ - else if (detach_pid != 0) { - dr_config_status_t res = detach(detach_pid, TRUE, detach_timeout); - if (res != DR_SUCCESS) - error("unable to detach: check pid and system ptrace permissions"); - } # endif else if (!syswide_on && !syswide_off) { usage(false, "no action specified");