Skip to content

Commit 426ee4e

Browse files
committed
Update on "[Executorch] make slice_copy parallel"
When doing large prefills in LLMs, slice_copy takes about 5-10% time. Mainly coming from slicing in the rope implementation. Differential Revision: [D85532081](https://our.internmc.facebook.com/intern/diff/D85532081/) [ghstack-poisoned]
2 parents 6f77646 + 6859abd commit 426ee4e

File tree

360 files changed

+13091
-4550
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

360 files changed

+13091
-4550
lines changed

.ci/docker/common/install_arm.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
# Copyright 2025 Arm Limited and/or its affiliates.
5+
#
6+
# This source code is licensed under the BSD-style license found in the
7+
# LICENSE file in the root directory of this source tree.
8+
9+
set -ex
10+
11+
install_arm_prerequiresites() {
12+
apt-get update -y
13+
apt-get install -y --no-install-recommends \
14+
mesa-vulkan-drivers libvulkan1
15+
rm -rf /var/lib/apt/lists/*
16+
}
17+
18+
install_arm_prerequiresites

.ci/docker/ubuntu/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ RUN if [ -n "${ANDROID_NDK_VERSION}" ]; then bash ./install_android.sh; fi
8383
RUN rm install_android.sh
8484

8585
ARG ARM_SDK
86+
COPY ./common/install_arm.sh install_arm.sh
87+
RUN if [ -n "${ARM_SDK}" ]; then bash ./install_arm.sh; fi
88+
RUN rm install_arm.sh
8689

8790
ARG ZEPHYR_SDK
8891
COPY ./common/install_zephyr.sh install_zephyr.sh

.ci/scripts/test_backend.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,25 @@ if [[ "$FLOW" == *vulkan* ]]; then
5757
fi
5858

5959
if [[ "$FLOW" == *arm* ]]; then
60+
6061
# Setup ARM deps.
61-
.ci/scripts/setup-arm-baremetal-tools.sh
62+
if [[ "$FLOW" == *vgf* ]]; then
63+
.ci/scripts/setup-arm-baremetal-tools.sh --enable-mlsdk-deps --install-mlsdk-deps-with-pip
64+
else
65+
.ci/scripts/setup-arm-baremetal-tools.sh
66+
fi
6267
source examples/arm/ethos-u-scratch/setup_path.sh
6368

6469
if [[ "$FLOW" == *ethos_u* ]]; then
6570
# Prepare a test runner binary that can run on the Corstone-3x0 FVPs
6671
backends/arm/scripts/build_executorch.sh
6772
backends/arm/test/setup_testing.sh
6873
fi
74+
75+
if [[ "$FLOW" == *vgf* ]]; then
76+
# Prepare a test runner binary for VKML runtime
77+
backends/arm/test/setup_testing_vkml.sh
78+
fi
6979
fi
7080

7181
if [[ $IS_MACOS -eq 1 ]]; then

.ci/scripts/test_llama.sh

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,14 @@ cmake_build_llama_runner() {
171171
git submodule update --init
172172
popd
173173
dir="examples/models/llama"
174-
retry cmake \
175-
-DEXECUTORCH_BUILD_TESTS=ON \
176-
-DBUILD_TESTING=OFF \
177-
-DCMAKE_INSTALL_PREFIX=cmake-out \
178-
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
179-
-Bcmake-out/${dir} \
180-
${dir}
181-
cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"
182-
174+
if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
175+
PRESET="llama-debug"
176+
else
177+
PRESET="llama-release"
178+
fi
179+
pushd "${dir}"
180+
cmake --workflow --preset "${PRESET}"
181+
popd
183182
}
184183

185184
cleanup_files() {

.ci/scripts/test_llama_lora.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212
cmake_install_executorch_libraries() {
1313
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
1414
rm -rf cmake-out
15-
retry cmake --preset llm \
16-
-DCMAKE_INSTALL_PREFIX=cmake-out \
17-
-DCMAKE_BUILD_TYPE=Release
18-
cmake --build cmake-out -j9 --target install --config Release
15+
cmake --workflow llm-release
1916
}
2017

2118
cmake_build_llama_runner() {

.ci/scripts/test_model_e2e.sh

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -156,27 +156,13 @@ echo "::endgroup::"
156156

157157
echo "::group::Build $MODEL_NAME Runner"
158158

159-
if [ "$DEVICE" = "cuda" ]; then
160-
BUILD_BACKEND="EXECUTORCH_BUILD_CUDA"
161-
elif [ "$DEVICE" = "metal" ]; then
162-
BUILD_BACKEND="EXECUTORCH_BUILD_METAL"
163-
else
159+
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ]; then
164160
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda' or 'metal'."
165161
exit 1
166162
fi
167163

168-
cmake --preset llm \
169-
-D${BUILD_BACKEND}=ON \
170-
-DCMAKE_INSTALL_PREFIX=cmake-out \
171-
-DCMAKE_BUILD_TYPE=Release \
172-
-Bcmake-out -S.
173-
cmake --build cmake-out -j$(nproc) --target install --config Release
174-
175-
cmake -D${BUILD_BACKEND}=ON \
176-
-DCMAKE_BUILD_TYPE=Release \
177-
-Sexamples/models/$RUNNER_PATH \
178-
-Bcmake-out/examples/models/$RUNNER_PATH/
179-
cmake --build cmake-out/examples/models/$RUNNER_PATH --target $RUNNER_TARGET --config Release
164+
MAKE_TARGET="${RUNNER_PATH}-${DEVICE}"
165+
make "${MAKE_TARGET}"
180166
echo "::endgroup::"
181167

182168
echo "::group::Run $MODEL_NAME Runner"
@@ -195,7 +181,7 @@ case "$MODEL_NAME" in
195181
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
196182
;;
197183
whisper-*)
198-
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR --model_name ${MODEL_NAME}"
184+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
199185
;;
200186
gemma3)
201187
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --image_path $IMAGE_PATH"

.ci/scripts/test_phi_3_mini.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,16 @@ if hash nproc &> /dev/null; then NPROC=$(nproc); fi
2323

2424
cmake_install_executorch_libraries() {
2525
rm -rf cmake-out
26-
cmake --preset llm -DCMAKE_INSTALL_PREFIX=cmake-out -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
27-
cmake --build cmake-out -j16 --target install --config ${BUILD_TYPE}
26+
27+
# Select workflow preset based on BUILD_TYPE
28+
if [[ "${BUILD_TYPE}" == "Debug" ]]; then
29+
WORKFLOW_PRESET="llm-debug"
30+
else
31+
WORKFLOW_PRESET="llm-release"
32+
fi
33+
34+
echo "Using workflow preset: ${WORKFLOW_PRESET}"
35+
cmake --workflow --preset ${WORKFLOW_PRESET}
2836
}
2937

3038
cmake_build_phi_3_mini() {

.ci/scripts/utils.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ dedupe_macos_loader_path_rpaths() {
8484

8585
install_domains() {
8686
echo "Install torchvision and torchaudio"
87-
pip install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}"
88-
pip install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${TORCHVISION_VERSION}"
87+
pip install --no-build-isolation --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}"
88+
pip install --no-build-isolation --user "git+https://github.com/pytorch/vision.git@${TORCHVISION_VERSION}"
8989
}
9090

9191
install_pytorch_and_domains() {

.github/workflows/cuda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
strategy:
7272
fail-fast: false
7373
matrix:
74-
model: [linear, add, add_mul, resnet18, conv1d]
74+
model: [linear, add, add_mul, resnet18, conv1d, sdpa]
7575
with:
7676
timeout: 90
7777
runner: linux.g5.4xlarge.nvidia.gpu

.github/workflows/pull.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ jobs:
340340
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
341341
timeout: 90
342342
script: |
343+
set -eux
343344
# The generic Linux job chooses to use base env, not the one setup by the image
344345
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
345346
conda activate "${CONDA_ENV}"

0 commit comments

Comments
 (0)