Skip to content

Commit

Permalink
Merge branch 'habana_main' into Enc_dec_mss
Browse files Browse the repository at this point in the history
  • Loading branch information
jkaniecki authored Jan 13, 2025
2 parents f93b319 + c245ef0 commit da339e4
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 13 deletions.
10 changes: 10 additions & 0 deletions .github/actionlint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
self-hosted-runner:
# Labels of self-hosted runner in array of strings.
labels:
- generic-runner
paths:
.github/workflows/trigger_jenkins.yml:
ignore:
- shellcheck reported issue in this script: SC2116:.+
- shellcheck reported issue in this script: SC2086:.+
- shellcheck reported issue in this script: SC2001:.+
107 changes: 102 additions & 5 deletions .github/workflows/trigger_jenkins.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,108 @@ on:
permissions:
pull-requests: write
jobs:
TriggerJenkinsTests:
DependencyReview:
name: Dependency Review
runs-on: ubuntu-latest
steps:
- name: Trigger Jenkins Tests
- name: 'Checkout Repository'
uses: actions/checkout@v4
- name: 'Dependency Review'
uses: actions/dependency-review-action@v4
with:
fail-on-severity: high
CodeQLScan:
name: CodeQL Scan
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: python
build-mode: none
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:python"
upload: "never"
CalculateJobs:
runs-on: generic-runner
name: Calculate Tests To Trigger
needs: [DependencyReview,CodeQLScan]
outputs:
tests_list: ${{ steps.tests.outputs.tests_list }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install YQ
run: |
wget https://github.com/mikefarah/yq/releases/download/v4.14.1/yq_linux_amd64.tar.gz -O - |\
tar xz && sudo mv yq_linux_amd64 /usr/bin/yq
- name: Calculate Tests
id: tests
run: |
test_list=$(yq -oj e .jenkins/test_config.yaml | jq -c "[.stages[].steps[]]")
echo "tests_list=${test_list}" >> "$GITHUB_OUTPUT"
TestRun:
name: Test / ${{matrix.tests.name}}
needs: [CalculateJobs]
runs-on: generic-runner
strategy:
fail-fast: false
matrix:
tests: ${{ fromJson(needs.CalculateJobs.outputs.tests_list) }}
env:
USERNAME: ${{ secrets.SWUSERNAME }}
PASSWORD: ${{ secrets.SWPASSWORD }}
POD_TEMPLATE: ${{ secrets.POD_TEMPLATE }}
TEST_COMMAND: ${{ matrix.tests.command }}
steps:
- name: Download Hlctl
run: |
curl --show-error --silent ${{ secrets.HLCTL_ADDRESS }} | bash &> /dev/null
- name: Config Hlctl
run: |
${{ secrets.HLCTL_COMMAND }} &> /dev/null
- name: Create Pod Template
env:
TARGET_BRANCH: ${{ github.base_ref }}
RELEASED_SYNAPSE_VERSION: ${{ vars.RELEASED_SYNAPSE_VERSION }}
BASE_BRANCH: ${{github.head_ref}}
run: |
if [[ $TARGET_BRANCH == "habana_main" ]]; then
synapse_version=${RELEASED_SYNAPSE_VERSION#v}
elif [[ $TARGET_BRANCH =~ v*.*.* ]]; then
synapse_version=${TARGET_BRANCH#v}
else
echo "Cant Calculate Synapse Version, Failing The Test"
exit 1
fi
synapse_build=$(curl "https://dms.habana-labs.com/api/v1.1/branch/info/v$synapse_version" | jq -r ".release_id")
pt_version=${{ vars.PT_VERSION }}
BUILD_TAG="Github-vLLM-Fork-${{ github.event.number }}-${{github.run_number}}"
safe_cmd=${TEST_COMMAND//&/\\&}
echo "Writing Pod Template To File"
echo "${POD_TEMPLATE}" > pod.yml
sed -i "s/##VERSION##/${synapse_version}/g" pod.yml
sed -i "s/##BUILD##/${synapse_build}/g" pod.yml
sed -i "s/##BUILD_TAG##/${BUILD_TAG}/g" pod.yml
sed -i "s/##PYTORCH_VERSION##/${pt_version}/g" pod.yml
sed -i "s|##GIT_BRANCH##|$BASE_BRANCH|g" pod.yml
sed -i "s|##CMD##|$safe_cmd|g" pod.yml
echo "Pod Template Created"
- name: Run Test
run: |
curl -XPOST -H "Content-Type: application/json" \
"${{ secrets.WEBHOOK_URL }}" \
-d '${{ toJson(github) }}'
converted_test_name=$(echo ${{ matrix.tests.name }} | tr "_" "-")
if [[ ${#converted_test_name} -ge 33 ]];then
converted_test_name=${converted_test_name:12}
fi
hlctl create containers \
--file=pod.yml \
--flavor=${{ matrix.tests.flavor}} \
--name="vllm-fork-${{github.event.number}}-${converted_test_name}" \
--namespace="framework" \
--priority="high" \
--retry \
--shm=10240
11 changes: 11 additions & 0 deletions vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,17 @@ def _xla_weights_iterator(iterator: Generator):

weights_iterator = _xla_weights_iterator(weights_iterator)

if current_platform.is_hpu():

import habana_frameworks.torch.core as htcore

def _hpu_weights_iterator(iterator: Generator):
for weights in iterator:
yield weights
htcore.mark_step()

weights_iterator = _hpu_weights_iterator(weights_iterator)

# Apply the prefix.
return ((source.prefix + name, tensor)
for (name, tensor) in weights_iterator)
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,6 @@ def load_weights(self, weights: Iterable[Tuple[str,
default_weight_loader)
weight_loader(param, loaded_weight)
loaded_params.add(name)
if is_hpu:
torch.hpu.synchronize()
return loaded_params

# If this function is called, it should always initialize KV cache scale
Expand Down
3 changes: 0 additions & 3 deletions vllm/model_executor/models/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader, maybe_remap_kv_scale_name)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors

from .interfaces import SupportsLoRA, SupportsPP
Expand Down Expand Up @@ -483,6 +482,4 @@ def load_weights(self, weights: Iterable[Tuple[str,
default_weight_loader)
weight_loader(param, loaded_weight)
loaded_params.add(name)
if current_platform.is_hpu():
torch.hpu.synchronize()
return loaded_params
19 changes: 16 additions & 3 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,7 @@ def _prepare_decode(
device='cpu')
else:
real_batch_size = len(seq_group_metadata_list)
input_tokens = output[:real_batch_size]
input_tokens = output[:real_batch_size].clone()

input_positions = torch.tensor(input_positions,
dtype=torch.long,
Expand Down Expand Up @@ -2340,18 +2340,31 @@ def try_revert_dummy_output_tokens():

result = self._prepare_decode(seq_group_metadata_list,
output=output)
if self.lora_config:
lora_mapping = LoRAMapping(
**dict(index_mapping=result.lora_index_mapping,
prompt_mapping=result.lora_prompt_mapping,
is_prefill=False))
self.set_active_loras(result.lora_requests,
lora_mapping)
lora_mask, lora_logits_mask = self.create_lora_mask(
result.input_tokens, result.lora_ids, False)

execute_model_kwargs.update({
"input_ids":
result.input_tokens,
"positions":
result.input_positions,
"attn_metadata":
self.trim_attn_metadata(result.attn_metadata)
self.trim_attn_metadata(result.attn_metadata),
"lora_mask":
lora_mask,
})
model_kwargs_broadcast_data = {
"input_ids": result.input_tokens,
"positions": result.input_positions,
"attn_metadata": vars(result.attn_metadata)
"attn_metadata": vars(result.attn_metadata),
"lora_mask": lora_mask,
}
broadcast_tensor_dict(model_kwargs_broadcast_data, src=0)
else:
Expand Down

0 comments on commit da339e4

Please sign in to comment.