Skip to content

Commit

Permalink
Merge pull request #6 from efeslab/fix-copy
Browse files Browse the repository at this point in the history
Fix non-blocking transfer to CPU
  • Loading branch information
kamahori authored Apr 28, 2024
2 parents 83c16a3 + f2c111c commit 227715b
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions src/fiddler/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,9 +543,7 @@ def mixtral_forward(self, input_ids, position_ids, is_decode):
)

if not is_cuda:
experts[i_expert] = experts[i_expert].to(
"cpu", non_blocking=True
)
experts[i_expert] = experts[i_expert].to("cpu")

# end of one expert

Expand Down Expand Up @@ -629,10 +627,8 @@ def mixtral_forward(self, input_ids, position_ids, is_decode):
current_state = self.run_expert_at_cpu(
i_layer,
i_expert,
current_state.to("cpu", non_blocking=True),
routing_weights[top_2_list, idx_list, None].to(
"cpu", non_blocking=True
),
current_state.to("cpu"),
routing_weights[top_2_list, idx_list, None].to("cpu"),
)
inps_after_experts.index_add_(
0,
Expand Down

0 comments on commit 227715b

Please sign in to comment.