Skip to content

Commit

Permalink
Merge branch 'main' into ifuUpdate127
Browse files Browse the repository at this point in the history
  • Loading branch information
Cemberk authored Jan 31, 2025
2 parents 7e56643 + be867d5 commit 8105539
Show file tree
Hide file tree
Showing 12 changed files with 21 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/self-push-amd-mi210-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi210
secrets: inherit
secrets: inherit
2 changes: 1 addition & 1 deletion .github/workflows/self-push-amd-mi250-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi250
secrets: inherit
secrets: inherit
2 changes: 1 addition & 1 deletion src/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ class PretrainedConfig(PushToHubMixin):
Whether the model should use legacy TensorFlow losses. Legacy losses have variable output shapes and may
not be XLA-compatible. This option is here for backward compatibility and will be removed in Transformers
v5.
loss_type (`str`, *optional*):
loss_type (`str`, *optional*):
The type of loss that the model should use. It should be in `LOSS_MAPPING`'s keys, otherwise the loss will
be automatically infered from the model architecture.
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2516,6 +2516,9 @@ def _inner_training_loop(
if (self.state.global_step == args.stable_train_warmup_steps):
start_train_stable_time = time.time()

if (self.state.global_step == args.stable_train_warmup_steps):
start_train_stable_time = time.time()

if self.args.include_num_input_tokens_seen:
main_input_name = getattr(self.model, "main_input_name", "input_ids")
if main_input_name not in inputs:
Expand Down
4 changes: 2 additions & 2 deletions tests/models/dbrx/test_modeling_dbrx.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,12 +327,12 @@ class DbrxModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
test_headmasking = False
test_pruning = False

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_generate_with_static_cache(self):
super().test_generate_with_static_cache()
pass

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_generate_from_inputs_embeds_with_static_cache(self):
super().test_generate_from_inputs_embeds_with_static_cache()
pass
Expand Down
2 changes: 1 addition & 1 deletion tests/models/falcon_mamba/test_modeling_falcon_mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def test_config(self):
self.config_tester.run_common_tests()

@require_torch_multi_gpu
@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_multi_gpu_data_parallel_forward(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down
2 changes: 1 addition & 1 deletion tests/models/gpt_neox/test_modeling_gpt_neox.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ class GPTNeoXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
def test_flex_attention_with_grads(self):
super().test_flex_attention_with_grads()

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_generate_with_static_cache(self):
super().test_generate_with_static_cache()
pass
Expand Down
4 changes: 2 additions & 2 deletions tests/models/idefics/test_modeling_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,12 +597,12 @@ class IdeficsForVisionText2TextTest(IdeficsModelTest, GenerationTesterMixin, uni
all_generative_model_classes = (IdeficsForVisionText2Text,) if is_torch_available() else ()


@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_generate_from_inputs_embeds_with_static_cache(self):
super().test_generate_from_inputs_embeds_with_static_cache()
pass

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_generate_with_static_cache(self):
super().test_generate_with_static_cache()
pass
Expand Down
4 changes: 2 additions & 2 deletions tests/models/mixtral/test_modeling_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,12 +320,12 @@ class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
fx_compatible = False # Broken by attention refactor cc @Cyrilvallez

def test_generate_from_inputs_embeds_with_static_cache(self):
if rocmUtils.is_rocm_skippable(arch='gfx90a'):
if rocmUtils.is_rocm_skippable(arch=['gfx90a','gfx942']):
torch._dynamo.config.capture_dynamic_output_shape_ops = True
super().test_generate_from_inputs_embeds_with_static_cache()

def test_generate_with_static_cache(self):
if rocmUtils.is_rocm_skippable(arch='gfx90a'):
if rocmUtils.is_rocm_skippable(arch=['gfx90a','gfx942']):
torch._dynamo.config.capture_dynamic_output_shape_ops = True
super().test_generate_with_static_cache()

Expand Down
4 changes: 2 additions & 2 deletions tests/models/olmoe/test_modeling_olmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,13 @@ class OlmoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
model_split_percents = [0.5, 0.7, 0.8]

def test_generate_with_static_cache(self):
if rocmUtils.is_rocm_skippable(arch=['gfx1201','gfx90a']):
if rocmUtils.is_rocm_skippable(arch=['gfx1201','gfx90a','gfx942']):
torch._dynamo.config.capture_dynamic_output_shape_ops = True
super().test_generate_with_static_cache()
pass

def test_generate_from_inputs_embeds_with_static_cache(self):
if rocmUtils.is_rocm_skippable(arch=['gfx1201','gfx90a']):
if rocmUtils.is_rocm_skippable(arch=['gfx1201','gfx90a','gfx942']):
torch._dynamo.config.capture_dynamic_output_shape_ops = True
super().test_generate_from_inputs_embeds_with_static_cache()
pass
Expand Down
6 changes: 3 additions & 3 deletions tests/models/roberta/test_modeling_roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,17 +397,17 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
fx_compatible = True
model_split_percents = [0.5, 0.8, 0.9]

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_cpu_offload(self):
super().test_cpu_offload()
pass

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_disk_offload_bin(self):
super().test_disk_offload_bin()
pass

@skipIfRocm(arch=['gfx1201','gfx90a'])
@skipIfRocm(arch=['gfx1201','gfx90a','gfx942'])
def test_disk_offload_safetensors(self):
super().test_disk_offload_safetensors()
pass
Expand Down
2 changes: 2 additions & 0 deletions tests/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3052,6 +3052,8 @@ def test_inputs_embeds_matches_input_ids(self):
)[0]
torch.testing.assert_close(out_embeds, out_ids)


@skipIfRocm
@require_non_xpu
@skipIfRocm
@require_torch_multi_gpu
Expand Down

0 comments on commit 8105539

Please sign in to comment.