Merge pull request #12 from longtermrisk/bugfix/sft-trainer

fix sft_trainer bug, don't buffer run logs for so long
longtermrisk · Feb 12, 2025 · 3b5acc1 · 3b5acc1
2 parents 73d38e5 + 92596bc
commit 3b5acc1
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 6 deletions.
diff --git a/openweights/worker/main.py b/openweights/worker/main.py
@@ -342,14 +342,16 @@ def _execute_job(self, job):
                         stderr=subprocess.STDOUT,
                         cwd=tmp_dir, 
                         env=env,
-                        preexec_fn=os.setsid  # Allow us to send signals to the process group
+                        preexec_fn=os.setsid,  # Allow us to send signals to the process group
+                        bufsize=1,  # Line buffered
+                        universal_newlines=True  # Text mode
                     )
 
                     # Stream logs to both file and stdout
-                    for line in iter(self.current_process.stdout.readline, b''):
-                        decoded = line.decode().rstrip('\n')
-                        print(decoded)
-                        log_file.write(decoded + '\n')
+                    for line in iter(self.current_process.stdout.readline, ''):
+                        print(line.rstrip('\n'), flush=True)  # Immediate stdout flush
+                        log_file.write(line)
+                        log_file.flush()  # Force immediate write to file
 
                     self.current_process.wait()
 

diff --git a/openweights/worker/sft.py b/openweights/worker/sft.py
@@ -99,7 +99,7 @@ def apply_chat_template(examples):
         instruction_part, response_part = get_instruct_response_part(tokenizer)
         trainer_kwargs['data_collator'] = DataCollatorForSeq2Seq(tokenizer = tokenizer)
         trainer = train_on_responses_only(
-            SFTTrainer(**trainer),
+            SFTTrainer(**trainer_kwargs),
             instruction_part=instruction_part,
             response_part=response_part
         )