Enhance LTX video generation configuration and logging

- Added detailed documentation and examples to the LTXVideoSettings class in ltx_settings.py, improving clarity on configuration options and usage. - Updated ltx_api.log to include new server start logs and an error message indicating a failure to start due to worker issues. - Updated binary cache files for ltx_settings and other scripts to reflect recent changes. These improvements aim to enhance user understanding and troubleshooting capabilities for the LTX video generation server.
VikramxD · Nov 28, 2024 · 0a81513 · 0a81513
1 parent 8967792
commit 0a81513
Show file tree

Hide file tree

Showing 9 changed files with 204 additions and 52 deletions.
diff --git a/api/logs/ltx_api.log b/api/logs/ltx_api.log
@@ -12,3 +12,14 @@
 2024-11-26 11:27:36.160 | INFO     | __main__:main:347 - Starting LTX video generation server on port 8000
 2024-11-26 11:31:41.717 | INFO     | __main__:main:343 - Starting LTX video generation server on port 8000
 2024-11-26 11:34:03.736 | INFO     | __main__:main:343 - Starting LTX video generation server on port 8000
+2024-11-28 18:16:26.552 | INFO     | __main__:main:343 - Starting LTX video generation server on port 8000
+2024-11-28 18:33:29.617 | ERROR    | __main__:main:347 - Server failed to start: One or more workers failed to start. Shutting down LitServe
+2024-11-28 18:35:14.855 | INFO     | __main__:main:343 - Starting LTX video generation server on port 8000
+2024-11-28 18:42:27.070 | INFO     | __main__:main:343 - Starting LTX video generation server on port 8000
+2024-11-28 18:56:53.323 | INFO     | __main__:main:344 - Starting LTX video generation server on port 8000
+2024-11-28 19:03:09.065 | INFO     | __main__:main:348 - Starting LTX video generation server on port 8000
+2024-11-28 19:03:54.590 | INFO     | __main__:main:348 - Starting LTX video generation server on port 8000
+2024-11-28 19:05:52.633 | INFO     | __main__:main:348 - Starting LTX video generation server on port 8000
+2024-11-28 19:09:10.161 | INFO     | __main__:main:348 - Starting LTX video generation server on port 8000
+2024-11-28 19:11:22.845 | INFO     | __main__:main:356 - Starting LTX video generation server on port 8000
+2024-11-28 19:13:03.394 | INFO     | __main__:main:356 - Starting LTX video generation server on port 8000
diff --git a/api/ltx_serve.py b/api/ltx_serve.py
@@ -20,7 +20,8 @@
 
 from configs.ltx_settings import LTXVideoSettings
 from scripts.ltx_inference import LTXInference
-from scripts import mp4_to_s3_json 
+from scripts.mp4_to_s3_json import mp4_to_s3_json 
+import torch
 
 # Set up prometheus multiprocess mode
 os.environ["PROMETHEUS_MULTIPROC_DIR"] = "/tmp/prometheus_multiproc_dir"
@@ -206,6 +207,20 @@ def predict(self, inputs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
                         logger.info(f"Starting generation for prompt: {generation_request.prompt}")
                         self.engine.generate()
 
+                        # Verify file exists and is readable before uploading
+                        if not temp_video_path.exists():
+                            raise FileNotFoundError(f"Generated video file not found at {temp_video_path}")
+
+                        if not os.access(temp_video_path, os.R_OK):
+                            raise PermissionError(f"Generated video file is not readable at {temp_video_path}")
+
+                        # Upload to S3 with explicit file opening
+                        with open(temp_video_path, 'rb') as video_file:
+                            s3_response = mp4_to_s3_json(
+                                video_file,
+                                f"ltx_{int(time.time())}.mp4"
+                            )
+
                         end_time = time.time()
                         generation_time = end_time - start_time
                         self.log("inference_time", generation_time)
@@ -216,12 +231,6 @@ def predict(self, inputs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
                             "gpu_reserved": torch.cuda.memory_reserved() if torch.cuda.is_available() else 0
                         }
 
-                        # Upload to S3
-                        s3_response = mp4_to_s3_json(
-                            temp_video_path,
-                            f"ltx_{int(time.time())}.mp4"
-                        )
-
                         result = {
                             "status": "success",
                             "video_id": s3_response["video_id"],
@@ -236,10 +245,12 @@ def predict(self, inputs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
                         logger.info(f"Generation completed successfully")
 
                 except Exception as e:
-                    logger.error(f"Error in generation: {e}")
+                    import traceback
+                    logger.error(f"Error in generation: {e}\n{traceback.format_exc()}")
                     results.append({
                         "status": "error",
-                        "error": str(e)
+                        "error": str(e),
+                        "traceback": traceback.format_exc()
                     })
 
                 finally:
@@ -248,10 +259,12 @@ def predict(self, inputs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
                         torch.cuda.empty_cache()
 
         except Exception as e:
-            logger.error(f"Error in predict method: {e}")
+            import traceback
+            logger.error(f"Error in predict method: {e}\n{traceback.format_exc()}")
             results.append({
                 "status": "error",
-                "error": str(e)
+                "error": str(e),
+                "traceback": traceback.format_exc()
             })
 
         return results if results else [{"status": "error", "error": "No results generated"}]

diff --git a/configs/__pycache__/ltx_settings.cpython-311.pyc b/configs/__pycache__/ltx_settings.cpython-311.pyc
diff --git a/configs/ltx_settings.py b/configs/ltx_settings.py
@@ -1,7 +1,33 @@
 """
 Configuration module for LTX video generation model with HuggingFace Hub integration.
+
+This module provides a comprehensive configuration system for the LTX video generation model,
+handling model downloads, parameter validation, and settings management. It uses Pydantic
+for robust configuration validation and type checking.
+
+Key Features:
+    - Automatic model download from HuggingFace Hub
+    - Configurable video generation parameters
+    - Input/output path management
+    - Model checkpoint verification
+    - Device and precision settings
+    - Prompt configuration for generation
+
+Example:
+    >>> settings = LTXVideoSettings(
+    ...     model_id="Lightricks/LTX-Video",
+    ...     prompt="A beautiful sunset over the ocean",
+    ...     num_frames=60
+    ... )
+    >>> settings.download_model()
+    >>> unet_path, vae_path, scheduler_path = settings.get_model_paths()
 """
 
+# Constants
+MAX_HEIGHT: int = 720
+MAX_WIDTH: int = 1280
+MAX_NUM_FRAMES: int = 257
+
 from typing import Optional, Union
 from pathlib import Path
 import os
@@ -14,6 +40,39 @@
 class LTXVideoSettings(BaseSettings):
     """
     Configuration settings for LTX video generation model.
+    
+    This class manages all configuration aspects of the LTX video generation pipeline,
+    including model paths, generation parameters, and output settings. It provides
+    validation and automatic type conversion for all settings.
+
+    Attributes:
+        model_id (str): HuggingFace model identifier (default: "Lightricks/LTX-Video")
+        ckpt_dir (Path): Directory for model checkpoints
+        use_auth_token (Optional[str]): HuggingFace authentication token
+        input_video_path (Optional[Path]): Path to input video file
+        input_image_path (Optional[Path]): Path to input image file
+        output_path (Optional[Path]): Directory for output files
+        seed (int): Random seed for reproducible generation
+        num_inference_steps (int): Number of denoising steps (range: 1-100)
+        guidance_scale (float): Classifier-free guidance scale (range: 1.0-20.0)
+        height (int): Output video height in pixels (range: 256-720)
+        width (int): Output video width in pixels (range: 256-1280)
+        num_frames (int): Number of frames to generate (range: 1-257)
+        frame_rate (int): Output video frame rate (range: 1-60)
+        num_images_per_prompt (int): Number of videos per prompt (range: 1-4)
+        bfloat16 (bool): Whether to use bfloat16 precision
+        device (str): Device for inference ('cuda' or 'cpu')
+        prompt (Optional[str]): Generation prompt text
+        negative_prompt (str): Negative prompt for undesired features
+
+    Example:
+        >>> settings = LTXVideoSettings(
+        ...     prompt="A serene mountain landscape",
+        ...     num_frames=60,
+        ...     height=480,
+        ...     width=704
+        ... )
+        >>> settings.download_model()
     """
 
     # Model Settings
@@ -33,27 +92,34 @@ class LTXVideoSettings(BaseSettings):
 
     input_video_path: Optional[Path] = Field(None, description="Path to input video file")
     input_image_path: Optional[Path] = Field(None, description="Path to input image file")
-    output_path: Optional[Path] = Field(
-        default_factory=lambda: Path("outputs"),
-        description="Path to save output files"
-    )
+    output_path: Optional[Path] = Field(None, description="Path to save output files")
 
     # Generation Settings
     seed: int = Field(171198, description="Random seed for generation")
     num_inference_steps: int = Field(40, ge=1, le=100, description="Number of inference steps")
     guidance_scale: float = Field(3.0, ge=1.0, le=20.0, description="Guidance scale")
 
     # Video Parameters
-    height: int = Field(480, ge=256, le=720, description="Height of output video frames")
-    width: int = Field(704, ge=256, le=1280, description="Width of output video frames")
-    num_frames: int = Field(121, ge=1, le=257, description="Number of frames to generate")
-    frame_rate: int = Field(25, ge=1, le=60, description="Frame rate of output video")
-    num_images_per_prompt: int = Field(
-        1,
+    height: int = Field(
+        480,
+        ge=256,
+        le=MAX_HEIGHT,
+        description="Height of output video frames"
+    )
+    width: int = Field(
+        704,
+        ge=256,
+        le=MAX_WIDTH,
+        description="Width of output video frames"
+    )
+    num_frames: int = Field(
+        121,
         ge=1,
-        le=4,
-        description="Number of videos to generate per prompt"
+        le=MAX_NUM_FRAMES,
+        description="Number of frames to generate"
     )
+    frame_rate: int = Field(25, ge=1, le=60, description="Frame rate of output video")
+    num_images_per_prompt: int = Field(1, ge=1, le=4, description="Number of videos per prompt")
 
     # Model Settings
     bfloat16: bool = Field(False, description="Use bfloat16 precision")
@@ -75,8 +141,21 @@ def download_model(self) -> Path:
         """
         Download model from HuggingFace Hub if not already present.
         
+        This method checks for existing model files, downloads missing components,
+        and verifies the integrity of the downloaded files. It handles authentication
+        for private models using the provided token.
+
         Returns:
             Path: Path to the model checkpoint directory
+
+        Raises:
+            ValueError: If model download is incomplete or verification fails
+            Exception: If download encounters network or permission errors
+
+        Example:
+            >>> settings = LTXVideoSettings()
+            >>> model_path = settings.download_model()
+            >>> print(f"Model downloaded to {model_path}")
         """
         try:
             logger.info(f"Checking for model in {self.ckpt_dir}")
@@ -112,10 +191,24 @@ def download_model(self) -> Path:
 
     def _verify_model_files(self) -> bool:
         """
-        Verify that all required model files are present.
+        Verify that all required model files are present in the checkpoint directory.
         
+        Checks for the existence of essential model components including the UNet,
+        VAE, and scheduler configurations and weights.
+
         Returns:
-            bool: True if all required files are present
+            bool: True if all required files are present and accessible
+
+        Note:
+            Required directory structure:
+            - unet/
+                - config.json
+                - unet_diffusion_pytorch_model.safetensors
+            - vae/
+                - config.json
+                - vae_diffusion_pytorch_model.safetensors
+            - scheduler/
+                - scheduler_config.json
         """
         required_dirs = ['unet', 'vae', 'scheduler']
         required_files = {
@@ -156,7 +249,20 @@ class Config:
         validate_assignment = True
 
     def get_model_paths(self) -> tuple[Path, Path, Path]:
-        """Get paths to model components after ensuring model is downloaded."""
+        """
+        Get paths to model components after ensuring model is downloaded.
+        
+        This method ensures the model is downloaded before returning paths to
+        the essential model components.
+
+        Returns:
+            tuple[Path, Path, Path]: Paths to (unet_dir, vae_dir, scheduler_dir)
+
+        Example:
+            >>> settings = LTXVideoSettings()
+            >>> unet, vae, scheduler = settings.get_model_paths()
+            >>> print(f"UNet path: {unet}")
+        """
         # Ensure model is downloaded
         self.download_model()
 
@@ -167,13 +273,33 @@ def get_model_paths(self) -> tuple[Path, Path, Path]:
         return unet_dir, vae_dir, scheduler_dir
 
     def get_output_resolution(self) -> tuple[int, int]:
-        """Get the output resolution as a tuple of (height, width)."""
+        """
+        Get the output resolution as a tuple of (height, width).
+        
+        Returns:
+            tuple[int, int]: Video dimensions as (height, width)
+
+        Example:
+            >>> settings = LTXVideoSettings(height=480, width=704)
+            >>> h, w = settings.get_output_resolution()
+            >>> print(f"Output resolution: {h}x{w}")
+        """
         return (self.height, self.width)
 
     def get_padded_num_frames(self) -> int:
         """
         Calculate the padded number of frames.
-        Ensures the number of frames is compatible with model requirements.
+        
+        Ensures the number of frames is compatible with model requirements by
+        padding to the nearest multiple of 8 frames if necessary.
+
+        Returns:
+            int: Padded frame count that's compatible with the model
+
+        Example:
+            >>> settings = LTXVideoSettings(num_frames=30)
+            >>> padded = settings.get_padded_num_frames()
+            >>> print(f"Padded frame count: {padded}")  # Will be 32
         """
         # Common video models often require frame counts to be multiples of 8
         FRAME_PADDING = 8

diff --git a/ltx b/ltx
diff --git a/scripts/__pycache__/ltx_inference.cpython-311.pyc b/scripts/__pycache__/ltx_inference.cpython-311.pyc
diff --git a/scripts/__pycache__/mp4_to_s3_json.cpython-311.pyc b/scripts/__pycache__/mp4_to_s3_json.cpython-311.pyc
diff --git a/scripts/__pycache__/s3_manager.cpython-311.pyc b/scripts/__pycache__/s3_manager.cpython-311.pyc