Skip to content

API

Modules:

Name Description
config
container
execution
factories
interfaces
main

Main module for the Whisper TikTok application.

processors
repositories
services
strategies
utils
voice_manager

Classes:

Name Description
Application

Main application class responsible for orchestrating the video creation pipeline.

CommandExecutor

Executes external commands with error handling.

Container

IoC container for dependency injection.

FFmpegService

Service for FFmpeg operations.

TTSService

Text-to-Speech service using a hypothetical TTS engine.

TranscriptionService

Service for transcribing audio using Whisper.

VideoCreatorFactory

Factory for creating video processor instances.

VideoDownloaderService

YouTube video downloader using yt-dlp.

VoicesManager

Wrapper for edge_tts VoicesManager.

Functions:

Name Description
create

Create videos from text content.

list_voices

List available TTS voices.

rgb_to_bgr

Convert RGB hex to BGR hex.

setup_logger

Configure and return a logger instance.

Application

Main application class responsible for orchestrating the video creation pipeline. This class loads video data from a JSON file, builds configuration from a container, and processes each video asynchronously using a factory-created processor.

Attributes:

Name Type Description
container Container

Dependency injection container.

logger Logger

Logger instance for logging operations.

Methods:

Name Description
run

Main method to run the video creation pipeline.

Source code in whisper_tiktok/main.py
class Application:
    """Main application class responsible for orchestrating the video creation pipeline.
    This class loads video data from a JSON file, builds configuration from a container,
    and processes each video asynchronously using a factory-created processor.

    Attributes:
        container (Container): Dependency injection container.
        logger (logging.Logger): Logger instance for logging operations.

    Methods:
        run(): Main method to run the video creation pipeline.
    """

    def __init__(self, container: Container, logger: logging.Logger):
        self.container = container
        self.logger = logger
        self.factory = VideoCreatorFactory(container)

    def _load_video_data(self) -> list[dict]:
        """Load video data from video.json file.
        Returns:
            List of video data dictionaries.
        """
        video_json_path = Path.cwd() / "video.json"

        try:
            data: list[dict] = json.loads(video_json_path.read_text(encoding="utf-8"))
            self.logger.info(f"Loaded {len(data)} videos from video.json")
            return data
        except FileNotFoundError:
            self.logger.error(f"video.json not found at {video_json_path}")
            raise
        except json.JSONDecodeError as e:
            self.logger.error(f"Invalid JSON in video.json: {e}")
            raise

    def _build_config(self) -> dict:
        """Build configuration from container.

        Returns:
            Configuration dictionary.
        """
        return dict(self.container.config())

    async def run(self) -> None:
        """Run the video creation pipeline.

        Returns:
            None
        """

        # Load video data
        video_data = self._load_video_data()
        config = self._build_config()

        # Process each video
        for idx, video in enumerate(video_data, 1):
            self.logger.info(
                f"Processing video {idx}/{len(video_data)}: {video.get('series', 'Unknown')}"
            )
            await self._process_video(video, config)

    async def _process_video(self, video: dict, config: dict) -> None:
        """Process a single video.

        Args:
            video (dict): Video data dictionary.
            config (dict): Configuration dictionary.

        Returns:
            None
        """

        processor = self.factory.create_processor(video, config)

        try:
            result = await processor.process()
            self.logger.info(f"✓ Video created: {result.output_path}")
        except Exception:
            self.logger.exception(
                f"✗ Failed to process video: {video.get('series', 'Unknown')}"
            )
            raise

run() async

Run the video creation pipeline.

Returns:

Type Description
None

None

Source code in whisper_tiktok/main.py
async def run(self) -> None:
    """Run the video creation pipeline.

    Returns:
        None
    """

    # Load video data
    video_data = self._load_video_data()
    config = self._build_config()

    # Process each video
    for idx, video in enumerate(video_data, 1):
        self.logger.info(
            f"Processing video {idx}/{len(video_data)}: {video.get('series', 'Unknown')}"
        )
        await self._process_video(video, config)

CommandExecutor

Executes external commands with error handling.

Parameters:

Name Type Description Default
logger Logger

Logger instance for logging.

required

Methods:

Name Description
execute

Execute command and return result.

Source code in whisper_tiktok/execution/command_executor.py
class CommandExecutor:
    """Executes external commands with error handling.

    Args:
        logger: Logger instance for logging.

    """

    def __init__(self, logger: Logger):
        self.logger = logger

    def execute(
        self, command: str, cwd: Path | None = None, timeout: int | None = None
    ) -> ExecutionResult:
        """Execute command and return result.

        Args:
            command: Command to execute.
            cwd: Working directory for command execution.
            timeout: Timeout in seconds for command execution.

        Returns:
            ExecutionResult containing return code, stdout, and stderr.
        """

        self.logger.debug(f"Executing: {command}")
        try:
            with subprocess.Popen(
                command,
                cwd=cwd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
            ) as process:
                stdout, stderr = process.communicate(timeout=timeout)
                result = ExecutionResult(
                    returncode=process.returncode, stdout=stdout, stderr=stderr
                )
            return result
        except subprocess.TimeoutExpired as exc:
            self.logger.error(f"Command timed out: {command}")
            raise CommandTimeoutError(f"Command timed out after {timeout}s") from exc
        except Exception as exc:
            self.logger.exception(f"Command execution failed: {command}")
            raise CommandExecutionError(str(exc)) from exc

execute(command, cwd=None, timeout=None)

Execute command and return result.

Parameters:

Name Type Description Default
command str

Command to execute.

required
cwd Path | None

Working directory for command execution.

None
timeout int | None

Timeout in seconds for command execution.

None

Returns:

Type Description
ExecutionResult

ExecutionResult containing return code, stdout, and stderr.

Source code in whisper_tiktok/execution/command_executor.py
def execute(
    self, command: str, cwd: Path | None = None, timeout: int | None = None
) -> ExecutionResult:
    """Execute command and return result.

    Args:
        command: Command to execute.
        cwd: Working directory for command execution.
        timeout: Timeout in seconds for command execution.

    Returns:
        ExecutionResult containing return code, stdout, and stderr.
    """

    self.logger.debug(f"Executing: {command}")
    try:
        with subprocess.Popen(
            command,
            cwd=cwd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        ) as process:
            stdout, stderr = process.communicate(timeout=timeout)
            result = ExecutionResult(
                returncode=process.returncode, stdout=stdout, stderr=stderr
            )
        return result
    except subprocess.TimeoutExpired as exc:
        self.logger.error(f"Command timed out: {command}")
        raise CommandTimeoutError(f"Command timed out after {timeout}s") from exc
    except Exception as exc:
        self.logger.exception(f"Command execution failed: {command}")
        raise CommandExecutionError(str(exc)) from exc

Container

Bases: DeclarativeContainer

IoC container for dependency injection.

Source code in whisper_tiktok/container.py
class Container(containers.DeclarativeContainer):
    """IoC container for dependency injection."""

    config = providers.Configuration()

    # Path providers
    workspace_path = providers.Singleton(lambda: Path.cwd())

    media_path = providers.Factory(
        lambda workspace, uuid: workspace / "media" / uuid,
        workspace=workspace_path,
        uuid=providers.Dependency(),
    )

    output_path = providers.Factory(
        lambda workspace, uuid: workspace / "output" / uuid,
        workspace=workspace_path,
        uuid=providers.Dependency(),
    )

    # Service providers
    logger = providers.Singleton(lambda: logging.getLogger("whisper_tiktok"))

    command_executor = providers.Factory(CommandExecutor, logger=logger)

    ffmpeg_service = providers.Factory(
        FFmpegService, executor=command_executor, logger=logger
    )  # Changed

    video_downloader = providers.Factory(
        VideoDownloaderService,
        executor=command_executor,  # Changed from ffmpeg_service
        logger=logger,
    )

    tts_service = providers.Factory(TTSService, logger=logger)

    transcription_service = providers.Factory(TranscriptionService, logger=logger)

FFmpegService

Service for FFmpeg operations.

Methods:

Name Description
compose_video

Compose final video with background, audio, and subtitles.

get_media_info

Get media information using ffprobe.

Source code in whisper_tiktok/services/ffmpeg_service.py
class FFmpegService:
    """Service for FFmpeg operations."""

    def __init__(self, executor: CommandExecutor, logger: Logger):
        self.executor = executor
        self.logger = logger

    def _build_video_filters(self, subtitles: Path) -> str:
        return rf"crop=ih/16*9:ih,scale=w=1080:h=1920:flags=lanczos,gblur=sigma=2,ass={subtitles.as_posix()}"

    def _build_ffmpeg_command(
        self,
        background: Path,
        audio: Path,
        output: Path,
        start_time: int,
        duration: str,
        filters: str,
    ) -> str:
        return rf"ffmpeg -ss {start_time} -t {duration} -i {background.as_posix()} -i {audio.as_posix()} -map 0:v -map 1:a -filter:v {filters} -c:v libx264 -crf 23 -c:a aac -ac 2 -b:a 192K {output.as_posix()} -y -threads {os.cpu_count()}"

    def compose_video(
        self,
        background: Path,
        audio: Path,
        subtitles: Path,
        output: Path,
        start_time: int,
        duration: str,
    ) -> Path:
        """Compose final video with background, audio, and subtitles."""

        # Build filter complex
        filters = self._build_video_filters(subtitles)

        command = self._build_ffmpeg_command(
            background, audio, output, start_time, duration, filters
        )
        result = self.executor.execute(command)

        if result.returncode != 0:
            raise FFmpegError(f"Failed to compose video: {result.stderr}")

        return output

    def get_media_info(self, file_path: Path) -> MediaInfo:
        """Get media information using ffprobe."""
        command = f"ffprobe -v quiet -print_format json -show_format -show_streams {file_path.as_posix()}"
        result = self.executor.execute(command)

        if result.returncode != 0:
            raise FFmpegError(f"Failed to probe media: {result.stderr}")

        return MediaInfo.from_json(result)

compose_video(background, audio, subtitles, output, start_time, duration)

Compose final video with background, audio, and subtitles.

Source code in whisper_tiktok/services/ffmpeg_service.py
def compose_video(
    self,
    background: Path,
    audio: Path,
    subtitles: Path,
    output: Path,
    start_time: int,
    duration: str,
) -> Path:
    """Compose final video with background, audio, and subtitles."""

    # Build filter complex
    filters = self._build_video_filters(subtitles)

    command = self._build_ffmpeg_command(
        background, audio, output, start_time, duration, filters
    )
    result = self.executor.execute(command)

    if result.returncode != 0:
        raise FFmpegError(f"Failed to compose video: {result.stderr}")

    return output

get_media_info(file_path)

Get media information using ffprobe.

Source code in whisper_tiktok/services/ffmpeg_service.py
def get_media_info(self, file_path: Path) -> MediaInfo:
    """Get media information using ffprobe."""
    command = f"ffprobe -v quiet -print_format json -show_format -show_streams {file_path.as_posix()}"
    result = self.executor.execute(command)

    if result.returncode != 0:
        raise FFmpegError(f"Failed to probe media: {result.stderr}")

    return MediaInfo.from_json(result)

TTSService

Bases: ITTSService

Text-to-Speech service using a hypothetical TTS engine.

Methods:

Name Description
synthesize

Synthesize speech from text and save to output file.

Source code in whisper_tiktok/services/tts_service.py
class TTSService(ITTSService):
    """Text-to-Speech service using a hypothetical TTS engine."""

    def __init__(self, logger: Logger):
        self.logger = logger

    async def synthesize(
        self,
        text: str,
        output_file: Path,
        voice: str = "en-US-ChristopherNeural",
    ) -> None:
        """
        Synthesize speech from text and save to output file.

        Args:
            text (str): The text to be converted to speech.
            output_file (Path): The path to save the synthesized audio file.
            voice (str): The voice to be used for synthesis.
        """
        self.logger.debug(f"Synthesizing speech to {output_file} using voice {voice}")
        communicate = edge_tts.Communicate(text, voice)
        await communicate.save(output_file.as_posix())

synthesize(text, output_file, voice='en-US-ChristopherNeural') async

Synthesize speech from text and save to output file.

Parameters:

Name Type Description Default
text str

The text to be converted to speech.

required
output_file Path

The path to save the synthesized audio file.

required
voice str

The voice to be used for synthesis.

'en-US-ChristopherNeural'
Source code in whisper_tiktok/services/tts_service.py
async def synthesize(
    self,
    text: str,
    output_file: Path,
    voice: str = "en-US-ChristopherNeural",
) -> None:
    """
    Synthesize speech from text and save to output file.

    Args:
        text (str): The text to be converted to speech.
        output_file (Path): The path to save the synthesized audio file.
        voice (str): The voice to be used for synthesis.
    """
    self.logger.debug(f"Synthesizing speech to {output_file} using voice {voice}")
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(output_file.as_posix())

TranscriptionService

Bases: ITranscriptionService

Service for transcribing audio using Whisper.

Source code in whisper_tiktok/services/transcription_service.py
class TranscriptionService(ITranscriptionService):
    """Service for transcribing audio using Whisper."""

    def __init__(self, logger):
        self.logger = logger

    def transcribe(
        self,
        audio_file: Path,
        srt_file: Path,
        ass_file: Path,
        model: str,
        options: dict,
    ) -> tuple[Path, Path]:
        self.logger.debug(
            f"Transcribing {audio_file} with model {model} and options {options}"
        )

        whisper_model = stable_whisper.load_model(
            model, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
        )
        self.logger.debug(f"Loaded Whisper model: {model}")

        transcription = whisper_model.transcribe(
            audio_file.as_posix(),
            regroup=True,
            fp16=False,
            word_timestamps=True,
        )
        transcription.to_srt_vtt(srt_file.as_posix(), word_level=True)
        transcription.to_ass(ass_file.as_posix(), word_level=True, **options)
        return (srt_file, ass_file)

VideoCreatorFactory

Factory for creating video processor instances.

Methods:

Name Description
create_processor

Create a configured video processor.

Source code in whisper_tiktok/factories/video_factory.py
class VideoCreatorFactory:
    """Factory for creating video processor instances."""

    def __init__(self, container: Container):
        self.container = container

    def create_processor(self, video_data: dict, config: dict) -> VideoProcessor:
        """Create a configured video processor."""
        uuid_str = str(uuid.uuid4())

        return VideoProcessor(
            uuid=uuid_str,
            video_data=video_data,
            config=config,
            strategies=self._build_strategies(config),
            logger=self.container.logger(),
        )

    def _build_strategies(self, config: dict) -> list[ProcessingStrategy]:
        """Build processing pipeline based on config."""
        strategies = [
            DownloadBackgroundStrategy(
                self.container.video_downloader(), self.container.logger()
            ),
            TTSGenerationStrategy(
                self.container.tts_service(), self.container.logger()
            ),
            TranscriptionStrategy(
                self.container.transcription_service(), self.container.logger()
            ),
            VideoCompositionStrategy(
                self.container.ffmpeg_service(), self.container.logger()
            ),
        ]

        if config.get("upload_tiktok"):
            strategies.append(
                TikTokUploadStrategy(self.container.uploader(), self.container.logger())
            )

        return strategies

create_processor(video_data, config)

Create a configured video processor.

Source code in whisper_tiktok/factories/video_factory.py
def create_processor(self, video_data: dict, config: dict) -> VideoProcessor:
    """Create a configured video processor."""
    uuid_str = str(uuid.uuid4())

    return VideoProcessor(
        uuid=uuid_str,
        video_data=video_data,
        config=config,
        strategies=self._build_strategies(config),
        logger=self.container.logger(),
    )

VideoDownloaderService

Bases: IVideoDownloader

YouTube video downloader using yt-dlp.

Methods:

Name Description
download

Download video from URL.

Source code in whisper_tiktok/services/video_downloader.py
class VideoDownloaderService(IVideoDownloader):
    """YouTube video downloader using yt-dlp."""

    def __init__(self, executor: CommandExecutor, logger: Logger):
        self.executor = executor
        self.logger = logger

    def download(self, url: str, output_dir: Path) -> Path:
        """Download video from URL."""
        output_dir.mkdir(parents=True, exist_ok=True)

        command = rf"yt-dlp -f bestvideo[ext=mp4] --restrict-filenames -o %(id)s.%(ext)s {url}"
        result = self.executor.execute(command, cwd=output_dir)

        if result.returncode != 0:
            raise VideoDownloadError(f"Failed to download: {result.stderr}")

        # Find downloaded file
        videos = list(output_dir.glob("*.mp4"))
        if not videos:
            raise VideoDownloadError("No video file found after download")

        return videos[-1]  # Most recent

download(url, output_dir)

Download video from URL.

Source code in whisper_tiktok/services/video_downloader.py
def download(self, url: str, output_dir: Path) -> Path:
    """Download video from URL."""
    output_dir.mkdir(parents=True, exist_ok=True)

    command = rf"yt-dlp -f bestvideo[ext=mp4] --restrict-filenames -o %(id)s.%(ext)s {url}"
    result = self.executor.execute(command, cwd=output_dir)

    if result.returncode != 0:
        raise VideoDownloadError(f"Failed to download: {result.stderr}")

    # Find downloaded file
    videos = list(output_dir.glob("*.mp4"))
    if not videos:
        raise VideoDownloadError("No video file found after download")

    return videos[-1]  # Most recent

VoicesManager

Wrapper for edge_tts VoicesManager.

Methods:

Name Description
create

Create and return voices manager object.

find

Find a voice by gender and locale.

Source code in whisper_tiktok/voice_manager.py
class VoicesManager:
    """Wrapper for edge_tts VoicesManager."""

    @staticmethod
    async def create():
        """Create and return voices manager object."""
        return await edge_tts.VoicesManager.create()

    @staticmethod
    def find(voices, gender: str, locale: str) -> Any:
        """Find a voice by gender and locale.

        Args:
            voices: Voices manager object from create()
            gender: Gender filter (Male/Female)
            locale: Language locale filter (e.g., en-US)

        Returns:
            Dictionary with voice information

        Raises:
            ValueError: If no voice found
        """
        result = voices.find(Gender=gender, Locale=locale)
        if not result or len(result) == 0:
            raise ValueError(f"No voice found for {gender} - {locale}")
        # Return the first result as a dict-like object
        return result[0]

create() async staticmethod

Create and return voices manager object.

Source code in whisper_tiktok/voice_manager.py
@staticmethod
async def create():
    """Create and return voices manager object."""
    return await edge_tts.VoicesManager.create()

find(voices, gender, locale) staticmethod

Find a voice by gender and locale.

Parameters:

Name Type Description Default
voices

Voices manager object from create()

required
gender str

Gender filter (Male/Female)

required
locale str

Language locale filter (e.g., en-US)

required

Returns:

Type Description
Any

Dictionary with voice information

Raises:

Type Description
ValueError

If no voice found

Source code in whisper_tiktok/voice_manager.py
@staticmethod
def find(voices, gender: str, locale: str) -> Any:
    """Find a voice by gender and locale.

    Args:
        voices: Voices manager object from create()
        gender: Gender filter (Male/Female)
        locale: Language locale filter (e.g., en-US)

    Returns:
        Dictionary with voice information

    Raises:
        ValueError: If no voice found
    """
    result = voices.find(Gender=gender, Locale=locale)
    if not result or len(result) == 0:
        raise ValueError(f"No voice found for {gender} - {locale}")
    # Return the first result as a dict-like object
    return result[0]

create(model=typer.Option('turbo', '--model', '-m', help='Whisper model size [tiny|base|small|medium|large|turbo]'), background_url=typer.Option('https://www.youtube.com/watch?v=intRX7BRA90', '--background-url', '-u', help='YouTube URL for background video'), tts_voice=typer.Option('en-US-ChristopherNeural', '--tts', '-v', help='TTS voice to use'), random_voice=typer.Option(False, '--random-voice', help='Use random TTS voice'), gender=typer.Option(None, '--gender', '-g', help='Gender for random voice (Male/Female)'), language=typer.Option(None, '--language', '-l', help='Language for random voice (e.g., en-US)'), font=typer.Option('Lexend Bold', '--font', '-f', help='Subtitle font'), font_size=typer.Option(21, '--font-size', help='Subtitle font size'), font_color=typer.Option('FFF000', '--font-color', '-c', help='Subtitle color (hex format)'), sub_position=typer.Option(5, '--sub-position', '-p', help='Subtitle position (1-9)', min=1, max=9), upload_tiktok=typer.Option(False, '--upload-tiktok', help='Upload to TikTok'), clean=typer.Option(False, '--clean', help='Clean media and output folders before processing'), verbose=typer.Option(False, '--verbose', help='Enable verbose logging'))

Create videos from text content.

Source code in whisper_tiktok/main.py
@app.command()
def create(
    model: str = typer.Option(
        "turbo",
        "--model",
        "-m",
        help="Whisper model size [tiny|base|small|medium|large|turbo]",
    ),
    background_url: str = typer.Option(
        "https://www.youtube.com/watch?v=intRX7BRA90",
        "--background-url",
        "-u",
        help="YouTube URL for background video",
    ),
    tts_voice: str = typer.Option(
        "en-US-ChristopherNeural",
        "--tts",
        "-v",
        help="TTS voice to use",
    ),
    random_voice: bool = typer.Option(
        False,
        "--random-voice",
        help="Use random TTS voice",
    ),
    gender: Optional[str] = typer.Option(
        None,
        "--gender",
        "-g",
        help="Gender for random voice (Male/Female)",
    ),
    language: Optional[str] = typer.Option(
        None,
        "--language",
        "-l",
        help="Language for random voice (e.g., en-US)",
    ),
    font: str = typer.Option(
        "Lexend Bold",
        "--font",
        "-f",
        help="Subtitle font",
    ),
    font_size: int = typer.Option(
        21,
        "--font-size",
        help="Subtitle font size",
    ),
    font_color: str = typer.Option(
        "FFF000",
        "--font-color",
        "-c",
        help="Subtitle color (hex format)",
    ),
    sub_position: int = typer.Option(
        5,
        "--sub-position",
        "-p",
        help="Subtitle position (1-9)",
        min=1,
        max=9,
    ),
    upload_tiktok: bool = typer.Option(
        False,
        "--upload-tiktok",
        help="Upload to TikTok",
    ),
    clean: bool = typer.Option(
        False,
        "--clean",
        help="Clean media and output folders before processing",
    ),
    verbose: bool = typer.Option(
        False,
        "--verbose",
        help="Enable verbose logging",
    ),
):
    """Create videos from text content."""

    # Setup logging
    log_dir = Path.cwd() / "logs"
    log_level = "DEBUG" if verbose else "INFO"
    logger = setup_logger(log_dir, log_level)

    async def _create():
        nonlocal tts_voice, language

        logger.info("=" * 60)
        logger.info("Starting Whisper TikTok video creation pipeline")
        logger.info("=" * 60)

        # Validate model choice
        valid_models = ["tiny", "base", "small", "medium", "large", "turbo"]
        if model not in valid_models:
            logger.error("Invalid model. Choose from: %s", ", ".join(valid_models))
            raise typer.Exit(code=1)

        # Handle random voice selection
        if random_voice:
            if not gender or not language:
                logger.error("Both --gender and --language required for random voice")
                raise typer.Exit(code=1)

            try:
                voices_manager = VoicesManager()
                voices_obj = await voices_manager.create()
                voice_result = voices_manager.find(voices_obj, gender, language)
                tts_voice = voice_result.get("Name") or voice_result.get("ShortName")
                logger.info("Selected random voice: %s", tts_voice)
            except Exception as e:
                logger.error("Failed to select random voice: %s", e)
                raise typer.Exit(code=1) from e
        else:
            # Validate specified voice
            try:
                voices_manager = VoicesManager()
                voices_obj = await voices_manager.create()
                extracted_language = "-".join(tts_voice.split("-")[0:2])
                voice_result = voices_obj.find(Locale=extracted_language)

                if not voice_result:
                    logger.error(
                        "Voice not found. Run 'whisper-tiktok list-voices' to see available voices"
                    )
                    raise typer.Exit(code=1)

                language = extracted_language
                logger.info("Using voice: %s", tts_voice)

            except Exception as e:
                logger.error("Voice validation failed: %s", e)
                raise typer.Exit(code=1) from e

        # Process font color
        processed_font_color = font_color.lower()
        if processed_font_color.startswith("#"):
            processed_font_color = processed_font_color[1:]
        processed_font_color = rgb_to_bgr(processed_font_color)

        # Clean folders if requested
        if clean:
            logger.info("Cleaning media and output folders...")
            media_path = Path.cwd() / "media"
            output_path = Path.cwd() / "output"

            if media_path.exists():
                shutil.rmtree(media_path)
                logger.info("Removed %s", media_path)

            if output_path.exists():
                shutil.rmtree(output_path)
                logger.info("Removed %s", output_path)
        # Display startup info
        console.print("\n[bold green]🎬 Starting video creation pipeline…[/bold green]")
        console.print(f"  [cyan]Model:[/cyan] {model}")
        console.print(f"  [cyan]Voice:[/cyan] {tts_voice}")
        console.print(f"  [cyan]Language:[/cyan] {language}\n")

        # Setup DI container
        container = Container()
        config_dict = {
            "model": model,
            "background_url": background_url,
            "tts_voice": tts_voice,
            "upload_tiktok": upload_tiktok,
            "Fontname": font,
            "Fontsize": font_size,
            "highlight_color": processed_font_color,
            "Alignment": sub_position,
            "BorderStyle": "1",
            "Outline": "1",
            "Shadow": "2",
            "Blur": "21",
            "MarginL": "0",
            "MarginR": "0",
        }
        container.config.from_dict(config_dict)

        # Run application
        app_instance = Application(container, logger)

        try:
            await app_instance.run()
            console.print(
                "\n[bold green]✅ Pipeline completed successfully![/bold green]"
            )
        except Exception as e:
            logger.exception("Pipeline failed")
            console.print(f"\n[bold red]❌ Pipeline failed: {e}[/bold red]")
            raise typer.Exit(code=1) from e

    # Run the async function
    asyncio.run(_create())

list_voices(language=typer.Option(None, '--language', '-l', help='Filter by language (e.g., en-US)'), gender=typer.Option(None, '--gender', '-g', help='Filter by gender (Male/Female)'))

List available TTS voices.

Source code in whisper_tiktok/main.py
@app.command()
def list_voices(
    language: Optional[str] = typer.Option(
        None,
        "--language",
        "-l",
        help="Filter by language (e.g., en-US)",
    ),
    gender: Optional[str] = typer.Option(
        None,
        "--gender",
        "-g",
        help="Filter by gender (Male/Female)",
    ),
):
    """List available TTS voices."""

    async def _list_voices():
        voices_manager = VoicesManager()
        voices_obj = await voices_manager.create()

        # Get all voices
        voices = voices_obj.voices

        # Apply filters
        if language:
            voices = [v for v in voices if v.get("Locale", "").startswith(language)]
        if gender:
            voices = [v for v in voices if v.get("Gender", "") == gender]

        # Display in a table
        table = Table(title="Available TTS Voices")
        table.add_column("Name", style="cyan")
        table.add_column("Locale", style="green")
        table.add_column("Gender", style="magenta")
        table.add_column("Voice Personalities", style="blue")
        table.add_column("Scenarios", style="yellow")

        for voice in sorted(voices, key=lambda x: x.get("Locale", "")):
            table.add_row(
                voice.get("ShortName", ""),
                voice.get("Locale", ""),
                voice.get("Gender", ""),
                ", ".join(voice["VoiceTag"].get("VoicePersonalities", [])),
                ", ".join(voice["VoiceTag"].get("TailoredScenarios", [])),
            )

        console.print(table)
        console.print(f"\n[dim]Total: {len(voices)} voices[/dim]")

    asyncio.run(_list_voices())

rgb_to_bgr(rgb)

Convert RGB hex to BGR hex.

Parameters:

Name Type Description Default
rgb str

RGB hex string (e.g., "#RRGGBB" or "RRGGBB")

required

Returns:

Type Description
str

BGR hex string (e.g., "BBGGRR")

Source code in whisper_tiktok/utils/color_utils.py
def rgb_to_bgr(rgb: str) -> str:
    """Convert RGB hex to BGR hex.

    Args:
        rgb: RGB hex string (e.g., "#RRGGBB" or "RRGGBB")

    Returns:
        BGR hex string (e.g., "BBGGRR")
    """
    # Validate input length
    if len(rgb) != 6 and len(rgb) != 7:
        raise ValueError("RGB hex must be 6 or 7 characters long (including #).")

    # Validate hex characters
    match = validate_hex_color(rgb)

    if not match:
        raise ValueError("Invalid RGB hex format.")

    if rgb.startswith("#"):
        rgb = rgb[1:]
    r, g, b = rgb[0:2], rgb[2:4], rgb[4:6]
    return b + g + r

setup_logger(log_dir, log_level='INFO')

Configure and return a logger instance.

Parameters:

Name Type Description Default
log_dir Path

Directory to store log files

required
log_level str

Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)

'INFO'

Returns:

Type Description
Logger

Configured logger instance

Source code in whisper_tiktok/config/logger_config.py
def setup_logger(log_dir: Path, log_level: str = "INFO") -> logging.Logger:
    """
    Configure and return a logger instance.

    Args:
        log_dir: Directory to store log files
        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)

    Returns:
        Configured logger instance
    """
    log_dir.mkdir(parents=True, exist_ok=True)

    logger = logging.getLogger("whisper_tiktok")
    logger.setLevel(getattr(logging, log_level.upper()))

    # Clear existing handlers
    logger.handlers.clear()

    # Create formatters
    detailed_formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
    simple_formatter = logging.Formatter("%(levelname)s: %(message)s")

    # File handler
    file_handler = logging.FileHandler(log_dir / "app.log", encoding="utf-8")
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(detailed_formatter)
    logger.addHandler(file_handler)

    # Console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(getattr(logging, log_level.upper()))
    console_handler.setFormatter(simple_formatter)
    logger.addHandler(console_handler)

    return logger

config

Modules:

Name Description
logger_config

Logger configuration module.

logger_config

Logger configuration module.

Functions:

Name Description
setup_logger

Configure and return a logger instance.

setup_logger(log_dir, log_level='INFO')

Configure and return a logger instance.

Parameters:

Name Type Description Default
log_dir Path

Directory to store log files

required
log_level str

Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)

'INFO'

Returns:

Type Description
Logger

Configured logger instance

Source code in whisper_tiktok/config/logger_config.py
def setup_logger(log_dir: Path, log_level: str = "INFO") -> logging.Logger:
    """
    Configure and return a logger instance.

    Args:
        log_dir: Directory to store log files
        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)

    Returns:
        Configured logger instance
    """
    log_dir.mkdir(parents=True, exist_ok=True)

    logger = logging.getLogger("whisper_tiktok")
    logger.setLevel(getattr(logging, log_level.upper()))

    # Clear existing handlers
    logger.handlers.clear()

    # Create formatters
    detailed_formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
    simple_formatter = logging.Formatter("%(levelname)s: %(message)s")

    # File handler
    file_handler = logging.FileHandler(log_dir / "app.log", encoding="utf-8")
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(detailed_formatter)
    logger.addHandler(file_handler)

    # Console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(getattr(logging, log_level.upper()))
    console_handler.setFormatter(simple_formatter)
    logger.addHandler(console_handler)

    return logger

container

Classes:

Name Description
Container

IoC container for dependency injection.

Container

Bases: DeclarativeContainer

IoC container for dependency injection.

Source code in whisper_tiktok/container.py
class Container(containers.DeclarativeContainer):
    """IoC container for dependency injection."""

    config = providers.Configuration()

    # Path providers
    workspace_path = providers.Singleton(lambda: Path.cwd())

    media_path = providers.Factory(
        lambda workspace, uuid: workspace / "media" / uuid,
        workspace=workspace_path,
        uuid=providers.Dependency(),
    )

    output_path = providers.Factory(
        lambda workspace, uuid: workspace / "output" / uuid,
        workspace=workspace_path,
        uuid=providers.Dependency(),
    )

    # Service providers
    logger = providers.Singleton(lambda: logging.getLogger("whisper_tiktok"))

    command_executor = providers.Factory(CommandExecutor, logger=logger)

    ffmpeg_service = providers.Factory(
        FFmpegService, executor=command_executor, logger=logger
    )  # Changed

    video_downloader = providers.Factory(
        VideoDownloaderService,
        executor=command_executor,  # Changed from ffmpeg_service
        logger=logger,
    )

    tts_service = providers.Factory(TTSService, logger=logger)

    transcription_service = providers.Factory(TranscriptionService, logger=logger)

execution

Modules:

Name Description
command_executor

command_executor

Classes:

Name Description
CommandExecutionError

Custom exception for command execution errors.

CommandExecutor

Executes external commands with error handling.

CommandTimeoutError

Custom exception for command timeouts.

ExecutionResult

Result of command execution.

CommandExecutionError

Bases: Exception

Custom exception for command execution errors.

Source code in whisper_tiktok/execution/command_executor.py
class CommandExecutionError(Exception):
    """Custom exception for command execution errors."""

CommandExecutor

Executes external commands with error handling.

Parameters:

Name Type Description Default
logger Logger

Logger instance for logging.

required

Methods:

Name Description
execute

Execute command and return result.

Source code in whisper_tiktok/execution/command_executor.py
class CommandExecutor:
    """Executes external commands with error handling.

    Args:
        logger: Logger instance for logging.

    """

    def __init__(self, logger: Logger):
        self.logger = logger

    def execute(
        self, command: str, cwd: Path | None = None, timeout: int | None = None
    ) -> ExecutionResult:
        """Execute command and return result.

        Args:
            command: Command to execute.
            cwd: Working directory for command execution.
            timeout: Timeout in seconds for command execution.

        Returns:
            ExecutionResult containing return code, stdout, and stderr.
        """

        self.logger.debug(f"Executing: {command}")
        try:
            with subprocess.Popen(
                command,
                cwd=cwd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
            ) as process:
                stdout, stderr = process.communicate(timeout=timeout)
                result = ExecutionResult(
                    returncode=process.returncode, stdout=stdout, stderr=stderr
                )
            return result
        except subprocess.TimeoutExpired as exc:
            self.logger.error(f"Command timed out: {command}")
            raise CommandTimeoutError(f"Command timed out after {timeout}s") from exc
        except Exception as exc:
            self.logger.exception(f"Command execution failed: {command}")
            raise CommandExecutionError(str(exc)) from exc
execute(command, cwd=None, timeout=None)

Execute command and return result.

Parameters:

Name Type Description Default
command str

Command to execute.

required
cwd Path | None

Working directory for command execution.

None
timeout int | None

Timeout in seconds for command execution.

None

Returns:

Type Description
ExecutionResult

ExecutionResult containing return code, stdout, and stderr.

Source code in whisper_tiktok/execution/command_executor.py
def execute(
    self, command: str, cwd: Path | None = None, timeout: int | None = None
) -> ExecutionResult:
    """Execute command and return result.

    Args:
        command: Command to execute.
        cwd: Working directory for command execution.
        timeout: Timeout in seconds for command execution.

    Returns:
        ExecutionResult containing return code, stdout, and stderr.
    """

    self.logger.debug(f"Executing: {command}")
    try:
        with subprocess.Popen(
            command,
            cwd=cwd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        ) as process:
            stdout, stderr = process.communicate(timeout=timeout)
            result = ExecutionResult(
                returncode=process.returncode, stdout=stdout, stderr=stderr
            )
        return result
    except subprocess.TimeoutExpired as exc:
        self.logger.error(f"Command timed out: {command}")
        raise CommandTimeoutError(f"Command timed out after {timeout}s") from exc
    except Exception as exc:
        self.logger.exception(f"Command execution failed: {command}")
        raise CommandExecutionError(str(exc)) from exc

CommandTimeoutError

Bases: Exception

Custom exception for command timeouts.

Source code in whisper_tiktok/execution/command_executor.py
class CommandTimeoutError(Exception):
    """Custom exception for command timeouts."""

ExecutionResult dataclass

Result of command execution.

Attributes:

Name Type Description
success bool

Indicates if the command executed successfully.

Source code in whisper_tiktok/execution/command_executor.py
@dataclass
class ExecutionResult:
    """Result of command execution."""

    returncode: int
    stdout: str
    stderr: str

    @property
    def success(self) -> bool:
        """Indicates if the command executed successfully."""
        return self.returncode == 0
success property

Indicates if the command executed successfully.

factories

Modules:

Name Description
video_factory

video_factory

Classes:

Name Description
VideoCreatorFactory

Factory for creating video processor instances.

VideoCreatorFactory

Factory for creating video processor instances.

Methods:

Name Description
create_processor

Create a configured video processor.

Source code in whisper_tiktok/factories/video_factory.py
class VideoCreatorFactory:
    """Factory for creating video processor instances."""

    def __init__(self, container: Container):
        self.container = container

    def create_processor(self, video_data: dict, config: dict) -> VideoProcessor:
        """Create a configured video processor."""
        uuid_str = str(uuid.uuid4())

        return VideoProcessor(
            uuid=uuid_str,
            video_data=video_data,
            config=config,
            strategies=self._build_strategies(config),
            logger=self.container.logger(),
        )

    def _build_strategies(self, config: dict) -> list[ProcessingStrategy]:
        """Build processing pipeline based on config."""
        strategies = [
            DownloadBackgroundStrategy(
                self.container.video_downloader(), self.container.logger()
            ),
            TTSGenerationStrategy(
                self.container.tts_service(), self.container.logger()
            ),
            TranscriptionStrategy(
                self.container.transcription_service(), self.container.logger()
            ),
            VideoCompositionStrategy(
                self.container.ffmpeg_service(), self.container.logger()
            ),
        ]

        if config.get("upload_tiktok"):
            strategies.append(
                TikTokUploadStrategy(self.container.uploader(), self.container.logger())
            )

        return strategies
create_processor(video_data, config)

Create a configured video processor.

Source code in whisper_tiktok/factories/video_factory.py
def create_processor(self, video_data: dict, config: dict) -> VideoProcessor:
    """Create a configured video processor."""
    uuid_str = str(uuid.uuid4())

    return VideoProcessor(
        uuid=uuid_str,
        video_data=video_data,
        config=config,
        strategies=self._build_strategies(config),
        logger=self.container.logger(),
    )

interfaces

Modules:

Name Description
transcription_service
tts_service
video_downloader

transcription_service

Classes:

Name Description
ITranscriptionService

Interface for transcription services.

ITranscriptionService

Bases: ABC

Interface for transcription services.

Methods:

Name Description
transcribe

Transcribe audio and generate SRT/ASS files.

Source code in whisper_tiktok/interfaces/transcription_service.py
class ITranscriptionService(ABC):
    """Interface for transcription services."""

    @abstractmethod
    def transcribe(
        self,
        audio_file: Path,
        srt_file: Path,
        ass_file: Path,
        model: str,
        options: dict,
    ) -> tuple[Path, Path]:
        """Transcribe audio and generate SRT/ASS files."""
transcribe(audio_file, srt_file, ass_file, model, options) abstractmethod

Transcribe audio and generate SRT/ASS files.

Source code in whisper_tiktok/interfaces/transcription_service.py
@abstractmethod
def transcribe(
    self,
    audio_file: Path,
    srt_file: Path,
    ass_file: Path,
    model: str,
    options: dict,
) -> tuple[Path, Path]:
    """Transcribe audio and generate SRT/ASS files."""

tts_service

Classes:

Name Description
ITTSService

Interface for text-to-speech services.

ITTSService

Bases: ABC

Interface for text-to-speech services.

Methods:

Name Description
synthesize

Synthesize speech from text.

Source code in whisper_tiktok/interfaces/tts_service.py
class ITTSService(ABC):
    """Interface for text-to-speech services."""

    @abstractmethod
    async def synthesize(self, text: str, output_file: Path, voice: str) -> None:
        """Synthesize speech from text."""
synthesize(text, output_file, voice) abstractmethod async

Synthesize speech from text.

Source code in whisper_tiktok/interfaces/tts_service.py
@abstractmethod
async def synthesize(self, text: str, output_file: Path, voice: str) -> None:
    """Synthesize speech from text."""

video_downloader

Classes:

Name Description
IVideoDownloader

Interface for video downloading services.

IVideoDownloader

Bases: ABC

Interface for video downloading services.

Methods:

Name Description
download

Download video from URL to output directory.

Source code in whisper_tiktok/interfaces/video_downloader.py
class IVideoDownloader(ABC):
    """Interface for video downloading services."""

    @abstractmethod
    def download(self, url: str, output_dir: Path) -> Path:
        """Download video from URL to output directory."""
download(url, output_dir) abstractmethod

Download video from URL to output directory.

Source code in whisper_tiktok/interfaces/video_downloader.py
@abstractmethod
def download(self, url: str, output_dir: Path) -> Path:
    """Download video from URL to output directory."""

main

Main module for the Whisper TikTok application.

Classes:

Name Description
Application

Main application class responsible for orchestrating the video creation pipeline.

Functions:

Name Description
create

Create videos from text content.

list_voices

List available TTS voices.

main

Whisper TikTok - Create TikTok videos with AI-generated subtitles.

Application

Main application class responsible for orchestrating the video creation pipeline. This class loads video data from a JSON file, builds configuration from a container, and processes each video asynchronously using a factory-created processor.

Attributes:

Name Type Description
container Container

Dependency injection container.

logger Logger

Logger instance for logging operations.

Methods:

Name Description
run

Main method to run the video creation pipeline.

Source code in whisper_tiktok/main.py
class Application:
    """Main application class responsible for orchestrating the video creation pipeline.
    This class loads video data from a JSON file, builds configuration from a container,
    and processes each video asynchronously using a factory-created processor.

    Attributes:
        container (Container): Dependency injection container.
        logger (logging.Logger): Logger instance for logging operations.

    Methods:
        run(): Main method to run the video creation pipeline.
    """

    def __init__(self, container: Container, logger: logging.Logger):
        self.container = container
        self.logger = logger
        self.factory = VideoCreatorFactory(container)

    def _load_video_data(self) -> list[dict]:
        """Load video data from video.json file.
        Returns:
            List of video data dictionaries.
        """
        video_json_path = Path.cwd() / "video.json"

        try:
            data: list[dict] = json.loads(video_json_path.read_text(encoding="utf-8"))
            self.logger.info(f"Loaded {len(data)} videos from video.json")
            return data
        except FileNotFoundError:
            self.logger.error(f"video.json not found at {video_json_path}")
            raise
        except json.JSONDecodeError as e:
            self.logger.error(f"Invalid JSON in video.json: {e}")
            raise

    def _build_config(self) -> dict:
        """Build configuration from container.

        Returns:
            Configuration dictionary.
        """
        return dict(self.container.config())

    async def run(self) -> None:
        """Run the video creation pipeline.

        Returns:
            None
        """

        # Load video data
        video_data = self._load_video_data()
        config = self._build_config()

        # Process each video
        for idx, video in enumerate(video_data, 1):
            self.logger.info(
                f"Processing video {idx}/{len(video_data)}: {video.get('series', 'Unknown')}"
            )
            await self._process_video(video, config)

    async def _process_video(self, video: dict, config: dict) -> None:
        """Process a single video.

        Args:
            video (dict): Video data dictionary.
            config (dict): Configuration dictionary.

        Returns:
            None
        """

        processor = self.factory.create_processor(video, config)

        try:
            result = await processor.process()
            self.logger.info(f"✓ Video created: {result.output_path}")
        except Exception:
            self.logger.exception(
                f"✗ Failed to process video: {video.get('series', 'Unknown')}"
            )
            raise

run() async

Run the video creation pipeline.

Returns:

Type Description
None

None

Source code in whisper_tiktok/main.py
async def run(self) -> None:
    """Run the video creation pipeline.

    Returns:
        None
    """

    # Load video data
    video_data = self._load_video_data()
    config = self._build_config()

    # Process each video
    for idx, video in enumerate(video_data, 1):
        self.logger.info(
            f"Processing video {idx}/{len(video_data)}: {video.get('series', 'Unknown')}"
        )
        await self._process_video(video, config)

create(model=typer.Option('turbo', '--model', '-m', help='Whisper model size [tiny|base|small|medium|large|turbo]'), background_url=typer.Option('https://www.youtube.com/watch?v=intRX7BRA90', '--background-url', '-u', help='YouTube URL for background video'), tts_voice=typer.Option('en-US-ChristopherNeural', '--tts', '-v', help='TTS voice to use'), random_voice=typer.Option(False, '--random-voice', help='Use random TTS voice'), gender=typer.Option(None, '--gender', '-g', help='Gender for random voice (Male/Female)'), language=typer.Option(None, '--language', '-l', help='Language for random voice (e.g., en-US)'), font=typer.Option('Lexend Bold', '--font', '-f', help='Subtitle font'), font_size=typer.Option(21, '--font-size', help='Subtitle font size'), font_color=typer.Option('FFF000', '--font-color', '-c', help='Subtitle color (hex format)'), sub_position=typer.Option(5, '--sub-position', '-p', help='Subtitle position (1-9)', min=1, max=9), upload_tiktok=typer.Option(False, '--upload-tiktok', help='Upload to TikTok'), clean=typer.Option(False, '--clean', help='Clean media and output folders before processing'), verbose=typer.Option(False, '--verbose', help='Enable verbose logging'))

Create videos from text content.

Source code in whisper_tiktok/main.py
@app.command()
def create(
    model: str = typer.Option(
        "turbo",
        "--model",
        "-m",
        help="Whisper model size [tiny|base|small|medium|large|turbo]",
    ),
    background_url: str = typer.Option(
        "https://www.youtube.com/watch?v=intRX7BRA90",
        "--background-url",
        "-u",
        help="YouTube URL for background video",
    ),
    tts_voice: str = typer.Option(
        "en-US-ChristopherNeural",
        "--tts",
        "-v",
        help="TTS voice to use",
    ),
    random_voice: bool = typer.Option(
        False,
        "--random-voice",
        help="Use random TTS voice",
    ),
    gender: Optional[str] = typer.Option(
        None,
        "--gender",
        "-g",
        help="Gender for random voice (Male/Female)",
    ),
    language: Optional[str] = typer.Option(
        None,
        "--language",
        "-l",
        help="Language for random voice (e.g., en-US)",
    ),
    font: str = typer.Option(
        "Lexend Bold",
        "--font",
        "-f",
        help="Subtitle font",
    ),
    font_size: int = typer.Option(
        21,
        "--font-size",
        help="Subtitle font size",
    ),
    font_color: str = typer.Option(
        "FFF000",
        "--font-color",
        "-c",
        help="Subtitle color (hex format)",
    ),
    sub_position: int = typer.Option(
        5,
        "--sub-position",
        "-p",
        help="Subtitle position (1-9)",
        min=1,
        max=9,
    ),
    upload_tiktok: bool = typer.Option(
        False,
        "--upload-tiktok",
        help="Upload to TikTok",
    ),
    clean: bool = typer.Option(
        False,
        "--clean",
        help="Clean media and output folders before processing",
    ),
    verbose: bool = typer.Option(
        False,
        "--verbose",
        help="Enable verbose logging",
    ),
):
    """Create videos from text content."""

    # Setup logging
    log_dir = Path.cwd() / "logs"
    log_level = "DEBUG" if verbose else "INFO"
    logger = setup_logger(log_dir, log_level)

    async def _create():
        nonlocal tts_voice, language

        logger.info("=" * 60)
        logger.info("Starting Whisper TikTok video creation pipeline")
        logger.info("=" * 60)

        # Validate model choice
        valid_models = ["tiny", "base", "small", "medium", "large", "turbo"]
        if model not in valid_models:
            logger.error("Invalid model. Choose from: %s", ", ".join(valid_models))
            raise typer.Exit(code=1)

        # Handle random voice selection
        if random_voice:
            if not gender or not language:
                logger.error("Both --gender and --language required for random voice")
                raise typer.Exit(code=1)

            try:
                voices_manager = VoicesManager()
                voices_obj = await voices_manager.create()
                voice_result = voices_manager.find(voices_obj, gender, language)
                tts_voice = voice_result.get("Name") or voice_result.get("ShortName")
                logger.info("Selected random voice: %s", tts_voice)
            except Exception as e:
                logger.error("Failed to select random voice: %s", e)
                raise typer.Exit(code=1) from e
        else:
            # Validate specified voice
            try:
                voices_manager = VoicesManager()
                voices_obj = await voices_manager.create()
                extracted_language = "-".join(tts_voice.split("-")[0:2])
                voice_result = voices_obj.find(Locale=extracted_language)

                if not voice_result:
                    logger.error(
                        "Voice not found. Run 'whisper-tiktok list-voices' to see available voices"
                    )
                    raise typer.Exit(code=1)

                language = extracted_language
                logger.info("Using voice: %s", tts_voice)

            except Exception as e:
                logger.error("Voice validation failed: %s", e)
                raise typer.Exit(code=1) from e

        # Process font color
        processed_font_color = font_color.lower()
        if processed_font_color.startswith("#"):
            processed_font_color = processed_font_color[1:]
        processed_font_color = rgb_to_bgr(processed_font_color)

        # Clean folders if requested
        if clean:
            logger.info("Cleaning media and output folders...")
            media_path = Path.cwd() / "media"
            output_path = Path.cwd() / "output"

            if media_path.exists():
                shutil.rmtree(media_path)
                logger.info("Removed %s", media_path)

            if output_path.exists():
                shutil.rmtree(output_path)
                logger.info("Removed %s", output_path)
        # Display startup info
        console.print("\n[bold green]🎬 Starting video creation pipeline…[/bold green]")
        console.print(f"  [cyan]Model:[/cyan] {model}")
        console.print(f"  [cyan]Voice:[/cyan] {tts_voice}")
        console.print(f"  [cyan]Language:[/cyan] {language}\n")

        # Setup DI container
        container = Container()
        config_dict = {
            "model": model,
            "background_url": background_url,
            "tts_voice": tts_voice,
            "upload_tiktok": upload_tiktok,
            "Fontname": font,
            "Fontsize": font_size,
            "highlight_color": processed_font_color,
            "Alignment": sub_position,
            "BorderStyle": "1",
            "Outline": "1",
            "Shadow": "2",
            "Blur": "21",
            "MarginL": "0",
            "MarginR": "0",
        }
        container.config.from_dict(config_dict)

        # Run application
        app_instance = Application(container, logger)

        try:
            await app_instance.run()
            console.print(
                "\n[bold green]✅ Pipeline completed successfully![/bold green]"
            )
        except Exception as e:
            logger.exception("Pipeline failed")
            console.print(f"\n[bold red]❌ Pipeline failed: {e}[/bold red]")
            raise typer.Exit(code=1) from e

    # Run the async function
    asyncio.run(_create())

list_voices(language=typer.Option(None, '--language', '-l', help='Filter by language (e.g., en-US)'), gender=typer.Option(None, '--gender', '-g', help='Filter by gender (Male/Female)'))

List available TTS voices.

Source code in whisper_tiktok/main.py
@app.command()
def list_voices(
    language: Optional[str] = typer.Option(
        None,
        "--language",
        "-l",
        help="Filter by language (e.g., en-US)",
    ),
    gender: Optional[str] = typer.Option(
        None,
        "--gender",
        "-g",
        help="Filter by gender (Male/Female)",
    ),
):
    """List available TTS voices."""

    async def _list_voices():
        voices_manager = VoicesManager()
        voices_obj = await voices_manager.create()

        # Get all voices
        voices = voices_obj.voices

        # Apply filters
        if language:
            voices = [v for v in voices if v.get("Locale", "").startswith(language)]
        if gender:
            voices = [v for v in voices if v.get("Gender", "") == gender]

        # Display in a table
        table = Table(title="Available TTS Voices")
        table.add_column("Name", style="cyan")
        table.add_column("Locale", style="green")
        table.add_column("Gender", style="magenta")
        table.add_column("Voice Personalities", style="blue")
        table.add_column("Scenarios", style="yellow")

        for voice in sorted(voices, key=lambda x: x.get("Locale", "")):
            table.add_row(
                voice.get("ShortName", ""),
                voice.get("Locale", ""),
                voice.get("Gender", ""),
                ", ".join(voice["VoiceTag"].get("VoicePersonalities", [])),
                ", ".join(voice["VoiceTag"].get("TailoredScenarios", [])),
            )

        console.print(table)
        console.print(f"\n[dim]Total: {len(voices)} voices[/dim]")

    asyncio.run(_list_voices())

main()

Whisper TikTok - Create TikTok videos with AI-generated subtitles.

Source code in whisper_tiktok/main.py
@app.callback()
def main():
    """Whisper TikTok - Create TikTok videos with AI-generated subtitles."""

processors

Modules:

Name Description
video_processor

video_processor

Classes:

Name Description
ProcessingResult

Result of video processing.

VideoProcessor

Main orchestrator for video processing pipeline.

ProcessingResult dataclass

Result of video processing.

Source code in whisper_tiktok/processors/video_processor.py
@dataclass
class ProcessingResult:
    """Result of video processing."""

    uuid: str
    output_path: Path
    success: bool = True

VideoProcessor

Main orchestrator for video processing pipeline.

Methods:

Name Description
process

Execute the processing pipeline.

Source code in whisper_tiktok/processors/video_processor.py
class VideoProcessor:
    """Main orchestrator for video processing pipeline."""

    def __init__(
        self,
        uuid: str,
        video_data: dict,
        config: dict,
        strategies: list[ProcessingStrategy],
        logger: Logger,
    ):
        self.uuid = uuid
        self.video_data = video_data
        self.config = config
        self.strategies = strategies
        self.logger = logger

    async def process(self) -> ProcessingResult:
        """Execute the processing pipeline."""

        # Initialize context
        media_path = Path(self.config.get("workspace_path", ".")) / "media" / self.uuid
        output_path = (
            Path(self.config.get("workspace_path", ".")) / "output" / self.uuid
        )

        media_path.mkdir(parents=True, exist_ok=True)
        output_path.mkdir(parents=True, exist_ok=True)

        context = ProcessingContext(
            video_data=self.video_data,
            uuid=self.uuid,
            media_path=media_path,
            output_path=output_path,
            config=self.config,
        )

        # Execute each strategy
        try:
            for strategy in self.strategies:
                self.logger.info(f"Executing strategy: {strategy.__class__.__name__}")
                context = await strategy.execute(context)

            output_file = context.output_path / f"{self.uuid}.mp4"

            return ProcessingResult(
                uuid=self.uuid,
                output_path=output_file,
                success=True,
            )
        except Exception:
            self.logger.exception(f"Processing failed for video {self.uuid}")
            raise
process() async

Execute the processing pipeline.

Source code in whisper_tiktok/processors/video_processor.py
async def process(self) -> ProcessingResult:
    """Execute the processing pipeline."""

    # Initialize context
    media_path = Path(self.config.get("workspace_path", ".")) / "media" / self.uuid
    output_path = (
        Path(self.config.get("workspace_path", ".")) / "output" / self.uuid
    )

    media_path.mkdir(parents=True, exist_ok=True)
    output_path.mkdir(parents=True, exist_ok=True)

    context = ProcessingContext(
        video_data=self.video_data,
        uuid=self.uuid,
        media_path=media_path,
        output_path=output_path,
        config=self.config,
    )

    # Execute each strategy
    try:
        for strategy in self.strategies:
            self.logger.info(f"Executing strategy: {strategy.__class__.__name__}")
            context = await strategy.execute(context)

        output_file = context.output_path / f"{self.uuid}.mp4"

        return ProcessingResult(
            uuid=self.uuid,
            output_path=output_file,
            success=True,
        )
    except Exception:
        self.logger.exception(f"Processing failed for video {self.uuid}")
        raise

repositories

Modules:

Name Description
video_repository

video_repository

Classes:

Name Description
VideoRepository

Repository for video-related file operations.

VideoRepository

Repository for video-related file operations.

Methods:

Name Description
get_background_videos

Get list of available background videos.

random_background

Get random background video.

save_audio

Save audio file.

Source code in whisper_tiktok/repositories/video_repository.py
class VideoRepository:
    """Repository for video-related file operations."""

    def __init__(self, base_path: Path, logger: Logger):
        self.base_path = base_path
        self.logger = logger

    def save_audio(self, uuid: str, data: bytes) -> Path:
        """Save audio file."""
        path = self.base_path / uuid / f"{uuid}.mp3"
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_bytes(data)
        return path

    def get_background_videos(self) -> list[Path]:
        """Get list of available background videos."""
        bg_path = self.base_path / "background"
        return list(bg_path.glob("*.mp4"))

    def random_background(self) -> Path:
        """Get random background video."""
        videos = self.get_background_videos()
        if not videos:
            raise ValueError("No background videos available")
        return random.choice(videos)
get_background_videos()

Get list of available background videos.

Source code in whisper_tiktok/repositories/video_repository.py
def get_background_videos(self) -> list[Path]:
    """Get list of available background videos."""
    bg_path = self.base_path / "background"
    return list(bg_path.glob("*.mp4"))
random_background()

Get random background video.

Source code in whisper_tiktok/repositories/video_repository.py
def random_background(self) -> Path:
    """Get random background video."""
    videos = self.get_background_videos()
    if not videos:
        raise ValueError("No background videos available")
    return random.choice(videos)
save_audio(uuid, data)

Save audio file.

Source code in whisper_tiktok/repositories/video_repository.py
def save_audio(self, uuid: str, data: bytes) -> Path:
    """Save audio file."""
    path = self.base_path / uuid / f"{uuid}.mp3"
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_bytes(data)
    return path

services

Modules:

Name Description
ffmpeg_service
transcription_service
tts_service
video_downloader

ffmpeg_service

Classes:

Name Description
FFmpegError

Custom exception for FFmpeg errors.

FFmpegService

Service for FFmpeg operations.

MediaInfo

Represents the result of running FFprobe.

FFmpegError

Bases: Exception

Custom exception for FFmpeg errors.

Source code in whisper_tiktok/services/ffmpeg_service.py
class FFmpegError(Exception):
    """Custom exception for FFmpeg errors."""

FFmpegService

Service for FFmpeg operations.

Methods:

Name Description
compose_video

Compose final video with background, audio, and subtitles.

get_media_info

Get media information using ffprobe.

Source code in whisper_tiktok/services/ffmpeg_service.py
class FFmpegService:
    """Service for FFmpeg operations."""

    def __init__(self, executor: CommandExecutor, logger: Logger):
        self.executor = executor
        self.logger = logger

    def _build_video_filters(self, subtitles: Path) -> str:
        return rf"crop=ih/16*9:ih,scale=w=1080:h=1920:flags=lanczos,gblur=sigma=2,ass={subtitles.as_posix()}"

    def _build_ffmpeg_command(
        self,
        background: Path,
        audio: Path,
        output: Path,
        start_time: int,
        duration: str,
        filters: str,
    ) -> str:
        return rf"ffmpeg -ss {start_time} -t {duration} -i {background.as_posix()} -i {audio.as_posix()} -map 0:v -map 1:a -filter:v {filters} -c:v libx264 -crf 23 -c:a aac -ac 2 -b:a 192K {output.as_posix()} -y -threads {os.cpu_count()}"

    def compose_video(
        self,
        background: Path,
        audio: Path,
        subtitles: Path,
        output: Path,
        start_time: int,
        duration: str,
    ) -> Path:
        """Compose final video with background, audio, and subtitles."""

        # Build filter complex
        filters = self._build_video_filters(subtitles)

        command = self._build_ffmpeg_command(
            background, audio, output, start_time, duration, filters
        )
        result = self.executor.execute(command)

        if result.returncode != 0:
            raise FFmpegError(f"Failed to compose video: {result.stderr}")

        return output

    def get_media_info(self, file_path: Path) -> MediaInfo:
        """Get media information using ffprobe."""
        command = f"ffprobe -v quiet -print_format json -show_format -show_streams {file_path.as_posix()}"
        result = self.executor.execute(command)

        if result.returncode != 0:
            raise FFmpegError(f"Failed to probe media: {result.stderr}")

        return MediaInfo.from_json(result)
compose_video(background, audio, subtitles, output, start_time, duration)

Compose final video with background, audio, and subtitles.

Source code in whisper_tiktok/services/ffmpeg_service.py
def compose_video(
    self,
    background: Path,
    audio: Path,
    subtitles: Path,
    output: Path,
    start_time: int,
    duration: str,
) -> Path:
    """Compose final video with background, audio, and subtitles."""

    # Build filter complex
    filters = self._build_video_filters(subtitles)

    command = self._build_ffmpeg_command(
        background, audio, output, start_time, duration, filters
    )
    result = self.executor.execute(command)

    if result.returncode != 0:
        raise FFmpegError(f"Failed to compose video: {result.stderr}")

    return output
get_media_info(file_path)

Get media information using ffprobe.

Source code in whisper_tiktok/services/ffmpeg_service.py
def get_media_info(self, file_path: Path) -> MediaInfo:
    """Get media information using ffprobe."""
    command = f"ffprobe -v quiet -print_format json -show_format -show_streams {file_path.as_posix()}"
    result = self.executor.execute(command)

    if result.returncode != 0:
        raise FFmpegError(f"Failed to probe media: {result.stderr}")

    return MediaInfo.from_json(result)

MediaInfo

Bases: NamedTuple

Represents the result of running FFprobe.

Attributes:

Name Type Description
return_code int

The return code of the FFprobe process.

json str

The JSON output from FFprobe.

error str

The error message from FFprobe, if any.

Methods:

Name Description
convert_time

Converts time in seconds to a string in the format "hh🇲🇲ss.mmm".

from_json

Creates a MediaInfo instance from FFprobe execution result.

Source code in whisper_tiktok/services/ffmpeg_service.py
class MediaInfo(NamedTuple):
    """Represents the result of running FFprobe.

    Attributes:
        return_code (int): The return code of the FFprobe process.
        json (str): The JSON output from FFprobe.
        error (str): The error message from FFprobe, if any.
    """

    return_code: int
    json: str
    error: str

    @staticmethod
    def from_json(result: ExecutionResult) -> "MediaInfo":
        """Creates a MediaInfo instance from FFprobe execution result."""
        return MediaInfo(
            return_code=result.returncode, json=result.stdout, error=result.stderr
        )

    @staticmethod
    def convert_time(time_in_seconds: float) -> str:
        """
        Converts time in seconds to a string in the format "hh:mm:ss.mmm".

        Args:
            time_in_seconds (float): The time in seconds to be converted.

        Returns:
            str: The time in the format "hh:mm:ss.mmm".
        """
        hours = int(time_in_seconds // 3600)
        minutes = int((time_in_seconds % 3600) // 60)
        seconds = int(time_in_seconds % 60)
        milliseconds = int((time_in_seconds - int(time_in_seconds)) * 1000)
        return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"

    @property
    def duration(self) -> float:
        """Extracts the duration of the audio stream from the FFprobe JSON output."""
        d = json.loads(self.json)

        streams = d.get("streams", [])
        audio_stream = None
        for stream in streams:
            if stream["codec_type"] == "audio":
                audio_stream = stream
                break

        if audio_stream is None:
            raise ValueError("No audio stream found")

        return float(audio_stream["duration"])
duration property

Extracts the duration of the audio stream from the FFprobe JSON output.

convert_time(time_in_seconds) staticmethod

Converts time in seconds to a string in the format "hh🇲🇲ss.mmm".

Parameters:

Name Type Description Default
time_in_seconds float

The time in seconds to be converted.

required

Returns:

Name Type Description
str str

The time in the format "hh🇲🇲ss.mmm".

Source code in whisper_tiktok/services/ffmpeg_service.py
@staticmethod
def convert_time(time_in_seconds: float) -> str:
    """
    Converts time in seconds to a string in the format "hh:mm:ss.mmm".

    Args:
        time_in_seconds (float): The time in seconds to be converted.

    Returns:
        str: The time in the format "hh:mm:ss.mmm".
    """
    hours = int(time_in_seconds // 3600)
    minutes = int((time_in_seconds % 3600) // 60)
    seconds = int(time_in_seconds % 60)
    milliseconds = int((time_in_seconds - int(time_in_seconds)) * 1000)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
from_json(result) staticmethod

Creates a MediaInfo instance from FFprobe execution result.

Source code in whisper_tiktok/services/ffmpeg_service.py
@staticmethod
def from_json(result: ExecutionResult) -> "MediaInfo":
    """Creates a MediaInfo instance from FFprobe execution result."""
    return MediaInfo(
        return_code=result.returncode, json=result.stdout, error=result.stderr
    )

transcription_service

Classes:

Name Description
TranscriptionService

Service for transcribing audio using Whisper.

TranscriptionService

Bases: ITranscriptionService

Service for transcribing audio using Whisper.

Source code in whisper_tiktok/services/transcription_service.py
class TranscriptionService(ITranscriptionService):
    """Service for transcribing audio using Whisper."""

    def __init__(self, logger):
        self.logger = logger

    def transcribe(
        self,
        audio_file: Path,
        srt_file: Path,
        ass_file: Path,
        model: str,
        options: dict,
    ) -> tuple[Path, Path]:
        self.logger.debug(
            f"Transcribing {audio_file} with model {model} and options {options}"
        )

        whisper_model = stable_whisper.load_model(
            model, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
        )
        self.logger.debug(f"Loaded Whisper model: {model}")

        transcription = whisper_model.transcribe(
            audio_file.as_posix(),
            regroup=True,
            fp16=False,
            word_timestamps=True,
        )
        transcription.to_srt_vtt(srt_file.as_posix(), word_level=True)
        transcription.to_ass(ass_file.as_posix(), word_level=True, **options)
        return (srt_file, ass_file)

tts_service

Classes:

Name Description
TTSService

Text-to-Speech service using a hypothetical TTS engine.

TTSService

Bases: ITTSService

Text-to-Speech service using a hypothetical TTS engine.

Methods:

Name Description
synthesize

Synthesize speech from text and save to output file.

Source code in whisper_tiktok/services/tts_service.py
class TTSService(ITTSService):
    """Text-to-Speech service using a hypothetical TTS engine."""

    def __init__(self, logger: Logger):
        self.logger = logger

    async def synthesize(
        self,
        text: str,
        output_file: Path,
        voice: str = "en-US-ChristopherNeural",
    ) -> None:
        """
        Synthesize speech from text and save to output file.

        Args:
            text (str): The text to be converted to speech.
            output_file (Path): The path to save the synthesized audio file.
            voice (str): The voice to be used for synthesis.
        """
        self.logger.debug(f"Synthesizing speech to {output_file} using voice {voice}")
        communicate = edge_tts.Communicate(text, voice)
        await communicate.save(output_file.as_posix())
synthesize(text, output_file, voice='en-US-ChristopherNeural') async

Synthesize speech from text and save to output file.

Parameters:

Name Type Description Default
text str

The text to be converted to speech.

required
output_file Path

The path to save the synthesized audio file.

required
voice str

The voice to be used for synthesis.

'en-US-ChristopherNeural'
Source code in whisper_tiktok/services/tts_service.py
async def synthesize(
    self,
    text: str,
    output_file: Path,
    voice: str = "en-US-ChristopherNeural",
) -> None:
    """
    Synthesize speech from text and save to output file.

    Args:
        text (str): The text to be converted to speech.
        output_file (Path): The path to save the synthesized audio file.
        voice (str): The voice to be used for synthesis.
    """
    self.logger.debug(f"Synthesizing speech to {output_file} using voice {voice}")
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(output_file.as_posix())

video_downloader

Classes:

Name Description
VideoDownloadError

Custom exception for video download errors.

VideoDownloaderService

YouTube video downloader using yt-dlp.

VideoDownloadError

Bases: Exception

Custom exception for video download errors.

Source code in whisper_tiktok/services/video_downloader.py
class VideoDownloadError(Exception):
    """Custom exception for video download errors."""

VideoDownloaderService

Bases: IVideoDownloader

YouTube video downloader using yt-dlp.

Methods:

Name Description
download

Download video from URL.

Source code in whisper_tiktok/services/video_downloader.py
class VideoDownloaderService(IVideoDownloader):
    """YouTube video downloader using yt-dlp."""

    def __init__(self, executor: CommandExecutor, logger: Logger):
        self.executor = executor
        self.logger = logger

    def download(self, url: str, output_dir: Path) -> Path:
        """Download video from URL."""
        output_dir.mkdir(parents=True, exist_ok=True)

        command = rf"yt-dlp -f bestvideo[ext=mp4] --restrict-filenames -o %(id)s.%(ext)s {url}"
        result = self.executor.execute(command, cwd=output_dir)

        if result.returncode != 0:
            raise VideoDownloadError(f"Failed to download: {result.stderr}")

        # Find downloaded file
        videos = list(output_dir.glob("*.mp4"))
        if not videos:
            raise VideoDownloadError("No video file found after download")

        return videos[-1]  # Most recent
download(url, output_dir)

Download video from URL.

Source code in whisper_tiktok/services/video_downloader.py
def download(self, url: str, output_dir: Path) -> Path:
    """Download video from URL."""
    output_dir.mkdir(parents=True, exist_ok=True)

    command = rf"yt-dlp -f bestvideo[ext=mp4] --restrict-filenames -o %(id)s.%(ext)s {url}"
    result = self.executor.execute(command, cwd=output_dir)

    if result.returncode != 0:
        raise VideoDownloadError(f"Failed to download: {result.stderr}")

    # Find downloaded file
    videos = list(output_dir.glob("*.mp4"))
    if not videos:
        raise VideoDownloadError("No video file found after download")

    return videos[-1]  # Most recent

strategies

Modules:

Name Description
processing_strategy

processing_strategy

Classes:

Name Description
DownloadBackgroundStrategy

Strategy for downloading background video.

ProcessingContext

Context object passed through processing pipeline.

ProcessingStrategy

Base strategy for video processing steps.

TTSGenerationStrategy

Strategy for generating TTS audio.

TikTokUploadStrategy

Strategy for uploading videos to TikTok.

TranscriptionStrategy

Strategy for transcribing audio to generate subtitles.

VideoCompositionStrategy

Strategy for composing the final video.

DownloadBackgroundStrategy

Bases: ProcessingStrategy

Strategy for downloading background video.

Source code in whisper_tiktok/strategies/processing_strategy.py
class DownloadBackgroundStrategy(ProcessingStrategy):
    """Strategy for downloading background video."""

    def __init__(self, downloader: IVideoDownloader, logger: Logger):
        self.downloader = downloader
        self.logger = logger

    async def execute(self, context: ProcessingContext) -> ProcessingContext:
        url = context.config["background_url"]
        background_path = self.downloader.download(url, Path("background"))
        context.artifacts["background_video"] = background_path
        self.logger.info(f"Downloaded background: {background_path}")
        return context

ProcessingContext dataclass

Context object passed through processing pipeline.

Source code in whisper_tiktok/strategies/processing_strategy.py
@dataclass
class ProcessingContext:
    """Context object passed through processing pipeline."""

    video_data: dict
    uuid: str
    media_path: Path
    output_path: Path
    config: dict
    artifacts: dict = field(default_factory=dict)

ProcessingStrategy

Bases: ABC

Base strategy for video processing steps.

Methods:

Name Description
execute

Execute processing step and update context.

Source code in whisper_tiktok/strategies/processing_strategy.py
class ProcessingStrategy(ABC):
    """Base strategy for video processing steps."""

    @abstractmethod
    async def execute(self, context: ProcessingContext) -> ProcessingContext:
        """Execute processing step and update context."""
execute(context) abstractmethod async

Execute processing step and update context.

Source code in whisper_tiktok/strategies/processing_strategy.py
@abstractmethod
async def execute(self, context: ProcessingContext) -> ProcessingContext:
    """Execute processing step and update context."""

TTSGenerationStrategy

Bases: ProcessingStrategy

Strategy for generating TTS audio.

Methods:

Name Description
execute

Integrates TTS into the pipeline

Source code in whisper_tiktok/strategies/processing_strategy.py
class TTSGenerationStrategy(ProcessingStrategy):
    """Strategy for generating TTS audio."""

    def __init__(self, tts_service: ITTSService, logger: Logger):
        self.tts_service = tts_service
        self.logger = logger

    async def execute(self, context: ProcessingContext) -> ProcessingContext:
        """Integrates TTS into the pipeline"""
        text = f"{context.video_data['series']} - {context.video_data['part']}.\n"
        text += f"{context.video_data['text']}\n"
        text += f"{context.video_data['outro']}"

        output_file = context.media_path / f"{context.uuid}.mp3"
        voice = context.config.get("tts_voice", "en-US-ChristopherNeural")

        await self.tts_service.synthesize(text, output_file, voice)
        context.artifacts["audio_file"] = output_file

        self.logger.info(f"Generated TTS audio: {output_file}")
        return context
execute(context) async

Integrates TTS into the pipeline

Source code in whisper_tiktok/strategies/processing_strategy.py
async def execute(self, context: ProcessingContext) -> ProcessingContext:
    """Integrates TTS into the pipeline"""
    text = f"{context.video_data['series']} - {context.video_data['part']}.\n"
    text += f"{context.video_data['text']}\n"
    text += f"{context.video_data['outro']}"

    output_file = context.media_path / f"{context.uuid}.mp3"
    voice = context.config.get("tts_voice", "en-US-ChristopherNeural")

    await self.tts_service.synthesize(text, output_file, voice)
    context.artifacts["audio_file"] = output_file

    self.logger.info(f"Generated TTS audio: {output_file}")
    return context

TikTokUploadStrategy

Bases: ProcessingStrategy

Strategy for uploading videos to TikTok.

Source code in whisper_tiktok/strategies/processing_strategy.py
class TikTokUploadStrategy(ProcessingStrategy):
    """Strategy for uploading videos to TikTok."""

    def __init__(self, uploader, logger: Logger):
        self.uploader = uploader
        self.logger = logger

    async def execute(self, context: ProcessingContext) -> ProcessingContext:
        raise NotImplementedError("TikTok upload not implemented yet.")

TranscriptionStrategy

Bases: ProcessingStrategy

Strategy for transcribing audio to generate subtitles.

Source code in whisper_tiktok/strategies/processing_strategy.py
class TranscriptionStrategy(ProcessingStrategy):
    """Strategy for transcribing audio to generate subtitles."""

    def __init__(self, transcription_service: ITranscriptionService, logger: Logger):
        self.transcription_service = transcription_service
        self.logger = logger

    async def execute(self, context: ProcessingContext) -> ProcessingContext:
        audio_file = context.artifacts.get("audio_file")
        if not audio_file:
            raise ValueError("Audio file not found in context artifacts.")

        srt_file = context.media_path / f"{context.uuid}.srt"
        ass_file = context.media_path / f"{context.uuid}.ass"
        self.transcription_service.transcribe(
            audio_file,
            srt_file,
            ass_file,
            model=context.config["model"],
            options=context.config,
        )
        context.artifacts["srt_file"] = srt_file
        context.artifacts["ass_file"] = ass_file
        self.logger.info(f"Generated transcription SRT: {srt_file}")
        self.logger.info(f"Generated transcription ASS: {ass_file}")
        return context

VideoCompositionStrategy

Bases: ProcessingStrategy

Strategy for composing the final video.

Source code in whisper_tiktok/strategies/processing_strategy.py
class VideoCompositionStrategy(ProcessingStrategy):
    """Strategy for composing the final video."""

    def __init__(self, ffmpeg_service: FFmpegService, logger: Logger):
        self.ffmpeg_service = ffmpeg_service
        self.logger = logger

    async def execute(self, context: ProcessingContext) -> ProcessingContext:
        background_video = context.artifacts["background_video"]
        audio_file = context.artifacts["audio_file"]
        ass_file = context.artifacts["ass_file"]

        # Get video duration and audio duration to calculate start time
        audio_info = self.ffmpeg_service.get_media_info(file_path=audio_file)
        duration = audio_info.duration
        str_duration = audio_info.convert_time(time_in_seconds=duration)

        # Then compose video
        output_file = context.output_path / f"{context.uuid}.mp4"

        # Implementation using FFmpegService
        self.ffmpeg_service.compose_video(
            background=background_video,
            audio=audio_file,
            subtitles=ass_file,
            output=output_file,
            start_time=0,
            duration=str_duration,
        )

        context.artifacts["final_video"] = output_file
        self.logger.info(f"Composed video: {output_file}")
        return context

utils

Modules:

Name Description
color_utils

color_utils

Functions:

Name Description
rgb_to_bgr

Convert RGB hex to BGR hex.

validate_hex_color

Validate if the input string is a valid hex color.

rgb_to_bgr(rgb)

Convert RGB hex to BGR hex.

Parameters:

Name Type Description Default
rgb str

RGB hex string (e.g., "#RRGGBB" or "RRGGBB")

required

Returns:

Type Description
str

BGR hex string (e.g., "BBGGRR")

Source code in whisper_tiktok/utils/color_utils.py
def rgb_to_bgr(rgb: str) -> str:
    """Convert RGB hex to BGR hex.

    Args:
        rgb: RGB hex string (e.g., "#RRGGBB" or "RRGGBB")

    Returns:
        BGR hex string (e.g., "BBGGRR")
    """
    # Validate input length
    if len(rgb) != 6 and len(rgb) != 7:
        raise ValueError("RGB hex must be 6 or 7 characters long (including #).")

    # Validate hex characters
    match = validate_hex_color(rgb)

    if not match:
        raise ValueError("Invalid RGB hex format.")

    if rgb.startswith("#"):
        rgb = rgb[1:]
    r, g, b = rgb[0:2], rgb[2:4], rgb[4:6]
    return b + g + r

validate_hex_color(color)

Validate if the input string is a valid hex color.

Source code in whisper_tiktok/utils/color_utils.py
4
5
6
7
def validate_hex_color(color: str) -> bool:
    """Validate if the input string is a valid hex color."""
    pattern = r"^#?([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})$"
    return bool(re.match(pattern, color))

voice_manager

Classes:

Name Description
VoicesManager

Wrapper for edge_tts VoicesManager.

VoicesManager

Wrapper for edge_tts VoicesManager.

Methods:

Name Description
create

Create and return voices manager object.

find

Find a voice by gender and locale.

Source code in whisper_tiktok/voice_manager.py
class VoicesManager:
    """Wrapper for edge_tts VoicesManager."""

    @staticmethod
    async def create():
        """Create and return voices manager object."""
        return await edge_tts.VoicesManager.create()

    @staticmethod
    def find(voices, gender: str, locale: str) -> Any:
        """Find a voice by gender and locale.

        Args:
            voices: Voices manager object from create()
            gender: Gender filter (Male/Female)
            locale: Language locale filter (e.g., en-US)

        Returns:
            Dictionary with voice information

        Raises:
            ValueError: If no voice found
        """
        result = voices.find(Gender=gender, Locale=locale)
        if not result or len(result) == 0:
            raise ValueError(f"No voice found for {gender} - {locale}")
        # Return the first result as a dict-like object
        return result[0]

create() async staticmethod

Create and return voices manager object.

Source code in whisper_tiktok/voice_manager.py
@staticmethod
async def create():
    """Create and return voices manager object."""
    return await edge_tts.VoicesManager.create()

find(voices, gender, locale) staticmethod

Find a voice by gender and locale.

Parameters:

Name Type Description Default
voices

Voices manager object from create()

required
gender str

Gender filter (Male/Female)

required
locale str

Language locale filter (e.g., en-US)

required

Returns:

Type Description
Any

Dictionary with voice information

Raises:

Type Description
ValueError

If no voice found

Source code in whisper_tiktok/voice_manager.py
@staticmethod
def find(voices, gender: str, locale: str) -> Any:
    """Find a voice by gender and locale.

    Args:
        voices: Voices manager object from create()
        gender: Gender filter (Male/Female)
        locale: Language locale filter (e.g., en-US)

    Returns:
        Dictionary with voice information

    Raises:
        ValueError: If no voice found
    """
    result = voices.find(Gender=gender, Locale=locale)
    if not result or len(result) == 0:
        raise ValueError(f"No voice found for {gender} - {locale}")
    # Return the first result as a dict-like object
    return result[0]