Skip to content

images

Image generation module for Direktor.

direktor.core.images

Image generation module for Direktor.

This module handles image prompt generation and image creation using FLUX.

generate_image_prompts(transcript, temp_dir)

Generate image prompts from a transcript using GPT.

Parameters:

Name Type Description Default
transcript

Transcript dictionary with chunks and timestamps

required
temp_dir

Temporary directory for output files

required

Returns:

Type Description

List of image prompts with timestamps

Source code in direktor/core/images.py
def generate_image_prompts(transcript, temp_dir):
    """
    Generate image prompts from a transcript using GPT.

    Args:
        transcript: Transcript dictionary with chunks and timestamps
        temp_dir: Temporary directory for output files

    Returns:
        List of image prompts with timestamps
    """
    prompts_file = os.path.join(temp_dir, "image_prompts.json")
    if os.path.exists(prompts_file):
        with open(prompts_file, "r") as f:
            return json.load(f)

    client = OpenAI()
    all_prompts = []

    # Aggregate chunks to approximately 30-second segments
    aggregated_chunks = aggregate_chunks(transcript["chunks"], target_duration=30)

    for chunk in tqdm(aggregated_chunks, desc="Generating image prompts"):
        response = client.chat.completions.create(
            model=GPT4_MODEL,
            messages=[
                {
                    "role": "system",
                    "content": "You are an AI assistant that generates image prompts based on podcast transcripts. Generate a single, vivid image prompt that captures the main theme or most striking visual element from the given text.",
                },
                {
                    "role": "user",
                    "content": f"Generate an stable diffusion generation prompt for the following podcast transcript segment:\n\nText: {chunk['text']}\nTimestamp: {chunk['timestamp'][0]} - {chunk['timestamp'][1]}",
                },
            ],
        )

        prompt = response.choices[0].message.content.strip()
        all_prompts.append({"time": chunk["timestamp"][0], "prompt": prompt})

    with open(prompts_file, "w") as f:
        json.dump(all_prompts, f)

    return all_prompts

generate_images(prompts, temp_dir)

Generate images from prompts using the FLUX model.

Parameters:

Name Type Description Default
prompts

List of image prompts with timestamps

required
temp_dir

Temporary directory for output files

required

Returns:

Type Description

List of paths to generated image files

Source code in direktor/core/images.py
def generate_images(prompts, temp_dir):
    """
    Generate images from prompts using the FLUX model.

    Args:
        prompts: List of image prompts with timestamps
        temp_dir: Temporary directory for output files

    Returns:
        List of paths to generated image files
    """
    image_dir = os.path.join(temp_dir, "images")
    os.makedirs(image_dir, exist_ok=True)
    image_files = []

    for i, prompt in enumerate(tqdm(prompts, desc="Generating images")):
        image_file = os.path.join(image_dir, f"image_{i}.webp")
        if os.path.exists(image_file):
            image_files.append(image_file)
            continue

        input_data = {
            "prompt": prompt["prompt"],
            "num_outputs": 1,
            "aspect_ratio": "16:9",
            "output_format": "webp",
            "output_quality": 80,
            "seed": 0,
            "disable_safety_checker": True,
        }
        output = run_replicate_model(FLUX_MODEL, input_data)
        image_file = download_file(output[0], image_file)
        image_files.append(image_file)

    return image_files