linkedin-carousel-generator/linkedin_carousel_app/generator.py

import os
from google import genai
from google.genai import types
from styles import CarouselContent, STYLES, Slide
from PIL import Image
import io
import json

# Global client - will be initialized in app.py
client = None

def configure_genai(api_key: str):
    """Configures the Gemini API with the provided key."""
    global client
    if api_key:
        client = genai.Client(api_key=api_key, http_options={'api_version': 'v1alpha'}) # Gemini 3 models often require v1alpha

def generate_carousel_content(source_text: str, style_name: str, language: str = "English") -> tuple[CarouselContent, dict]:
    """
    Generates the text content for the carousel using Gemini 3 Flash Preview.
    Returns: (CarouselContent, usage_metadata)
    """
    if not client:
        raise ValueError("API Client not configured.")

    style_info = STYLES.get(style_name, list(STYLES.values())[0])
    style_desc = style_info["description"]

    prompt = f"""
    You are a professional LinkedIn content creator.
    Create a structured carousel based on the following source text.

    Target Audience: Professionals on LinkedIn.
    Tone: Educational, Insightful, Professional.
    Style: {style_name} - {style_desc}
    Output Language: {language}

    Source Text:
    {source_text}

    Requirements:
    1.  Create 5 to 7 slides.
    2.  Each slide must have a 'headline' (short, punchy) and 'body' (clear, concise) in {language}.
    3.  **Code Snippets**: If the source text contains code or technical examples, extract relevant snippets (max 5-8 lines) and include them in the 'code_snippet' field. Ensure the code is properly formatted and adds educational value.
    4.  **Image Integration**: If the source text references specific images via URL (e.g., diagrams, screenshots), extract the URL into the 'source_image_url' field for the relevant slide. Ensure the 'image_prompt' describes how this image should be integrated (e.g., "Display the provided diagram prominently on a clean background").
    5.  'image_prompt': Describe a background image that fits the '{style_name}' style and the slide content. Use visual keywords. Do NOT request text in the image.
    6.  'post_text': Write the accompanying LinkedIn post caption in {language}.
    """

    try:
        response = client.models.generate_content(
            model="gemini-3-flash-preview",
            contents=prompt,
            config=types.GenerateContentConfig(
                response_mime_type="application/json",
                response_schema=CarouselContent,
            )
        )

        usage = {
            "input_tokens": response.usage_metadata.prompt_token_count,
            "output_tokens": response.usage_metadata.candidates_token_count
        }

        if response.parsed:
            return response.parsed, usage

        data = json.loads(response.text)
        return CarouselContent(**data), usage

    except Exception as e:
        print(f"Content Generation Error: {e}")
        raise e

def generate_background_image(prompt: str, headline: str, body: str, code_snippet: str | None, style_name: str, reference_image: Image.Image = None, source_image: Image.Image | None = None) -> Image.Image:
    """
    Generates a slide image with text and optional code using Gemini 3 Image (Nano Banana Pro).
    Can use a reference_image to maintain consistency.
    Can integrate a source_image referenced in the text.
    """
    if not client:
        raise ValueError("API Client not configured.")

    style_info = STYLES.get(style_name, list(STYLES.values())[0])
    style_suffix = style_info.get("bg_prompt_suffix", "")

    # instructions for code block
    code_instr = ""
    if code_snippet:
        code_instr = (
            f"CODE BLOCK RENDERING: Include a syntax-highlighted code block containing this code: "
            f"\"\"\"{code_snippet}\"\"\" "
            f"Place it in a dark box or distinct section, ensure it is perfectly legible with a monospaced font style. "
        )

    # improved prompt for text rendering
    # If reference image is provided, we tell it to prioritize that over text style descriptions
    style_instruction = f"in a {style_name} style" if not reference_image else "basing the style EXCLUSIVELY on the provided reference image"

    text_prompt = (
        f"Create a professional presentation slide {style_instruction}. "
        f"Visuals: {prompt}. {style_suffix if not reference_image else ''} "
        f"TEXT RENDERING: The image MUST include the following text clearly and legibly. "
        f"FONT CONSISTENCY: Use the 'Montserrat' font (or a clean, modern sans-serif like Roboto) for ALL text in this slide. "
        f"Title: \"{headline}\" "
        f"Subtitle: \"{body}\" "
        f"{code_instr}"
        f"Ensure the text is balanced, high contrast, and perfectly legible. "
        f"High quality, professional design, aspect ratio 4:5 (portrait)."
    )

    contents = [text_prompt]

    if source_image:
        # Add instruction to integrate source image
        contents[0] += " SOURCE IMAGE INTEGRATION: Prominently feature and integrate the provided source image (diagram/photo) into the slide layout. Ensure it fits harmoniously with the text and style."
        # Convert source image to bytes
        src_byte_arr = io.BytesIO()
        source_image.save(src_byte_arr, format='PNG')
        src_bytes = src_byte_arr.getvalue()
        # Add source image part
        contents.append(types.Part.from_bytes(data=src_bytes, mime_type="image/png"))

    if reference_image:
        # Add instruction to use reference
        contents[0] += (
            " STYLE REFERENCE: Match the exact layout, font size, color palette, "
            "background texture, and overall composition of the provided reference image. "
            "Ignore any other style descriptions."
        )
        # Convert ref image to bytes
        ref_byte_arr = io.BytesIO()
        reference_image.save(ref_byte_arr, format='PNG')
        ref_bytes = ref_byte_arr.getvalue()

        # Add ref image part
        contents.append(types.Part.from_bytes(data=ref_bytes, mime_type="image/png"))

    try:
        response = client.models.generate_content(
            model="gemini-3-pro-image-preview",
            contents=contents
        )

        # Search for image in parts
        for part in response.candidates[0].content.parts:
            if part.inline_data:
                return Image.open(io.BytesIO(part.inline_data.data))

        print("No inline_data found in response.")
        return create_placeholder_image(style_name)

    except Exception as e:
        print(f"Image generation failed: {e}. Using placeholder.")
        return create_placeholder_image(style_name)

def create_placeholder_image(style_name: str) -> Image.Image:
    """Creates a solid color placeholder if generation fails."""
    style_info = STYLES.get(style_name, list(STYLES.values())[0])
    color = style_info.get("overlay_color", (50, 50, 50))
    if len(color) == 4:
        color = color[:3] # Drop alpha
    return Image.new('RGB', (1080, 1350), color=color)