164 lines
7.0 KiB
Python
164 lines
7.0 KiB
Python
import os
|
|
from google import genai
|
|
from google.genai import types
|
|
from styles import CarouselContent, STYLES, Slide
|
|
from PIL import Image
|
|
import io
|
|
import json
|
|
|
|
# Global client - will be initialized in app.py
|
|
client = None
|
|
|
|
def configure_genai(api_key: str):
|
|
"""Configures the Gemini API with the provided key."""
|
|
global client
|
|
if api_key:
|
|
client = genai.Client(api_key=api_key, http_options={'api_version': 'v1alpha'}) # Gemini 3 models often require v1alpha
|
|
|
|
def generate_carousel_content(source_text: str, style_name: str, language: str = "English") -> tuple[CarouselContent, dict]:
|
|
"""
|
|
Generates the text content for the carousel using Gemini 3 Flash Preview.
|
|
Returns: (CarouselContent, usage_metadata)
|
|
"""
|
|
if not client:
|
|
raise ValueError("API Client not configured.")
|
|
|
|
style_info = STYLES.get(style_name, list(STYLES.values())[0])
|
|
style_desc = style_info["description"]
|
|
|
|
prompt = f"""
|
|
You are a professional LinkedIn content creator.
|
|
Create a structured carousel based on the following source text.
|
|
|
|
Target Audience: Professionals on LinkedIn.
|
|
Tone: Educational, Insightful, Professional.
|
|
Style: {style_name} - {style_desc}
|
|
Output Language: {language}
|
|
|
|
Source Text:
|
|
{source_text}
|
|
|
|
Requirements:
|
|
1. Create 5 to 7 slides.
|
|
2. Each slide must have a 'headline' (short, punchy) and 'body' (clear, concise) in {language}.
|
|
3. **Code Snippets**: If the source text contains code or technical examples, extract relevant snippets (max 5-8 lines) and include them in the 'code_snippet' field. Ensure the code is properly formatted and adds educational value.
|
|
4. **Image Integration**: If the source text references specific images via URL (e.g., diagrams, screenshots), extract the URL into the 'source_image_url' field for the relevant slide. Ensure the 'image_prompt' describes how this image should be integrated (e.g., "Display the provided diagram prominently on a clean background").
|
|
5. 'image_prompt': Describe a background image that fits the '{style_name}' style and the slide content. Use visual keywords. Do NOT request text in the image.
|
|
6. 'post_text': Write the accompanying LinkedIn post caption in {language}.
|
|
"""
|
|
|
|
try:
|
|
response = client.models.generate_content(
|
|
model="gemini-3-flash-preview",
|
|
contents=prompt,
|
|
config=types.GenerateContentConfig(
|
|
response_mime_type="application/json",
|
|
response_schema=CarouselContent,
|
|
)
|
|
)
|
|
|
|
usage = {
|
|
"input_tokens": response.usage_metadata.prompt_token_count,
|
|
"output_tokens": response.usage_metadata.candidates_token_count
|
|
}
|
|
|
|
if response.parsed:
|
|
return response.parsed, usage
|
|
|
|
data = json.loads(response.text)
|
|
return CarouselContent(**data), usage
|
|
|
|
except Exception as e:
|
|
print(f"Content Generation Error: {e}")
|
|
raise e
|
|
|
|
def generate_background_image(prompt: str, headline: str, body: str, code_snippet: str | None, style_name: str, reference_image: Image.Image = None, source_image: Image.Image | None = None) -> Image.Image:
|
|
"""
|
|
Generates a slide image with text and optional code using Gemini 3 Image (Nano Banana Pro).
|
|
Can use a reference_image to maintain consistency.
|
|
Can integrate a source_image referenced in the text.
|
|
"""
|
|
if not client:
|
|
raise ValueError("API Client not configured.")
|
|
|
|
style_info = STYLES.get(style_name, list(STYLES.values())[0])
|
|
style_suffix = style_info.get("bg_prompt_suffix", "")
|
|
|
|
# instructions for code block
|
|
code_instr = ""
|
|
if code_snippet:
|
|
code_instr = (
|
|
f"CODE BLOCK RENDERING: Include a syntax-highlighted code block containing this code: "
|
|
f"\"\"\"{code_snippet}\"\"\" "
|
|
f"Place it in a dark box or distinct section, ensure it is perfectly legible with a monospaced font style. "
|
|
)
|
|
|
|
# improved prompt for text rendering
|
|
# If reference image is provided, we tell it to prioritize that over text style descriptions
|
|
style_instruction = f"in a {style_name} style" if not reference_image else "basing the style EXCLUSIVELY on the provided reference image"
|
|
|
|
text_prompt = (
|
|
f"Create a professional presentation slide {style_instruction}. "
|
|
f"Visuals: {prompt}. {style_suffix if not reference_image else ''} "
|
|
f"TEXT RENDERING: The image MUST include the following text clearly and legibly. "
|
|
f"FONT CONSISTENCY: Use the 'Montserrat' font (or a clean, modern sans-serif like Roboto) for ALL text in this slide. "
|
|
f"Title: \"{headline}\" "
|
|
f"Subtitle: \"{body}\" "
|
|
f"{code_instr}"
|
|
f"Ensure the text is balanced, high contrast, and perfectly legible. "
|
|
f"High quality, professional design, aspect ratio 4:5 (portrait)."
|
|
)
|
|
|
|
contents = [text_prompt]
|
|
|
|
if source_image:
|
|
# Add instruction to integrate source image
|
|
contents[0] += " SOURCE IMAGE INTEGRATION: Prominently feature and integrate the provided source image (diagram/photo) into the slide layout. Ensure it fits harmoniously with the text and style."
|
|
# Convert source image to bytes
|
|
src_byte_arr = io.BytesIO()
|
|
source_image.save(src_byte_arr, format='PNG')
|
|
src_bytes = src_byte_arr.getvalue()
|
|
# Add source image part
|
|
contents.append(types.Part.from_bytes(data=src_bytes, mime_type="image/png"))
|
|
|
|
if reference_image:
|
|
# Add instruction to use reference
|
|
contents[0] += (
|
|
" STYLE REFERENCE: Match the exact layout, font size, color palette, "
|
|
"background texture, and overall composition of the provided reference image. "
|
|
"Ignore any other style descriptions."
|
|
)
|
|
# Convert ref image to bytes
|
|
ref_byte_arr = io.BytesIO()
|
|
reference_image.save(ref_byte_arr, format='PNG')
|
|
ref_bytes = ref_byte_arr.getvalue()
|
|
|
|
# Add ref image part
|
|
contents.append(types.Part.from_bytes(data=ref_bytes, mime_type="image/png"))
|
|
|
|
try:
|
|
response = client.models.generate_content(
|
|
model="gemini-3-pro-image-preview",
|
|
contents=contents
|
|
)
|
|
|
|
# Search for image in parts
|
|
for part in response.candidates[0].content.parts:
|
|
if part.inline_data:
|
|
return Image.open(io.BytesIO(part.inline_data.data))
|
|
|
|
print("No inline_data found in response.")
|
|
return create_placeholder_image(style_name)
|
|
|
|
except Exception as e:
|
|
print(f"Image generation failed: {e}. Using placeholder.")
|
|
return create_placeholder_image(style_name)
|
|
|
|
def create_placeholder_image(style_name: str) -> Image.Image:
|
|
"""Creates a solid color placeholder if generation fails."""
|
|
style_info = STYLES.get(style_name, list(STYLES.values())[0])
|
|
color = style_info.get("overlay_color", (50, 50, 50))
|
|
if len(color) == 4:
|
|
color = color[:3] # Drop alpha
|
|
return Image.new('RGB', (1080, 1350), color=color)
|