This commit is contained in:
Nicola Malizia
2026-02-24 15:30:25 +01:00
commit 2f2d5ed486
12 changed files with 1041 additions and 0 deletions

View File

@@ -0,0 +1,163 @@
import os
from google import genai
from google.genai import types
from styles import CarouselContent, STYLES, Slide
from PIL import Image
import io
import json
# Global client - will be initialized in app.py
client = None
def configure_genai(api_key: str):
"""Configures the Gemini API with the provided key."""
global client
if api_key:
client = genai.Client(api_key=api_key, http_options={'api_version': 'v1alpha'}) # Gemini 3 models often require v1alpha
def generate_carousel_content(source_text: str, style_name: str, language: str = "English") -> tuple[CarouselContent, dict]:
"""
Generates the text content for the carousel using Gemini 3 Flash Preview.
Returns: (CarouselContent, usage_metadata)
"""
if not client:
raise ValueError("API Client not configured.")
style_info = STYLES.get(style_name, list(STYLES.values())[0])
style_desc = style_info["description"]
prompt = f"""
You are a professional LinkedIn content creator.
Create a structured carousel based on the following source text.
Target Audience: Professionals on LinkedIn.
Tone: Educational, Insightful, Professional.
Style: {style_name} - {style_desc}
Output Language: {language}
Source Text:
{source_text}
Requirements:
1. Create 5 to 7 slides.
2. Each slide must have a 'headline' (short, punchy) and 'body' (clear, concise) in {language}.
3. **Code Snippets**: If the source text contains code or technical examples, extract relevant snippets (max 5-8 lines) and include them in the 'code_snippet' field. Ensure the code is properly formatted and adds educational value.
4. **Image Integration**: If the source text references specific images via URL (e.g., diagrams, screenshots), extract the URL into the 'source_image_url' field for the relevant slide. Ensure the 'image_prompt' describes how this image should be integrated (e.g., "Display the provided diagram prominently on a clean background").
5. 'image_prompt': Describe a background image that fits the '{style_name}' style and the slide content. Use visual keywords. Do NOT request text in the image.
6. 'post_text': Write the accompanying LinkedIn post caption in {language}.
"""
try:
response = client.models.generate_content(
model="gemini-3-flash-preview",
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=CarouselContent,
)
)
usage = {
"input_tokens": response.usage_metadata.prompt_token_count,
"output_tokens": response.usage_metadata.candidates_token_count
}
if response.parsed:
return response.parsed, usage
data = json.loads(response.text)
return CarouselContent(**data), usage
except Exception as e:
print(f"Content Generation Error: {e}")
raise e
def generate_background_image(prompt: str, headline: str, body: str, code_snippet: str | None, style_name: str, reference_image: Image.Image = None, source_image: Image.Image | None = None) -> Image.Image:
"""
Generates a slide image with text and optional code using Gemini 3 Image (Nano Banana Pro).
Can use a reference_image to maintain consistency.
Can integrate a source_image referenced in the text.
"""
if not client:
raise ValueError("API Client not configured.")
style_info = STYLES.get(style_name, list(STYLES.values())[0])
style_suffix = style_info.get("bg_prompt_suffix", "")
# instructions for code block
code_instr = ""
if code_snippet:
code_instr = (
f"CODE BLOCK RENDERING: Include a syntax-highlighted code block containing this code: "
f"\"\"\"{code_snippet}\"\"\" "
f"Place it in a dark box or distinct section, ensure it is perfectly legible with a monospaced font style. "
)
# improved prompt for text rendering
# If reference image is provided, we tell it to prioritize that over text style descriptions
style_instruction = f"in a {style_name} style" if not reference_image else "basing the style EXCLUSIVELY on the provided reference image"
text_prompt = (
f"Create a professional presentation slide {style_instruction}. "
f"Visuals: {prompt}. {style_suffix if not reference_image else ''} "
f"TEXT RENDERING: The image MUST include the following text clearly and legibly. "
f"FONT CONSISTENCY: Use the 'Montserrat' font (or a clean, modern sans-serif like Roboto) for ALL text in this slide. "
f"Title: \"{headline}\" "
f"Subtitle: \"{body}\" "
f"{code_instr}"
f"Ensure the text is balanced, high contrast, and perfectly legible. "
f"High quality, professional design, aspect ratio 4:5 (portrait)."
)
contents = [text_prompt]
if source_image:
# Add instruction to integrate source image
contents[0] += " SOURCE IMAGE INTEGRATION: Prominently feature and integrate the provided source image (diagram/photo) into the slide layout. Ensure it fits harmoniously with the text and style."
# Convert source image to bytes
src_byte_arr = io.BytesIO()
source_image.save(src_byte_arr, format='PNG')
src_bytes = src_byte_arr.getvalue()
# Add source image part
contents.append(types.Part.from_bytes(data=src_bytes, mime_type="image/png"))
if reference_image:
# Add instruction to use reference
contents[0] += (
" STYLE REFERENCE: Match the exact layout, font size, color palette, "
"background texture, and overall composition of the provided reference image. "
"Ignore any other style descriptions."
)
# Convert ref image to bytes
ref_byte_arr = io.BytesIO()
reference_image.save(ref_byte_arr, format='PNG')
ref_bytes = ref_byte_arr.getvalue()
# Add ref image part
contents.append(types.Part.from_bytes(data=ref_bytes, mime_type="image/png"))
try:
response = client.models.generate_content(
model="gemini-3-pro-image-preview",
contents=contents
)
# Search for image in parts
for part in response.candidates[0].content.parts:
if part.inline_data:
return Image.open(io.BytesIO(part.inline_data.data))
print("No inline_data found in response.")
return create_placeholder_image(style_name)
except Exception as e:
print(f"Image generation failed: {e}. Using placeholder.")
return create_placeholder_image(style_name)
def create_placeholder_image(style_name: str) -> Image.Image:
"""Creates a solid color placeholder if generation fails."""
style_info = STYLES.get(style_name, list(STYLES.values())[0])
color = style_info.get("overlay_color", (50, 50, 50))
if len(color) == 4:
color = color[:3] # Drop alpha
return Image.new('RGB', (1080, 1350), color=color)