commit 2f2d5ed486cf6cebac3cda95eca799ed16634796 Author: Nicola Malizia Date: Tue Feb 24 15:30:25 2026 +0100 FIRST diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b2f007b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +venv/ +linkedin_carousel_app/venv/ +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +env/ +.env +.venv +.git/ +.agents/ +linkedin_carousel_app/streamlit.log diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d9f3d6c --- /dev/null +++ b/.gitignore @@ -0,0 +1,158 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a PyInstaller installations, but they may +# be found in any project directory. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or even +# fail to install them. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock + +# PEP 582; used by e.g. github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +linkedin_carousel_app/venv/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Project specific +linkedin_carousel_app/streamlit.log +.agents/ +.gemini/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..63ae72a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.10-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y + build-essential + curl + software-properties-common + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first to leverage Docker cache +COPY linkedin_carousel_app/requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application +COPY linkedin_carousel_app/ . + +# Expose Streamlit port +EXPOSE 8501 + +# Add healthcheck +HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health + +# Run the application +ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..3e08657 --- /dev/null +++ b/README.md @@ -0,0 +1,104 @@ +# ✨ LinkedIn Carousel Generator + +A complete application that automatically generates professional, highly visual LinkedIn carousels from simple text or articles using Google's **Gemini 3 Pro** (for advanced reasoning and content structuring) and **Gemini 3 Image** (for high-quality, style-consistent backgrounds). + +Built with **Python**, **Streamlit**, and the latest **Google GenAI SDK**. + +--- + +## 🚀 Features + +- **Automated Content Generation**: Extracts key points from any text or article and structures them into a compelling multi-slide narrative optimized for LinkedIn. +- **AI Image Generation**: Dynamically creates unique, high-quality background images tailored to the content of each slide using Gemini's image models. +- **Consistent Styling**: Supports multiple visual styles or a custom user-uploaded reference image to maintain brand consistency across all slides. +- **Auto-Formatting**: Automatically overlays text, headlines, and code snippets onto the generated backgrounds using Python Pillow. +- **Cost Estimation**: Provides real-time token and cost estimation for transparency. +- **Multi-Language Support**: Generate carousels in over 10 different languages. +- **Export Ready**: Download all generated slides as a ZIP file, ready to be converted to PDF and posted directly to LinkedIn. + +## 🛠️ Tech Stack + +- **Frontend**: [Streamlit](https://streamlit.io/) +- **AI Models**: Google Gemini (via `google-genai` SDK) +- **Image Processing**: [Pillow (PIL)](https://python-pillow.org/) +- **Deployment**: Docker & Docker Compose + +--- + +## 📦 Getting Started + +### Prerequisites + +- A **Google Gemini API Key**. You can get one from [Google AI Studio](https://aistudio.google.com/). +- **Docker** and **Docker Compose** (Recommended) or **Python 3.10+** (for local development). + +### Option 1: Running with Docker (Recommended) + +The easiest way to run the application is using Docker Compose. + +1. **Clone the repository** (if you haven't already) and navigate to the project directory: + ```bash + cd linkedin-carousel-generator + ``` + +2. **Run the application**: + You can either export your API key first or pass it inline: + ```bash + export GOOGLE_API_KEY="your_api_key_here" + docker compose up --build -d + ``` + *Alternatively, you can provide the API key later through the web UI.* + +3. **Access the app**: + Open your browser and navigate to [http://localhost:8501](http://localhost:8501). + +### Option 2: Local Python Setup + +1. **Navigate to the app directory**: + ```bash + cd linkedin-carousel-generator/linkedin_carousel_app + ``` + +2. **Create a virtual environment**: + ```bash + python -m venv venv + source venv/bin/activate # On Windows, use `venv\Scripts\activate` + ``` + +3. **Install the dependencies**: + ```bash + pip install -r requirements.txt + ``` + +4. **Run the application**: + ```bash + streamlit run app.py + ``` + +--- + +## 🏗️ Project Structure + +```text +linkedin-carousel-generator/ +├── Dockerfile # Container definition for the app +├── docker-compose.yml # Compose file for easy deployment +├── .dockerignore # Files to exclude from Docker builds +├── linkedin_carousel_app/ # Main application source code +│ ├── app.py # Main Streamlit UI and execution logic +│ ├── generator.py # Integration with Google Gemini for Text & Images +│ ├── styles.py # Configuration for visual styles and layouts +│ ├── utils.py # Helper functions (Image formatting, fonts, etc.) +│ └── requirements.txt # Python dependencies +└── README.md # Project documentation +``` + +## 💡 How to Use + +1. **Enter API Key**: Open the settings sidebar and paste your Gemini API Key if you haven't set it via environment variables. +2. **Paste Text**: In the main area, paste the article, blog post, or ideas you want to convert into a carousel. +3. **Configure**: Select an output language and visual style (e.g., Tech Minimalist, Bold Gradient). +4. **Generate**: Click "Generate Carousel". The app will first break down your text into slides, and then generate imagery for each one concurrently. +5. **Download**: Once finished, preview the slides in the app and click the download button to get a ZIP archive of your completed carousel. + +*(Tip: To post on LinkedIn as a swipeable carousel, convert the downloaded images into a single PDF document first).* diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..69093f7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3.8' + +services: + carousel-generator: + build: . + container_name: linkedin-carousel-app + ports: + - "8501:8501" + environment: + # You can pass your API key here or use an .env file + - GOOGLE_API_KEY=${GOOGLE_API_KEY} + restart: unless-stopped diff --git a/linkedin-carousel-generator.zip b/linkedin-carousel-generator.zip new file mode 100755 index 0000000..a253d8d Binary files /dev/null and b/linkedin-carousel-generator.zip differ diff --git a/linkedin_carousel_app/__init__.py b/linkedin_carousel_app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/linkedin_carousel_app/app.py b/linkedin_carousel_app/app.py new file mode 100644 index 0000000..f141406 --- /dev/null +++ b/linkedin_carousel_app/app.py @@ -0,0 +1,403 @@ +import streamlit as st +import os +import io +import zipfile +import logging +import requests +import concurrent.futures +from generator import configure_genai, generate_carousel_content, generate_background_image +from utils import format_slide, download_fonts +from styles import STYLES +from PIL import Image + +# Configure CLI Logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +# Configure page +st.set_page_config( + page_title="LinkedIn Carousel Generator", + page_icon="✨", + layout="wide" +) + +# Initialize Session State +if "generated_content" not in st.session_state: + st.session_state.generated_content = None +if "generated_slides" not in st.session_state: + st.session_state.generated_slides = [] +if "logs" not in st.session_state: + st.session_state.logs = "" + +# Constants for Cost Calculation (Gemini 3 Flash Preview + Pro Image 1K/2K) +TEXT_INPUT_COST_PER_1M = 0.50 +TEXT_OUTPUT_COST_PER_1M = 3.00 +IMAGE_COST_PER_UNIT = 0.134 # 1K/2K Image rate + +if "total_cost" not in st.session_state: + st.session_state.total_cost = 0.0 + +def calculate_text_cost(input_tokens, output_tokens): + cost = (input_tokens / 1_000_000 * TEXT_INPUT_COST_PER_1M) + \ + (output_tokens / 1_000_000 * TEXT_OUTPUT_COST_PER_1M) + return cost + +def add_log(message: str): + """Adds a message to the session state logs and prints to CLI.""" + st.session_state.logs += f"[LOG] {message}\n" + logger.info(message) + +def download_image_from_url(url: str) -> Image.Image: + """Downloads an image from a URL and returns a PIL Image object.""" + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + return Image.open(io.BytesIO(response.content)) + except Exception as e: + add_log(f"Failed to download source image from {url}: {e}") + return None + +def process_slide_task(idx, slide, style_name, total_slides, reference_image=None, source_image=None): + """Worker function to generate a single slide.""" + logs = [] + try: + logs.append(f"DEBUG: Starting task for Slide {idx+1}") + + # Generate Slide with Text and optional Code using Gemini 3 + slide_image = generate_background_image( + slide.image_prompt, + slide.headline, + slide.body, + slide.code_snippet, + style_name, + reference_image=reference_image, + source_image=source_image + ) + + # Format + final_slide = format_slide( + slide_image, + idx + 1, + total_slides, + STYLES[style_name] + ) + # Return image cost as well + return idx, final_slide, None, IMAGE_COST_PER_UNIT, logs + except Exception as e: + logs.append(f"ERROR in task: {str(e)}") + return idx, None, str(e), 0.0, logs + +# Sidebar for API Key +with st.sidebar: + st.header("Settings") + # Priority 1: User input + user_api_key = st.text_input("Gemini API Key", type="password", help="Get your key from ai.google.dev") + # Priority 2: Environment variable + env_api_key = os.environ.get("GOOGLE_API_KEY") + + active_key = user_api_key if user_api_key else env_api_key + + if active_key: + configure_genai(active_key) + if user_api_key: + st.success("API Key configured from input.") + else: + st.success("API Key configured from environment.") + else: + st.warning("Please enter your API Key to proceed.") + + # Cost Metric + st.divider() + st.metric(label="Estimated Cost", value=f"${st.session_state.total_cost:.4f}") + + # Style Reference Uploader + st.divider() + st.subheader("Advanced") + uploaded_ref_file = st.file_uploader("Upload Style Reference (Optional)", type=["png", "jpg", "jpeg"], help="Upload an image to define the exact style/layout for all slides.") + + if st.button("Clear Logs"): + st.session_state.logs = "" + st.session_state.total_cost = 0.0 + st.rerun() + +st.title("✨ LinkedIn Carousel Generator") +st.markdown("Create professional carousels from text using **Gemini 3** models and the latest **Google GenAI SDK**.") + +# Input Section +col1, col2 = st.columns([2, 1]) + +with col1: + source_text = st.text_area("Paste your article or text here:", height=300, placeholder="Once upon a time in the world of AI...") + +with col2: + st.subheader("Configuration") + + # Logic: If reference is uploaded, style selector is locked to Reference-Based + if uploaded_ref_file: + style_name = st.selectbox("Style (Locked to Reference)", ["Reference-Based"], disabled=True) + st.info("Style is being derived from your uploaded image.") + else: + # Filter out Reference-Based from the manual list to avoid confusion + manual_styles = [s for s in STYLES.keys() if s != "Reference-Based"] + style_name = st.selectbox("Select Style", manual_styles) + style_desc = STYLES[style_name]["description"] + st.caption(f"**Description:** {style_desc}") + + # Language Selection + language = st.selectbox( + "Output Language", + ["English", "Spanish", "French", "German", "Italian", "Portuguese", "Dutch", "Russian", "Chinese", "Japanese", "Korean"], + index=0 + ) + + generate_btn = st.button("Generate Carousel", type="primary", disabled=not active_key) + +# Logs Expander (Persistent) +log_container = st.empty() +if st.session_state.logs: + with st.expander("🛠️ Execution Logs", expanded=True): + st.code(st.session_state.logs) + +# Main Logic +if generate_btn and source_text: + st.session_state.logs = "" + st.session_state.total_cost = 0.0 # Reset cost + add_log(f"Starting generation with style: {style_name}") + + # Pre-check fonts to avoid race conditions in threads + try: + download_fonts() + add_log("Fonts verified/downloaded.") + except Exception as e: + add_log(f"Warning: Font download failed: {e}") + + # Check for user uploaded reference + user_reference_img = None + if uploaded_ref_file: + try: + user_reference_img = Image.open(uploaded_ref_file) + add_log("User provided a custom style reference image. Using it for all slides.") + except Exception as e: + add_log(f"Error loading uploaded reference image: {e}") + + # Create containers for progress feedback + status_text = st.empty() + progress_bar = st.progress(0) + + try: + # Step 1: Text Generation + status_text.markdown("### 📝 Analyzing text and generating structure...") + add_log("Calling Gemini 3 Flash Preview for text analysis...") + progress_bar.progress(10) + + # Updated to receive usage metadata + content, usage = generate_carousel_content(source_text, style_name, language) + + # Calculate text cost + text_cost = calculate_text_cost(usage.get("input_tokens", 0), usage.get("output_tokens", 0)) + st.session_state.total_cost += text_cost + add_log(f"Text Gen Cost: ${text_cost:.4f} ({usage['input_tokens']} in, {usage['output_tokens']} out)") + + st.session_state.generated_content = content + + add_log(f"Content generated successfully. {len(content.slides)} slides planned.") + status_text.markdown("### ✅ Structure generated! Preparing image generation...") + progress_bar.progress(20) + + except Exception as e: + add_log(f"ERROR during text generation: {str(e)}") + st.error(f"Failed to generate content: {e}") + status_text.empty() + progress_bar.empty() + + if st.session_state.generated_content: + content = st.session_state.generated_content + slides_data = content.slides + total_slides = len(slides_data) + + generated_images_map = {} + reference_slide_img = None + + progress_per_slide = 80 / total_slides + current_progress = 20 + + # Logic Branch: User Reference vs Auto-Generated Reference + slides_to_process_parallel = [] + + if user_reference_img: + # Case A: User provided reference -> All slides run in parallel immediately + reference_slide_img = user_reference_img + slides_to_process_parallel = list(range(total_slides)) # 0 to N + add_log("Using uploaded image as style reference for ALL slides.") + else: + # Case B: No user reference -> Generate Slide 0 first, then use as reference + if total_slides > 0: + add_log("No custom reference provided. Generating Slide 1 as the seed reference...") + status_text.markdown(f"### 🎨 Generating Reference Slide (1/{total_slides})...") + + try: + # Check for source image in first slide + first_slide_source_img = None + if slides_data[0].source_image_url: + add_log(f"Downloading source image for Slide 1: {slides_data[0].source_image_url}") + first_slide_source_img = download_image_from_url(slides_data[0].source_image_url) + + # Generate Slide 1 synchronously + r_idx, img, error, img_cost, task_logs = process_slide_task( + 0, + slides_data[0], + style_name, + total_slides, + reference_image=None, + source_image=first_slide_source_img + ) + + # Process logs from worker + for log_msg in task_logs: + add_log(log_msg) + + if error: + add_log(f"Error generating reference slide: {error}") + status_text.error("Failed to generate reference slide.") + else: + generated_images_map[0] = img + reference_slide_img = img + + st.session_state.total_cost += img_cost + add_log(f"Reference Slide 1 completed. Cost: +${img_cost:.3f}") + + # Update progress + current_progress += progress_per_slide + progress_bar.progress(min(int(current_progress), 99)) + + # Set remaining slides for parallel processing + slides_to_process_parallel = list(range(1, total_slides)) # 1 to N + except Exception as e: + add_log(f"Critical error in reference slide gen: {e}") + + # Step 3: Parallel Execution + if slides_to_process_parallel: + if not reference_slide_img and not user_reference_img: + add_log("WARNING: Reference slide generation failed. Proceeding with independent generation (consistency mode disabled).") + else: + add_log(f"Starting parallel generation for {len(slides_to_process_parallel)} slides using reference...") + + status_text.markdown(f"### 🚀 Parallel Generating {len(slides_to_process_parallel)} Slides...") + + # Download source images + slide_source_images = {} + for idx in slides_to_process_parallel: + if slides_data[idx].source_image_url: + add_log(f"Downloading source image for Slide {idx+1}...") + slide_source_images[idx] = download_image_from_url(slides_data[idx].source_image_url) + else: + slide_source_images[idx] = None + + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + future_to_slide = { + executor.submit( + process_slide_task, + idx, + slides_data[idx], + style_name, + total_slides, + reference_image=reference_slide_img, # Uses user uploaded or generated ref + source_image=slide_source_images[idx] + ): idx + for idx in slides_to_process_parallel + } + + for future in concurrent.futures.as_completed(future_to_slide): + idx = future_to_slide[future] + try: + r_idx, img, error, img_cost, task_logs = future.result() + + # Process logs from worker + for log_msg in task_logs: + add_log(log_msg) + + if error: + add_log(f"Error generating slide {r_idx+1}: {error}") + status_text.warning(f"Failed to generate Slide {r_idx+1}") + else: + generated_images_map[r_idx] = img + headline = slides_data[r_idx].headline + st.session_state.total_cost += img_cost + add_log(f"Slide {r_idx+1} completed: {headline} (Cost: +${img_cost:.3f})") + status_text.markdown(f"### 🎨 Finished Slide {r_idx+1}/{total_slides}") + + current_progress += progress_per_slide + progress_bar.progress(min(int(current_progress), 99)) + + with log_container: + with st.expander("🛠️ Execution Logs", expanded=False): + st.code(st.session_state.logs) + + except Exception as exc: + add_log(f"Unexpected exception for slide {idx+1}: {exc}") + + # Sort images by index to maintain order + generated_images = [] + for i in range(total_slides): + if i in generated_images_map and generated_images_map[i] is not None: + generated_images.append(generated_images_map[i]) + else: + add_log(f"Warning: Slide {i+1} missing or invalid in final set.") + + st.session_state.generated_slides = generated_images + + # Finalize + progress_bar.progress(100) + add_log(f"Generation completed. Total Estimated Cost: ${st.session_state.total_cost:.4f}") + status_text.success(f"### 🎉 Carousel Generated! (Est. Cost: ${st.session_state.total_cost:.4f})") + st.balloons() + + # Final update to log container + with log_container: + with st.expander("🛠️ Execution Logs", expanded=False): + st.code(st.session_state.logs) + +# Display Results +if st.session_state.generated_slides: + st.divider() + st.subheader("Preview & Download") + + # Display slides in a grid + cols = st.columns(3) + for idx, img in enumerate(st.session_state.generated_slides): + with cols[idx % 3]: + st.image(img, caption=f"Slide {idx+1}", use_column_width=True) + + # Post Text + with st.expander("LinkedIn Post Text", expanded=True): + st.text_area("Copy this for your post:", value=st.session_state.generated_content.post_text, height=200) + + # Download ZIP + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w") as zf: + # Add slides + for idx, img in enumerate(st.session_state.generated_slides): + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') + zf.writestr(f"slide_{idx+1}.png", img_byte_arr.getvalue()) + + # Add post text + if st.session_state.generated_content and st.session_state.generated_content.post_text: + zf.writestr("post_text.md", st.session_state.generated_content.post_text) + + st.download_button( + label="Download All Slides (ZIP)", + data=zip_buffer.getvalue(), + file_name="linkedin_carousel.zip", + mime="application/zip", + type="primary" + ) + +elif not generate_btn: + st.info("Enter text and click Generate to start.") diff --git a/linkedin_carousel_app/generator.py b/linkedin_carousel_app/generator.py new file mode 100644 index 0000000..e2a6419 --- /dev/null +++ b/linkedin_carousel_app/generator.py @@ -0,0 +1,163 @@ +import os +from google import genai +from google.genai import types +from styles import CarouselContent, STYLES, Slide +from PIL import Image +import io +import json + +# Global client - will be initialized in app.py +client = None + +def configure_genai(api_key: str): + """Configures the Gemini API with the provided key.""" + global client + if api_key: + client = genai.Client(api_key=api_key, http_options={'api_version': 'v1alpha'}) # Gemini 3 models often require v1alpha + +def generate_carousel_content(source_text: str, style_name: str, language: str = "English") -> tuple[CarouselContent, dict]: + """ + Generates the text content for the carousel using Gemini 3 Flash Preview. + Returns: (CarouselContent, usage_metadata) + """ + if not client: + raise ValueError("API Client not configured.") + + style_info = STYLES.get(style_name, list(STYLES.values())[0]) + style_desc = style_info["description"] + + prompt = f""" + You are a professional LinkedIn content creator. + Create a structured carousel based on the following source text. + + Target Audience: Professionals on LinkedIn. + Tone: Educational, Insightful, Professional. + Style: {style_name} - {style_desc} + Output Language: {language} + + Source Text: + {source_text} + + Requirements: + 1. Create 5 to 7 slides. + 2. Each slide must have a 'headline' (short, punchy) and 'body' (clear, concise) in {language}. + 3. **Code Snippets**: If the source text contains code or technical examples, extract relevant snippets (max 5-8 lines) and include them in the 'code_snippet' field. Ensure the code is properly formatted and adds educational value. + 4. **Image Integration**: If the source text references specific images via URL (e.g., diagrams, screenshots), extract the URL into the 'source_image_url' field for the relevant slide. Ensure the 'image_prompt' describes how this image should be integrated (e.g., "Display the provided diagram prominently on a clean background"). + 5. 'image_prompt': Describe a background image that fits the '{style_name}' style and the slide content. Use visual keywords. Do NOT request text in the image. + 6. 'post_text': Write the accompanying LinkedIn post caption in {language}. + """ + + try: + response = client.models.generate_content( + model="gemini-3-flash-preview", + contents=prompt, + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=CarouselContent, + ) + ) + + usage = { + "input_tokens": response.usage_metadata.prompt_token_count, + "output_tokens": response.usage_metadata.candidates_token_count + } + + if response.parsed: + return response.parsed, usage + + data = json.loads(response.text) + return CarouselContent(**data), usage + + except Exception as e: + print(f"Content Generation Error: {e}") + raise e + +def generate_background_image(prompt: str, headline: str, body: str, code_snippet: str | None, style_name: str, reference_image: Image.Image = None, source_image: Image.Image | None = None) -> Image.Image: + """ + Generates a slide image with text and optional code using Gemini 3 Image (Nano Banana Pro). + Can use a reference_image to maintain consistency. + Can integrate a source_image referenced in the text. + """ + if not client: + raise ValueError("API Client not configured.") + + style_info = STYLES.get(style_name, list(STYLES.values())[0]) + style_suffix = style_info.get("bg_prompt_suffix", "") + + # instructions for code block + code_instr = "" + if code_snippet: + code_instr = ( + f"CODE BLOCK RENDERING: Include a syntax-highlighted code block containing this code: " + f"\"\"\"{code_snippet}\"\"\" " + f"Place it in a dark box or distinct section, ensure it is perfectly legible with a monospaced font style. " + ) + + # improved prompt for text rendering + # If reference image is provided, we tell it to prioritize that over text style descriptions + style_instruction = f"in a {style_name} style" if not reference_image else "basing the style EXCLUSIVELY on the provided reference image" + + text_prompt = ( + f"Create a professional presentation slide {style_instruction}. " + f"Visuals: {prompt}. {style_suffix if not reference_image else ''} " + f"TEXT RENDERING: The image MUST include the following text clearly and legibly. " + f"FONT CONSISTENCY: Use the 'Montserrat' font (or a clean, modern sans-serif like Roboto) for ALL text in this slide. " + f"Title: \"{headline}\" " + f"Subtitle: \"{body}\" " + f"{code_instr}" + f"Ensure the text is balanced, high contrast, and perfectly legible. " + f"High quality, professional design, aspect ratio 4:5 (portrait)." + ) + + contents = [text_prompt] + + if source_image: + # Add instruction to integrate source image + contents[0] += " SOURCE IMAGE INTEGRATION: Prominently feature and integrate the provided source image (diagram/photo) into the slide layout. Ensure it fits harmoniously with the text and style." + # Convert source image to bytes + src_byte_arr = io.BytesIO() + source_image.save(src_byte_arr, format='PNG') + src_bytes = src_byte_arr.getvalue() + # Add source image part + contents.append(types.Part.from_bytes(data=src_bytes, mime_type="image/png")) + + if reference_image: + # Add instruction to use reference + contents[0] += ( + " STYLE REFERENCE: Match the exact layout, font size, color palette, " + "background texture, and overall composition of the provided reference image. " + "Ignore any other style descriptions." + ) + # Convert ref image to bytes + ref_byte_arr = io.BytesIO() + reference_image.save(ref_byte_arr, format='PNG') + ref_bytes = ref_byte_arr.getvalue() + + # Add ref image part + contents.append(types.Part.from_bytes(data=ref_bytes, mime_type="image/png")) + + try: + response = client.models.generate_content( + model="gemini-3-pro-image-preview", + contents=contents + ) + + # Search for image in parts + for part in response.candidates[0].content.parts: + if part.inline_data: + return Image.open(io.BytesIO(part.inline_data.data)) + + print("No inline_data found in response.") + return create_placeholder_image(style_name) + + except Exception as e: + print(f"Image generation failed: {e}. Using placeholder.") + return create_placeholder_image(style_name) + +def create_placeholder_image(style_name: str) -> Image.Image: + """Creates a solid color placeholder if generation fails.""" + style_info = STYLES.get(style_name, list(STYLES.values())[0]) + color = style_info.get("overlay_color", (50, 50, 50)) + if len(color) == 4: + color = color[:3] # Drop alpha + return Image.new('RGB', (1080, 1350), color=color) diff --git a/linkedin_carousel_app/requirements.txt b/linkedin_carousel_app/requirements.txt new file mode 100644 index 0000000..761e879 --- /dev/null +++ b/linkedin_carousel_app/requirements.txt @@ -0,0 +1,4 @@ +streamlit +google-genai +pydantic +Pillow diff --git a/linkedin_carousel_app/styles.py b/linkedin_carousel_app/styles.py new file mode 100644 index 0000000..4a50fe5 --- /dev/null +++ b/linkedin_carousel_app/styles.py @@ -0,0 +1,80 @@ +from pydantic import BaseModel, Field +from typing import List + +# --- Data Models (Structured Output from Gemini) --- + +class Slide(BaseModel): + headline: str = Field(description="Catchy headline for the slide (max 10 words).") + body: str = Field(description="Concise body text for the slide (max 30 words).") + code_snippet: str | None = Field(description="Optional: A very short, relevant code snippet (max 5-8 lines). Use only if it adds value to the slide.") + source_image_url: str | None = Field(description="Optional: A URL to an image mentioned in the source text that should be integrated into this slide's visual.") + image_prompt: str = Field(description="Detailed prompt for generating the background image based on the style.") + +class CarouselContent(BaseModel): + slides: List[Slide] = Field(description="List of 5-7 slides for the carousel.") + post_text: str = Field(description="Engaging LinkedIn post text to accompany the carousel, including hashtags.") + +# --- Visual Styles --- + +STYLES = { + "Minimalist Tech": { + "description": "Clean, white background, dark text, geometric shapes.", + "bg_prompt_suffix": "minimalist, clean white background, subtle geometric tech patterns, high key lighting, 8k resolution, uncluttered.", + "text_color": (30, 30, 30), # Dark Grey + "overlay_color": (255, 255, 255, 220), # White with transparency + }, + "Bold Corporate": { + "description": "Strong blue background, white text, professional look.", + "bg_prompt_suffix": "corporate professional background, deep blue gradients, abstract business concepts, subtle network connections, 8k resolution.", + "text_color": (255, 255, 255), # White + "overlay_color": (0, 50, 100, 200), # Blue with transparency + }, + "Creative Vibrant": { + "description": "Colorful gradients, artistic, modern.", + "bg_prompt_suffix": "vibrant artistic background, fluid color gradients, abstract art, creative energy, 8k resolution, soft lighting.", + "text_color": (255, 255, 255), # White + "overlay_color": (0, 0, 0, 150), # Black with transparency + }, + "Nature Serene": { + "description": "Calm, nature-inspired, green tones.", + "bg_prompt_suffix": "serene nature background, soft green leaves, organic shapes, calm atmosphere, natural light, 8k resolution.", + "text_color": (20, 50, 20), # Dark Green + "overlay_color": (240, 255, 240, 200), # Light Green/White with transparency + }, + "Hand-Drawn Doodle": { + "description": "Playful, sketch-style, white background with black ink drawings.", + "bg_prompt_suffix": "hand-drawn doodle style, sketch, pencil on white paper, playful, creative, simple line art, black ink on white background.", + "text_color": (0, 0, 0), # Black + "overlay_color": (255, 255, 255, 240), # White with transparency + }, + "Playful Color Doodle": { + "description": "Fun, hand-drawn sketches with vibrant pops of color.", + "bg_prompt_suffix": "colorful hand-drawn doodle style, marker drawings, watercolor splashes, vibrant accents, creative, white background, playful illustrations.", + "text_color": (30, 30, 30), # Dark Grey + "overlay_color": (255, 255, 255, 230), # White with transparency + }, + "Reference-Based": { + "description": "Uses your uploaded image to define the style. Predefined settings are ignored.", + "bg_prompt_suffix": "match the provided reference image style perfectly.", + "text_color": (255, 255, 255), # Default white (will be used for footer) + "overlay_color": (0, 0, 0, 0), # No overlay + }, + "Tech Color Doodle": { + "description": "Hand-drawn technical sketches with neon tech accents.", + "bg_prompt_suffix": "tech doodle style, hand-drawn circuits, network nodes, code symbols, cloud infrastructure sketches, neon blue and cyan marker accents, clean white background, modern tech aesthetic.", + "text_color": (30, 30, 30), # Dark Grey + "overlay_color": (255, 255, 255, 240), # White with transparency + }, + "Excalidraw Light": { + "description": "Clean, hand-drawn diagram style on a white background, like Excalidraw.", + "bg_prompt_suffix": "excalidraw style, hand-drawn diagram, sketchy black lines on clean white background, rough strokes, architectural sketch, minimalist, technical drawing aesthetic, high contrast.", + "text_color": (20, 20, 20), # Almost Black + "overlay_color": (255, 255, 255, 230), # White with transparency + }, + "Excalidraw Dark": { + "description": "Dark mode hand-drawn diagram style, white lines on dark grey.", + "bg_prompt_suffix": "excalidraw dark mode style, hand-drawn diagram, sketchy white lines on dark grey background (#121212), rough strokes, architectural sketch, minimalist, technical drawing aesthetic, high contrast.", + "text_color": (240, 240, 240), # Off-White + "overlay_color": (30, 30, 30, 200), # Dark Grey with transparency + } +} diff --git a/linkedin_carousel_app/utils.py b/linkedin_carousel_app/utils.py new file mode 100644 index 0000000..7734c72 --- /dev/null +++ b/linkedin_carousel_app/utils.py @@ -0,0 +1,76 @@ +import os +import requests +from PIL import Image, ImageDraw, ImageFont +import textwrap + +FONT_DIR = "assets/fonts" +FONT_PATH = os.path.join(FONT_DIR, "Roboto-Bold.ttf") +BODY_FONT_PATH = os.path.join(FONT_DIR, "Roboto-Regular.ttf") + +def download_fonts(): + """Downloads Roboto font if not present.""" + if not os.path.exists(FONT_DIR): + os.makedirs(FONT_DIR) + + fonts = { + "Roboto-Bold.ttf": "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Bold.ttf", + "Roboto-Regular.ttf": "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf" + } + + for name, url in fonts.items(): + path = os.path.join(FONT_DIR, name) + if not os.path.exists(path): + try: + print(f"Downloading {name}...") + response = requests.get(url) + if response.status_code == 200: + with open(path, "wb") as f: + f.write(response.content) + except Exception as e: + print(f"Failed to download font {name}: {e}") + +def format_slide(slide_image: Image.Image, slide_number: int, total_slides: int, style_config: dict) -> Image.Image: + """ + Formats the AI-generated slide (with text) by adding a subtle footer for navigation. + """ + # Ensure fonts are available (for footer) + if not os.path.exists(FONT_PATH): + download_fonts() + + target_size = (1080, 1350) + img = slide_image.resize(target_size, Image.Resampling.LANCZOS) + + # We remove the heavy overlay because the AI image already has text and design integrated. + # A very subtle gradient at the bottom might be nice for the footer, but let's keep it minimal. + + draw = ImageDraw.Draw(img) + + # Load Fonts (only for footer) + try: + footer_font = ImageFont.truetype(BODY_FONT_PATH, 30) + except: + footer_font = ImageFont.load_default() + + text_color = style_config["text_color"] + margin = 80 + + # Footer (Slide Number) + footer_text = f"{slide_number} / {total_slides}" + bbox = draw.textbbox((0, 0), footer_text, font=footer_font) + + # Draw a small background for readability if needed, or just text + # Let's draw text with a slight shadow or outline for readability on varied backgrounds + x = target_size[0] - bbox[2] - margin + y = target_size[1] - 80 + + # Simple shadow + shadow_color = (0, 0, 0) if sum(text_color) > 300 else (255, 255, 255) + draw.text((x+2, y+2), footer_text, font=footer_font, fill=shadow_color) + draw.text((x, y), footer_text, font=footer_font, fill=text_color) + + # Branding (Optional) - REMOVED + # branding = "Generated by Gemini" + # draw.text((margin+2, y+2), branding, font=footer_font, fill=shadow_color) + # draw.text((margin, y), branding, font=footer_font, fill=text_color) + + return img