add voice support

2025-10-27 21:03:28 +01:00
parent 58a3509bf8
commit c98734c322
2 changed files with 101 additions and 16 deletions
--- a/conversations/4762864.json
+++ b/conversations/4762864.json
@@ -0,0 +1,33 @@
 {
  "user_id": 4762864,
  "username": "unnikked",
  "last_updated": 1761595387.27931,
  "last_updated_str": "2025-10-27 21:03:07",
  "message_count": 4,
  "history": [
    {
      "role": "user",
      "content": "Me lo spegni il gelato",
      "timestamp": 1761595234.0849943,
      "timestamp_str": "2025-10-27 21:00:34"
    },
    {
      "role": "assistant",
      "content": "Fatto! Canale \"Gelato\" spento. ✅",
      "timestamp": 1761595239.1608367,
      "timestamp_str": "2025-10-27 21:00:39"
    },
    {
      "role": "user",
      "content": "Mi accendi il gelato.",
      "timestamp": 1761595383.8880353,
      "timestamp_str": "2025-10-27 21:03:03"
    },
    {
      "role": "assistant",
      "content": "Certo! Gelato acceso! 🍦✅",
      "timestamp": 1761595387.27931,
      "timestamp_str": "2025-10-27 21:03:07"
    }
  ]
 }
--- a/telegram_bot.py
+++ b/telegram_bot.py
@@ -321,6 +321,37 @@ Ricorda: chi ti parla è spesso sul palco, con le mani occupate da uno strumento
 Devi essere veloce, chiaro e capire anche richieste approssimative.
 """
    def _get_file_path(self, file_id: str) -> str:
        """Richiede a Telegram il percorso di un file."""
        result = self.bot._make_request("getFile", {"file_id": file_id})
        if result.get("ok"):
            return result["result"]["file_path"]
        raise Exception("Impossibile ottenere il file da Telegram")
    def _download_file(self, url: str) -> bytes:
        """Scarica un file da Telegram e restituisce i bytes."""
        req = Request(url)
        with urlopen(req, timeout=30) as response:
            return response.read()
    def _transcribe_audio(self, audio_bytes: bytes) -> Optional[str]:
        """Trascrive un file audio in testo usando Gemini."""
        try:
            response = self.client.models.generate_content(
                model="gemini-2.5-flash",
                contents=[
                    "Trascrivi in testo chiaro ciò che dice questa registrazione:",
                    types.Part.from_bytes(
                        data=audio_bytes,
                        mime_type="audio/ogg"  # Telegram voice è in OGG/Opus
                    ),
                ]
            )
            return response.text.strip()
        except Exception as e:
            print(f"❌ Errore nella trascrizione: {e}")
            return None
    def get_conversation_history(self, user_id: int) -> List[Dict]:
        """Ottiene la storia della conversazione per un utente."""
        if user_id not in self.conversations:
@@ -488,27 +519,48 @@ Scrivi semplicemente cosa vuoi fare e ci penso io! 🎵
        message = update["message"]
        # Ignora messaggi non testuali
        if "text" not in message:
            return
        chat_id = message["chat"]["id"]
        user_id = message["from"]["id"]
        username = message["from"].get("username", "Unknown")
        # 🎤 Se è un messaggio vocale o audio
        if "voice" in message or "audio" in message:
            self.bot.send_chat_action(chat_id, "typing")
            file_info = message.get("voice") or message.get("audio")
            file_id = file_info["file_id"]
            # Ottieni l'URL del file Telegram
            file_path = self._get_file_path(file_id)
            file_url = f"https://api.telegram.org/file/bot{self.bot.token}/{file_path}"
            # Scarica il file
            audio_bytes = self._download_file(file_url)
            # Trascrivi con Gemini
            transcript = self._transcribe_audio(audio_bytes)
            if not transcript:
                self.bot.send_message(chat_id, "❌ Non sono riuscito a capire l’audio.")
                return
            # Conferma trascrizione
            self.bot.send_message(chat_id, f"🗣️ Hai detto:\n> {transcript}")
            # Processa il testo come se fosse un messaggio
            response = self.process_message(user_id, username, transcript)
            self.bot.send_message(chat_id, response, parse_mode='MarkdownV2')
            return
        # 🎯 Messaggi testuali classici
        if "text" not in message:
            return
        text = message["text"]
        print(f"📨 Messaggio da @{username}: {text}")
        print(f"📨 Messaggio da @{username} (ID: {user_id}): {text}")
        # Mostra "typing..."
        self.bot.send_chat_action(chat_id, "typing")
        # Elabora il messaggio
        response = self.process_message(user_id, username, text)
        # Invia la risposta
        self.bot.send_message(chat_id, response, parse_mode='MarkdownV2')
        print(f"✅ Risposta inviata a @{username}")
    def run(self):
        """Avvia il bot con long polling."""