From 67b4d949fdee156a0529ae717d8d105b869e99b2 Mon Sep 17 00:00:00 2001
From: Moon <moon@shipoclu.com>
Date: Thu, 12 Jun 2025 12:53:37 +0900
Subject: [PATCH] use a library for content type detection instead of
 handrolled

---
 bot/pleroma_service.py | 33 ++++++++++++++-------------------
 requirements.txt       |  1 +
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/bot/pleroma_service.py b/bot/pleroma_service.py
index d7c52b6..21320d1 100644
--- a/bot/pleroma_service.py
+++ b/bot/pleroma_service.py
@@ -1,7 +1,7 @@
 from mastodon import Mastodon
 from typing import List, Optional, Dict, Any, Union, BinaryIO
-import mimetypes
 import io
+import filetype
 from fediverse_service import FediverseService
 from fediverse_types import (
     FediverseNotification, FediversePost, FediverseUser, FediverseFile,
@@ -159,33 +159,28 @@ class PleromaService(FediverseService):
     def upload_file(self, file_data: Union[BinaryIO, bytes], filename: Optional[str] = None) -> FediverseFile:
         """Upload a file to Pleroma instance"""
         try:
-            # Convert file_data to bytes if it's a stream for MIME detection
+            # Convert file_data to bytes for MIME detection
             if hasattr(file_data, 'read'):
                 # Check if we can seek back
                 try:
                     current_pos = file_data.tell()
-                    header = file_data.read(8)
+                    file_bytes = file_data.read()
                     file_data.seek(current_pos)
-                except (io.UnsupportedOperation, OSError):
-                    # Non-seekable stream, read all data
-                    remaining_data = file_data.read()
-                    file_bytes = header + remaining_data
                     file_data = io.BytesIO(file_bytes)
-                    header = file_bytes[:8]
+                except (io.UnsupportedOperation, OSError):
+                    # Non-seekable stream, already read all data
+                    file_data = io.BytesIO(file_bytes)
             else:
-                header = file_data[:8] if len(file_data) >= 8 else file_data
+                file_bytes = file_data
+                file_data = io.BytesIO(file_bytes)
             
-            # Determine mime type from file header
-            if header.startswith(b'\xff\xd8\xff'):
-                mime_type = 'image/jpeg'
-            elif header.startswith(b'\x89PNG\r\n\x1a\n'):
-                mime_type = 'image/png'
-            elif header.startswith(b'GIF8'):
-                mime_type = 'image/gif'
-            elif header.startswith(b'RIFF') and len(header) >= 8 and b'WEBP' in header:
-                mime_type = 'image/webp'
+            # Use filetype library for robust MIME detection
+            kind = filetype.guess(file_bytes)
+            if kind is not None:
+                mime_type = kind.mime
             else:
-                mime_type = 'image/jpeg'  # Default fallback
+                # Fallback to image/jpeg if detection fails
+                mime_type = 'image/jpeg'
             
             media = self.client.media_post(file_data, mime_type=mime_type, description=filename)
             return self._convert_mastodon_file(media)
diff --git a/requirements.txt b/requirements.txt
index 3d237b4..4a749b2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ MarkupSafe==3.0.2
 Werkzeug==3.1.3
 Misskey.py==4.1.0
 Mastodon.py==1.8.1
+filetype==1.2.0