import os
import tempfile
from pathlib import Path
from manolo_bot.storage.documents.base import BaseDocumentStorage
[docs]
class FileDocumentStorage(BaseDocumentStorage):
"""
File-based implementation of document storage.
"""
def __init__(self, bot_uuid: str, base_path: str | None = None) -> None:
"""
Initializes the file document storage.
:param bot_uuid: The UUID of the bot.
:param base_path: The base path for storage. Defaults to system temp dir.
"""
super().__init__(bot_uuid)
if base_path is None:
base_path = os.path.join(tempfile.gettempdir(), "manolo_bot", "documents")
self.base_path = Path(base_path) / bot_uuid
def _get_chat_path(self, chat_id: int) -> Path:
"""
Gets the path for a specific chat.
:param chat_id: The ID of the chat.
:return: The Path object for the chat directory.
"""
path = (self.base_path / str(chat_id)).resolve()
path.mkdir(parents=True, exist_ok=True)
return path
def _get_safe_path(self, chat_path: Path, filename: str) -> Path:
"""
Gets a safe path for a file, preventing directory traversal.
:param chat_path: The base path for the chat.
:param filename: The name of the file.
:return: The safe Path object.
:raises ValueError: If the path is insecure.
"""
# Prevent directory traversal
file_path = (chat_path / f"{filename}.txt").resolve()
if not str(file_path).startswith(str(chat_path)):
raise ValueError(f"Insecure filename or path: {filename}")
return file_path
[docs]
async def store(self, chat_id: int, filename: str, text: str) -> None:
"""
Stores the extracted text of a document in the filesystem.
:param chat_id: The ID of the chat.
:param filename: The name of the document.
:param text: The extracted text.
"""
chat_path = self._get_chat_path(chat_id)
file_path = self._get_safe_path(chat_path, filename)
with open(file_path, "w", encoding="utf-8") as f:
f.write(text)
[docs]
async def retrieve(self, chat_id: int, filename: str) -> str | None:
"""
Retrieves the extracted text of a document from the filesystem.
:param chat_id: The ID of the chat.
:param filename: The name of the document.
:return: The extracted text or None if not found or path is insecure.
"""
chat_path = self._get_chat_path(chat_id)
try:
file_path = self._get_safe_path(chat_path, filename)
except ValueError:
return None
if file_path.exists():
with open(file_path, encoding="utf-8") as f:
return f.read()
return None
[docs]
async def clear(self, chat_id: int) -> None:
"""
Clears all stored documents for a specific chat from the filesystem.
:param chat_id: The ID of the chat.
"""
chat_path = self._get_chat_path(chat_id)
if chat_path.exists():
for file in chat_path.glob("*.txt"):
if file.resolve().parent == chat_path:
file.unlink()
[docs]
async def list_documents(self, chat_id: int) -> list[str]:
"""
Lists all stored documents for a specific chat in the filesystem.
:param chat_id: The ID of the chat.
:return: A list of filenames.
"""
chat_path = self._get_chat_path(chat_id)
if not chat_path.exists():
return []
return [f.stem for f in chat_path.glob("*.txt") if f.resolve().parent == chat_path]