filecache.go
package assist
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"github.com/readysite/readysite/website/models"
)
// File content caching limits
const (
MaxCachedFileSize = 100 * 1024 // 100KB max file size to cache
MaxCachedTextLength = 10000 // Max characters in cached text
TokenEstimateRatio = 4 // Approximate characters per token
)
// GetFileContent returns the text content of a file, using cache if available.
// Returns an error if the file is too large, not text-based, or doesn't exist.
func GetFileContent(fileID string) (string, error) {
// Get the file
file, err := models.Files.Get(fileID)
if err != nil {
return "", fmt.Errorf("file not found: %w", err)
}
// Check if file is text-based
if !file.IsText() {
return "", fmt.Errorf("file is not text-based (type: %s)", file.MimeType)
}
// Check file size
if file.Size > MaxCachedFileSize {
return "", fmt.Errorf("file too large (%d bytes, max %d)", file.Size, MaxCachedFileSize)
}
// Compute content hash
hash := computeHash(file.Data)
// Check cache
cache, _ := models.FileContentCaches.First("WHERE FileID = ?", fileID)
if cache != nil && cache.ContentHash == hash {
// Cache hit
return cache.TextContent, nil
}
// Extract text content
textContent := extractTextContent(file)
// Truncate if needed
if len(textContent) > MaxCachedTextLength {
textContent = textContent[:MaxCachedTextLength] + "\n...[truncated]"
}
// Estimate token count
tokenCount := len(textContent) / TokenEstimateRatio
// Update or create cache
if cache != nil {
cache.ContentHash = hash
cache.TextContent = textContent
cache.TokenCount = tokenCount
models.FileContentCaches.Update(cache)
} else {
cache = &models.FileContentCache{
FileID: fileID,
ContentHash: hash,
TextContent: textContent,
TokenCount: tokenCount,
}
models.FileContentCaches.Insert(cache)
}
return textContent, nil
}
// InvalidateFileCache removes cached content for a file.
// Call this when a file is modified or deleted.
func InvalidateFileCache(fileID string) {
cache, _ := models.FileContentCaches.First("WHERE FileID = ?", fileID)
if cache != nil {
models.FileContentCaches.Delete(cache)
}
}
// computeHash computes a SHA-256 hash of the file content.
func computeHash(data []byte) string {
hash := sha256.Sum256(data)
return hex.EncodeToString(hash[:])
}
// extractTextContent extracts text from file data.
func extractTextContent(file *models.File) string {
if file.Data == nil {
return ""
}
content := string(file.Data)
// Clean up based on file type
switch file.MimeType {
case "text/html":
// For HTML, we could strip tags, but for now keep as-is
// since the AI might want to see the structure
return content
case "application/json":
// Pretty-print JSON if it's not already
return content
default:
// For other text types, normalize line endings
content = strings.ReplaceAll(content, "\r\n", "\n")
content = strings.ReplaceAll(content, "\r", "\n")
return content
}
}
// GetCachedTokenCount returns the estimated token count for a file.
// Returns 0 if the file is not cached.
func GetCachedTokenCount(fileID string) int {
cache, _ := models.FileContentCaches.First("WHERE FileID = ?", fileID)
if cache != nil {
return cache.TokenCount
}
return 0
}
// IsFileCacheable returns true if a file can be cached for AI context.
func IsFileCacheable(file *models.File) bool {
if file == nil {
return false
}
return file.IsText() && file.Size <= MaxCachedFileSize
}