readysite / website / internal / content / validate / document.go
16.8 KB
document.go
// Package validate provides document and input validation.
package validate

import (
	"encoding/json"
	"fmt"
	"net/mail"
	"net/url"
	"regexp"
	"time"

	"github.com/readysite/readysite/website/internal/content/schema"
	"github.com/readysite/readysite/website/models"
)

// ValidationError represents a field validation error.
type ValidationError struct {
	Field   string `json:"field"`
	Message string `json:"message"`
}

func (e ValidationError) Error() string {
	return fmt.Sprintf("%s: %s", e.Field, e.Message)
}

// ValidationErrors is a collection of validation errors.
type ValidationErrors []ValidationError

func (e ValidationErrors) Error() string {
	if len(e) == 0 {
		return ""
	}
	if len(e) == 1 {
		return e[0].Error()
	}
	return fmt.Sprintf("%d validation errors: %s", len(e), e[0].Error())
}

// Document validates document data against a collection's schema.
// Returns nil if valid, or ValidationErrors if invalid.
func Document(collection *models.Collection, data map[string]any) error {
	fields, err := schema.GetFields(collection)
	if err != nil {
		return fmt.Errorf("invalid schema: %w", err)
	}

	// If no schema defined, accept any data
	if len(fields) == 0 {
		return nil
	}

	var errors ValidationErrors

	for _, field := range fields {
		value, exists := data[field.Name]

		// Check required fields
		if field.Required && (!exists || isEmpty(value)) {
			errors = append(errors, ValidationError{
				Field:   field.Name,
				Message: "field is required",
			})
			continue
		}

		// Skip validation if field is not present and not required
		if !exists || value == nil {
			continue
		}

		// Validate field type
		if err := validateFieldType(field, value); err != nil {
			errors = append(errors, ValidationError{
				Field:   field.Name,
				Message: err.Error(),
			})
		}
	}

	if len(errors) > 0 {
		return errors
	}
	return nil
}

// DocumentJSON validates document data from a JSON string.
func DocumentJSON(collection *models.Collection, dataJSON string) error {
	var data map[string]any
	if dataJSON == "" {
		data = make(map[string]any)
	} else if err := json.Unmarshal([]byte(dataJSON), &data); err != nil {
		return fmt.Errorf("invalid JSON: %w", err)
	}
	return Document(collection, data)
}

// isEmpty checks if a value is considered empty.
func isEmpty(v any) bool {
	if v == nil {
		return true
	}
	switch val := v.(type) {
	case string:
		return val == ""
	case []any:
		return len(val) == 0
	case map[string]any:
		return len(val) == 0
	}
	return false
}

// validateFieldType validates a value against a field's type constraints.
func validateFieldType(field schema.Field, value any) error {
	switch field.Type {
	case schema.Text:
		return validateText(field, value)
	case schema.Number:
		return validateNumber(field, value)
	case schema.Bool:
		return validateBool(value)
	case schema.Date:
		return validateDate(value)
	case schema.Email:
		return validateEmailField(value)
	case schema.URL:
		return validateURL(value)
	case schema.Select:
		return validateSelect(field, value)
	case schema.Relation:
		return validateRelation(field, value)
	case schema.File:
		return validateFile(field, value)
	case schema.JSON:
		return validateJSON(value)
	case schema.GeoPoint:
		return validateGeoPoint(value)
	case schema.Editor:
		return validateEditor(field, value)
	case schema.Autodate:
		// Autodate fields are auto-set, so skip validation of user-provided values
		// The actual value will be set by ProcessAutodate
		return nil
	default:
		// Unknown type - accept any value
		return nil
	}
}

// validateText validates a text field.
func validateText(field schema.Field, value any) error {
	str, ok := value.(string)
	if !ok {
		return fmt.Errorf("expected string, got %T", value)
	}

	// Check options
	if field.Options != nil {
		if maxLen, ok := field.Options["maxLength"].(float64); ok && maxLen > 0 {
			if len(str) > int(maxLen) {
				return fmt.Errorf("exceeds maximum length of %d", int(maxLen))
			}
		}
		if pattern, ok := field.Options["pattern"].(string); ok && pattern != "" {
			re, err := regexp.Compile(pattern)
			if err != nil {
				return fmt.Errorf("invalid pattern in schema: %w", err)
			}
			if !re.MatchString(str) {
				return fmt.Errorf("does not match required pattern")
			}
		}
	}

	return nil
}

// validateNumber validates a number field.
func validateNumber(field schema.Field, value any) error {
	var num float64

	switch v := value.(type) {
	case float64:
		num = v
	case int:
		num = float64(v)
	case int64:
		num = float64(v)
	case string:
		// Try to parse string as number
		return fmt.Errorf("expected number, got string")
	default:
		return fmt.Errorf("expected number, got %T", value)
	}

	// Check options
	if field.Options != nil {
		if min, ok := field.Options["min"].(float64); ok {
			if num < min {
				return fmt.Errorf("must be at least %v", min)
			}
		}
		if max, ok := field.Options["max"].(float64); ok {
			if num > max {
				return fmt.Errorf("must be at most %v", max)
			}
		}
		if onlyInt, ok := field.Options["onlyInt"].(bool); ok && onlyInt {
			if num != float64(int64(num)) {
				return fmt.Errorf("must be an integer")
			}
		}
	}

	return nil
}

// validateBool validates a boolean field.
func validateBool(value any) error {
	if _, ok := value.(bool); !ok {
		return fmt.Errorf("expected boolean, got %T", value)
	}
	return nil
}

// validateDate validates a date field.
func validateDate(value any) error {
	str, ok := value.(string)
	if !ok {
		return fmt.Errorf("expected date string, got %T", value)
	}

	// Accept RFC3339 format
	if _, err := time.Parse(time.RFC3339, str); err == nil {
		return nil
	}

	// Accept date-only format
	if _, err := time.Parse("2006-01-02", str); err == nil {
		return nil
	}

	return fmt.Errorf("invalid date format (expected RFC3339 or YYYY-MM-DD)")
}

// validateEmailField validates an email field.
func validateEmailField(value any) error {
	str, ok := value.(string)
	if !ok {
		return fmt.Errorf("expected email string, got %T", value)
	}

	if _, err := mail.ParseAddress(str); err != nil {
		return fmt.Errorf("invalid email address")
	}
	return nil
}

// validateURL validates a URL field.
func validateURL(value any) error {
	str, ok := value.(string)
	if !ok {
		return fmt.Errorf("expected URL string, got %T", value)
	}

	u, err := url.Parse(str)
	if err != nil {
		return fmt.Errorf("invalid URL")
	}
	if u.Scheme == "" || u.Host == "" {
		return fmt.Errorf("URL must have scheme and host")
	}
	return nil
}

// validateSelect validates a select field.
func validateSelect(field schema.Field, value any) error {
	var values []string

	// Get allowed values from options
	if field.Options != nil {
		if v, ok := field.Options["values"].([]any); ok {
			for _, item := range v {
				if s, ok := item.(string); ok {
					values = append(values, s)
				}
			}
		}
	}

	// Check if multiple selection is allowed
	multiple := false
	if field.Options != nil {
		if m, ok := field.Options["multiple"].(bool); ok {
			multiple = m
		}
	}

	// Validate the value(s)
	if multiple {
		arr, ok := value.([]any)
		if !ok {
			return fmt.Errorf("expected array for multiple select, got %T", value)
		}
		for _, item := range arr {
			str, ok := item.(string)
			if !ok {
				return fmt.Errorf("expected string values in select array")
			}
			if !contains(values, str) {
				return fmt.Errorf("invalid option: %s", str)
			}
		}
	} else {
		str, ok := value.(string)
		if !ok {
			return fmt.Errorf("expected string, got %T", value)
		}
		if len(values) > 0 && !contains(values, str) {
			return fmt.Errorf("invalid option: %s", str)
		}
	}

	return nil
}

// validateRelation validates a relation field.
func validateRelation(field schema.Field, value any) error {
	// Get target collection from options
	var targetCollection string
	if field.Options != nil {
		if c, ok := field.Options["collection"].(string); ok {
			targetCollection = c
		}
	}

	// Check if multiple relations are allowed
	multiple := false
	if field.Options != nil {
		if m, ok := field.Options["multiple"].(bool); ok {
			multiple = m
		}
	}

	// Validate the value(s) - should be document IDs
	if multiple {
		arr, ok := value.([]any)
		if !ok {
			return fmt.Errorf("expected array for multiple relation, got %T", value)
		}
		for _, item := range arr {
			str, ok := item.(string)
			if !ok {
				return fmt.Errorf("expected string IDs in relation array")
			}
			if targetCollection != "" {
				// Verify the related document exists
				doc, _ := models.Documents.Get(str)
				if doc == nil || doc.CollectionID != targetCollection {
					return fmt.Errorf("invalid relation: document %s not found in collection %s", str, targetCollection)
				}
			}
		}
	} else {
		str, ok := value.(string)
		if !ok {
			return fmt.Errorf("expected string ID, got %T", value)
		}
		if targetCollection != "" {
			doc, _ := models.Documents.Get(str)
			if doc == nil || doc.CollectionID != targetCollection {
				return fmt.Errorf("invalid relation: document %s not found in collection %s", str, targetCollection)
			}
		}
	}

	return nil
}

// validateFile validates a file field.
func validateFile(field schema.Field, value any) error {
	// File values are typically URLs or file identifiers
	// Check if multiple files are allowed
	multiple := false
	if field.Options != nil {
		if m, ok := field.Options["multiple"].(bool); ok {
			multiple = m
		}
	}

	if multiple {
		arr, ok := value.([]any)
		if !ok {
			return fmt.Errorf("expected array for multiple files, got %T", value)
		}
		for _, item := range arr {
			if _, ok := item.(string); !ok {
				return fmt.Errorf("expected string file references")
			}
		}
	} else {
		if _, ok := value.(string); !ok {
			return fmt.Errorf("expected string file reference, got %T", value)
		}
	}

	return nil
}

// validateJSON validates a JSON field.
func validateJSON(value any) error {
	// JSON fields can contain any valid JSON value
	// If it's already a Go value (from JSON parsing), it's valid
	switch value.(type) {
	case map[string]any, []any, string, float64, bool, nil:
		return nil
	default:
		return fmt.Errorf("invalid JSON value type: %T", value)
	}
}

// contains checks if a slice contains a string.
func contains(slice []string, s string) bool {
	for _, item := range slice {
		if item == s {
			return true
		}
	}
	return false
}

// validateGeoPoint validates a geopoint field.
// GeoPoint must be an object with "lon" and "lat" fields.
// Lon must be between -180 and 180, Lat must be between -90 and 90.
func validateGeoPoint(value any) error {
	obj, ok := value.(map[string]any)
	if !ok {
		return fmt.Errorf("expected object with lon and lat, got %T", value)
	}

	// Check lon
	lon, ok := obj["lon"]
	if !ok {
		return fmt.Errorf("missing 'lon' field")
	}
	lonVal, ok := toFloat64(lon)
	if !ok {
		return fmt.Errorf("'lon' must be a number")
	}
	if lonVal < -180 || lonVal > 180 {
		return fmt.Errorf("'lon' must be between -180 and 180")
	}

	// Check lat
	lat, ok := obj["lat"]
	if !ok {
		return fmt.Errorf("missing 'lat' field")
	}
	latVal, ok := toFloat64(lat)
	if !ok {
		return fmt.Errorf("'lat' must be a number")
	}
	if latVal < -90 || latVal > 90 {
		return fmt.Errorf("'lat' must be between -90 and 90")
	}

	return nil
}

// validateEditor validates an editor (rich HTML) field.
// Editor fields contain HTML content that will be sanitized.
func validateEditor(field schema.Field, value any) error {
	str, ok := value.(string)
	if !ok {
		return fmt.Errorf("expected string (HTML content), got %T", value)
	}

	// Check maxLength option (applied to text length, not HTML length)
	if field.Options != nil {
		if maxLen, ok := field.Options["maxLength"].(float64); ok && maxLen > 0 {
			// Strip HTML tags and check length
			textLen := len(stripHTMLTags(str))
			if textLen > int(maxLen) {
				return fmt.Errorf("content exceeds maximum length of %d characters", int(maxLen))
			}
		}
	}

	return nil
}

// stripHTMLTags removes HTML tags from a string for length checking.
func stripHTMLTags(s string) string {
	// Simple regex-based tag removal
	re := regexp.MustCompile("<[^>]*>")
	return re.ReplaceAllString(s, "")
}

// toFloat64 converts a value to float64.
func toFloat64(v any) (float64, bool) {
	switch n := v.(type) {
	case float64:
		return n, true
	case int:
		return float64(n), true
	case int64:
		return float64(n), true
	case float32:
		return float64(n), true
	}
	return 0, false
}

// ProcessFieldModifiers processes field modifiers like field+ (append) and field- (remove).
// Modifies data in place by applying the modifiers to existingData and updating data with results.
// Returns the cleaned data with modifiers applied.
func ProcessFieldModifiers(data map[string]any, existingData map[string]any) map[string]any {
	result := make(map[string]any)

	for key, value := range data {
		// Check for + or - suffix
		if len(key) > 1 {
			modifier := key[len(key)-1]
			baseKey := key[:len(key)-1]

			if modifier == '+' {
				// Append operation
				result[baseKey] = applyAppend(existingData[baseKey], value)
				continue
			} else if modifier == '-' {
				// Remove operation
				result[baseKey] = applyRemove(existingData[baseKey], value)
				continue
			}
		}

		// No modifier - use value as-is
		result[key] = value
	}

	return result
}

// applyAppend appends value(s) to an existing array or increments a number.
func applyAppend(existing, toAdd any) any {
	// If toAdd is a number and existing is a number, increment
	if existingNum, ok := toFloat64(existing); ok {
		if addNum, ok := toFloat64(toAdd); ok {
			return existingNum + addNum
		}
	}

	// Otherwise, treat as array append
	var existingArr []any
	switch v := existing.(type) {
	case []any:
		existingArr = v
	case nil:
		existingArr = []any{}
	default:
		// Existing is not an array - wrap it
		existingArr = []any{existing}
	}

	// Add the new value(s)
	switch v := toAdd.(type) {
	case []any:
		existingArr = append(existingArr, v...)
	default:
		existingArr = append(existingArr, toAdd)
	}

	return existingArr
}

// applyRemove removes value(s) from an existing array or decrements a number.
func applyRemove(existing, toRemove any) any {
	// If toRemove is a number and existing is a number, decrement
	if existingNum, ok := toFloat64(existing); ok {
		if removeNum, ok := toFloat64(toRemove); ok {
			return existingNum - removeNum
		}
	}

	// Otherwise, treat as array remove
	var existingArr []any
	switch v := existing.(type) {
	case []any:
		existingArr = v
	case nil:
		return []any{}
	default:
		existingArr = []any{existing}
	}

	// Build set of values to remove
	removeSet := make(map[string]bool)
	switch v := toRemove.(type) {
	case []any:
		for _, item := range v {
			removeSet[fmt.Sprintf("%v", item)] = true
		}
	default:
		removeSet[fmt.Sprintf("%v", toRemove)] = true
	}

	// Filter out removed values
	var result []any
	for _, item := range existingArr {
		if !removeSet[fmt.Sprintf("%v", item)] {
			result = append(result, item)
		}
	}

	return result
}

// ProcessAutodate processes autodate fields, setting them to current time based on options.
// Call this before saving a document.
// isCreate should be true for new documents, false for updates.
func ProcessAutodate(collection *models.Collection, data map[string]any, isCreate bool) error {
	fields, err := schema.GetFields(collection)
	if err != nil {
		return err
	}

	now := time.Now().Format(time.RFC3339)

	for _, field := range fields {
		if field.Type != schema.Autodate {
			continue
		}

		onCreate := false
		onUpdate := false

		if field.Options != nil {
			if v, ok := field.Options["onCreate"].(bool); ok {
				onCreate = v
			}
			if v, ok := field.Options["onUpdate"].(bool); ok {
				onUpdate = v
			}
		}

		// Default: if neither is set, behave as onCreate=true
		if !onCreate && !onUpdate {
			onCreate = true
		}

		if isCreate && onCreate {
			data[field.Name] = now
		} else if !isCreate && onUpdate {
			data[field.Name] = now
		}
	}

	return nil
}

// MaxEmailLength is the maximum allowed length for email addresses.
const MaxEmailLength = 254

// Email validates an email address format.
// Returns nil if valid, error if invalid.
// Checks:
// - Not empty
// - Not too long (max 254 chars per RFC 5321)
// - Valid email format per RFC 5322
// - No dangerous characters that could cause injection
func Email(email string) error {
	if email == "" {
		return fmt.Errorf("email cannot be empty")
	}

	if len(email) > MaxEmailLength {
		return fmt.Errorf("email exceeds maximum length of %d characters", MaxEmailLength)
	}

	// Check for dangerous characters that shouldn't be in emails
	// These could be used for injection attacks
	dangerousChars := []string{";", "'", "\"", "\\", "\n", "\r", "\t", "\x00"}
	for _, char := range dangerousChars {
		if regexp.MustCompile(regexp.QuoteMeta(char)).MatchString(email) {
			return fmt.Errorf("email contains invalid characters")
		}
	}

	// Validate email format using Go's mail parser
	addr, err := mail.ParseAddress(email)
	if err != nil {
		return fmt.Errorf("invalid email format")
	}

	// Ensure the parsed address matches (no display name tricks)
	if addr.Address != email {
		return fmt.Errorf("invalid email format")
	}

	return nil
}
← Back