readysite / website / internal / search / strip.go
1.2 KB
strip.go
package search

import (
	"encoding/json"
	"strings"

	"golang.org/x/net/html"
)

// StripHTML removes HTML tags and returns plain text.
func StripHTML(s string) string {
	tokenizer := html.NewTokenizer(strings.NewReader(s))
	var buf strings.Builder

	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			return strings.TrimSpace(buf.String())
		case html.TextToken:
			text := strings.TrimSpace(tokenizer.Token().Data)
			if text != "" {
				if buf.Len() > 0 {
					buf.WriteByte(' ')
				}
				buf.WriteString(text)
			}
		}
	}
}

// FlattenJSON extracts all string values from a JSON object into a single string.
func FlattenJSON(jsonStr string) string {
	var data any
	if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
		return ""
	}

	var buf strings.Builder
	flattenValue(&buf, data)
	return strings.TrimSpace(buf.String())
}

func flattenValue(buf *strings.Builder, v any) {
	switch val := v.(type) {
	case string:
		if val != "" {
			if buf.Len() > 0 {
				buf.WriteByte(' ')
			}
			buf.WriteString(val)
		}
	case map[string]any:
		for _, v := range val {
			flattenValue(buf, v)
		}
	case []any:
		for _, item := range val {
			flattenValue(buf, item)
		}
	}
}
← Back