strip.go
package search
import (
"encoding/json"
"strings"
"golang.org/x/net/html"
)
// StripHTML removes HTML tags and returns plain text.
func StripHTML(s string) string {
tokenizer := html.NewTokenizer(strings.NewReader(s))
var buf strings.Builder
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
return strings.TrimSpace(buf.String())
case html.TextToken:
text := strings.TrimSpace(tokenizer.Token().Data)
if text != "" {
if buf.Len() > 0 {
buf.WriteByte(' ')
}
buf.WriteString(text)
}
}
}
}
// FlattenJSON extracts all string values from a JSON object into a single string.
func FlattenJSON(jsonStr string) string {
var data any
if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
return ""
}
var buf strings.Builder
flattenValue(&buf, data)
return strings.TrimSpace(buf.String())
}
func flattenValue(buf *strings.Builder, v any) {
switch val := v.(type) {
case string:
if val != "" {
if buf.Len() > 0 {
buf.WriteByte(' ')
}
buf.WriteString(val)
}
case map[string]any:
for _, v := range val {
flattenValue(buf, v)
}
case []any:
for _, item := range val {
flattenValue(buf, item)
}
}
}