personotes/internal/indexer/indexer.go

package indexer

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"sync"
	"unicode"

	yaml "gopkg.in/yaml.v3"
)

// Indexer maintient un index en memoire des tags associes aux fichiers Markdown.
type Indexer struct {
	mu   sync.RWMutex
	tags map[string][]string
	docs map[string]*Document
}

// Document représente une note indexée pour la recherche.
type Document struct {
	Path         string
	Title        string
	Tags         []string
	Date         string
	LastModified string
	Body         string
	Summary      string

	lowerTitle string
	lowerBody  string
	lowerTags  []string
}

// SearchResult représente un résultat de recherche enrichi.
type SearchResult struct {
	Path         string
	Title        string
	Tags         []string
	Snippet      string
	Score        float64
	Date         string
	LastModified string
}

// New cree une nouvelle instance d Indexer.
func New() *Indexer {
	return &Indexer{
		tags: make(map[string][]string),
		docs: make(map[string]*Document),
	}
}

// Load reconstruit l index a partir du repertoire fourni.
func (i *Indexer) Load(root string) error {
	entries := make(map[string]map[string]struct{})
	documents := make(map[string]*Document)

	err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
		if walkErr != nil {
			return walkErr
		}
		if d.IsDir() {
			return nil
		}
		if !strings.EqualFold(filepath.Ext(path), ".md") {
			return nil
		}

		rel, err := filepath.Rel(root, path)
		if err != nil {
			rel = path
		}

		fm, body, err := ExtractFrontMatterAndBody(path)
		if err != nil {
			return fmt.Errorf("analyse du front matter pour %s: %w", path, err)
		}

		tags := normalizeTags([]string(fm.Tags))
		if len(tags) > 0 {
			for _, tag := range tags {
				key := strings.ToLower(tag)
				if _, ok := entries[key]; !ok {
					entries[key] = make(map[string]struct{})
				}
				entries[key][rel] = struct{}{}
			}
		}

		doc := buildDocument(rel, fm, body, tags)
		documents[rel] = doc

		return nil
	})
	if err != nil {
		return err
	}

	indexed := make(map[string][]string, len(entries))
	for tag, files := range entries {
		list := make([]string, 0, len(files))
		for file := range files {
			list = append(list, file)
		}
		sort.Strings(list)
		indexed[tag] = list
	}

	i.mu.Lock()
	i.tags = indexed
	i.docs = documents
	i.mu.Unlock()

	return nil
}

func normalizeTags(tags []string) []string {
	if len(tags) == 0 {
		return nil
	}

	seen := make(map[string]struct{}, len(tags))
	result := make([]string, 0, len(tags))

	for _, tag := range tags {
		trimmed := strings.TrimSpace(tag)
		if trimmed == "" {
			continue
		}
		lower := strings.ToLower(trimmed)
		if _, ok := seen[lower]; ok {
			continue
		}
		seen[lower] = struct{}{}
		result = append(result, trimmed)
	}

	return result
}

func buildDocument(path string, fm FullFrontMatter, body string, tags []string) *Document {
	title := strings.TrimSpace(fm.Title)
	if title == "" {
		title = deriveTitleFromPath(path)
	}

	summary := buildSummary(body)

	lowerTags := make([]string, len(tags))
	for idx, tag := range tags {
		lowerTags[idx] = strings.ToLower(tag)
	}

	doc := &Document{
		Path:         path,
		Title:        title,
		Tags:         tags,
		Date:         strings.TrimSpace(fm.Date),
		LastModified: strings.TrimSpace(fm.LastModified),
		Body:         body,
		Summary:      summary,
		lowerTitle:   strings.ToLower(title),
		lowerBody:    strings.ToLower(body),
		lowerTags:    lowerTags,
	}

	return doc
}

func deriveTitleFromPath(path string) string {
	base := filepath.Base(path)
	base = strings.TrimSuffix(base, filepath.Ext(base))
	base = strings.ReplaceAll(base, "-", " ")
	base = strings.ReplaceAll(base, "_", " ")
	base = strings.TrimSpace(base)
	if base == "" {
		return "Sans titre"
	}
	return strings.Title(base)
}

func buildSummary(body string) string {
	const maxRunes = 240
	trimmed := strings.TrimSpace(body)
	if trimmed == "" {
		return ""
	}

	// Remplacer les retours à la ligne multiples par un espace simple
	normalized := strings.Join(strings.Fields(trimmed), " ")

	runes := []rune(normalized)
	if len(runes) <= maxRunes {
		return normalized
	}

	return string(runes[:maxRunes]) + "…"
}

// SearchByTag renvoie une copie de la liste des fichiers indexés pour un tag donné.
func (i *Indexer) SearchByTag(tag string) []string {
	i.mu.RLock()
	defer i.mu.RUnlock()

	tag = strings.TrimSpace(tag)
	if tag == "" {
		return nil
	}

	lowerTag := strings.ToLower(tag)
	files, ok := i.tags[lowerTag]
	if !ok {
		return nil
	}

	copyFiles := make([]string, len(files))
	copy(copyFiles, files)
	return copyFiles
}

// SearchDocuments effectue une recherche riche sur les documents indexés.
func (i *Indexer) SearchDocuments(query string) []SearchResult {
	parsed := parseSearchQuery(query)

	i.mu.RLock()
	defer i.mu.RUnlock()

	if len(parsed.terms) == 0 && len(parsed.tagFilters) == 0 && len(parsed.titleFilters) == 0 && len(parsed.pathFilters) == 0 {
		return nil
	}

	results := make([]SearchResult, 0, len(i.docs))

	for _, doc := range i.docs {
		match, score := matchDocument(doc, parsed)
		if !match {
			continue
		}

		snippet := buildSnippet(doc, parsed.terms)
		if snippet == "" {
			snippet = doc.Summary
		}

		results = append(results, SearchResult{
			Path:         doc.Path,
			Title:        doc.Title,
			Tags:         doc.Tags,
			Snippet:      snippet,
			Score:        score,
			Date:         doc.Date,
			LastModified: doc.LastModified,
		})
	}

	sort.SliceStable(results, func(a, b int) bool {
		if results[a].Score == results[b].Score {
			return strings.ToLower(results[a].Title) < strings.ToLower(results[b].Title)
		}
		return results[a].Score > results[b].Score
	})

	return results
}

func matchDocument(doc *Document, q parsedQuery) (bool, float64) {
	score := 0.0

	// Tag filters must all match
	for _, filter := range q.tagFilters {
		if !containsString(doc.lowerTags, filter) {
			return false, 0
		}
		score += 2 // léger bonus pour les filtres respectés
	}

	// Title filters must all match
	for _, filter := range q.titleFilters {
		if !strings.Contains(doc.lowerTitle, filter) {
			return false, 0
		}
		score += 4
	}

	// Path filters must all match
	lowerPath := strings.ToLower(doc.Path)
	for _, filter := range q.pathFilters {
		if !strings.Contains(lowerPath, filter) {
			return false, 0
		}
		score += 1.5
	}

	// General terms (AND logic)
	for _, term := range q.terms {
		if term == "" {
			continue
		}
		termScore := 0.0
		if strings.Contains(doc.lowerTitle, term) {
			termScore += 6
		}
		if containsString(doc.lowerTags, term) {
			termScore += 4
		}
		if strings.Contains(lowerPath, term) {
			termScore += 2
		}
		if strings.Contains(doc.lowerBody, term) {
			termScore += 1.5
		}
		if termScore == 0 {
			return false, 0 // term must match somewhere
		}
		score += termScore
	}

	if len(q.terms) == 0 && len(q.tagFilters) == 0 && len(q.titleFilters) == 0 && len(q.pathFilters) == 0 {
		return false, 0
	}

	// Bonus léger pour documents avec titre défini
	if doc.Title != "" {
		score += 0.5
	}

	return true, score
}

func containsString(list []string, target string) bool {
	for _, item := range list {
		if item == target {
			return true
		}
	}
	return false
}

func buildSnippet(doc *Document, terms []string) string {
	if doc.Body == "" || len(terms) == 0 {
		return doc.Summary
	}

	pos, termLen := findTermPosition(doc.Body, terms)
	if pos == -1 {
		return doc.Summary
	}

	return extractSnippetFromRunes([]rune(doc.Body), pos, termLen)
}

func findTermPosition(body string, terms []string) (int, int) {
	if len(terms) == 0 {
		return -1, 0
	}

	bodyRunes := []rune(body)
	lowerRunes := make([]rune, len(bodyRunes))
	for idx, r := range bodyRunes {
		lowerRunes[idx] = unicode.ToLower(r)
	}

	for _, term := range terms {
		term = strings.TrimSpace(term)
		if term == "" {
			continue
		}
		termRunes := []rune(term)
		for idx, r := range termRunes {
			termRunes[idx] = unicode.ToLower(r)
		}
		pos := indexRunes(lowerRunes, termRunes)
		if pos != -1 {
			return pos, len(termRunes)
		}
	}

	return -1, 0
}

func indexRunes(haystack, needle []rune) int {
	if len(needle) == 0 || len(needle) > len(haystack) {
		return -1
	}

	for i := 0; i <= len(haystack)-len(needle); i++ {
		match := true
		for j := 0; j < len(needle); j++ {
			if haystack[i+j] != needle[j] {
				match = false
				break
			}
		}
		if match {
			return i
		}
	}
	return -1
}

func extractSnippetFromRunes(body []rune, pos, termLen int) string {
	if len(body) == 0 {
		return ""
	}

	const window = 120

	start := pos - window/2
	if start < 0 {
		start = 0
	}

	end := pos + termLen + window/2
	if end > len(body) {
		end = len(body)
	}

	snippet := strings.TrimSpace(string(body[start:end]))
	snippet = strings.Join(strings.Fields(snippet), " ")

	if start > 0 {
		snippet = "…" + snippet
	}
	if end < len(body) {
		snippet += "…"
	}

	return snippet
}

type parsedQuery struct {
	terms        []string
	tagFilters   []string
	titleFilters []string
	pathFilters  []string
}

func parseSearchQuery(query string) parsedQuery {
	trimmed := strings.TrimSpace(query)
	if trimmed == "" {
		return parsedQuery{}
	}

	tokens := splitQuery(trimmed)
	result := parsedQuery{
		terms:        make([]string, 0, len(tokens)),
		tagFilters:   []string{},
		titleFilters: []string{},
		pathFilters:  []string{},
	}

	for _, token := range tokens {
		if token == "" {
			continue
		}
		lower := strings.ToLower(token)

		switch {
		case strings.HasPrefix(lower, "tag:"):
			value := strings.TrimSpace(token[4:])
			if value != "" {
				result.tagFilters = append(result.tagFilters, strings.ToLower(value))
			}
		case strings.HasPrefix(lower, "title:"):
			value := strings.TrimSpace(token[6:])
			if value != "" {
				result.titleFilters = append(result.titleFilters, strings.ToLower(value))
			}
		case strings.HasPrefix(lower, "path:"):
			value := strings.TrimSpace(token[5:])
			if value != "" {
				result.pathFilters = append(result.pathFilters, strings.ToLower(value))
			}
		default:
			result.terms = append(result.terms, strings.ToLower(token))
		}
	}

	return result
}

func splitQuery(input string) []string {
	var tokens []string
	var current strings.Builder
	inQuotes := false

	for _, r := range input {
		switch r {
		case '"':
			if inQuotes {
				tokens = appendToken(tokens, current.String())
				current.Reset()
				inQuotes = false
			} else {
				if current.Len() > 0 {
					tokens = appendToken(tokens, current.String())
					current.Reset()
				}
				inQuotes = true
			}
		case ' ', '\t', '\n':
			if inQuotes {
				current.WriteRune(r)
			} else {
				if current.Len() > 0 {
					tokens = appendToken(tokens, current.String())
					current.Reset()
				}
			}
		default:
			current.WriteRune(r)
		}
	}

	if current.Len() > 0 {
		tokens = appendToken(tokens, current.String())
	}

	return tokens
}

func appendToken(tokens []string, token string) []string {
	token = strings.TrimSpace(token)
	if token != "" {
		tokens = append(tokens, token)
	}
	return tokens
}

// FullFrontMatter represente la structure complete du front matter YAML.
type FullFrontMatter struct {
	Title        string  `yaml:"title,omitempty"`
	Date         string  `yaml:"date,omitempty"`
	LastModified string  `yaml:"last_modified,omitempty"`
	Tags         tagList `yaml:"tags,omitempty"`
}

// frontMatter est une version simplifiee pour la compatibilite avec Load.
type frontMatter struct {
	Tags tagList `yaml:"tags"`
}

type tagList []string

func (t *tagList) UnmarshalYAML(value *yaml.Node) error {
	switch value.Kind {
	case yaml.ScalarNode:
		var tag string
		if err := value.Decode(&tag); err != nil {
			return err
		}
		*t = tagList{tag}
		return nil
	case yaml.SequenceNode:
		var tags []string
		if err := value.Decode(&tags); err != nil {
			return err
		}
		*t = tagList(tags)
		return nil
	case yaml.AliasNode:
		return t.UnmarshalYAML(value.Alias)
	default:
		return fmt.Errorf("format de tags non supporte")
	}
}

// ExtractFrontMatterAndBody extrait le front matter et le corps d'un fichier Markdown.
func ExtractFrontMatterAndBody(path string) (FullFrontMatter, string, error) {
	file, err := os.Open(path)
	if err != nil {
		return FullFrontMatter{}, "", err
	}
	defer file.Close()

	return ExtractFrontMatterAndBodyFromReader(file)
}

// extractFrontMatterAndBodyFromReader extrait le front matter et le corps d'un io.Reader.
func ExtractFrontMatterAndBodyFromReader(reader io.Reader) (FullFrontMatter, string, error) {
	bufReader := bufio.NewReader(reader)
	var fm FullFrontMatter
	var bodyBuilder strings.Builder
	var fmBuilder strings.Builder

	line, err := bufReader.ReadString('\n')
	if err != nil && !errors.Is(err, io.EOF) {
		return FullFrontMatter{}, "", err
	}

	if strings.TrimSpace(line) != "---" {
		bodyBuilder.WriteString(line) // If no front matter, this is part of the body
		_, err := io.Copy(&bodyBuilder, bufReader)
		return FullFrontMatter{}, bodyBuilder.String(), err
	}

	// Found first '---', now read front matter
	fmFound := false
	for {
		line, err = bufReader.ReadString('\n')
		if err != nil {
			if errors.Is(err, io.EOF) {
				// Front matter not terminated, treat entire content as body
				return FullFrontMatter{}, "---" + fmBuilder.String() + bodyBuilder.String(), nil
			}
			return FullFrontMatter{}, "", err
		}
		if strings.TrimSpace(line) == "---" {
			fmFound = true
			break
		}
		fmBuilder.WriteString(line)
	}

	if fmFound {
		if err := yaml.Unmarshal([]byte(fmBuilder.String()), &fm); err != nil {
			return FullFrontMatter{}, "", fmt.Errorf("erreur d'analyse YAML du front matter: %w", err)
		}
	}

	// Read the rest of the body
	_, err = io.Copy(&bodyBuilder, bufReader)
	if err != nil {
		return FullFrontMatter{}, "", err
	}

	return fm, bodyBuilder.String(), nil
}

// extractFrontMatter est une version simplifiee pour la compatibilite avec Load.
func extractFrontMatter(path string) (frontMatter, error) {
	fm, _, err := ExtractFrontMatterAndBody(path)
	return frontMatter{Tags: fm.Tags}, err
}