Premier commit déjà bien avancé

2025-11-10 18:33:24 +01:00
commit db4f0508cb
652 changed files with 440521 additions and 0 deletions
--- a/internal/indexer/indexer.go
+++ b/internal/indexer/indexer.go
@ -0,0 +1,640 @@
+package indexer
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"sync"
+	"unicode"
+
+	yaml "gopkg.in/yaml.v3"
+)
+
+// Indexer maintient un index en memoire des tags associes aux fichiers Markdown.
+type Indexer struct {
+	mu   sync.RWMutex
+	tags map[string][]string
+	docs map[string]*Document
+}
+
+// Document représente une note indexée pour la recherche.
+type Document struct {
+	Path         string
+	Title        string
+	Tags         []string
+	Date         string
+	LastModified string
+	Body         string
+	Summary      string
+
+	lowerTitle string
+	lowerBody  string
+	lowerTags  []string
+}
+
+// SearchResult représente un résultat de recherche enrichi.
+type SearchResult struct {
+	Path         string
+	Title        string
+	Tags         []string
+	Snippet      string
+	Score        float64
+	Date         string
+	LastModified string
+}
+
+// New cree une nouvelle instance d Indexer.
+func New() *Indexer {
+	return &Indexer{
+		tags: make(map[string][]string),
+		docs: make(map[string]*Document),
+	}
+}
+
+// Load reconstruit l index a partir du repertoire fourni.
+func (i *Indexer) Load(root string) error {
+	entries := make(map[string]map[string]struct{})
+	documents := make(map[string]*Document)
+
+	err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
+		if walkErr != nil {
+			return walkErr
+		}
+		if d.IsDir() {
+			return nil
+		}
+		if !strings.EqualFold(filepath.Ext(path), ".md") {
+			return nil
+		}
+
+		rel, err := filepath.Rel(root, path)
+		if err != nil {
+			rel = path
+		}
+
+		fm, body, err := ExtractFrontMatterAndBody(path)
+		if err != nil {
+			return fmt.Errorf("analyse du front matter pour %s: %w", path, err)
+		}
+
+		tags := normalizeTags([]string(fm.Tags))
+		if len(tags) > 0 {
+			for _, tag := range tags {
+				key := strings.ToLower(tag)
+				if _, ok := entries[key]; !ok {
+					entries[key] = make(map[string]struct{})
+				}
+				entries[key][rel] = struct{}{}
+			}
+		}
+
+		doc := buildDocument(rel, fm, body, tags)
+		documents[rel] = doc
+
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	indexed := make(map[string][]string, len(entries))
+	for tag, files := range entries {
+		list := make([]string, 0, len(files))
+		for file := range files {
+			list = append(list, file)
+		}
+		sort.Strings(list)
+		indexed[tag] = list
+	}
+
+	i.mu.Lock()
+	i.tags = indexed
+	i.docs = documents
+	i.mu.Unlock()
+
+	return nil
+}
+
+func normalizeTags(tags []string) []string {
+	if len(tags) == 0 {
+		return nil
+	}
+
+	seen := make(map[string]struct{}, len(tags))
+	result := make([]string, 0, len(tags))
+
+	for _, tag := range tags {
+		trimmed := strings.TrimSpace(tag)
+		if trimmed == "" {
+			continue
+		}
+		lower := strings.ToLower(trimmed)
+		if _, ok := seen[lower]; ok {
+			continue
+		}
+		seen[lower] = struct{}{}
+		result = append(result, trimmed)
+	}
+
+	return result
+}
+
+func buildDocument(path string, fm FullFrontMatter, body string, tags []string) *Document {
+	title := strings.TrimSpace(fm.Title)
+	if title == "" {
+		title = deriveTitleFromPath(path)
+	}
+
+	summary := buildSummary(body)
+
+	lowerTags := make([]string, len(tags))
+	for idx, tag := range tags {
+		lowerTags[idx] = strings.ToLower(tag)
+	}
+
+	doc := &Document{
+		Path:         path,
+		Title:        title,
+		Tags:         tags,
+		Date:         strings.TrimSpace(fm.Date),
+		LastModified: strings.TrimSpace(fm.LastModified),
+		Body:         body,
+		Summary:      summary,
+		lowerTitle:   strings.ToLower(title),
+		lowerBody:    strings.ToLower(body),
+		lowerTags:    lowerTags,
+	}
+
+	return doc
+}
+
+func deriveTitleFromPath(path string) string {
+	base := filepath.Base(path)
+	base = strings.TrimSuffix(base, filepath.Ext(base))
+	base = strings.ReplaceAll(base, "-", " ")
+	base = strings.ReplaceAll(base, "_", " ")
+	base = strings.TrimSpace(base)
+	if base == "" {
+		return "Sans titre"
+	}
+	return strings.Title(base)
+}
+
+func buildSummary(body string) string {
+	const maxRunes = 240
+	trimmed := strings.TrimSpace(body)
+	if trimmed == "" {
+		return ""
+	}
+
+	// Remplacer les retours à la ligne multiples par un espace simple
+	normalized := strings.Join(strings.Fields(trimmed), " ")
+
+	runes := []rune(normalized)
+	if len(runes) <= maxRunes {
+		return normalized
+	}
+
+	return string(runes[:maxRunes]) + "…"
+}
+
+// SearchByTag renvoie une copie de la liste des fichiers indexés pour un tag donné.
+func (i *Indexer) SearchByTag(tag string) []string {
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+
+	tag = strings.TrimSpace(tag)
+	if tag == "" {
+		return nil
+	}
+
+	lowerTag := strings.ToLower(tag)
+	files, ok := i.tags[lowerTag]
+	if !ok {
+		return nil
+	}
+
+	copyFiles := make([]string, len(files))
+	copy(copyFiles, files)
+	return copyFiles
+}
+
+// SearchDocuments effectue une recherche riche sur les documents indexés.
+func (i *Indexer) SearchDocuments(query string) []SearchResult {
+	parsed := parseSearchQuery(query)
+
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+
+	if len(parsed.terms) == 0 && len(parsed.tagFilters) == 0 && len(parsed.titleFilters) == 0 && len(parsed.pathFilters) == 0 {
+		return nil
+	}
+
+	results := make([]SearchResult, 0, len(i.docs))
+
+	for _, doc := range i.docs {
+		match, score := matchDocument(doc, parsed)
+		if !match {
+			continue
+		}
+
+		snippet := buildSnippet(doc, parsed.terms)
+		if snippet == "" {
+			snippet = doc.Summary
+		}
+
+		results = append(results, SearchResult{
+			Path:         doc.Path,
+			Title:        doc.Title,
+			Tags:         doc.Tags,
+			Snippet:      snippet,
+			Score:        score,
+			Date:         doc.Date,
+			LastModified: doc.LastModified,
+		})
+	}
+
+	sort.SliceStable(results, func(a, b int) bool {
+		if results[a].Score == results[b].Score {
+			return strings.ToLower(results[a].Title) < strings.ToLower(results[b].Title)
+		}
+		return results[a].Score > results[b].Score
+	})
+
+	return results
+}
+
+func matchDocument(doc *Document, q parsedQuery) (bool, float64) {
+	score := 0.0
+
+	// Tag filters must all match
+	for _, filter := range q.tagFilters {
+		if !containsString(doc.lowerTags, filter) {
+			return false, 0
+		}
+		score += 2 // léger bonus pour les filtres respectés
+	}
+
+	// Title filters must all match
+	for _, filter := range q.titleFilters {
+		if !strings.Contains(doc.lowerTitle, filter) {
+			return false, 0
+		}
+		score += 4
+	}
+
+	// Path filters must all match
+	lowerPath := strings.ToLower(doc.Path)
+	for _, filter := range q.pathFilters {
+		if !strings.Contains(lowerPath, filter) {
+			return false, 0
+		}
+		score += 1.5
+	}
+
+	// General terms (AND logic)
+	for _, term := range q.terms {
+		if term == "" {
+			continue
+		}
+		termScore := 0.0
+		if strings.Contains(doc.lowerTitle, term) {
+			termScore += 6
+		}
+		if containsString(doc.lowerTags, term) {
+			termScore += 4
+		}
+		if strings.Contains(lowerPath, term) {
+			termScore += 2
+		}
+		if strings.Contains(doc.lowerBody, term) {
+			termScore += 1.5
+		}
+		if termScore == 0 {
+			return false, 0 // term must match somewhere
+		}
+		score += termScore
+	}
+
+	if len(q.terms) == 0 && len(q.tagFilters) == 0 && len(q.titleFilters) == 0 && len(q.pathFilters) == 0 {
+		return false, 0
+	}
+
+	// Bonus léger pour documents avec titre défini
+	if doc.Title != "" {
+		score += 0.5
+	}
+
+	return true, score
+}
+
+func containsString(list []string, target string) bool {
+	for _, item := range list {
+		if item == target {
+			return true
+		}
+	}
+	return false
+}
+
+func buildSnippet(doc *Document, terms []string) string {
+	if doc.Body == "" || len(terms) == 0 {
+		return doc.Summary
+	}
+
+	pos, termLen := findTermPosition(doc.Body, terms)
+	if pos == -1 {
+		return doc.Summary
+	}
+
+	return extractSnippetFromRunes([]rune(doc.Body), pos, termLen)
+}
+
+func findTermPosition(body string, terms []string) (int, int) {
+	if len(terms) == 0 {
+		return -1, 0
+	}
+
+	bodyRunes := []rune(body)
+	lowerRunes := make([]rune, len(bodyRunes))
+	for idx, r := range bodyRunes {
+		lowerRunes[idx] = unicode.ToLower(r)
+	}
+
+	for _, term := range terms {
+		term = strings.TrimSpace(term)
+		if term == "" {
+			continue
+		}
+		termRunes := []rune(term)
+		for idx, r := range termRunes {
+			termRunes[idx] = unicode.ToLower(r)
+		}
+		pos := indexRunes(lowerRunes, termRunes)
+		if pos != -1 {
+			return pos, len(termRunes)
+		}
+	}
+
+	return -1, 0
+}
+
+func indexRunes(haystack, needle []rune) int {
+	if len(needle) == 0 || len(needle) > len(haystack) {
+		return -1
+	}
+
+	for i := 0; i <= len(haystack)-len(needle); i++ {
+		match := true
+		for j := 0; j < len(needle); j++ {
+			if haystack[i+j] != needle[j] {
+				match = false
+				break
+			}
+		}
+		if match {
+			return i
+		}
+	}
+	return -1
+}
+
+func extractSnippetFromRunes(body []rune, pos, termLen int) string {
+	if len(body) == 0 {
+		return ""
+	}
+
+	const window = 120
+
+	start := pos - window/2
+	if start < 0 {
+		start = 0
+	}
+
+	end := pos + termLen + window/2
+	if end > len(body) {
+		end = len(body)
+	}
+
+	snippet := strings.TrimSpace(string(body[start:end]))
+	snippet = strings.Join(strings.Fields(snippet), " ")
+
+	if start > 0 {
+		snippet = "…" + snippet
+	}
+	if end < len(body) {
+		snippet += "…"
+	}
+
+	return snippet
+}
+
+type parsedQuery struct {
+	terms        []string
+	tagFilters   []string
+	titleFilters []string
+	pathFilters  []string
+}
+
+func parseSearchQuery(query string) parsedQuery {
+	trimmed := strings.TrimSpace(query)
+	if trimmed == "" {
+		return parsedQuery{}
+	}
+
+	tokens := splitQuery(trimmed)
+	result := parsedQuery{
+		terms:        make([]string, 0, len(tokens)),
+		tagFilters:   []string{},
+		titleFilters: []string{},
+		pathFilters:  []string{},
+	}
+
+	for _, token := range tokens {
+		if token == "" {
+			continue
+		}
+		lower := strings.ToLower(token)
+
+		switch {
+		case strings.HasPrefix(lower, "tag:"):
+			value := strings.TrimSpace(token[4:])
+			if value != "" {
+				result.tagFilters = append(result.tagFilters, strings.ToLower(value))
+			}
+		case strings.HasPrefix(lower, "title:"):
+			value := strings.TrimSpace(token[6:])
+			if value != "" {
+				result.titleFilters = append(result.titleFilters, strings.ToLower(value))
+			}
+		case strings.HasPrefix(lower, "path:"):
+			value := strings.TrimSpace(token[5:])
+			if value != "" {
+				result.pathFilters = append(result.pathFilters, strings.ToLower(value))
+			}
+		default:
+			result.terms = append(result.terms, strings.ToLower(token))
+		}
+	}
+
+	return result
+}
+
+func splitQuery(input string) []string {
+	var tokens []string
+	var current strings.Builder
+	inQuotes := false
+
+	for _, r := range input {
+		switch r {
+		case '"':
+			if inQuotes {
+				tokens = appendToken(tokens, current.String())
+				current.Reset()
+				inQuotes = false
+			} else {
+				if current.Len() > 0 {
+					tokens = appendToken(tokens, current.String())
+					current.Reset()
+				}
+				inQuotes = true
+			}
+		case ' ', '\t', '\n':
+			if inQuotes {
+				current.WriteRune(r)
+			} else {
+				if current.Len() > 0 {
+					tokens = appendToken(tokens, current.String())
+					current.Reset()
+				}
+			}
+		default:
+			current.WriteRune(r)
+		}
+	}
+
+	if current.Len() > 0 {
+		tokens = appendToken(tokens, current.String())
+	}
+
+	return tokens
+}
+
+func appendToken(tokens []string, token string) []string {
+	token = strings.TrimSpace(token)
+	if token != "" {
+		tokens = append(tokens, token)
+	}
+	return tokens
+}
+
+// FullFrontMatter represente la structure complete du front matter YAML.
+type FullFrontMatter struct {
+	Title        string  `yaml:"title,omitempty"`
+	Date         string  `yaml:"date,omitempty"`
+	LastModified string  `yaml:"last_modified,omitempty"`
+	Tags         tagList `yaml:"tags,omitempty"`
+}
+
+// frontMatter est une version simplifiee pour la compatibilite avec Load.
+type frontMatter struct {
+	Tags tagList `yaml:"tags"`
+}
+
+type tagList []string
+
+func (t *tagList) UnmarshalYAML(value *yaml.Node) error {
+	switch value.Kind {
+	case yaml.ScalarNode:
+		var tag string
+		if err := value.Decode(&tag); err != nil {
+			return err
+		}
+		*t = tagList{tag}
+		return nil
+	case yaml.SequenceNode:
+		var tags []string
+		if err := value.Decode(&tags); err != nil {
+			return err
+		}
+		*t = tagList(tags)
+		return nil
+	case yaml.AliasNode:
+		return t.UnmarshalYAML(value.Alias)
+	default:
+		return fmt.Errorf("format de tags non supporte")
+	}
+}
+
+// ExtractFrontMatterAndBody extrait le front matter et le corps d'un fichier Markdown.
+func ExtractFrontMatterAndBody(path string) (FullFrontMatter, string, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return FullFrontMatter{}, "", err
+	}
+	defer file.Close()
+
+	return ExtractFrontMatterAndBodyFromReader(file)
+}
+
+// extractFrontMatterAndBodyFromReader extrait le front matter et le corps d'un io.Reader.
+func ExtractFrontMatterAndBodyFromReader(reader io.Reader) (FullFrontMatter, string, error) {
+	bufReader := bufio.NewReader(reader)
+	var fm FullFrontMatter
+	var bodyBuilder strings.Builder
+	var fmBuilder strings.Builder
+
+	line, err := bufReader.ReadString('\n')
+	if err != nil && !errors.Is(err, io.EOF) {
+		return FullFrontMatter{}, "", err
+	}
+
+	if strings.TrimSpace(line) != "---" {
+		bodyBuilder.WriteString(line) // If no front matter, this is part of the body
+		_, err := io.Copy(&bodyBuilder, bufReader)
+		return FullFrontMatter{}, bodyBuilder.String(), err
+	}
+
+	// Found first '---', now read front matter
+	fmFound := false
+	for {
+		line, err = bufReader.ReadString('\n')
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				// Front matter not terminated, treat entire content as body
+				return FullFrontMatter{}, "---" + fmBuilder.String() + bodyBuilder.String(), nil
+			}
+			return FullFrontMatter{}, "", err
+		}
+		if strings.TrimSpace(line) == "---" {
+			fmFound = true
+			break
+		}
+		fmBuilder.WriteString(line)
+	}
+
+	if fmFound {
+		if err := yaml.Unmarshal([]byte(fmBuilder.String()), &fm); err != nil {
+			return FullFrontMatter{}, "", fmt.Errorf("erreur d'analyse YAML du front matter: %w", err)
+		}
+	}
+
+	// Read the rest of the body
+	_, err = io.Copy(&bodyBuilder, bufReader)
+	if err != nil {
+		return FullFrontMatter{}, "", err
+	}
+
+	return fm, bodyBuilder.String(), nil
+}
+
+// extractFrontMatter est une version simplifiee pour la compatibilite avec Load.
+func extractFrontMatter(path string) (frontMatter, error) {
+	fm, _, err := ExtractFrontMatterAndBody(path)
+	return frontMatter{Tags: fm.Tags}, err
+}