package indexer import ( "bufio" "errors" "fmt" "io" "os" "path/filepath" "regexp" "sort" "strings" "sync" "unicode" yaml "gopkg.in/yaml.v3" ) // Indexer maintient un index en memoire des tags associes aux fichiers Markdown. type Indexer struct { mu sync.RWMutex tags map[string][]string docs map[string]*Document backlinks map[string][]string // note path -> list of notes that reference it } // Document représente une note indexée pour la recherche. type Document struct { Path string Title string Tags []string Date string LastModified string Body string Summary string lowerTitle string lowerBody string lowerTags []string } // SearchResult représente un résultat de recherche enrichi. type SearchResult struct { Path string Title string Tags []string Snippet string Score float64 Date string LastModified string } // New cree une nouvelle instance d Indexer. func New() *Indexer { return &Indexer{ tags: make(map[string][]string), docs: make(map[string]*Document), backlinks: make(map[string][]string), } } // Load reconstruit l index a partir du repertoire fourni. func (i *Indexer) Load(root string) error { entries := make(map[string]map[string]struct{}) documents := make(map[string]*Document) err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } if !strings.EqualFold(filepath.Ext(path), ".md") { return nil } rel, err := filepath.Rel(root, path) if err != nil { rel = path } fm, body, err := ExtractFrontMatterAndBody(path) if err != nil { return fmt.Errorf("analyse du front matter pour %s: %w", path, err) } tags := normalizeTags([]string(fm.Tags)) if len(tags) > 0 { for _, tag := range tags { key := strings.ToLower(tag) if _, ok := entries[key]; !ok { entries[key] = make(map[string]struct{}) } entries[key][rel] = struct{}{} } } doc := buildDocument(rel, fm, body, tags) documents[rel] = doc return nil }) if err != nil { return err } indexed := make(map[string][]string, len(entries)) for tag, files := range entries { list := make([]string, 0, len(files)) for file := range files { list = append(list, file) } sort.Strings(list) indexed[tag] = list } // Build backlinks index backlinksMap := make(map[string][]string) for sourcePath, doc := range documents { links := extractInternalLinks(doc.Body) for _, targetPath := range links { // Add sourcePath to the backlinks of targetPath if _, ok := backlinksMap[targetPath]; !ok { backlinksMap[targetPath] = make([]string, 0) } // Avoid duplicates if !containsString(backlinksMap[targetPath], sourcePath) { backlinksMap[targetPath] = append(backlinksMap[targetPath], sourcePath) } } } // Sort backlinks for consistency for _, links := range backlinksMap { sort.Strings(links) } i.mu.Lock() i.tags = indexed i.docs = documents i.backlinks = backlinksMap i.mu.Unlock() return nil } func normalizeTags(tags []string) []string { if len(tags) == 0 { return nil } seen := make(map[string]struct{}, len(tags)) result := make([]string, 0, len(tags)) for _, tag := range tags { trimmed := strings.TrimSpace(tag) if trimmed == "" { continue } lower := strings.ToLower(trimmed) if _, ok := seen[lower]; ok { continue } seen[lower] = struct{}{} result = append(result, trimmed) } return result } func buildDocument(path string, fm FullFrontMatter, body string, tags []string) *Document { title := strings.TrimSpace(fm.Title) if title == "" { title = deriveTitleFromPath(path) } summary := buildSummary(body) lowerTags := make([]string, len(tags)) for idx, tag := range tags { lowerTags[idx] = strings.ToLower(tag) } doc := &Document{ Path: path, Title: title, Tags: tags, Date: strings.TrimSpace(fm.Date), LastModified: strings.TrimSpace(fm.LastModified), Body: body, Summary: summary, lowerTitle: strings.ToLower(title), lowerBody: strings.ToLower(body), lowerTags: lowerTags, } return doc } func deriveTitleFromPath(path string) string { base := filepath.Base(path) base = strings.TrimSuffix(base, filepath.Ext(base)) base = strings.ReplaceAll(base, "-", " ") base = strings.ReplaceAll(base, "_", " ") base = strings.TrimSpace(base) if base == "" { return "Sans titre" } return strings.Title(base) } func buildSummary(body string) string { const maxRunes = 240 trimmed := strings.TrimSpace(body) if trimmed == "" { return "" } // Remplacer les retours à la ligne multiples par un espace simple normalized := strings.Join(strings.Fields(trimmed), " ") runes := []rune(normalized) if len(runes) <= maxRunes { return normalized } return string(runes[:maxRunes]) + "…" } // SearchByTag renvoie une copie de la liste des fichiers indexés pour un tag donné. func (i *Indexer) SearchByTag(tag string) []string { i.mu.RLock() defer i.mu.RUnlock() tag = strings.TrimSpace(tag) if tag == "" { return nil } lowerTag := strings.ToLower(tag) files, ok := i.tags[lowerTag] if !ok { return nil } copyFiles := make([]string, len(files)) copy(copyFiles, files) return copyFiles } // SearchDocuments effectue une recherche riche sur les documents indexés. func (i *Indexer) SearchDocuments(query string) []SearchResult { parsed := parseSearchQuery(query) i.mu.RLock() defer i.mu.RUnlock() if len(parsed.terms) == 0 && len(parsed.tagFilters) == 0 && len(parsed.titleFilters) == 0 && len(parsed.pathFilters) == 0 { return nil } results := make([]SearchResult, 0, len(i.docs)) for _, doc := range i.docs { match, score := matchDocument(doc, parsed) if !match { continue } snippet := buildSnippet(doc, parsed.terms) if snippet == "" { snippet = doc.Summary } results = append(results, SearchResult{ Path: doc.Path, Title: doc.Title, Tags: doc.Tags, Snippet: snippet, Score: score, Date: doc.Date, LastModified: doc.LastModified, }) } sort.SliceStable(results, func(a, b int) bool { if results[a].Score == results[b].Score { return strings.ToLower(results[a].Title) < strings.ToLower(results[b].Title) } return results[a].Score > results[b].Score }) return results } func matchDocument(doc *Document, q parsedQuery) (bool, float64) { score := 0.0 // Tag filters must all match for _, filter := range q.tagFilters { if !containsString(doc.lowerTags, filter) { return false, 0 } score += 2 // léger bonus pour les filtres respectés } // Title filters must all match for _, filter := range q.titleFilters { if !strings.Contains(doc.lowerTitle, filter) { return false, 0 } score += 4 } // Path filters must all match lowerPath := strings.ToLower(doc.Path) for _, filter := range q.pathFilters { if !strings.Contains(lowerPath, filter) { return false, 0 } score += 1.5 } // General terms (AND logic) for _, term := range q.terms { if term == "" { continue } termScore := 0.0 if strings.Contains(doc.lowerTitle, term) { termScore += 6 } if containsString(doc.lowerTags, term) { termScore += 4 } if strings.Contains(lowerPath, term) { termScore += 2 } if strings.Contains(doc.lowerBody, term) { termScore += 1.5 } if termScore == 0 { return false, 0 // term must match somewhere } score += termScore } if len(q.terms) == 0 && len(q.tagFilters) == 0 && len(q.titleFilters) == 0 && len(q.pathFilters) == 0 { return false, 0 } // Bonus léger pour documents avec titre défini if doc.Title != "" { score += 0.5 } return true, score } func containsString(list []string, target string) bool { for _, item := range list { if item == target { return true } } return false } func buildSnippet(doc *Document, terms []string) string { if doc.Body == "" || len(terms) == 0 { return doc.Summary } pos, termLen := findTermPosition(doc.Body, terms) if pos == -1 { return doc.Summary } return extractSnippetFromRunes([]rune(doc.Body), pos, termLen) } func findTermPosition(body string, terms []string) (int, int) { if len(terms) == 0 { return -1, 0 } bodyRunes := []rune(body) lowerRunes := make([]rune, len(bodyRunes)) for idx, r := range bodyRunes { lowerRunes[idx] = unicode.ToLower(r) } for _, term := range terms { term = strings.TrimSpace(term) if term == "" { continue } termRunes := []rune(term) for idx, r := range termRunes { termRunes[idx] = unicode.ToLower(r) } pos := indexRunes(lowerRunes, termRunes) if pos != -1 { return pos, len(termRunes) } } return -1, 0 } func indexRunes(haystack, needle []rune) int { if len(needle) == 0 || len(needle) > len(haystack) { return -1 } for i := 0; i <= len(haystack)-len(needle); i++ { match := true for j := 0; j < len(needle); j++ { if haystack[i+j] != needle[j] { match = false break } } if match { return i } } return -1 } func extractSnippetFromRunes(body []rune, pos, termLen int) string { if len(body) == 0 { return "" } const window = 120 start := pos - window/2 if start < 0 { start = 0 } end := pos + termLen + window/2 if end > len(body) { end = len(body) } snippet := strings.TrimSpace(string(body[start:end])) snippet = strings.Join(strings.Fields(snippet), " ") if start > 0 { snippet = "…" + snippet } if end < len(body) { snippet += "…" } return snippet } type parsedQuery struct { terms []string tagFilters []string titleFilters []string pathFilters []string } func parseSearchQuery(query string) parsedQuery { trimmed := strings.TrimSpace(query) if trimmed == "" { return parsedQuery{} } tokens := splitQuery(trimmed) result := parsedQuery{ terms: make([]string, 0, len(tokens)), tagFilters: []string{}, titleFilters: []string{}, pathFilters: []string{}, } for _, token := range tokens { if token == "" { continue } lower := strings.ToLower(token) switch { case strings.HasPrefix(lower, "tag:"): value := strings.TrimSpace(token[4:]) if value != "" { result.tagFilters = append(result.tagFilters, strings.ToLower(value)) } case strings.HasPrefix(lower, "title:"): value := strings.TrimSpace(token[6:]) if value != "" { result.titleFilters = append(result.titleFilters, strings.ToLower(value)) } case strings.HasPrefix(lower, "path:"): value := strings.TrimSpace(token[5:]) if value != "" { result.pathFilters = append(result.pathFilters, strings.ToLower(value)) } default: result.terms = append(result.terms, strings.ToLower(token)) } } return result } func splitQuery(input string) []string { var tokens []string var current strings.Builder inQuotes := false for _, r := range input { switch r { case '"': if inQuotes { tokens = appendToken(tokens, current.String()) current.Reset() inQuotes = false } else { if current.Len() > 0 { tokens = appendToken(tokens, current.String()) current.Reset() } inQuotes = true } case ' ', '\t', '\n': if inQuotes { current.WriteRune(r) } else { if current.Len() > 0 { tokens = appendToken(tokens, current.String()) current.Reset() } } default: current.WriteRune(r) } } if current.Len() > 0 { tokens = appendToken(tokens, current.String()) } return tokens } func appendToken(tokens []string, token string) []string { token = strings.TrimSpace(token) if token != "" { tokens = append(tokens, token) } return tokens } // FullFrontMatter represente la structure complete du front matter YAML. type FullFrontMatter struct { Title string `yaml:"title,omitempty"` Date string `yaml:"date,omitempty"` LastModified string `yaml:"last_modified,omitempty"` Tags tagList `yaml:"tags,omitempty"` } // frontMatter est une version simplifiee pour la compatibilite avec Load. type frontMatter struct { Tags tagList `yaml:"tags"` } type tagList []string func (t *tagList) UnmarshalYAML(value *yaml.Node) error { switch value.Kind { case yaml.ScalarNode: var tag string if err := value.Decode(&tag); err != nil { return err } *t = tagList{tag} return nil case yaml.SequenceNode: var tags []string if err := value.Decode(&tags); err != nil { return err } *t = tagList(tags) return nil case yaml.AliasNode: return t.UnmarshalYAML(value.Alias) default: return fmt.Errorf("format de tags non supporte") } } // ExtractFrontMatterAndBody extrait le front matter et le corps d'un fichier Markdown. func ExtractFrontMatterAndBody(path string) (FullFrontMatter, string, error) { file, err := os.Open(path) if err != nil { return FullFrontMatter{}, "", err } defer file.Close() return ExtractFrontMatterAndBodyFromReader(file) } // extractFrontMatterAndBodyFromReader extrait le front matter et le corps d'un io.Reader. func ExtractFrontMatterAndBodyFromReader(reader io.Reader) (FullFrontMatter, string, error) { bufReader := bufio.NewReader(reader) var fm FullFrontMatter var bodyBuilder strings.Builder var fmBuilder strings.Builder line, err := bufReader.ReadString('\n') if err != nil && !errors.Is(err, io.EOF) { return FullFrontMatter{}, "", err } if strings.TrimSpace(line) != "---" { bodyBuilder.WriteString(line) // If no front matter, this is part of the body _, err := io.Copy(&bodyBuilder, bufReader) return FullFrontMatter{}, bodyBuilder.String(), err } // Found first '---', now read front matter fmFound := false for { line, err = bufReader.ReadString('\n') if err != nil { if errors.Is(err, io.EOF) { // Front matter not terminated, treat entire content as body return FullFrontMatter{}, "---" + fmBuilder.String() + bodyBuilder.String(), nil } return FullFrontMatter{}, "", err } if strings.TrimSpace(line) == "---" { fmFound = true break } fmBuilder.WriteString(line) } if fmFound { if err := yaml.Unmarshal([]byte(fmBuilder.String()), &fm); err != nil { return FullFrontMatter{}, "", fmt.Errorf("erreur d'analyse YAML du front matter: %w", err) } } // Read the rest of the body _, err = io.Copy(&bodyBuilder, bufReader) if err != nil { return FullFrontMatter{}, "", err } return fm, bodyBuilder.String(), nil } // extractFrontMatter est une version simplifiee pour la compatibilite avec Load. func extractFrontMatter(path string) (frontMatter, error) { fm, _, err := ExtractFrontMatterAndBody(path) return frontMatter{Tags: fm.Tags}, err } // TagCount représente un tag avec son nombre d'utilisations type TagCount struct { Tag string Count int } // GetAllTagsWithCount retourne tous les tags avec leur nombre d'utilisations, triés par popularité func (i *Indexer) GetAllTagsWithCount() []TagCount { i.mu.RLock() defer i.mu.RUnlock() result := make([]TagCount, 0, len(i.tags)) for tag, files := range i.tags { result = append(result, TagCount{ Tag: tag, Count: len(files), }) } // Trier par popularité (nombre décroissant), puis par nom alphabétique sort.Slice(result, func(a, b int) bool { if result[a].Count == result[b].Count { return result[a].Tag < result[b].Tag } return result[a].Count > result[b].Count }) return result } // GetBacklinks retourne la liste des notes qui référencent la note spécifiée func (i *Indexer) GetBacklinks(path string) []string { i.mu.RLock() defer i.mu.RUnlock() links, ok := i.backlinks[path] if !ok || len(links) == 0 { return nil } // Retourner une copie pour éviter les modifications externes result := make([]string, len(links)) copy(result, links) return result } // extractInternalLinks extrait tous les liens internes d'un texte Markdown/HTML // Format: func extractInternalLinks(body string) []string { // Pattern pour capturer le chemin dans hx-get="/api/notes/..." // On cherche: hx-get="/api/notes/ suivi de n'importe quoi jusqu'au prochain guillemet pattern := `hx-get="/api/notes/([^"]+)"` // Compiler la regex re, err := regexp.Compile(pattern) if err != nil { return nil } // Trouver tous les matches matches := re.FindAllStringSubmatch(body, -1) if len(matches) == 0 { return nil } // Extraire les chemins (groupe de capture 1) links := make([]string, 0, len(matches)) seen := make(map[string]struct{}) for _, match := range matches { if len(match) > 1 { path := match[1] // Éviter les doublons if _, ok := seen[path]; !ok { seen[path] = struct{}{} links = append(links, path) } } } return links }