749 lines
16 KiB
Go
749 lines
16 KiB
Go
package indexer
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"unicode"
|
|
|
|
yaml "gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// Indexer maintient un index en memoire des tags associes aux fichiers Markdown.
|
|
type Indexer struct {
|
|
mu sync.RWMutex
|
|
tags map[string][]string
|
|
docs map[string]*Document
|
|
backlinks map[string][]string // note path -> list of notes that reference it
|
|
}
|
|
|
|
// Document représente une note indexée pour la recherche.
|
|
type Document struct {
|
|
Path string
|
|
Title string
|
|
Tags []string
|
|
Date string
|
|
LastModified string
|
|
Body string
|
|
Summary string
|
|
|
|
lowerTitle string
|
|
lowerBody string
|
|
lowerTags []string
|
|
}
|
|
|
|
// SearchResult représente un résultat de recherche enrichi.
|
|
type SearchResult struct {
|
|
Path string
|
|
Title string
|
|
Tags []string
|
|
Snippet string
|
|
Score float64
|
|
Date string
|
|
LastModified string
|
|
}
|
|
|
|
// New cree une nouvelle instance d Indexer.
|
|
func New() *Indexer {
|
|
return &Indexer{
|
|
tags: make(map[string][]string),
|
|
docs: make(map[string]*Document),
|
|
backlinks: make(map[string][]string),
|
|
}
|
|
}
|
|
|
|
// Load reconstruit l index a partir du repertoire fourni.
|
|
func (i *Indexer) Load(root string) error {
|
|
entries := make(map[string]map[string]struct{})
|
|
documents := make(map[string]*Document)
|
|
|
|
err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
if !strings.EqualFold(filepath.Ext(path), ".md") {
|
|
return nil
|
|
}
|
|
|
|
rel, err := filepath.Rel(root, path)
|
|
if err != nil {
|
|
rel = path
|
|
}
|
|
|
|
fm, body, err := ExtractFrontMatterAndBody(path)
|
|
if err != nil {
|
|
return fmt.Errorf("analyse du front matter pour %s: %w", path, err)
|
|
}
|
|
|
|
tags := normalizeTags([]string(fm.Tags))
|
|
if len(tags) > 0 {
|
|
for _, tag := range tags {
|
|
key := strings.ToLower(tag)
|
|
if _, ok := entries[key]; !ok {
|
|
entries[key] = make(map[string]struct{})
|
|
}
|
|
entries[key][rel] = struct{}{}
|
|
}
|
|
}
|
|
|
|
doc := buildDocument(rel, fm, body, tags)
|
|
documents[rel] = doc
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
indexed := make(map[string][]string, len(entries))
|
|
for tag, files := range entries {
|
|
list := make([]string, 0, len(files))
|
|
for file := range files {
|
|
list = append(list, file)
|
|
}
|
|
sort.Strings(list)
|
|
indexed[tag] = list
|
|
}
|
|
|
|
// Build backlinks index
|
|
backlinksMap := make(map[string][]string)
|
|
for sourcePath, doc := range documents {
|
|
links := extractInternalLinks(doc.Body)
|
|
for _, targetPath := range links {
|
|
// Add sourcePath to the backlinks of targetPath
|
|
if _, ok := backlinksMap[targetPath]; !ok {
|
|
backlinksMap[targetPath] = make([]string, 0)
|
|
}
|
|
// Avoid duplicates
|
|
if !containsString(backlinksMap[targetPath], sourcePath) {
|
|
backlinksMap[targetPath] = append(backlinksMap[targetPath], sourcePath)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort backlinks for consistency
|
|
for _, links := range backlinksMap {
|
|
sort.Strings(links)
|
|
}
|
|
|
|
i.mu.Lock()
|
|
i.tags = indexed
|
|
i.docs = documents
|
|
i.backlinks = backlinksMap
|
|
i.mu.Unlock()
|
|
|
|
return nil
|
|
}
|
|
|
|
func normalizeTags(tags []string) []string {
|
|
if len(tags) == 0 {
|
|
return nil
|
|
}
|
|
|
|
seen := make(map[string]struct{}, len(tags))
|
|
result := make([]string, 0, len(tags))
|
|
|
|
for _, tag := range tags {
|
|
trimmed := strings.TrimSpace(tag)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
lower := strings.ToLower(trimmed)
|
|
if _, ok := seen[lower]; ok {
|
|
continue
|
|
}
|
|
seen[lower] = struct{}{}
|
|
result = append(result, trimmed)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func buildDocument(path string, fm FullFrontMatter, body string, tags []string) *Document {
|
|
title := strings.TrimSpace(fm.Title)
|
|
if title == "" {
|
|
title = deriveTitleFromPath(path)
|
|
}
|
|
|
|
summary := buildSummary(body)
|
|
|
|
lowerTags := make([]string, len(tags))
|
|
for idx, tag := range tags {
|
|
lowerTags[idx] = strings.ToLower(tag)
|
|
}
|
|
|
|
doc := &Document{
|
|
Path: path,
|
|
Title: title,
|
|
Tags: tags,
|
|
Date: strings.TrimSpace(fm.Date),
|
|
LastModified: strings.TrimSpace(fm.LastModified),
|
|
Body: body,
|
|
Summary: summary,
|
|
lowerTitle: strings.ToLower(title),
|
|
lowerBody: strings.ToLower(body),
|
|
lowerTags: lowerTags,
|
|
}
|
|
|
|
return doc
|
|
}
|
|
|
|
func deriveTitleFromPath(path string) string {
|
|
base := filepath.Base(path)
|
|
base = strings.TrimSuffix(base, filepath.Ext(base))
|
|
base = strings.ReplaceAll(base, "-", " ")
|
|
base = strings.ReplaceAll(base, "_", " ")
|
|
base = strings.TrimSpace(base)
|
|
if base == "" {
|
|
return "Sans titre"
|
|
}
|
|
return strings.Title(base)
|
|
}
|
|
|
|
func buildSummary(body string) string {
|
|
const maxRunes = 240
|
|
trimmed := strings.TrimSpace(body)
|
|
if trimmed == "" {
|
|
return ""
|
|
}
|
|
|
|
// Remplacer les retours à la ligne multiples par un espace simple
|
|
normalized := strings.Join(strings.Fields(trimmed), " ")
|
|
|
|
runes := []rune(normalized)
|
|
if len(runes) <= maxRunes {
|
|
return normalized
|
|
}
|
|
|
|
return string(runes[:maxRunes]) + "…"
|
|
}
|
|
|
|
// SearchByTag renvoie une copie de la liste des fichiers indexés pour un tag donné.
|
|
func (i *Indexer) SearchByTag(tag string) []string {
|
|
i.mu.RLock()
|
|
defer i.mu.RUnlock()
|
|
|
|
tag = strings.TrimSpace(tag)
|
|
if tag == "" {
|
|
return nil
|
|
}
|
|
|
|
lowerTag := strings.ToLower(tag)
|
|
files, ok := i.tags[lowerTag]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
copyFiles := make([]string, len(files))
|
|
copy(copyFiles, files)
|
|
return copyFiles
|
|
}
|
|
|
|
// SearchDocuments effectue une recherche riche sur les documents indexés.
|
|
func (i *Indexer) SearchDocuments(query string) []SearchResult {
|
|
parsed := parseSearchQuery(query)
|
|
|
|
i.mu.RLock()
|
|
defer i.mu.RUnlock()
|
|
|
|
if len(parsed.terms) == 0 && len(parsed.tagFilters) == 0 && len(parsed.titleFilters) == 0 && len(parsed.pathFilters) == 0 {
|
|
return nil
|
|
}
|
|
|
|
results := make([]SearchResult, 0, len(i.docs))
|
|
|
|
for _, doc := range i.docs {
|
|
match, score := matchDocument(doc, parsed)
|
|
if !match {
|
|
continue
|
|
}
|
|
|
|
snippet := buildSnippet(doc, parsed.terms)
|
|
if snippet == "" {
|
|
snippet = doc.Summary
|
|
}
|
|
|
|
results = append(results, SearchResult{
|
|
Path: doc.Path,
|
|
Title: doc.Title,
|
|
Tags: doc.Tags,
|
|
Snippet: snippet,
|
|
Score: score,
|
|
Date: doc.Date,
|
|
LastModified: doc.LastModified,
|
|
})
|
|
}
|
|
|
|
sort.SliceStable(results, func(a, b int) bool {
|
|
if results[a].Score == results[b].Score {
|
|
return strings.ToLower(results[a].Title) < strings.ToLower(results[b].Title)
|
|
}
|
|
return results[a].Score > results[b].Score
|
|
})
|
|
|
|
return results
|
|
}
|
|
|
|
func matchDocument(doc *Document, q parsedQuery) (bool, float64) {
|
|
score := 0.0
|
|
|
|
// Tag filters must all match
|
|
for _, filter := range q.tagFilters {
|
|
if !containsString(doc.lowerTags, filter) {
|
|
return false, 0
|
|
}
|
|
score += 2 // léger bonus pour les filtres respectés
|
|
}
|
|
|
|
// Title filters must all match
|
|
for _, filter := range q.titleFilters {
|
|
if !strings.Contains(doc.lowerTitle, filter) {
|
|
return false, 0
|
|
}
|
|
score += 4
|
|
}
|
|
|
|
// Path filters must all match
|
|
lowerPath := strings.ToLower(doc.Path)
|
|
for _, filter := range q.pathFilters {
|
|
if !strings.Contains(lowerPath, filter) {
|
|
return false, 0
|
|
}
|
|
score += 1.5
|
|
}
|
|
|
|
// General terms (AND logic)
|
|
for _, term := range q.terms {
|
|
if term == "" {
|
|
continue
|
|
}
|
|
termScore := 0.0
|
|
if strings.Contains(doc.lowerTitle, term) {
|
|
termScore += 6
|
|
}
|
|
if containsString(doc.lowerTags, term) {
|
|
termScore += 4
|
|
}
|
|
if strings.Contains(lowerPath, term) {
|
|
termScore += 2
|
|
}
|
|
if strings.Contains(doc.lowerBody, term) {
|
|
termScore += 1.5
|
|
}
|
|
if termScore == 0 {
|
|
return false, 0 // term must match somewhere
|
|
}
|
|
score += termScore
|
|
}
|
|
|
|
if len(q.terms) == 0 && len(q.tagFilters) == 0 && len(q.titleFilters) == 0 && len(q.pathFilters) == 0 {
|
|
return false, 0
|
|
}
|
|
|
|
// Bonus léger pour documents avec titre défini
|
|
if doc.Title != "" {
|
|
score += 0.5
|
|
}
|
|
|
|
return true, score
|
|
}
|
|
|
|
func containsString(list []string, target string) bool {
|
|
for _, item := range list {
|
|
if item == target {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func buildSnippet(doc *Document, terms []string) string {
|
|
if doc.Body == "" || len(terms) == 0 {
|
|
return doc.Summary
|
|
}
|
|
|
|
pos, termLen := findTermPosition(doc.Body, terms)
|
|
if pos == -1 {
|
|
return doc.Summary
|
|
}
|
|
|
|
return extractSnippetFromRunes([]rune(doc.Body), pos, termLen)
|
|
}
|
|
|
|
func findTermPosition(body string, terms []string) (int, int) {
|
|
if len(terms) == 0 {
|
|
return -1, 0
|
|
}
|
|
|
|
bodyRunes := []rune(body)
|
|
lowerRunes := make([]rune, len(bodyRunes))
|
|
for idx, r := range bodyRunes {
|
|
lowerRunes[idx] = unicode.ToLower(r)
|
|
}
|
|
|
|
for _, term := range terms {
|
|
term = strings.TrimSpace(term)
|
|
if term == "" {
|
|
continue
|
|
}
|
|
termRunes := []rune(term)
|
|
for idx, r := range termRunes {
|
|
termRunes[idx] = unicode.ToLower(r)
|
|
}
|
|
pos := indexRunes(lowerRunes, termRunes)
|
|
if pos != -1 {
|
|
return pos, len(termRunes)
|
|
}
|
|
}
|
|
|
|
return -1, 0
|
|
}
|
|
|
|
func indexRunes(haystack, needle []rune) int {
|
|
if len(needle) == 0 || len(needle) > len(haystack) {
|
|
return -1
|
|
}
|
|
|
|
for i := 0; i <= len(haystack)-len(needle); i++ {
|
|
match := true
|
|
for j := 0; j < len(needle); j++ {
|
|
if haystack[i+j] != needle[j] {
|
|
match = false
|
|
break
|
|
}
|
|
}
|
|
if match {
|
|
return i
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
func extractSnippetFromRunes(body []rune, pos, termLen int) string {
|
|
if len(body) == 0 {
|
|
return ""
|
|
}
|
|
|
|
const window = 120
|
|
|
|
start := pos - window/2
|
|
if start < 0 {
|
|
start = 0
|
|
}
|
|
|
|
end := pos + termLen + window/2
|
|
if end > len(body) {
|
|
end = len(body)
|
|
}
|
|
|
|
snippet := strings.TrimSpace(string(body[start:end]))
|
|
snippet = strings.Join(strings.Fields(snippet), " ")
|
|
|
|
if start > 0 {
|
|
snippet = "…" + snippet
|
|
}
|
|
if end < len(body) {
|
|
snippet += "…"
|
|
}
|
|
|
|
return snippet
|
|
}
|
|
|
|
type parsedQuery struct {
|
|
terms []string
|
|
tagFilters []string
|
|
titleFilters []string
|
|
pathFilters []string
|
|
}
|
|
|
|
func parseSearchQuery(query string) parsedQuery {
|
|
trimmed := strings.TrimSpace(query)
|
|
if trimmed == "" {
|
|
return parsedQuery{}
|
|
}
|
|
|
|
tokens := splitQuery(trimmed)
|
|
result := parsedQuery{
|
|
terms: make([]string, 0, len(tokens)),
|
|
tagFilters: []string{},
|
|
titleFilters: []string{},
|
|
pathFilters: []string{},
|
|
}
|
|
|
|
for _, token := range tokens {
|
|
if token == "" {
|
|
continue
|
|
}
|
|
lower := strings.ToLower(token)
|
|
|
|
switch {
|
|
case strings.HasPrefix(lower, "tag:"):
|
|
value := strings.TrimSpace(token[4:])
|
|
if value != "" {
|
|
result.tagFilters = append(result.tagFilters, strings.ToLower(value))
|
|
}
|
|
case strings.HasPrefix(lower, "title:"):
|
|
value := strings.TrimSpace(token[6:])
|
|
if value != "" {
|
|
result.titleFilters = append(result.titleFilters, strings.ToLower(value))
|
|
}
|
|
case strings.HasPrefix(lower, "path:"):
|
|
value := strings.TrimSpace(token[5:])
|
|
if value != "" {
|
|
result.pathFilters = append(result.pathFilters, strings.ToLower(value))
|
|
}
|
|
default:
|
|
result.terms = append(result.terms, strings.ToLower(token))
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func splitQuery(input string) []string {
|
|
var tokens []string
|
|
var current strings.Builder
|
|
inQuotes := false
|
|
|
|
for _, r := range input {
|
|
switch r {
|
|
case '"':
|
|
if inQuotes {
|
|
tokens = appendToken(tokens, current.String())
|
|
current.Reset()
|
|
inQuotes = false
|
|
} else {
|
|
if current.Len() > 0 {
|
|
tokens = appendToken(tokens, current.String())
|
|
current.Reset()
|
|
}
|
|
inQuotes = true
|
|
}
|
|
case ' ', '\t', '\n':
|
|
if inQuotes {
|
|
current.WriteRune(r)
|
|
} else {
|
|
if current.Len() > 0 {
|
|
tokens = appendToken(tokens, current.String())
|
|
current.Reset()
|
|
}
|
|
}
|
|
default:
|
|
current.WriteRune(r)
|
|
}
|
|
}
|
|
|
|
if current.Len() > 0 {
|
|
tokens = appendToken(tokens, current.String())
|
|
}
|
|
|
|
return tokens
|
|
}
|
|
|
|
func appendToken(tokens []string, token string) []string {
|
|
token = strings.TrimSpace(token)
|
|
if token != "" {
|
|
tokens = append(tokens, token)
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
// FullFrontMatter represente la structure complete du front matter YAML.
|
|
type FullFrontMatter struct {
|
|
Title string `yaml:"title,omitempty"`
|
|
Date string `yaml:"date,omitempty"`
|
|
LastModified string `yaml:"last_modified,omitempty"`
|
|
Tags tagList `yaml:"tags,omitempty"`
|
|
}
|
|
|
|
// frontMatter est une version simplifiee pour la compatibilite avec Load.
|
|
type frontMatter struct {
|
|
Tags tagList `yaml:"tags"`
|
|
}
|
|
|
|
type tagList []string
|
|
|
|
func (t *tagList) UnmarshalYAML(value *yaml.Node) error {
|
|
switch value.Kind {
|
|
case yaml.ScalarNode:
|
|
var tag string
|
|
if err := value.Decode(&tag); err != nil {
|
|
return err
|
|
}
|
|
*t = tagList{tag}
|
|
return nil
|
|
case yaml.SequenceNode:
|
|
var tags []string
|
|
if err := value.Decode(&tags); err != nil {
|
|
return err
|
|
}
|
|
*t = tagList(tags)
|
|
return nil
|
|
case yaml.AliasNode:
|
|
return t.UnmarshalYAML(value.Alias)
|
|
default:
|
|
return fmt.Errorf("format de tags non supporte")
|
|
}
|
|
}
|
|
|
|
// ExtractFrontMatterAndBody extrait le front matter et le corps d'un fichier Markdown.
|
|
func ExtractFrontMatterAndBody(path string) (FullFrontMatter, string, error) {
|
|
file, err := os.Open(path)
|
|
if err != nil {
|
|
return FullFrontMatter{}, "", err
|
|
}
|
|
defer file.Close()
|
|
|
|
return ExtractFrontMatterAndBodyFromReader(file)
|
|
}
|
|
|
|
// extractFrontMatterAndBodyFromReader extrait le front matter et le corps d'un io.Reader.
|
|
func ExtractFrontMatterAndBodyFromReader(reader io.Reader) (FullFrontMatter, string, error) {
|
|
bufReader := bufio.NewReader(reader)
|
|
var fm FullFrontMatter
|
|
var bodyBuilder strings.Builder
|
|
var fmBuilder strings.Builder
|
|
|
|
line, err := bufReader.ReadString('\n')
|
|
if err != nil && !errors.Is(err, io.EOF) {
|
|
return FullFrontMatter{}, "", err
|
|
}
|
|
|
|
if strings.TrimSpace(line) != "---" {
|
|
bodyBuilder.WriteString(line) // If no front matter, this is part of the body
|
|
_, err := io.Copy(&bodyBuilder, bufReader)
|
|
return FullFrontMatter{}, bodyBuilder.String(), err
|
|
}
|
|
|
|
// Found first '---', now read front matter
|
|
fmFound := false
|
|
for {
|
|
line, err = bufReader.ReadString('\n')
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
// Front matter not terminated, treat entire content as body
|
|
return FullFrontMatter{}, "---" + fmBuilder.String() + bodyBuilder.String(), nil
|
|
}
|
|
return FullFrontMatter{}, "", err
|
|
}
|
|
if strings.TrimSpace(line) == "---" {
|
|
fmFound = true
|
|
break
|
|
}
|
|
fmBuilder.WriteString(line)
|
|
}
|
|
|
|
if fmFound {
|
|
if err := yaml.Unmarshal([]byte(fmBuilder.String()), &fm); err != nil {
|
|
return FullFrontMatter{}, "", fmt.Errorf("erreur d'analyse YAML du front matter: %w", err)
|
|
}
|
|
}
|
|
|
|
// Read the rest of the body
|
|
_, err = io.Copy(&bodyBuilder, bufReader)
|
|
if err != nil {
|
|
return FullFrontMatter{}, "", err
|
|
}
|
|
|
|
return fm, bodyBuilder.String(), nil
|
|
}
|
|
|
|
// extractFrontMatter est une version simplifiee pour la compatibilite avec Load.
|
|
func extractFrontMatter(path string) (frontMatter, error) {
|
|
fm, _, err := ExtractFrontMatterAndBody(path)
|
|
return frontMatter{Tags: fm.Tags}, err
|
|
}
|
|
|
|
// TagCount représente un tag avec son nombre d'utilisations
|
|
type TagCount struct {
|
|
Tag string
|
|
Count int
|
|
}
|
|
|
|
// GetAllTagsWithCount retourne tous les tags avec leur nombre d'utilisations, triés par popularité
|
|
func (i *Indexer) GetAllTagsWithCount() []TagCount {
|
|
i.mu.RLock()
|
|
defer i.mu.RUnlock()
|
|
|
|
result := make([]TagCount, 0, len(i.tags))
|
|
for tag, files := range i.tags {
|
|
result = append(result, TagCount{
|
|
Tag: tag,
|
|
Count: len(files),
|
|
})
|
|
}
|
|
|
|
// Trier par popularité (nombre décroissant), puis par nom alphabétique
|
|
sort.Slice(result, func(a, b int) bool {
|
|
if result[a].Count == result[b].Count {
|
|
return result[a].Tag < result[b].Tag
|
|
}
|
|
return result[a].Count > result[b].Count
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
// GetBacklinks retourne la liste des notes qui référencent la note spécifiée
|
|
func (i *Indexer) GetBacklinks(path string) []string {
|
|
i.mu.RLock()
|
|
defer i.mu.RUnlock()
|
|
|
|
links, ok := i.backlinks[path]
|
|
if !ok || len(links) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Retourner une copie pour éviter les modifications externes
|
|
result := make([]string, len(links))
|
|
copy(result, links)
|
|
return result
|
|
}
|
|
|
|
// extractInternalLinks extrait tous les liens internes d'un texte Markdown/HTML
|
|
// Format: <a ... hx-get="/api/notes/path/to/note.md" ...>
|
|
func extractInternalLinks(body string) []string {
|
|
// Pattern pour capturer le chemin dans hx-get="/api/notes/..."
|
|
// On cherche: hx-get="/api/notes/ suivi de n'importe quoi jusqu'au prochain guillemet
|
|
pattern := `hx-get="/api/notes/([^"]+)"`
|
|
|
|
// Compiler la regex
|
|
re, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
// Trouver tous les matches
|
|
matches := re.FindAllStringSubmatch(body, -1)
|
|
if len(matches) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Extraire les chemins (groupe de capture 1)
|
|
links := make([]string, 0, len(matches))
|
|
seen := make(map[string]struct{})
|
|
|
|
for _, match := range matches {
|
|
if len(match) > 1 {
|
|
path := match[1]
|
|
// Éviter les doublons
|
|
if _, ok := seen[path]; !ok {
|
|
seen[path] = struct{}{}
|
|
links = append(links, path)
|
|
}
|
|
}
|
|
}
|
|
|
|
return links
|
|
}
|