Premier commit déjà bien avancé
This commit is contained in:
640
internal/indexer/indexer.go
Normal file
640
internal/indexer/indexer.go
Normal file
@ -0,0 +1,640 @@
|
||||
package indexer
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Indexer maintient un index en memoire des tags associes aux fichiers Markdown.
|
||||
type Indexer struct {
|
||||
mu sync.RWMutex
|
||||
tags map[string][]string
|
||||
docs map[string]*Document
|
||||
}
|
||||
|
||||
// Document représente une note indexée pour la recherche.
|
||||
type Document struct {
|
||||
Path string
|
||||
Title string
|
||||
Tags []string
|
||||
Date string
|
||||
LastModified string
|
||||
Body string
|
||||
Summary string
|
||||
|
||||
lowerTitle string
|
||||
lowerBody string
|
||||
lowerTags []string
|
||||
}
|
||||
|
||||
// SearchResult représente un résultat de recherche enrichi.
|
||||
type SearchResult struct {
|
||||
Path string
|
||||
Title string
|
||||
Tags []string
|
||||
Snippet string
|
||||
Score float64
|
||||
Date string
|
||||
LastModified string
|
||||
}
|
||||
|
||||
// New cree une nouvelle instance d Indexer.
|
||||
func New() *Indexer {
|
||||
return &Indexer{
|
||||
tags: make(map[string][]string),
|
||||
docs: make(map[string]*Document),
|
||||
}
|
||||
}
|
||||
|
||||
// Load reconstruit l index a partir du repertoire fourni.
|
||||
func (i *Indexer) Load(root string) error {
|
||||
entries := make(map[string]map[string]struct{})
|
||||
documents := make(map[string]*Document)
|
||||
|
||||
err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
|
||||
if walkErr != nil {
|
||||
return walkErr
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if !strings.EqualFold(filepath.Ext(path), ".md") {
|
||||
return nil
|
||||
}
|
||||
|
||||
rel, err := filepath.Rel(root, path)
|
||||
if err != nil {
|
||||
rel = path
|
||||
}
|
||||
|
||||
fm, body, err := ExtractFrontMatterAndBody(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("analyse du front matter pour %s: %w", path, err)
|
||||
}
|
||||
|
||||
tags := normalizeTags([]string(fm.Tags))
|
||||
if len(tags) > 0 {
|
||||
for _, tag := range tags {
|
||||
key := strings.ToLower(tag)
|
||||
if _, ok := entries[key]; !ok {
|
||||
entries[key] = make(map[string]struct{})
|
||||
}
|
||||
entries[key][rel] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
doc := buildDocument(rel, fm, body, tags)
|
||||
documents[rel] = doc
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
indexed := make(map[string][]string, len(entries))
|
||||
for tag, files := range entries {
|
||||
list := make([]string, 0, len(files))
|
||||
for file := range files {
|
||||
list = append(list, file)
|
||||
}
|
||||
sort.Strings(list)
|
||||
indexed[tag] = list
|
||||
}
|
||||
|
||||
i.mu.Lock()
|
||||
i.tags = indexed
|
||||
i.docs = documents
|
||||
i.mu.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func normalizeTags(tags []string) []string {
|
||||
if len(tags) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{}, len(tags))
|
||||
result := make([]string, 0, len(tags))
|
||||
|
||||
for _, tag := range tags {
|
||||
trimmed := strings.TrimSpace(tag)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
lower := strings.ToLower(trimmed)
|
||||
if _, ok := seen[lower]; ok {
|
||||
continue
|
||||
}
|
||||
seen[lower] = struct{}{}
|
||||
result = append(result, trimmed)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func buildDocument(path string, fm FullFrontMatter, body string, tags []string) *Document {
|
||||
title := strings.TrimSpace(fm.Title)
|
||||
if title == "" {
|
||||
title = deriveTitleFromPath(path)
|
||||
}
|
||||
|
||||
summary := buildSummary(body)
|
||||
|
||||
lowerTags := make([]string, len(tags))
|
||||
for idx, tag := range tags {
|
||||
lowerTags[idx] = strings.ToLower(tag)
|
||||
}
|
||||
|
||||
doc := &Document{
|
||||
Path: path,
|
||||
Title: title,
|
||||
Tags: tags,
|
||||
Date: strings.TrimSpace(fm.Date),
|
||||
LastModified: strings.TrimSpace(fm.LastModified),
|
||||
Body: body,
|
||||
Summary: summary,
|
||||
lowerTitle: strings.ToLower(title),
|
||||
lowerBody: strings.ToLower(body),
|
||||
lowerTags: lowerTags,
|
||||
}
|
||||
|
||||
return doc
|
||||
}
|
||||
|
||||
func deriveTitleFromPath(path string) string {
|
||||
base := filepath.Base(path)
|
||||
base = strings.TrimSuffix(base, filepath.Ext(base))
|
||||
base = strings.ReplaceAll(base, "-", " ")
|
||||
base = strings.ReplaceAll(base, "_", " ")
|
||||
base = strings.TrimSpace(base)
|
||||
if base == "" {
|
||||
return "Sans titre"
|
||||
}
|
||||
return strings.Title(base)
|
||||
}
|
||||
|
||||
func buildSummary(body string) string {
|
||||
const maxRunes = 240
|
||||
trimmed := strings.TrimSpace(body)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remplacer les retours à la ligne multiples par un espace simple
|
||||
normalized := strings.Join(strings.Fields(trimmed), " ")
|
||||
|
||||
runes := []rune(normalized)
|
||||
if len(runes) <= maxRunes {
|
||||
return normalized
|
||||
}
|
||||
|
||||
return string(runes[:maxRunes]) + "…"
|
||||
}
|
||||
|
||||
// SearchByTag renvoie une copie de la liste des fichiers indexés pour un tag donné.
|
||||
func (i *Indexer) SearchByTag(tag string) []string {
|
||||
i.mu.RLock()
|
||||
defer i.mu.RUnlock()
|
||||
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
lowerTag := strings.ToLower(tag)
|
||||
files, ok := i.tags[lowerTag]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
copyFiles := make([]string, len(files))
|
||||
copy(copyFiles, files)
|
||||
return copyFiles
|
||||
}
|
||||
|
||||
// SearchDocuments effectue une recherche riche sur les documents indexés.
|
||||
func (i *Indexer) SearchDocuments(query string) []SearchResult {
|
||||
parsed := parseSearchQuery(query)
|
||||
|
||||
i.mu.RLock()
|
||||
defer i.mu.RUnlock()
|
||||
|
||||
if len(parsed.terms) == 0 && len(parsed.tagFilters) == 0 && len(parsed.titleFilters) == 0 && len(parsed.pathFilters) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
results := make([]SearchResult, 0, len(i.docs))
|
||||
|
||||
for _, doc := range i.docs {
|
||||
match, score := matchDocument(doc, parsed)
|
||||
if !match {
|
||||
continue
|
||||
}
|
||||
|
||||
snippet := buildSnippet(doc, parsed.terms)
|
||||
if snippet == "" {
|
||||
snippet = doc.Summary
|
||||
}
|
||||
|
||||
results = append(results, SearchResult{
|
||||
Path: doc.Path,
|
||||
Title: doc.Title,
|
||||
Tags: doc.Tags,
|
||||
Snippet: snippet,
|
||||
Score: score,
|
||||
Date: doc.Date,
|
||||
LastModified: doc.LastModified,
|
||||
})
|
||||
}
|
||||
|
||||
sort.SliceStable(results, func(a, b int) bool {
|
||||
if results[a].Score == results[b].Score {
|
||||
return strings.ToLower(results[a].Title) < strings.ToLower(results[b].Title)
|
||||
}
|
||||
return results[a].Score > results[b].Score
|
||||
})
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func matchDocument(doc *Document, q parsedQuery) (bool, float64) {
|
||||
score := 0.0
|
||||
|
||||
// Tag filters must all match
|
||||
for _, filter := range q.tagFilters {
|
||||
if !containsString(doc.lowerTags, filter) {
|
||||
return false, 0
|
||||
}
|
||||
score += 2 // léger bonus pour les filtres respectés
|
||||
}
|
||||
|
||||
// Title filters must all match
|
||||
for _, filter := range q.titleFilters {
|
||||
if !strings.Contains(doc.lowerTitle, filter) {
|
||||
return false, 0
|
||||
}
|
||||
score += 4
|
||||
}
|
||||
|
||||
// Path filters must all match
|
||||
lowerPath := strings.ToLower(doc.Path)
|
||||
for _, filter := range q.pathFilters {
|
||||
if !strings.Contains(lowerPath, filter) {
|
||||
return false, 0
|
||||
}
|
||||
score += 1.5
|
||||
}
|
||||
|
||||
// General terms (AND logic)
|
||||
for _, term := range q.terms {
|
||||
if term == "" {
|
||||
continue
|
||||
}
|
||||
termScore := 0.0
|
||||
if strings.Contains(doc.lowerTitle, term) {
|
||||
termScore += 6
|
||||
}
|
||||
if containsString(doc.lowerTags, term) {
|
||||
termScore += 4
|
||||
}
|
||||
if strings.Contains(lowerPath, term) {
|
||||
termScore += 2
|
||||
}
|
||||
if strings.Contains(doc.lowerBody, term) {
|
||||
termScore += 1.5
|
||||
}
|
||||
if termScore == 0 {
|
||||
return false, 0 // term must match somewhere
|
||||
}
|
||||
score += termScore
|
||||
}
|
||||
|
||||
if len(q.terms) == 0 && len(q.tagFilters) == 0 && len(q.titleFilters) == 0 && len(q.pathFilters) == 0 {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
// Bonus léger pour documents avec titre défini
|
||||
if doc.Title != "" {
|
||||
score += 0.5
|
||||
}
|
||||
|
||||
return true, score
|
||||
}
|
||||
|
||||
func containsString(list []string, target string) bool {
|
||||
for _, item := range list {
|
||||
if item == target {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func buildSnippet(doc *Document, terms []string) string {
|
||||
if doc.Body == "" || len(terms) == 0 {
|
||||
return doc.Summary
|
||||
}
|
||||
|
||||
pos, termLen := findTermPosition(doc.Body, terms)
|
||||
if pos == -1 {
|
||||
return doc.Summary
|
||||
}
|
||||
|
||||
return extractSnippetFromRunes([]rune(doc.Body), pos, termLen)
|
||||
}
|
||||
|
||||
func findTermPosition(body string, terms []string) (int, int) {
|
||||
if len(terms) == 0 {
|
||||
return -1, 0
|
||||
}
|
||||
|
||||
bodyRunes := []rune(body)
|
||||
lowerRunes := make([]rune, len(bodyRunes))
|
||||
for idx, r := range bodyRunes {
|
||||
lowerRunes[idx] = unicode.ToLower(r)
|
||||
}
|
||||
|
||||
for _, term := range terms {
|
||||
term = strings.TrimSpace(term)
|
||||
if term == "" {
|
||||
continue
|
||||
}
|
||||
termRunes := []rune(term)
|
||||
for idx, r := range termRunes {
|
||||
termRunes[idx] = unicode.ToLower(r)
|
||||
}
|
||||
pos := indexRunes(lowerRunes, termRunes)
|
||||
if pos != -1 {
|
||||
return pos, len(termRunes)
|
||||
}
|
||||
}
|
||||
|
||||
return -1, 0
|
||||
}
|
||||
|
||||
func indexRunes(haystack, needle []rune) int {
|
||||
if len(needle) == 0 || len(needle) > len(haystack) {
|
||||
return -1
|
||||
}
|
||||
|
||||
for i := 0; i <= len(haystack)-len(needle); i++ {
|
||||
match := true
|
||||
for j := 0; j < len(needle); j++ {
|
||||
if haystack[i+j] != needle[j] {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if match {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func extractSnippetFromRunes(body []rune, pos, termLen int) string {
|
||||
if len(body) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
const window = 120
|
||||
|
||||
start := pos - window/2
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
end := pos + termLen + window/2
|
||||
if end > len(body) {
|
||||
end = len(body)
|
||||
}
|
||||
|
||||
snippet := strings.TrimSpace(string(body[start:end]))
|
||||
snippet = strings.Join(strings.Fields(snippet), " ")
|
||||
|
||||
if start > 0 {
|
||||
snippet = "…" + snippet
|
||||
}
|
||||
if end < len(body) {
|
||||
snippet += "…"
|
||||
}
|
||||
|
||||
return snippet
|
||||
}
|
||||
|
||||
type parsedQuery struct {
|
||||
terms []string
|
||||
tagFilters []string
|
||||
titleFilters []string
|
||||
pathFilters []string
|
||||
}
|
||||
|
||||
func parseSearchQuery(query string) parsedQuery {
|
||||
trimmed := strings.TrimSpace(query)
|
||||
if trimmed == "" {
|
||||
return parsedQuery{}
|
||||
}
|
||||
|
||||
tokens := splitQuery(trimmed)
|
||||
result := parsedQuery{
|
||||
terms: make([]string, 0, len(tokens)),
|
||||
tagFilters: []string{},
|
||||
titleFilters: []string{},
|
||||
pathFilters: []string{},
|
||||
}
|
||||
|
||||
for _, token := range tokens {
|
||||
if token == "" {
|
||||
continue
|
||||
}
|
||||
lower := strings.ToLower(token)
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(lower, "tag:"):
|
||||
value := strings.TrimSpace(token[4:])
|
||||
if value != "" {
|
||||
result.tagFilters = append(result.tagFilters, strings.ToLower(value))
|
||||
}
|
||||
case strings.HasPrefix(lower, "title:"):
|
||||
value := strings.TrimSpace(token[6:])
|
||||
if value != "" {
|
||||
result.titleFilters = append(result.titleFilters, strings.ToLower(value))
|
||||
}
|
||||
case strings.HasPrefix(lower, "path:"):
|
||||
value := strings.TrimSpace(token[5:])
|
||||
if value != "" {
|
||||
result.pathFilters = append(result.pathFilters, strings.ToLower(value))
|
||||
}
|
||||
default:
|
||||
result.terms = append(result.terms, strings.ToLower(token))
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func splitQuery(input string) []string {
|
||||
var tokens []string
|
||||
var current strings.Builder
|
||||
inQuotes := false
|
||||
|
||||
for _, r := range input {
|
||||
switch r {
|
||||
case '"':
|
||||
if inQuotes {
|
||||
tokens = appendToken(tokens, current.String())
|
||||
current.Reset()
|
||||
inQuotes = false
|
||||
} else {
|
||||
if current.Len() > 0 {
|
||||
tokens = appendToken(tokens, current.String())
|
||||
current.Reset()
|
||||
}
|
||||
inQuotes = true
|
||||
}
|
||||
case ' ', '\t', '\n':
|
||||
if inQuotes {
|
||||
current.WriteRune(r)
|
||||
} else {
|
||||
if current.Len() > 0 {
|
||||
tokens = appendToken(tokens, current.String())
|
||||
current.Reset()
|
||||
}
|
||||
}
|
||||
default:
|
||||
current.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
if current.Len() > 0 {
|
||||
tokens = appendToken(tokens, current.String())
|
||||
}
|
||||
|
||||
return tokens
|
||||
}
|
||||
|
||||
func appendToken(tokens []string, token string) []string {
|
||||
token = strings.TrimSpace(token)
|
||||
if token != "" {
|
||||
tokens = append(tokens, token)
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// FullFrontMatter represente la structure complete du front matter YAML.
|
||||
type FullFrontMatter struct {
|
||||
Title string `yaml:"title,omitempty"`
|
||||
Date string `yaml:"date,omitempty"`
|
||||
LastModified string `yaml:"last_modified,omitempty"`
|
||||
Tags tagList `yaml:"tags,omitempty"`
|
||||
}
|
||||
|
||||
// frontMatter est une version simplifiee pour la compatibilite avec Load.
|
||||
type frontMatter struct {
|
||||
Tags tagList `yaml:"tags"`
|
||||
}
|
||||
|
||||
type tagList []string
|
||||
|
||||
func (t *tagList) UnmarshalYAML(value *yaml.Node) error {
|
||||
switch value.Kind {
|
||||
case yaml.ScalarNode:
|
||||
var tag string
|
||||
if err := value.Decode(&tag); err != nil {
|
||||
return err
|
||||
}
|
||||
*t = tagList{tag}
|
||||
return nil
|
||||
case yaml.SequenceNode:
|
||||
var tags []string
|
||||
if err := value.Decode(&tags); err != nil {
|
||||
return err
|
||||
}
|
||||
*t = tagList(tags)
|
||||
return nil
|
||||
case yaml.AliasNode:
|
||||
return t.UnmarshalYAML(value.Alias)
|
||||
default:
|
||||
return fmt.Errorf("format de tags non supporte")
|
||||
}
|
||||
}
|
||||
|
||||
// ExtractFrontMatterAndBody extrait le front matter et le corps d'un fichier Markdown.
|
||||
func ExtractFrontMatterAndBody(path string) (FullFrontMatter, string, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return FullFrontMatter{}, "", err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
return ExtractFrontMatterAndBodyFromReader(file)
|
||||
}
|
||||
|
||||
// extractFrontMatterAndBodyFromReader extrait le front matter et le corps d'un io.Reader.
|
||||
func ExtractFrontMatterAndBodyFromReader(reader io.Reader) (FullFrontMatter, string, error) {
|
||||
bufReader := bufio.NewReader(reader)
|
||||
var fm FullFrontMatter
|
||||
var bodyBuilder strings.Builder
|
||||
var fmBuilder strings.Builder
|
||||
|
||||
line, err := bufReader.ReadString('\n')
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return FullFrontMatter{}, "", err
|
||||
}
|
||||
|
||||
if strings.TrimSpace(line) != "---" {
|
||||
bodyBuilder.WriteString(line) // If no front matter, this is part of the body
|
||||
_, err := io.Copy(&bodyBuilder, bufReader)
|
||||
return FullFrontMatter{}, bodyBuilder.String(), err
|
||||
}
|
||||
|
||||
// Found first '---', now read front matter
|
||||
fmFound := false
|
||||
for {
|
||||
line, err = bufReader.ReadString('\n')
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
// Front matter not terminated, treat entire content as body
|
||||
return FullFrontMatter{}, "---" + fmBuilder.String() + bodyBuilder.String(), nil
|
||||
}
|
||||
return FullFrontMatter{}, "", err
|
||||
}
|
||||
if strings.TrimSpace(line) == "---" {
|
||||
fmFound = true
|
||||
break
|
||||
}
|
||||
fmBuilder.WriteString(line)
|
||||
}
|
||||
|
||||
if fmFound {
|
||||
if err := yaml.Unmarshal([]byte(fmBuilder.String()), &fm); err != nil {
|
||||
return FullFrontMatter{}, "", fmt.Errorf("erreur d'analyse YAML du front matter: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Read the rest of the body
|
||||
_, err = io.Copy(&bodyBuilder, bufReader)
|
||||
if err != nil {
|
||||
return FullFrontMatter{}, "", err
|
||||
}
|
||||
|
||||
return fm, bodyBuilder.String(), nil
|
||||
}
|
||||
|
||||
// extractFrontMatter est une version simplifiee pour la compatibilite avec Load.
|
||||
func extractFrontMatter(path string) (frontMatter, error) {
|
||||
fm, _, err := ExtractFrontMatterAndBody(path)
|
||||
return frontMatter{Tags: fm.Tags}, err
|
||||
}
|
||||
Reference in New Issue
Block a user