-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrobots.go
128 lines (105 loc) · 3.04 KB
/
robots.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package main
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strings"
)
// Generator handles the robots.txt generation
type Generator struct {
DocRoot string
GitIgnore string
SitemapURLs []string
}
// NewGenerator creates a new robots.txt generator instance
func NewGenerator(docRoot string) *Generator {
return &Generator{
DocRoot: docRoot,
GitIgnore: filepath.Join(docRoot, ".gitignore"),
SitemapURLs: make([]string, 0),
}
}
// FindSitemaps searches for XML sitemaps in the document root
func (g *Generator) FindSitemaps() error {
pattern := filepath.Join(g.DocRoot, "*sitemap*.xml")
matches, err := filepath.Glob(pattern)
if err != nil {
return fmt.Errorf("error finding sitemaps: %w", err)
}
for _, match := range matches {
// Convert absolute path to URL path
relPath, err := filepath.Rel(g.DocRoot, match)
if err != nil {
return fmt.Errorf("error converting path %s: %w", match, err)
}
g.SitemapURLs = append(g.SitemapURLs, "/"+filepath.ToSlash(relPath))
}
return nil
}
// ConvertGitIgnorePatterns reads .gitignore and converts patterns to robots.txt format
func (g *Generator) ConvertGitIgnorePatterns() ([]string, error) {
if _, err := os.Stat(g.GitIgnore); os.IsNotExist(err) {
return nil, nil // Return empty if .gitignore doesn't exist
}
file, err := os.Open(g.GitIgnore)
if err != nil {
return nil, fmt.Errorf("error opening .gitignore: %w", err)
}
defer file.Close()
var disallowRules []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
// Convert .gitignore pattern to robots.txt format
rule := strings.TrimPrefix(line, "/")
rule = strings.TrimPrefix(rule, "*")
rule = strings.TrimSuffix(rule, "*")
if rule != "" {
disallowRules = append(disallowRules, "/"+strings.TrimPrefix(rule, "/"))
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading .gitignore: %w", err)
}
return disallowRules, nil
}
// Generate creates the robots.txt content
func (g *Generator) Generate() (string, error) {
if err := g.FindSitemaps(); err != nil {
return "", err
}
disallowRules, err := g.ConvertGitIgnorePatterns()
if err != nil {
return "", err
}
var builder strings.Builder
// Add default header
builder.WriteString("User-agent: *\n")
// Add Disallow rules
for _, rule := range disallowRules {
builder.WriteString(fmt.Sprintf("Disallow: %s\n", rule))
}
// Add Allow rules for common public directories
builder.WriteString("Allow: /assets/\n")
builder.WriteString("Allow: /images/\n")
builder.WriteString("Allow: /css/\n")
builder.WriteString("Allow: /js/\n")
// Add sitemaps
for _, sitemap := range g.SitemapURLs {
builder.WriteString(fmt.Sprintf("Sitemap: %s\n", sitemap))
}
return builder.String(), nil
}
func robotsmain(docroot string) error {
generator := NewGenerator(docroot)
content, err := generator.Generate()
if err != nil {
return err
}
// Write to robots.txt
return os.WriteFile("robots.txt", []byte(content), 0o644)
}