diff --git a/core/controllers/base_file.go b/core/controllers/base_file.go index 5018caeae..403fe89d1 100644 --- a/core/controllers/base_file.go +++ b/core/controllers/base_file.go @@ -4,19 +4,16 @@ import ( "errors" "fmt" "github.com/crawlab-team/crawlab/core/fs" - "github.com/crawlab-team/crawlab/core/interfaces" - "github.com/crawlab-team/crawlab/core/utils" "github.com/gin-gonic/gin" "io" "os" - "path/filepath" "sync" ) func GetBaseFileListDir(rootPath string, c *gin.Context) { path := c.Query("path") - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -36,7 +33,7 @@ func GetBaseFileListDir(rootPath string, c *gin.Context) { func GetBaseFileFile(rootPath string, c *gin.Context) { path := c.Query("path") - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -54,7 +51,7 @@ func GetBaseFileFile(rootPath string, c *gin.Context) { func GetBaseFileFileInfo(rootPath string, c *gin.Context) { path := c.Query("path") - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -70,7 +67,7 @@ func GetBaseFileFileInfo(rootPath string, c *gin.Context) { } func PostBaseFileSaveFile(rootPath string, c *gin.Context) { - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorInternalServerError(c, err) return @@ -120,7 +117,7 @@ func PostBaseFileSaveFile(rootPath string, c *gin.Context) { } func PostBaseFileSaveFiles(rootPath string, c *gin.Context) { - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorInternalServerError(c, err) return @@ -181,7 +178,7 @@ func PostBaseFileSaveDir(rootPath string, c *gin.Context) { return } - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -205,7 +202,7 @@ func PostBaseFileRenameFile(rootPath string, c *gin.Context) { return } - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -229,7 +226,7 @@ func DeleteBaseFileFile(rootPath string, c *gin.Context) { payload.Path = "." } - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -257,7 +254,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) { return } - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -272,7 +269,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) { } func PostBaseFileExport(rootPath string, c *gin.Context) { - fsSvc, err := getBaseFileFsSvc(rootPath) + fsSvc, err := fs.GetBaseFileFsSvc(rootPath) if err != nil { HandleErrorBadRequest(c, err) return @@ -289,14 +286,3 @@ func PostBaseFileExport(rootPath string, c *gin.Context) { c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", zipFilePath)) c.File(zipFilePath) } - -func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) { - return getBaseFileFsSvc(rootPath) -} - -func getBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) { - workspacePath := utils.GetWorkspace() - fsSvc := fs.NewFsService(filepath.Join(workspacePath, rootPath)) - - return fsSvc, nil -} diff --git a/core/controllers/spider.go b/core/controllers/spider.go index bb924dcae..7392a47e8 100644 --- a/core/controllers/spider.go +++ b/core/controllers/spider.go @@ -5,6 +5,7 @@ import ( "github.com/crawlab-team/crawlab/core/constants" "github.com/crawlab-team/crawlab/core/models/models" mongo2 "github.com/crawlab-team/crawlab/core/mongo" + "github.com/crawlab-team/crawlab/core/spider" "math" "os" "path/filepath" @@ -293,6 +294,17 @@ func PostSpider(c *gin.Context) { return } + // create template if available + if utils.IsPro() && s.Template != "" { + if templateSvc := spider.GetSpiderTemplateRegistryService(); templateSvc != nil { + err = templateSvc.CreateTemplate(s.Id) + if err != nil { + HandleErrorInternalServerError(c, err) + return + } + } + } + HandleSuccessWithData(c, s) } diff --git a/core/fs/utils.go b/core/fs/utils.go new file mode 100644 index 000000000..9a658b302 --- /dev/null +++ b/core/fs/utils.go @@ -0,0 +1,14 @@ +package fs + +import ( + "github.com/crawlab-team/crawlab/core/interfaces" + "github.com/crawlab-team/crawlab/core/utils" + "path/filepath" +) + +func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) { + workspacePath := utils.GetWorkspace() + fsSvc := NewFsService(filepath.Join(workspacePath, rootPath)) + + return fsSvc, nil +} diff --git a/core/interfaces/spider_template_service.go b/core/interfaces/spider_template_service.go new file mode 100644 index 000000000..a788b59fc --- /dev/null +++ b/core/interfaces/spider_template_service.go @@ -0,0 +1,7 @@ +package interfaces + +import "go.mongodb.org/mongo-driver/bson/primitive" + +type SpiderTemplateService interface { + CreateTemplate(id primitive.ObjectID) (err error) +} diff --git a/core/models/models/spider.go b/core/models/models/spider.go index c1052466b..684e34c0d 100644 --- a/core/models/models/spider.go +++ b/core/models/models/spider.go @@ -20,6 +20,12 @@ type Spider struct { GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id GitRootPath string `json:"git_root_path" bson:"git_root_path"` Git *Git `json:"git,omitempty" bson:"-"` + Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template + TemplateParams *struct { + SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"` + StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"` + Domains string `json:"domains,omitempty" bson:"domains,omitempty"` + } `json:"template_params,omitempty" bson:"template_params,omitempty"` // stats Stat *SpiderStat `json:"stat,omitempty" bson:"-"` diff --git a/core/spider/registry_service.go b/core/spider/registry_service.go new file mode 100644 index 000000000..6e9122260 --- /dev/null +++ b/core/spider/registry_service.go @@ -0,0 +1,13 @@ +package spider + +import "github.com/crawlab-team/crawlab/core/interfaces" + +var templateSvcInstance interfaces.SpiderTemplateService + +func SetSpiderTemplateRegistryService(svc interfaces.SpiderTemplateService) { + templateSvcInstance = svc +} + +func GetSpiderTemplateRegistryService() interfaces.SpiderTemplateService { + return templateSvcInstance +} diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index 13611c1f3..ef61a2e0b 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -340,6 +340,19 @@ func (r *Runner) startHealthCheck() { } } +// configurePythonPath sets up the Python environment paths, handling both pyenv and default installations +func (r *Runner) configurePythonPath() { + // Configure global node_modules path + pyenvRoot := utils.GetPyenvPath() + pyenvShimsPath := pyenvRoot + "/shims" + pyenvBinPath := pyenvRoot + "/bin" + + // Configure global pyenv path + _ = os.Setenv("PYENV_ROOT", pyenvRoot) + _ = os.Setenv("PATH", pyenvShimsPath+":"+os.Getenv("PATH")) + _ = os.Setenv("PATH", pyenvBinPath+":"+os.Getenv("PATH")) +} + // configureNodePath sets up the Node.js environment paths, handling both nvm and default installations func (r *Runner) configureNodePath() { // Configure nvm-based Node.js paths @@ -366,7 +379,10 @@ func (r *Runner) configureGoPath() { // - Crawlab-specific variables // - Global environment variables from the system func (r *Runner) configureEnv() { - // Configure Node.js paths + // Configure Python path + r.configurePythonPath() + + // Configure Node.js path r.configureNodePath() // Configure Go path @@ -375,8 +391,6 @@ func (r *Runner) configureEnv() { // Default envs r.cmd.Env = os.Environ() r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex()) - r.cmd.Env = append(r.cmd.Env, "PYENV_ROOT="+utils.PyenvRoot) - r.cmd.Env = append(r.cmd.Env, "PATH="+os.Getenv("PATH")+":"+utils.PyenvRoot+"/shims:"+utils.PyenvRoot+"/bin") // Global environment variables envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil) diff --git a/core/utils/config.go b/core/utils/config.go index 0b924c0d1..b35ae9e06 100644 --- a/core/utils/config.go +++ b/core/utils/config.go @@ -30,7 +30,7 @@ const ( DefaultInstallRoot = "/app/install" MetadataConfigDirName = ".crawlab" MetadataConfigName = "config.json" - PyenvRoot = "/root/.pyenv" + DefaultPyenvPath = "/root/.pyenv" DefaultNodeModulesPath = "/usr/lib/node_modules" DefaultGoPath = "/root/go" ) @@ -250,6 +250,13 @@ func GetInstallRoot() string { return DefaultInstallRoot } +func GetPyenvPath() string { + if res := viper.GetString("install.pyenv.path"); res != "" { + return res + } + return DefaultPyenvPath +} + func GetNodeModulesPath() string { if res := viper.GetString("install.node.path"); res != "" { return res diff --git a/core/utils/string.go b/core/utils/string.go new file mode 100644 index 000000000..b337b1144 --- /dev/null +++ b/core/utils/string.go @@ -0,0 +1,23 @@ +package utils + +import ( + "golang.org/x/text/cases" + "golang.org/x/text/language" + "strings" +) + +func ToSnakeCase(s string) string { + s = strings.TrimSpace(s) + s = strings.ToLower(s) + s = strings.ReplaceAll(s, " ", "_") + s = strings.ReplaceAll(s, "-", "_") + return s +} + +func ToPascalCase(s string) string { + s = strings.TrimSpace(s) + s = strings.ReplaceAll(s, "_", " ") + s = cases.Title(language.English).String(s) + s = strings.ReplaceAll(s, " ", "") + return s +}