Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use worker pool for smartctl #204

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func (i *SMARTctlManagerCollector) Describe(ch chan<- *prometheus.Desc) {
func (i *SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) {
info := NewSMARTctlInfo(ch)
i.mutex.Lock()
refreshAllDevices(i.logger, i.Devices)
for _, device := range i.Devices {
json := readData(i.logger, device)
if json.Exists() {
Expand Down
61 changes: 48 additions & 13 deletions readjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ type JSONCache struct {
LastCollect time.Time
}

type SMARTresult struct {
device Device
JSON gjson.Result
ok bool
}

type SMARTctlWorkerPool struct {
results chan SMARTresult
expected int
}

var (
jsonCache sync.Map
)
Expand All @@ -40,6 +51,10 @@ func init() {
jsonCache.Store("", JSONCache{})
}

func createPool() SMARTctlWorkerPool {
return SMARTctlWorkerPool{make(chan SMARTresult), 0}
}

// Parse json to gjson object
func parseJSON(data string) gjson.Result {
if !gjson.Valid(data) {
Expand All @@ -62,7 +77,7 @@ func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
}

// Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
func readSMARTctl(logger log.Logger, device Device, results chan<- SMARTresult) {
start := time.Now()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", "--device="+device.Type, device.Name).Output()
if err != nil {
Expand All @@ -72,7 +87,7 @@ func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json)
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
return json, rcOk && jsonOk
results <- SMARTresult{device, json, rcOk && jsonOk}
}

func readSMARTctlDevices(logger log.Logger) gjson.Result {
Expand All @@ -89,23 +104,43 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
return parseJSON(string(out))
}

// Refresh all devices' json
func refreshAllDevices(logger log.Logger, devices []Device) {
if *smartctlFakeData {
return
}

pool := createPool()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be implementing a sync.WaitGroup. Or maybe better, use errgroup.Group.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Will try to refactor it later, after the fate of #205 is decided.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Will try to refactor it later, after the fate of #205 is decided.

Merged. Can you fix the conflicts for merge?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pokab Wondering if are still planning to work on this MR.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also experienced troubles with timeouts and I'd love to see this PR merged. Thank you!

for _, device := range devices {
refreshData(logger, device, &pool)
}
for pool.expected > 0 {
result := <-pool.results
if result.ok {
jsonCache.Store(result.device, JSONCache{JSON: result.JSON, LastCollect: time.Now()})
}
pool.expected--
}
close(pool.results)
}

// Select json source and parse
func refreshData(logger log.Logger, device Device, pool *SMARTctlWorkerPool) {
cacheValue, cacheOk := jsonCache.Load(device)
if !cacheOk || time.Now().After(cacheValue.(JSONCache).LastCollect.Add(*smartctlInterval)) {
go readSMARTctl(logger, device, pool.results)
pool.expected++
}
}

func readData(logger log.Logger, device Device) gjson.Result {
if *smartctlFakeData {
return readFakeSMARTctl(logger, device)
}

cacheValue, cacheOk := jsonCache.Load(device)
if !cacheOk || time.Now().After(cacheValue.(JSONCache).LastCollect.Add(*smartctlInterval)) {
json, ok := readSMARTctl(logger, device)
if ok {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
j, found := jsonCache.Load(device)
if !found {
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
}
return j.(JSONCache).JSON
}
cacheValue, found := jsonCache.Load(device)
if !found {
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
return gjson.Result{}
}
return cacheValue.(JSONCache).JSON
Expand Down