diff --git a/pkg/dataflatten/examples/flatten/main.go b/pkg/dataflatten/examples/flatten/main.go new file mode 100644 index 000000000..98b0586eb --- /dev/null +++ b/pkg/dataflatten/examples/flatten/main.go @@ -0,0 +1,77 @@ +package main + +import ( + "flag" + "fmt" + "os" + "strings" + "text/tabwriter" + + "github.com/kolide/kit/logutil" + "github.com/kolide/launcher/pkg/dataflatten" + "github.com/peterbourgon/ff" + "github.com/pkg/errors" +) + +func checkError(err error) { + if err != nil { + fmt.Printf("Got Error: %v\nStack:\n%+v\n", err, err) + os.Exit(1) + } +} + +func main() { + + flagset := flag.NewFlagSet("plist", flag.ExitOnError) + + var ( + flPlist = flagset.String("plist", "", "Path to plist") + flJson = flagset.String("json", "", "Path to json file") + flQuery = flagset.String("q", "", "query") + + flDebug = flagset.Bool("debug", false, "use a debug logger") + ) + + if err := ff.Parse(flagset, os.Args[1:], + ff.WithConfigFileFlag("config"), + ff.WithConfigFileParser(ff.PlainParser), + ); err != nil { + checkError(errors.Wrap(err, "parsing flags")) + } + + logger := logutil.NewCLILogger(*flDebug) + + opts := []dataflatten.FlattenOpts{ + dataflatten.WithLogger(logger), + } + + if *flQuery != "" { + opts = append(opts, dataflatten.WithQuery(strings.Split(*flQuery, `/`))) + } + + rows := []dataflatten.Row{} + + if *flPlist != "" { + data, err := dataflatten.PlistFile(*flPlist, opts...) + checkError(errors.Wrap(err, "flattening plist file")) + rows = append(rows, data...) + } + + if *flJson != "" { + data, err := dataflatten.JsonFile(*flJson, opts...) + checkError(errors.Wrap(err, "flattening json file")) + rows = append(rows, data...) + } + + w := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0) + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", "path", "parent key", "key", "value") + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", "----", "----------", "---", "-----") + + for _, row := range rows { + p, k := row.ParentKey("/") + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", row.StringPath("/"), p, k, row.Value) + } + w.Flush() + + return +} diff --git a/pkg/dataflatten/flatten.go b/pkg/dataflatten/flatten.go new file mode 100644 index 000000000..434afee0a --- /dev/null +++ b/pkg/dataflatten/flatten.go @@ -0,0 +1,376 @@ +// Package dataflatten contains tools to flatten complex data +// structures. +// +// On macOS, many plists use an array of maps, these can be tricky to +// filter. This package knows how to flatten that structure, as well +// as rewriting it as a nested array, or filtering it. It is akin to +// xpath, though simpler. +// +// This tool works primarily through string interfaces, so type +// information may be lost. +// +// Query Syntax +// +// The query syntax handles both filtering and basic rewriting. It is +// not perfect. The idea behind it, is that we descend through an data +// structure, specifying what matches at each level. +// +// Each level of query can do: +// * specify a filter, this is a simple string match with wildcard support. (prefix and/or postfix, but not infix) +// * If the data is an array, specify an index +// * For array-of-maps, specify a key to rewrite as a nested map +// +// Each query term has 3 parts: [#]string[=>kvmatch] +// 1. An optional `#` This denotes a key to rewrite an array-of-maps with +// 2. A search term. If this is an integer, it is interpreted as an array index. +// 3. a key/value match string. For a map, this is to match the value of a key. +// +// Some examples: +// * data/users Return everything under { data: { users: { ... } } } +// * data/users/0 Return the first item in the users array +// * data/users/name=>A* Return users whose name starts with "A" +// * data/users/#id Return the users, and rewrite the users array to be a map with the id as the key +// +// See the test suite for extensive examples. +package dataflatten + +import ( + "strconv" + "strings" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" +) + +// Flattener is an interface to flatten complex, nested, data +// structures. It recurses through them, and returns a simplified +// form. At the simplest level, this rewrites: +// +// { foo: { bar: { baz: 1 } } } +// +// To: +// +// [ { path: foo/bar/baz, value: 1 } ] +// +// It can optionally filtering and rewriting. +type Flattener struct { + includeNils bool + rows []Row + logger log.Logger + query []string + queryWildcard string + queryKeyDenoter string +} + +type FlattenOpts func(*Flattener) + +// IncludeNulls indicates that Flatten should return null values, +// instead of skipping over them. +func IncludeNulls() FlattenOpts { + return func(fl *Flattener) { + fl.includeNils = true + } +} + +// WithLogger sets the logger to use +func WithLogger(logger log.Logger) FlattenOpts { + return func(fl *Flattener) { + fl.logger = logger + } +} + +// WithQuery Specifies a query to flatten with. This is used both for +// re-writing arrays into maps, and for filtering. See "Query +// Specification" for docs. +func WithQuery(q []string) FlattenOpts { + return func(fl *Flattener) { + fl.query = q + } +} + +// Flatten is the entry point to the Flattener functionality. +func Flatten(data interface{}, opts ...FlattenOpts) ([]Row, error) { + fl := &Flattener{ + rows: []Row{}, + logger: log.NewNopLogger(), + queryWildcard: `*`, + queryKeyDenoter: `#`, + } + + for _, opt := range opts { + opt(fl) + } + + if err := fl.descend([]string{}, data, 0); err != nil { + return nil, err + } + + return fl.rows, nil +} + +// descend recurses through a given data structure flattening along the way. +func (fl *Flattener) descend(path []string, data interface{}, depth int) error { + queryTerm, isQueryMatched := fl.queryAtDepth(depth) + + logger := log.With(fl.logger, + "caller", "descend", + "depth", depth, + "rows-so-far", len(fl.rows), + "query", queryTerm, + "path", strings.Join(path, "/"), + ) + + switch v := data.(type) { + case []interface{}: + for i, e := range v { + pathKey := strconv.Itoa(i) + level.Debug(logger).Log("msg", "checking an array", "indexStr", pathKey) + + // If the queryTerm starts with + // queryKeyDenoter, then we want to rewrite + // the path based on it. Note that this does + // no sanity checking. Multiple values will + // re-write. If the value isn't there, you get + // nothing. Etc. + // + // keyName == "name" + // keyValue == "alex" (need to test this againsty queryTerm + // pathKey == What we descend with + if strings.HasPrefix(queryTerm, fl.queryKeyDenoter) { + keyQuery := strings.SplitN(strings.TrimPrefix(queryTerm, fl.queryKeyDenoter), "=>", 2) + keyName := keyQuery[0] + + innerlogger := log.With(logger, "arraykeyname", keyName) + level.Debug(logger).Log("msg", "attempting to coerce array into map") + + e, ok := e.(map[string]interface{}) + if !ok { + level.Debug(innerlogger).Log("msg", "can't coerce into map") + continue + } + + // Is keyName in this array? + val, ok := e[keyName] + if !ok { + level.Debug(innerlogger).Log("msg", "keyName not in map") + continue + } + + pathKey, ok = val.(string) + if !ok { + level.Debug(innerlogger).Log("msg", "can't coerce pathKey val into string") + continue + } + + // Looks good to descend. we're overwritten both e and pathKey. Exit this conditional. + } + + if !(isQueryMatched || fl.queryMatchArrayElement(e, i, queryTerm)) { + level.Debug(logger).Log("msg", "query not matched") + continue + } + + if err := fl.descend(append(path, pathKey), e, depth+1); err != nil { + return errors.Wrap(err, "flattening array") + } + + } + case map[string]interface{}: + level.Debug(logger).Log("msg", "checking a map", "path", strings.Join(path, "/")) + for k, e := range v { + + // Check that the key name matches. If not, skip this entire + // branch of the map + if !(isQueryMatched || fl.queryMatchString(k, queryTerm)) { + continue + } + + if err := fl.descend(append(path, k), e, depth+1); err != nil { + return errors.Wrap(err, "flattening map") + } + } + case nil: + // Because we want to filter nils out, we do _not_ examine isQueryMatched here + if !(fl.queryMatchNil(queryTerm)) { + level.Debug(logger).Log("msg", "query not matched") + return nil + } + fl.rows = append(fl.rows, Row{Path: path, Value: ""}) + default: + // non-iterable. stringify and be done + stringValue, err := stringify(v) + if err != nil { + return errors.Wrapf(err, "flattening at path %v", path) + } + + if !(isQueryMatched || fl.queryMatchString(stringValue, queryTerm)) { + level.Debug(logger).Log("msg", "query not matched") + return nil + } + fl.rows = append(fl.rows, Row{Path: path, Value: stringValue}) + + } + return nil +} + +func (fl *Flattener) queryMatchNil(queryTerm string) bool { + // TODO: If needed, we could use queryTerm for optional nil filtering + return fl.includeNils +} + +// queryMatchArrayElement matches arrays. This one is magic. +// +// Syntax: +// #i -- Match index i. For example `#0` +// k=>queryTerm -- If this is a map, it should have key k, that matches queryTerm +// +// We use `=>` as something that is reasonably intuitive, and not very +// likely to occur on it's own. Unfortunately, `==` shows up in base64 +func (fl *Flattener) queryMatchArrayElement(data interface{}, arrIndex int, queryTerm string) bool { + logger := log.With(fl.logger, + "caller", "queryMatchArrayElement", + "rows-so-far", len(fl.rows), + "query", queryTerm, + "arrIndex", arrIndex, + ) + + // strip off the key re-write denotation before trying to match + queryTerm = strings.TrimPrefix(queryTerm, fl.queryKeyDenoter) + + if queryTerm == fl.queryWildcard { + return true + } + + // If the queryTerm is an int, then we expect to match the index + if queryIndex, err := strconv.Atoi(queryTerm); err == nil { + level.Debug(logger).Log("msg", "using numeric index comparison") + return queryIndex == arrIndex + } + + level.Debug(logger).Log("msg", "checking data type") + + switch dataCasted := data.(type) { + case []interface{}: + // fails. We can't match an array that has arrays as elements. Use a wildcard + return false + case map[string]interface{}: + kvQuery := strings.SplitN(queryTerm, "=>", 2) + + // If this is one long, then we're testing for whether or not there's a key with this name, + if len(kvQuery) == 1 { + _, ok := dataCasted[kvQuery[0]] + return ok + } + + // Else see if the value matches + for k, v := range dataCasted { + // Since this needs to check against _every_ + // member, return true. Or fall through to the + // false. + if fl.queryMatchString(k, kvQuery[0]) && fl.queryMatchStringify(v, kvQuery[1]) { + return true + } + } + return false + default: + // non-iterable. stringify and be done + return fl.queryMatchStringify(dataCasted, queryTerm) + } +} + +func (fl *Flattener) queryMatchStringify(data interface{}, queryTerm string) bool { + // strip off the key re-write denotation before trying to match + queryTerm = strings.TrimPrefix(queryTerm, fl.queryKeyDenoter) + + if queryTerm == fl.queryWildcard { + return true + } + + if data == nil { + return fl.queryMatchNil(queryTerm) + } + + stringValue, err := stringify(data) + if err != nil { + return false + } + + return fl.queryMatchString(stringValue, queryTerm) + +} + +func (fl *Flattener) queryMatchString(v, queryTerm string) bool { + if queryTerm == fl.queryWildcard { + return true + } + + // Some basic string manipulations to handle prefix and suffix operations + switch { + case strings.HasPrefix(queryTerm, fl.queryWildcard) && strings.HasSuffix(queryTerm, fl.queryWildcard): + queryTerm = strings.TrimPrefix(queryTerm, fl.queryWildcard) + queryTerm = strings.TrimSuffix(queryTerm, fl.queryWildcard) + return strings.Contains(v, queryTerm) + + case strings.HasPrefix(queryTerm, fl.queryWildcard): + queryTerm = strings.TrimPrefix(queryTerm, fl.queryWildcard) + return strings.HasSuffix(v, queryTerm) + + case strings.HasSuffix(queryTerm, fl.queryWildcard): + queryTerm = strings.TrimSuffix(queryTerm, fl.queryWildcard) + return strings.HasPrefix(v, queryTerm) + } + + return v == queryTerm +} + +// queryAtDepth returns the query parameter for a given depth, and +// boolean indicating we've run out of queries. If we've run out of +// queries, than we can start checking, everything is a match. +func (fl *Flattener) queryAtDepth(depth int) (string, bool) { + // if we're nil, there's an implied wildcard + // + // This works because: + // []string is len 0, and nil + // []string{} is len 0, but not nil + if fl.query == nil { + return fl.queryWildcard, true + } + + // If there's no query for this depth, then there's an implied + // wildcard. This allows the query to specify prefixes. + if depth+1 > len(fl.query) { + return fl.queryWildcard, true + } + + q := fl.query[depth] + + return q, q == fl.queryWildcard +} + +// stringify takes an arbitrary piece of data, and attempst to coerce +// it into a string. +func stringify(data interface{}) (string, error) { + switch v := data.(type) { + case nil: + return "", nil + case string: + return v, nil + case []byte: + return string(v), nil + case uint64: + return strconv.FormatUint(v, 10), nil + case float64: + return strconv.FormatFloat(v, 'f', -1, 64), nil + case int: + return strconv.Itoa(v), nil + case bool: + return strconv.FormatBool(v), nil + case time.Time: + return strconv.FormatInt(v.Unix(), 10), nil + default: + //spew.Dump(data) + return "", errors.Errorf("unknown type on %v", data) + } +} diff --git a/pkg/dataflatten/flatten_test.go b/pkg/dataflatten/flatten_test.go new file mode 100644 index 000000000..de11d9686 --- /dev/null +++ b/pkg/dataflatten/flatten_test.go @@ -0,0 +1,259 @@ +package dataflatten + +import ( + "encoding/json" + "io/ioutil" + "path/filepath" + "sort" + "testing" + + "github.com/stretchr/testify/require" +) + +type flattenTestCase struct { + in string + out []Row + options []FlattenOpts + comment string + err bool +} + +func TestFlatten_Complex(t *testing.T) { + t.Parallel() + + // Do the unmarshaling here, so we don't keep doing it again and again + dataRaw, err := ioutil.ReadFile(filepath.Join("testdata", "animals.json")) + require.NoError(t, err, "reading file") + var dataIn interface{} + require.NoError(t, json.Unmarshal(dataRaw, &dataIn), "unmarshalling json") + + // We do a bunch of tests to select this user. So we'll pull + // this out here and make the testcases more DRY + testdataUser0 := []Row{ + Row{Path: []string{"users", "0", "favorites", "0"}, Value: "ants"}, + Row{Path: []string{"users", "0", "id"}, Value: "1"}, + Row{Path: []string{"users", "0", "name"}, Value: "Alex Aardvark"}, + Row{Path: []string{"users", "0", "uuid"}, Value: "abc123"}, + } + + var tests = []flattenTestCase{ + { + out: []Row{ + Row{Path: []string{"metadata", "testing"}, Value: "true"}, + Row{Path: []string{"metadata", "version"}, Value: "1.0.1"}, + Row{Path: []string{"system"}, Value: "users demo"}, + Row{Path: []string{"users", "0", "favorites", "0"}, Value: "ants"}, + Row{Path: []string{"users", "0", "id"}, Value: "1"}, + Row{Path: []string{"users", "0", "name"}, Value: "Alex Aardvark"}, + Row{Path: []string{"users", "0", "uuid"}, Value: "abc123"}, + Row{Path: []string{"users", "1", "favorites", "1"}, Value: "mice"}, + Row{Path: []string{"users", "1", "favorites", "1"}, Value: "birds"}, + Row{Path: []string{"users", "1", "id"}, Value: "2"}, + Row{Path: []string{"users", "1", "name"}, Value: "Bailey Bobcat"}, + Row{Path: []string{"users", "1", "uuid"}, Value: "def456"}, + Row{Path: []string{"users", "2", "favorites", "0"}, Value: "seeds"}, + Row{Path: []string{"users", "2", "id"}, Value: "3"}, + Row{Path: []string{"users", "2", "name"}, Value: "Cam Chipmunk"}, + Row{Path: []string{"users", "2", "uuid"}, Value: "ghi789"}, + }, + }, + { + options: []FlattenOpts{WithQuery([]string{"metadata"})}, + out: []Row{ + Row{Path: []string{"metadata", "testing"}, Value: "true"}, + Row{Path: []string{"metadata", "version"}, Value: "1.0.1"}, + }, + }, + { + comment: "array by #", + options: []FlattenOpts{WithQuery([]string{"users", "0"})}, + out: testdataUser0, + }, + { + comment: "array by id value", + options: []FlattenOpts{WithQuery([]string{"users", "id=>1"})}, + out: testdataUser0, + }, + { + comment: "array by uuid", + options: []FlattenOpts{WithQuery([]string{"users", "uuid=>abc123"})}, + out: testdataUser0, + }, + { + comment: "array by name with suffix wildcard", + options: []FlattenOpts{WithQuery([]string{"users", "name=>Al*"})}, + out: testdataUser0, + }, + { + comment: "array by name with prefix wildcard", + options: []FlattenOpts{WithQuery([]string{"users", "name=>*Aardvark"})}, + out: testdataUser0, + }, + + { + comment: "array by name with suffix and prefix", + options: []FlattenOpts{WithQuery([]string{"users", "name=>*Aardv*"})}, + out: testdataUser0, + }, + { + comment: "who likes ants, array re-written", + options: []FlattenOpts{WithQuery([]string{"users", "#name", "favorites", "ants"})}, + out: []Row{ + Row{Path: []string{"users", "Alex Aardvark", "favorites", "0"}, Value: "ants"}, + }, + }, + { + comment: "rewritten and filtered", + options: []FlattenOpts{WithQuery([]string{"users", "#name=>Al*", "id"})}, + out: []Row{ + Row{Path: []string{"users", "Alex Aardvark", "id"}, Value: "1"}, + }, + }, + { + comment: "bad key name", + options: []FlattenOpts{WithQuery([]string{"users", "#nokey"})}, + out: []Row{}, + }, + { + comment: "rewrite array to map", + options: []FlattenOpts{WithQuery([]string{"users", "#name", "id"})}, + out: []Row{ + Row{Path: []string{"users", "Alex Aardvark", "id"}, Value: "1"}, + Row{Path: []string{"users", "Bailey Bobcat", "id"}, Value: "2"}, + Row{Path: []string{"users", "Cam Chipmunk", "id"}, Value: "3"}, + }, + }, + } + + for _, tt := range tests { + actual, err := Flatten(dataIn, tt.options...) + testFlattenCase(t, tt, actual, err) + } +} + +func TestFlatten_ArrayMaps(t *testing.T) { + t.Parallel() + + var tests = []flattenTestCase{ + { + in: `{"data": [{"v":1,"id":"a"},{"v":2,"id":"b"},{"v":3,"id":"c"}]}`, + out: []Row{ + Row{Path: []string{"data", "0", "id"}, Value: "a"}, + Row{Path: []string{"data", "0", "v"}, Value: "1"}, + + Row{Path: []string{"data", "1", "id"}, Value: "b"}, + Row{Path: []string{"data", "1", "v"}, Value: "2"}, + + Row{Path: []string{"data", "2", "id"}, Value: "c"}, + Row{Path: []string{"data", "2", "v"}, Value: "3"}, + }, + comment: "nested array as array", + }, + { + in: `{"data": [{"v":1,"id":"a"},{"v":2,"id":"b"},{"v":3,"id":"c"}]}`, + out: []Row{ + Row{Path: []string{"data", "a", "id"}, Value: "a"}, + Row{Path: []string{"data", "a", "v"}, Value: "1"}, + + Row{Path: []string{"data", "b", "id"}, Value: "b"}, + Row{Path: []string{"data", "b", "v"}, Value: "2"}, + + Row{Path: []string{"data", "c", "id"}, Value: "c"}, + Row{Path: []string{"data", "c", "v"}, Value: "3"}, + }, + options: []FlattenOpts{WithQuery([]string{"data", "#id"})}, + comment: "nested array as map", + }, + } + + for _, tt := range tests { + actual, err := Json([]byte(tt.in), tt.options...) + testFlattenCase(t, tt, actual, err) + } + +} + +func TestFlatten(t *testing.T) { + t.Parallel() + + var tests = []flattenTestCase{ + { + in: "a", + err: true, + }, + { + in: `["a", null]`, + out: []Row{ + Row{Path: []string{"0"}, Value: "a"}, + }, + comment: "skip null", + }, + + { + in: `["a", "b", null]`, + out: []Row{ + Row{Path: []string{"0"}, Value: "a"}, + Row{Path: []string{"1"}, Value: "b"}, + Row{Path: []string{"2"}, Value: ""}, + }, + options: []FlattenOpts{IncludeNulls()}, + comment: "includes null", + }, + + { + in: `["1"]`, + out: []Row{ + Row{Path: []string{"0"}, Value: "1"}, + }, + }, + { + in: `["a", true, false, "1", 2, 3.3]`, + out: []Row{ + Row{Path: []string{"0"}, Value: "a"}, + Row{Path: []string{"1"}, Value: "true"}, + Row{Path: []string{"2"}, Value: "false"}, + Row{Path: []string{"3"}, Value: "1"}, + Row{Path: []string{"4"}, Value: "2"}, + Row{Path: []string{"5"}, Value: "3.3"}, + }, + comment: "mixed types", + }, + { + in: `{"a": 1, "b": "2.2", "c": [1,2,3]}`, + out: []Row{ + Row{Path: []string{"a"}, Value: "1"}, + Row{Path: []string{"b"}, Value: "2.2"}, + Row{Path: []string{"c", "0"}, Value: "1"}, + Row{Path: []string{"c", "1"}, Value: "2"}, + Row{Path: []string{"c", "2"}, Value: "3"}, + }, + comment: "nested types", + }, + } + + for _, tt := range tests { + actual, err := Json([]byte(tt.in), tt.options...) + testFlattenCase(t, tt, actual, err) + } +} + +// testFlattenCase runs tests for a single test case. Normally this +// would be in a for loop, instead it's abstracted here to make it +// simpler to split up a giant array of test cases. +func testFlattenCase(t *testing.T, tt flattenTestCase, actual []Row, actualErr error) { + if tt.err { + require.Error(t, actualErr, "test %s %s", tt.in, tt.comment) + return + } + + require.NoError(t, actualErr, "test %s %s", tt.in, tt.comment) + + // Despite being an array. data is returned + // unordered. This greatly complicates our testing. We + // can either sort it, or use an unordered comparison + // operator. The `require.ElementsMatch` produces much + // harder to read diffs, so instead we'll sort things. + sort.SliceStable(tt.out, func(i, j int) bool { return tt.out[i].StringPath("/") < tt.out[j].StringPath("/") }) + sort.SliceStable(actual, func(i, j int) bool { return actual[i].StringPath("/") < actual[j].StringPath("/") }) + require.EqualValues(t, tt.out, actual, "test %s %s", tt.in, tt.comment) +} diff --git a/pkg/dataflatten/json.go b/pkg/dataflatten/json.go new file mode 100644 index 000000000..65a218e51 --- /dev/null +++ b/pkg/dataflatten/json.go @@ -0,0 +1,26 @@ +package dataflatten + +import ( + "encoding/json" + "io/ioutil" + + "github.com/pkg/errors" +) + +func JsonFile(file string, opts ...FlattenOpts) ([]Row, error) { + rawdata, err := ioutil.ReadFile(file) + if err != nil { + return nil, err + } + return Json(rawdata, opts...) +} + +func Json(rawdata []byte, opts ...FlattenOpts) ([]Row, error) { + var data interface{} + + if err := json.Unmarshal(rawdata, &data); err != nil { + return nil, errors.Wrap(err, "unmarshalling json") + } + + return Flatten(data, opts...) +} diff --git a/pkg/dataflatten/plist.go b/pkg/dataflatten/plist.go new file mode 100644 index 000000000..8c908a4c9 --- /dev/null +++ b/pkg/dataflatten/plist.go @@ -0,0 +1,26 @@ +package dataflatten + +import ( + "io/ioutil" + + "github.com/groob/plist" + "github.com/pkg/errors" +) + +func PlistFile(file string, opts ...FlattenOpts) ([]Row, error) { + rawdata, err := ioutil.ReadFile(file) + if err != nil { + return nil, err + } + return Plist(rawdata, opts...) +} + +func Plist(rawdata []byte, opts ...FlattenOpts) ([]Row, error) { + var data interface{} + + if err := plist.Unmarshal(rawdata, &data); err != nil { + return nil, errors.Wrap(err, "unmarshalling plist") + } + + return Flatten(data, opts...) +} diff --git a/pkg/dataflatten/plist_test.go b/pkg/dataflatten/plist_test.go new file mode 100644 index 000000000..fdd4704ca --- /dev/null +++ b/pkg/dataflatten/plist_test.go @@ -0,0 +1,31 @@ +package dataflatten + +import ( + "testing" +) + +// TestPlist is testing a very simple plist case. Most of the more complex testing is in the spec files. +func TestPlist(t *testing.T) { + t.Parallel() + + var tests = []flattenTestCase{ + { + in: ` + +ab`, + out: []Row{ + Row{Path: []string{"0"}, Value: "a"}, + Row{Path: []string{"1"}, Value: "b"}, + }, + }, + { + in: ``, + err: true, + }, + } + + for _, tt := range tests { + actual, err := Plist([]byte(tt.in)) + testFlattenCase(t, tt, actual, err) + } +} diff --git a/pkg/dataflatten/row.go b/pkg/dataflatten/row.go new file mode 100644 index 000000000..8fdaf47b6 --- /dev/null +++ b/pkg/dataflatten/row.go @@ -0,0 +1,27 @@ +package dataflatten + +import "strings" + +// Row is the record type we return. +type Row struct { + Path []string + Value string +} + +func (r Row) StringPath(sep string) string { + return strings.Join(r.Path, sep) +} + +func (r Row) ParentKey(sep string) (string, string) { + switch len(r.Path) { + case 0: + return "", "" + case 1: + return "", r.Path[0] + } + + parent := strings.Join(r.Path[:len(r.Path)-1], sep) + key := r.Path[len(r.Path)-1] + + return parent, key +} diff --git a/pkg/dataflatten/row_test.go b/pkg/dataflatten/row_test.go new file mode 100644 index 000000000..72d8e190d --- /dev/null +++ b/pkg/dataflatten/row_test.go @@ -0,0 +1,46 @@ +package dataflatten + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRowParentFunctions(t *testing.T) { + t.Parallel() + + var tests = []struct { + in Row + parent string + key string + }{ + { + in: Row{}, + }, + + { + in: Row{Path: []string{}}, + }, + { + in: Row{Path: []string{"a"}}, + parent: "", + key: "a", + }, + { + in: Row{Path: []string{"a", "b"}}, + parent: "a", + key: "b", + }, + { + in: Row{Path: []string{"a", "b", "c"}}, + parent: "a/b", + key: "c", + }, + } + + for _, tt := range tests { + parent, key := tt.in.ParentKey("/") + require.Equal(t, tt.parent, parent) + require.Equal(t, tt.key, key) + } +} diff --git a/pkg/dataflatten/testdata/animals.json b/pkg/dataflatten/testdata/animals.json new file mode 100644 index 000000000..d0455552b --- /dev/null +++ b/pkg/dataflatten/testdata/animals.json @@ -0,0 +1,34 @@ +{ + "metadata": { + "testing": true, + "version": "1.0.1" + }, + "system": "users demo", + "users": [ + { + "favorites": [ + "ants" + ], + "uuid": "abc123", + "name": "Alex Aardvark", + "id": 1 + }, + { + "favorites": [ + "mice", + "birds" + ], + "uuid": "def456", + "name": "Bailey Bobcat", + "id": 2 + }, + { + "favorites": [ + "seeds" + ], + "uuid": "ghi789", + "name": "Cam Chipmunk", + "id": 3 + } + ] +} diff --git a/pkg/dataflatten/testdata/animals.plist b/pkg/dataflatten/testdata/animals.plist new file mode 100644 index 000000000..a73113417 Binary files /dev/null and b/pkg/dataflatten/testdata/animals.plist differ diff --git a/pkg/dataflatten/testdata/animals.xml b/pkg/dataflatten/testdata/animals.xml new file mode 100644 index 000000000..94248befd --- /dev/null +++ b/pkg/dataflatten/testdata/animals.xml @@ -0,0 +1,55 @@ + + + + + metadata + + testing + + version + 1.0.1 + + system + users demo + users + + + favorites + + ants + + id + 1 + name + Alex Aardvark + uuid + abc123 + + + favorites + + mice + birds + + id + 2 + name + Bailey Bobcat + uuid + def456 + + + favorites + + seeds + + id + 3 + name + Cam Chipmunk + uuid + ghi789 + + + + diff --git a/pkg/osquery/table/platform_tables_darwin.go b/pkg/osquery/table/platform_tables_darwin.go index c56026afe..4279d68e8 100644 --- a/pkg/osquery/table/platform_tables_darwin.go +++ b/pkg/osquery/table/platform_tables_darwin.go @@ -6,6 +6,7 @@ import ( "github.com/go-kit/kit/log" "github.com/knightsc/system_policy/osquery/table/kextpolicy" "github.com/knightsc/system_policy/osquery/table/legacyexec" + "github.com/kolide/launcher/pkg/osquery/tables/plist" osquery "github.com/kolide/osquery-go" "github.com/kolide/osquery-go/plugin/table" _ "github.com/mattn/go-sqlite3" @@ -30,6 +31,7 @@ func platformTables(client *osquery.ExtensionManagerClient, logger log.Logger) [ UserAvatar(logger), kextpolicy.TablePlugin(), legacyexec.TablePlugin(), + plist.TablePlugin(client, logger), munki.ManagedInstalls(client, logger), munki.MunkiReport(client, logger), } diff --git a/pkg/osquery/tables/plist/plist.go b/pkg/osquery/tables/plist/plist.go new file mode 100644 index 000000000..7e1dfd733 --- /dev/null +++ b/pkg/osquery/tables/plist/plist.go @@ -0,0 +1,104 @@ +package plist + +import ( + "context" + "fmt" + "strings" + + "github.com/go-kit/kit/log" + "github.com/kolide/launcher/pkg/dataflatten" + "github.com/kolide/osquery-go" + "github.com/kolide/osquery-go/plugin/table" + "github.com/pkg/errors" +) + +type Table struct { + client *osquery.ExtensionManagerClient + logger log.Logger +} + +func TablePlugin(client *osquery.ExtensionManagerClient, logger log.Logger) *table.Plugin { + + columns := []table.ColumnDefinition{ + table.TextColumn("path"), + table.TextColumn("fullkey"), + table.TextColumn("parent"), + table.TextColumn("key"), + table.TextColumn("value"), + table.TextColumn("query"), + } + + t := &Table{ + client: client, + logger: logger, + } + + return table.NewPlugin("kolide_plist", columns, t.generate) +} + +func (t *Table) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { + flattenOpts := []dataflatten.FlattenOpts{} + + if t.logger != nil { + flattenOpts = append(flattenOpts, dataflatten.WithLogger(t.logger)) + } + + var results []map[string]string + + pathQ, ok := queryContext.Constraints["path"] + if !ok || len(pathQ.Constraints) == 0 { + return results, errors.New("The kolide_plist table requires that you specify a single constraint for path") + } + for _, pathConstraint := range pathQ.Constraints { + + filePath := pathConstraint.Expression + + if q, ok := queryContext.Constraints["query"]; ok && len(q.Constraints) != 0 { + + for _, constraint := range q.Constraints { + plistQuery := constraint.Expression + + data, err := dataflatten.PlistFile(filePath, + append(flattenOpts, dataflatten.WithQuery(strings.Split(plistQuery, "/")))...) + if err != nil { + fmt.Println("parse failjure") + return results, errors.Wrap(err, "parsing data") + } + + for _, row := range data { + p, k := row.ParentKey("/") + + res := map[string]string{ + "path": filePath, + "fullkey": row.StringPath("/"), + "parent": p, + "key": k, + "value": row.Value, + "query": plistQuery, + } + results = append(results, res) + } + } + } else { + data, err := dataflatten.PlistFile(filePath, flattenOpts...) + if err != nil { + return results, errors.Wrap(err, "parsing data") + } + + for _, row := range data { + p, k := row.ParentKey("/") + + res := map[string]string{ + "path": filePath, + "fullkey": row.StringPath("/"), + "parent": p, + "key": k, + "value": row.Value, + } + results = append(results, res) + } + } + } + + return results, nil +} diff --git a/pkg/osquery/tables/plist/plist_test.go b/pkg/osquery/tables/plist/plist_test.go new file mode 100644 index 000000000..f616af6ad --- /dev/null +++ b/pkg/osquery/tables/plist/plist_test.go @@ -0,0 +1,88 @@ +package plist + +import ( + "context" + "path/filepath" + "testing" + + "github.com/kolide/osquery-go/plugin/table" + "github.com/stretchr/testify/require" +) + +func TestPlist(t *testing.T) { + t.Parallel() + plistTable := Table{} + + var tests = []struct { + paths []string + queries []string + expected []map[string]string + err bool + }{ + { + err: true, + }, + { + paths: []string{filepath.Join("testdata", "NetworkInterfaces.plist")}, + queries: []string{"Interfaces/#BSD Name/SCNetworkInterfaceType/FireWire"}, + expected: []map[string]string{ + map[string]string{ + "fullkey": "Interfaces/fw0/SCNetworkInterfaceType", + "key": "SCNetworkInterfaceType", + "parent": "Interfaces/fw0", + "value": "FireWire", + }}, + }, + { + paths: []string{filepath.Join("testdata", "com.apple.launchservices.secure.plist")}, + queries: []string{ + "LSHandlers/LSHandlerURLScheme=>htt*/LSHandlerRole*", + "LSHandlers/LSHandlerContentType=>*html/LSHandlerRole*", + }, + expected: []map[string]string{ + map[string]string{"fullkey": "LSHandlers/5/LSHandlerRoleAll", "key": "LSHandlerRoleAll", "parent": "LSHandlers/5", "value": "com.choosyosx.choosy"}, + map[string]string{"fullkey": "LSHandlers/6/LSHandlerRoleAll", "key": "LSHandlerRoleAll", "parent": "LSHandlers/6", "value": "com.choosyosx.choosy"}, + map[string]string{"fullkey": "LSHandlers/7/LSHandlerRoleAll", "key": "LSHandlerRoleAll", "parent": "LSHandlers/7", "value": "com.choosyosx.choosy"}, + map[string]string{"fullkey": "LSHandlers/8/LSHandlerRoleAll", "key": "LSHandlerRoleAll", "parent": "LSHandlers/8", "value": "com.google.chrome"}, + }, + }, + } + + for _, tt := range tests { + rows, err := plistTable.generate(context.TODO(), mockQueryContext(tt.paths, tt.queries)) + if tt.err { + require.Error(t, err) + continue + } + + // delete the path and query keys, so we don't need to enumerate them in the test case + for _, row := range rows { + delete(row, "path") + delete(row, "query") + } + + require.NoError(t, err) + require.EqualValues(t, tt.expected, rows) + } + +} + +func mockQueryContext(paths []string, queries []string) table.QueryContext { + pathConstraints := make([]table.Constraint, len(paths)) + for i, path := range paths { + pathConstraints[i].Expression = path + } + queryConstraints := make([]table.Constraint, len(queries)) + for i, q := range queries { + queryConstraints[i].Expression = q + } + + queryContext := table.QueryContext{ + Constraints: map[string]table.ConstraintList{ + "path": table.ConstraintList{Constraints: pathConstraints}, + "query": table.ConstraintList{Constraints: queryConstraints}, + }, + } + + return queryContext +} diff --git a/pkg/osquery/tables/plist/testdata/NetworkInterfaces.plist b/pkg/osquery/tables/plist/testdata/NetworkInterfaces.plist new file mode 100644 index 000000000..92c92d3f0 --- /dev/null +++ b/pkg/osquery/tables/plist/testdata/NetworkInterfaces.plist @@ -0,0 +1,277 @@ + + + + + Interfaces + + + Active + + BSD Name + en0 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 0 + SCNetworkInterfaceInfo + + UserDefinedName + Wi-Fi + + SCNetworkInterfaceType + IEEE80211 + + + Active + + BSD Name + en1 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 1 + SCNetworkInterfaceInfo + + UserDefinedName + Thunderbolt 1 + + SCNetworkInterfaceType + Ethernet + + + Active + + BSD Name + en2 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 2 + SCNetworkInterfaceInfo + + UserDefinedName + Thunderbolt 2 + + SCNetworkInterfaceType + Ethernet + + + Active + + BSD Name + en3 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 3 + SCNetworkInterfaceInfo + + UserDefinedName + Thunderbolt 3 + + SCNetworkInterfaceType + Ethernet + + + Active + + BSD Name + en4 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 4 + SCNetworkInterfaceInfo + + UserDefinedName + Thunderbolt 4 + + SCNetworkInterfaceType + Ethernet + + + Active + + BSD Name + en5 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 5 + SCNetworkInterfaceInfo + + USB Product Name + iBridge + UserDefinedName + iBridge + idProduct + 34304 + idVendor + 1452 + + SCNetworkInterfaceType + Ethernet + + + BSD Name + en6 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 6 + SCNetworkInterfaceInfo + + UserDefinedName + Bluetooth PAN + + SCNetworkInterfaceType + Ethernet + + + BSD Name + en7 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 7 + SCNetworkInterfaceInfo + + USB Product Name + Belkin USB_C LAN + UserDefinedName + Belkin USB-C LAN + idProduct + 33107 + idVendor + 3034 + + SCNetworkInterfaceType + Ethernet + + + Active + + BSD Name + en8 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 8 + SCNetworkInterfaceInfo + + USB Product Name + USB 10_100_1000 LAN + UserDefinedName + USB 10/100/1000 LAN + idProduct + 33107 + idVendor + 3034 + + SCNetworkInterfaceType + Ethernet + + + BSD Name + en9 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 9 + SCNetworkInterfaceInfo + + USB Product Name + iPhone + UserDefinedName + iPhone + idProduct + 4776 + idVendor + 1452 + + SCNetworkInterfaceType + Ethernet + + + BSD Name + en10 + IOBuiltin + + IOInterfaceNamePrefix + en + IOInterfaceType + 6 + IOInterfaceUnit + 10 + SCNetworkInterfaceInfo + + UserDefinedName + Display Ethernet + + SCNetworkInterfaceType + Ethernet + + + BSD Name + fw0 + IOBuiltin + + IOInterfaceNamePrefix + fw + IOInterfaceType + 144 + IOInterfaceUnit + 0 + SCNetworkInterfaceInfo + + UserDefinedName + Display FireWire + + SCNetworkInterfaceType + FireWire + + + Model + MacBookPro14,3 + + diff --git a/pkg/osquery/tables/plist/testdata/com.apple.launchservices.secure.plist b/pkg/osquery/tables/plist/testdata/com.apple.launchservices.secure.plist new file mode 100644 index 000000000..98d43d030 --- /dev/null +++ b/pkg/osquery/tables/plist/testdata/com.apple.launchservices.secure.plist @@ -0,0 +1,295 @@ + + + + + LSHandlers + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + xcbot + + + LSHandlerContentType + public.svg-image + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + org.mozilla.firefox + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.logmein.mac.gotoopener + LSHandlerURLScheme + citrixonline488 + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + keybase.electron + LSHandlerURLScheme + web+stellar + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.facetime + LSHandlerURLScheme + facetime + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.choosyosx.choosy + LSHandlerURLScheme + http + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.choosyosx.choosy + LSHandlerURLScheme + https + + + LSHandlerContentType + public.html + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.choosyosx.choosy + + + LSHandlerContentType + public.xhtml + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.google.chrome + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + org.whispersystems.signal-desktop + LSHandlerURLScheme + sgnl + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.choosyosx.choosy + LSHandlerURLScheme + x-choosy + + + LSHandlerContentType + com.apple.installer-package + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.installer + + + LSHandlerContentType + com.apple.installer-meta-package + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.installer + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.bluejeans.nw.app + LSHandlerURLScheme + bjn + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + xcpref + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + xcdevice + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + xcdoc + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + apple-reference-documentation + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.logmein.mac.gotoopener + LSHandlerURLScheme + gotoopener + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + xcode + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.logmein.mac.gotoopener + LSHandlerURLScheme + citrixonline + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.google.chrome + LSHandlerURLScheme + webcal + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + keybase.electron + LSHandlerURLScheme + keybase + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.apple.dt.xcode + LSHandlerURLScheme + x-source-tag + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + com.logmein.mac.gotoopener + LSHandlerURLScheme + gotoopener488 + + + LSHandlerPreferredVersions + + LSHandlerRoleAll + - + + LSHandlerRoleAll + us.zoom.xos + LSHandlerURLScheme + zoomphonecall + + + +