From 59418df23e3c05cea6f2845de86879efbd126f23 Mon Sep 17 00:00:00 2001 From: Mike Goldsmith Date: Mon, 17 Jun 2024 11:45:16 +0100 Subject: [PATCH] feat: Allow URL encoded dataset in libhoney endpoint paths (#1199) ## Which problem is this PR solving? Allow URL encoded dataset names in /event and /batch endpoint. For example, to send data to a dataset with the name `foo/bar`, you can now send data like this: ``` curl -i -H "x-honeycomb-team: " -X POST 'http://localhost:8080/1/events/foo%2Fbar' -d '{"name":"mike"}' ``` The dataset name `foo%2Fbar` is URL decoded into `foo/bar` and used as the dataset name. When forwarding events to the Honeycomb API, the dataset name is URL encoded again to ensure it's received into HNY correctly. ## Short description of the changes - Update mux router to allow encoded paths - Add utility function to get the dataset from mux vars and url escape the value - Add tests for utility function - Simplify batch event processing by removing `batchedEventToEvent`, adding `getSampleRate` batchEvent member func and re-using apiHost and dataset --- route/route.go | 81 +++++++++++++++--------------- route/route_test.go | 117 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 39 deletions(-) diff --git a/route/route.go b/route/route.go index 003716b5df..f7166fd29b 100644 --- a/route/route.go +++ b/route/route.go @@ -168,6 +168,7 @@ func (r *Router) LnS(incomingOrPeer string) { // require an auth header for events and batches authedMuxxer := muxxer.PathPrefix("/1/").Methods("POST").Subrouter() + authedMuxxer.UseEncodedPath() authedMuxxer.Use(r.apiKeyChecker) // handle events and batches @@ -388,8 +389,10 @@ func (r *Router) requestToEvent(req *http.Request, reqBod []byte) (*types.Event, sampleRate = 1 } eventTime := getEventTime(req.Header.Get(types.TimestampHeader)) - vars := mux.Vars(req) - dataset := vars["datasetName"] + dataset, err := getDatasetFromRequest(req) + if err != nil { + return nil, err + } apiHost, err := r.Config.GetHoneycombAPI() if err != nil { @@ -447,6 +450,15 @@ func (r *Router) batch(w http.ResponseWriter, req *http.Request) { return } + dataset, err := getDatasetFromRequest(req) + if err != nil { + r.handlerReturnWithError(w, ErrReqToEvent, err) + } + apiHost, err := r.Config.GetHoneycombAPI() + if err != nil { + r.handlerReturnWithError(w, ErrReqToEvent, err) + } + apiKey := req.Header.Get(types.APIKeyHeader) if apiKey == "" { apiKey = req.Header.Get(types.APIKeyHeaderShort) @@ -460,17 +472,15 @@ func (r *Router) batch(w http.ResponseWriter, req *http.Request) { batchedResponses := make([]*BatchResponse, 0, len(batchedEvents)) for _, bev := range batchedEvents { - ev, err := r.batchedEventToEvent(req, bev, apiKey, environment) - if err != nil { - batchedResponses = append( - batchedResponses, - &BatchResponse{ - Status: http.StatusBadRequest, - Error: fmt.Sprintf("failed to convert to event: %s", err.Error()), - }, - ) - debugLog.WithField("error", err).Logf("event from batch failed to process event") - continue + ev := &types.Event{ + Context: req.Context(), + APIHost: apiHost, + APIKey: apiKey, + Dataset: dataset, + Environment: environment, + SampleRate: bev.getSampleRate(), + Timestamp: bev.getEventTime(), + Data: bev.Data, } err = r.processEvent(ev, reqID) @@ -670,32 +680,6 @@ func (r *Router) getMaybeCompressedBody(req *http.Request) (io.Reader, error) { return reader, nil } -func (r *Router) batchedEventToEvent(req *http.Request, bev batchedEvent, apiKey string, environment string) (*types.Event, error) { - sampleRate := bev.SampleRate - if sampleRate == 0 { - sampleRate = 1 - } - eventTime := bev.getEventTime() - // TODO move the following 3 lines outside of this loop; they could be done - // once for the entire batch instead of in every event. - vars := mux.Vars(req) - dataset := vars["datasetName"] - apiHost, err := r.Config.GetHoneycombAPI() - if err != nil { - return nil, err - } - return &types.Event{ - Context: req.Context(), - APIHost: apiHost, - APIKey: apiKey, - Dataset: dataset, - Environment: environment, - SampleRate: uint(sampleRate), - Timestamp: eventTime, - Data: bev.Data, - }, nil -} - type batchedEvent struct { Timestamp string `json:"time"` MsgPackTimestamp *time.Time `msgpack:"time,omitempty"` @@ -711,6 +695,13 @@ func (b *batchedEvent) getEventTime() time.Time { return getEventTime(b.Timestamp) } +func (b *batchedEvent) getSampleRate() uint { + if b.SampleRate == 0 { + return defaultSampleRate + } + return uint(b.SampleRate) +} + // getEventTime tries to guess the time format in our time header! // Allowable options are // * RFC3339Nano @@ -969,3 +960,15 @@ func (r *Router) AddOTLPMuxxer(muxxer *mux.Router) { otlpMuxxer.HandleFunc("/logs", r.postOTLPLogs).Name("otlp_logs") otlpMuxxer.HandleFunc("/logs/", r.postOTLPLogs).Name("otlp_logs") } + +func getDatasetFromRequest(req *http.Request) (string, error) { + dataset := mux.Vars(req)["datasetName"] + if dataset == "" { + return "", fmt.Errorf("missing dataset name") + } + dataset, err := url.PathUnescape(dataset) + if err != nil { + return "", err + } + return dataset, nil +} diff --git a/route/route_test.go b/route/route_test.go index e66ff6c9ed..537a353158 100644 --- a/route/route_test.go +++ b/route/route_test.go @@ -9,6 +9,7 @@ import ( "io" "net/http" "net/http/httptest" + "net/url" "strings" "testing" "time" @@ -612,3 +613,119 @@ func TestGRPCHealthProbeWatch(t *testing.T) { sentMessage := mockServer.GetSentMessages()[0] assert.Equal(t, grpc_health_v1.HealthCheckResponse_SERVING, sentMessage.Status) } + +func TestGetDatasetFromRequest(t *testing.T) { + testCases := []struct { + name string + datasetName string + expectedDatasetName string + expectedError error + }{ + { + name: "empty dataset name", + datasetName: "", + expectedError: fmt.Errorf("missing dataset name"), + }, + { + name: "dataset name with invalid URL encoding", + datasetName: "foo%2", + expectedError: url.EscapeError("%2"), + }, + { + name: "normal dataset name", + datasetName: "foo", + expectedDatasetName: "foo", + }, + { + name: "dataset name with numbers", + datasetName: "foo123", + expectedDatasetName: "foo123", + }, + { + name: "dataset name with hyphen", + datasetName: "foo-bar", + expectedDatasetName: "foo-bar", + }, + { + name: "dataset name with underscore", + datasetName: "foo_bar", + expectedDatasetName: "foo_bar", + }, + { + name: "dataset name with tilde", + datasetName: "foo~bar", + expectedDatasetName: "foo~bar", + }, + { + name: "dataset name with period", + datasetName: "foo.bar", + expectedDatasetName: "foo.bar", + }, + { + name: "dataset name with URL encoded hyphen", + datasetName: "foo%2Dbar", + expectedDatasetName: "foo-bar", + }, + { + name: "dataset name with URL encoded underscore", + datasetName: "foo%5Fbar", + expectedDatasetName: "foo_bar", + }, + { + name: "dataset name with URL encoded tilde", + datasetName: "foo%7Ebar", + expectedDatasetName: "foo~bar", + }, + { + name: "dataset name with URL encoded period", + datasetName: "foo%2Ebar", + expectedDatasetName: "foo.bar", + }, + { + name: "dataset name with URL encoded forward slash", + datasetName: "foo%2Fbar", + expectedDatasetName: "foo/bar", + }, + { + name: "dataset name with URL encoded colon", + datasetName: "foo%3Abar", + expectedDatasetName: "foo:bar", + }, + { + name: "dataset name with URL encoded square brackets", + datasetName: "foo%5Bbar%5D", + expectedDatasetName: "foo[bar]", + }, + { + name: "dataset name with URL encoded parentheses", + datasetName: "foo%28bar%29", + expectedDatasetName: "foo(bar)", + }, + { + name: "dataset name with URL encoded curly braces", + datasetName: "foo%7Bbar%7D", + expectedDatasetName: "foo{bar}", + }, + { + name: "dataset name with URL encoded percent", + datasetName: "foo%25bar", + expectedDatasetName: "foo%bar", + }, + { + name: "dataset name with URL encoded ampersand", + datasetName: "foo%26bar", + expectedDatasetName: "foo&bar", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + req, _ := http.NewRequest("GET", "/1/events/dataset", nil) + req = mux.SetURLVars(req, map[string]string{"datasetName": tc.datasetName}) + + dataset, err := getDatasetFromRequest(req) + assert.Equal(t, tc.expectedError, err) + assert.Equal(t, tc.expectedDatasetName, dataset) + }) + } +}