Skip to content

Commit

Permalink
Add a default query size if limit operator is missing (ENT-34)
Browse files Browse the repository at this point in the history
Completes [ENT-34](https://linear.app/hasura/issue/ENT-34/add-size=big-int-in-all-queries)

- Basic logic for adding a default query size: [3dcd393](3dcd393)
- Tests: [c098b0d](c098b0d)
- Add env var to make the default size configurable by used: [c360787](c360787)
  • Loading branch information
m-Bilal authored Oct 18, 2024
2 parents 4993fed + 0122ae6 commit 0365cc9
Show file tree
Hide file tree
Showing 5 changed files with 312 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

- Add a default query size of 10,000 to all queries ([#31](https://github.com/hasura/ndc-elasticsearch/pull/31))

## [1.0.3]

### Changed
Expand Down
3 changes: 3 additions & 0 deletions ci/templates/connector-metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ supportedEnvironmentVariables:
description: The path to the Certificate Authority (CA) certificate for verifying the Elasticsearch server's SSL certificate.
- name: ELASTICSEARCH_INDEX_PATTERN
description: The pattern for matching Elasticsearch indices, potentially including wildcards, used by the connector.
- name: ELASTICSEARCH_DEFAULT_RESULT_SIZE
description: The default query size when no limit is applied. Defaults to 10,000.
defaultValue: "10000"
commands:
update: hasura-elasticsearch update
cliPlugin:
Expand Down
6 changes: 6 additions & 0 deletions connector/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"

"github.com/hasura/ndc-elasticsearch/elasticsearch"
"github.com/hasura/ndc-elasticsearch/types"
"github.com/hasura/ndc-sdk-go/connector"
"github.com/hasura/ndc-sdk-go/schema"
Expand Down Expand Up @@ -114,6 +115,9 @@ func executeQuery(ctx context.Context, configuration *types.Configuration, state

// prepareElasticsearchQuery prepares an Elasticsearch query based on the provided query request.
func prepareElasticsearchQuery(ctx context.Context, request *schema.QueryRequest, state *types.State, index string) (map[string]interface{}, error) {
// Set the user configured default result size in ctx
ctx = context.WithValue(ctx, elasticsearch.DEFAULT_RESULT_SIZE_KEY, elasticsearch.GetDefaultResultSize())

query := map[string]interface{}{
"_source": map[string]interface{}{
"excludes": []string{"*"},
Expand All @@ -139,6 +143,8 @@ func prepareElasticsearchQuery(ctx context.Context, request *schema.QueryRequest
// Set the limit
if request.Query.Limit != nil {
query["size"] = *request.Query.Limit
} else {
query["size"] = ctx.Value(elasticsearch.DEFAULT_RESULT_SIZE_KEY).(int)
}

// Set the offset
Expand Down
281 changes: 281 additions & 0 deletions connector/query_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
package connector

import (
"context"
"encoding/json"
"fmt"
"sort"
"testing"

"github.com/hasura/ndc-elasticsearch/types"
"github.com/hasura/ndc-sdk-go/schema"
"github.com/stretchr/testify/assert"
)

func TestPrepareElasticsearchQuery(t *testing.T) {
tests := []struct {
name string
ndcRequest string
expectedQuery string
}{
{
name: "Simple_Query_001",
ndcRequest: ndcRequest_001,
expectedQuery: expectedQuery_001,
},
{
name: "Simple_Query_With_Limit_002",
ndcRequest: ndcRequest_002,
expectedQuery: expectedQuery_002,
},
{
name: "Nested_Query_003",
ndcRequest: ndcRequest_003,
expectedQuery: expectedQuery_003,
},
{
name: "Nested_Query_With_Limit_004",
ndcRequest: ndcRequest_004,
expectedQuery: expectedQuery_004,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()
state := &types.State{}

ctx = context.WithValue(ctx, "postProcessor", &types.PostProcessor{})

var request schema.QueryRequest
err := json.Unmarshal([]byte(tt.ndcRequest), &request)
assert.NoError(t, err)

query, err := prepareElasticsearchQuery(ctx, &request, state, request.Collection)
assert.NoError(t, err)

// this correction is added because sometimes the order of _source array would change which resulted in the tests being flaky
query, err = sortSourceArray(query)
assert.NoError(t, err)

queryJson, err := json.MarshalIndent(query, "", " ")
assert.NoError(t, err)

assert.JSONEq(t, tt.expectedQuery, string(queryJson))
})
}
}

// A helper function to sort the _source array in the query
//
// Required because the order of the _source array in the query is not fixed, and the tests were flaky due to this
func sortSourceArray(query map[string]interface{}) (map[string]interface{}, error) {
source, ok := query["_source"].([]string)
if !ok {
return nil, fmt.Errorf("expected _source to be of type []string, got %T", query["_source"])
}

sort.Strings(source)
query["_source"] = source
return query, nil
}

const ndcRequest_001 = `{
"arguments": {},
"collection": "customers",
"collection_relationships": {},
"query": {
"fields": {
"id": {
"column": "_id",
"type": "column"
},
"name": {
"column": "name",
"type": "column"
}
}
}
}`

const expectedQuery_001 = `{
"_source": [
"_id",
"name"
],
"size": 10000
}`

const ndcRequest_002 = `{
"arguments": {},
"collection": "customers",
"collection_relationships": {},
"query": {
"fields": {
"id": {
"column": "_id",
"type": "column"
},
"name": {
"column": "name",
"type": "column"
}
},
"limit": 500
}
}`

const expectedQuery_002 = `{
"_source": [
"_id",
"name"
],
"size": 500
}`

const ndcRequest_003 = `{
"arguments": {},
"collection": "transactions",
"collection_relationships": {},
"query": {
"fields": {
"customerId": {
"column": "customer_id",
"type": "column"
},
"id": {
"column": "_id",
"type": "column"
},
"timestamp": {
"column": "timestamp",
"type": "column"
},
"transactionDetails": {
"column": "transaction_details",
"fields": {
"fields": {
"fields": {
"currency": {
"column": "currency",
"type": "column"
},
"itemId": {
"column": "item_id",
"type": "column"
},
"itemName": {
"column": "item_name",
"type": "column"
},
"price": {
"column": "price",
"type": "column"
},
"quantity": {
"column": "quantity",
"type": "column"
}
},
"type": "object"
},
"type": "array"
},
"type": "column"
},
"transactionId": {
"column": "transaction_id",
"type": "column"
}
}
}
}`

const expectedQuery_003 = `{
"_source": [
"_id",
"customer_id",
"timestamp",
"transaction_details.currency",
"transaction_details.item_id",
"transaction_details.item_name",
"transaction_details.price",
"transaction_details.quantity",
"transaction_id"
],
"size": 10000
}`

const ndcRequest_004 = `{
"arguments": {},
"collection": "transactions",
"collection_relationships": {},
"query": {
"fields": {
"customerId": {
"column": "customer_id",
"type": "column"
},
"id": {
"column": "_id",
"type": "column"
},
"timestamp": {
"column": "timestamp",
"type": "column"
},
"transactionDetails": {
"column": "transaction_details",
"fields": {
"fields": {
"fields": {
"currency": {
"column": "currency",
"type": "column"
},
"itemId": {
"column": "item_id",
"type": "column"
},
"itemName": {
"column": "item_name",
"type": "column"
},
"price": {
"column": "price",
"type": "column"
},
"quantity": {
"column": "quantity",
"type": "column"
}
},
"type": "object"
},
"type": "array"
},
"type": "column"
},
"transactionId": {
"column": "transaction_id",
"type": "column"
}
},
"limit": 20
}
}`

const expectedQuery_004 = `{
"_source": [
"_id",
"customer_id",
"timestamp",
"transaction_details.currency",
"transaction_details.item_id",
"transaction_details.item_name",
"transaction_details.price",
"transaction_details.quantity",
"transaction_id"
],
"size": 20
}`
18 changes: 18 additions & 0 deletions elasticsearch/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ import (
"errors"
"fmt"
"os"
"strconv"
"strings"

"github.com/elastic/go-elasticsearch/v8"
)

const esMaxResultSize = 10000
const DEFAULT_RESULT_SIZE_KEY = "esDefaultResultSize"

// getConfigFromEnv retrieves elastic search configuration from environment variables.
func getConfigFromEnv() (*elasticsearch.Config, error) {
esConfig := elasticsearch.Config{}
Expand Down Expand Up @@ -49,3 +53,17 @@ func getConfigFromEnv() (*elasticsearch.Config, error) {

return &esConfig, nil
}

func GetDefaultResultSize() int {
defaultResultSize := os.Getenv("ELASTICSEARCH_DEFAULT_RESULT_SIZE")
if defaultResultSize == "" {
return esMaxResultSize
}

size, err := strconv.Atoi(defaultResultSize)
if err != nil {
return esMaxResultSize
}

return size
}

0 comments on commit 0365cc9

Please sign in to comment.