From 3ee0216d7bb652a817153532cf0d43248cf81489 Mon Sep 17 00:00:00 2001 From: Sreeram Date: Fri, 5 Jan 2024 15:11:21 +0530 Subject: [PATCH 1/7] add: AWS s3 helper to download files from s3 --- aws/s3.go | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 aws/s3.go diff --git a/aws/s3.go b/aws/s3.go new file mode 100644 index 0000000..922b362 --- /dev/null +++ b/aws/s3.go @@ -0,0 +1,191 @@ +package aws + +import ( + "context" + "io" + "net/url" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/skit-ai/vcore/errors" + "github.com/skit-ai/vcore/log/slog" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3manager" +) + +const ( + // Regex for S3 URLs, VPCE interface endpoint + vpceUrlPattern = "^((.+)\\.)?" + // maybe a bucket name + "(bucket|accesspoint|control)\\.vpce-[-a-z0-9]+\\." + // VPC endpoint DNS name + "s3[.-]" + // S3 service name + "(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1 + "vpce\\." + + "(amazonaws\\.com|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov)" + vpceUrlPatternBucketIdx = 2 + vpceUrlPatternRegionIdx = 5 + + // Regex for S3 URLs, public S3 endpoint + nonVpceUrlPattern = "^((.+)\\.)?" + // maybe a bucket name + "s3[.-](website[-.])?(accelerate\\.)?(dualstack[-.])?" + // S3 service name with optional features + "(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1 + "(amazonaws\\.com|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov)" + nonVpceUrlPatternBucketIdx = 2 + nonVpceUrlPatternRegionIdx = 7 +) + +var ( + vpceUrlRegex = regexp.MustCompile(vpceUrlPattern) + nonVpceUrlRegex = regexp.MustCompile(nonVpceUrlPattern) +) + +// S3URL holds interesting pieces after parsing a s3 URL +type S3URL struct { + IsPathStyle bool + Bucket string + Key string + Region string +} + +// DownloadFile downloads a file from s3 based on the key and writes it into WriteAt. +func (u S3URL) DownloadFile(ctx context.Context, w io.WriterAt) error { + sess, err := session.NewSession(&aws.Config{ + Region: aws.String(u.Region), // Specify the region where the bucket is located + }) + if err != nil { + return errors.NewError("Error creating session", err, false) + } + + downloader := s3manager.NewDownloader(sess) + + numBytes, err := downloader.DownloadWithContext(ctx, w, &s3.GetObjectInput{ + Bucket: aws.String(u.Bucket), + Key: aws.String(u.Key), + }) + + if err != nil { + return errors.NewError("Error downloading file", err, false) + } + + slog.Debug("Downloaded file", "size", numBytes) + + return nil +} + +// ParseAmazonS3URL parses an HTTP/HTTPS URL for an S3 resource and returns an +// S3URL object. +// +// S3 URLs come in two flavors: virtual hosted-style URLs and path-style URLs. +// Virtual hosted-style URLs have the bucket name as the first component of the +// hostname, e.g. +// +// https://mybucket.s3.us-east-1.amazonaws.com/a/b/c +// +// Path-style URLs have the bucket name as the first component of the path, e.g. +// +// https://s3.us-east-1.amazonaws.com/mybucket/a/b/c +func ParseAmazonS3URL(s3URL *url.URL) (S3URL, error) { + output, err := parseBucketAndRegionFromHost(s3URL.Host, vpceUrlRegex, vpceUrlPatternBucketIdx, vpceUrlPatternRegionIdx) + if err != nil { + output, err = parseBucketAndRegionFromHost(s3URL.Host, nonVpceUrlRegex, nonVpceUrlPatternBucketIdx, nonVpceUrlPatternRegionIdx) + if err != nil { + return S3URL{}, err + } + } + + output.IsPathStyle = output.Bucket == "" + + path := s3URL.Path + + if output.IsPathStyle { + // no bucket name in the authority, parse it from the path + output.IsPathStyle = true + + // grab the encoded path so we don't run afoul of '/'s in the bucket name + if path == "/" || path == "" { + } else { + path = path[1:] + index := strings.Index(path, "/") + if index == -1 { + // https://s3.amazonaws.com/bucket + output.Bucket = path + output.Key = "" + } else if index == (len(path) - 1) { + // https://s3.amazonaws.com/bucket/ + output.Bucket = strings.TrimRight(path, "/") + output.Key = "" + } else { + // https://s3.amazonaws.com/bucket/key + output.Bucket = path[:index] + output.Key = path[index+1:] + } + } + } else { + // bucket name in the host, path is the object key + if path == "/" || path == "" { + output.Key = "" + } else { + output.Key = path[1:] + } + } + + if strings.EqualFold(output.Region, "external-1") { + output.Region = "us-east-1" + } else if output.Region == "" { + // s3 bucket URL in us-east-1 doesn't include region + output.Region = "us-east-1" + } + + return output, nil +} + +func parseBucketAndRegionFromHost(host string, re *regexp.Regexp, bucketIdx, regionIdx int) (S3URL, error) { + result := re.FindStringSubmatch(host) + if result != nil && len(result) > bucketIdx && len(result) > regionIdx { + return S3URL{ + Bucket: result[bucketIdx], + Region: result[regionIdx], + }, nil + } else { + return S3URL{}, errors.NewError("no match", nil, false) + } +} + +// DownloadFileFromS3 takes an S3 URL and a filePath, downloads the file from s3 and stores it in the filePath. +func DownloadFileFromS3(ctx context.Context, downloadURL, filePath string) error { + parsedURL, err := url.Parse(downloadURL) + if err != nil { + return err + } + + // Parse s3 URL to extract region, key and bucket. + s3URL, err := ParseAmazonS3URL(parsedURL) + if err != nil { + return errors.NewError("Failed to parse URL", err, false) + } + + // Create file path + err = os.MkdirAll(filepath.Dir(filePath), os.ModePerm) + if err != nil { + return errors.NewError("Unable to create directory", err, false) + } + + // Create a local file to write to + f, err := os.Create(filePath) + if err != nil { + return errors.NewError("Error creating file", err, false) + } + + defer func() { + // Ensure file is closed even if an error occurs + if f != nil { + f.Close() + } + }() + + return s3URL.DownloadFile(ctx, f) +} From 6fe8c1a58a8ce1a95275a4c5f0553a70c0c50ce8 Mon Sep 17 00:00:00 2001 From: Sreeram Date: Fri, 5 Jan 2024 15:13:20 +0530 Subject: [PATCH 2/7] update: aws sdk version --- go.mod | 10 +++++----- go.sum | 10 ++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index ce86d4b..dcdda6f 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.19 require ( github.com/Vernacular-ai/gorm v1.11.3 - github.com/aws/aws-sdk-go v1.44.153 + github.com/aws/aws-sdk-go v1.49.15 github.com/getsentry/sentry-go v0.15.0 github.com/go-kit/log v0.2.1 github.com/google/go-cmp v0.5.9 @@ -94,11 +94,11 @@ require ( go.opentelemetry.io/proto/otlp v0.19.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.8.0 // indirect - golang.org/x/crypto v0.3.0 // indirect - golang.org/x/net v0.3.0 // indirect + golang.org/x/crypto v0.14.0 // indirect + golang.org/x/net v0.17.0 // indirect golang.org/x/oauth2 v0.2.0 // indirect - golang.org/x/sys v0.3.0 // indirect - golang.org/x/text v0.5.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/text v0.13.0 // indirect golang.org/x/time v0.3.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/api v0.103.0 // indirect diff --git a/go.sum b/go.sum index 8d55842..b64794f 100644 --- a/go.sum +++ b/go.sum @@ -210,6 +210,8 @@ github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgI github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/aws/aws-sdk-go v1.44.153 h1:KfN5URb9O/Fk48xHrAinrPV2DzPcLa0cd9yo1ax5KGg= github.com/aws/aws-sdk-go v1.44.153/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.49.15 h1:aH9bSV4kL4ziH0AMtuYbukGIVebXddXBL0cKZ1zj15k= +github.com/aws/aws-sdk-go v1.49.15/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -687,6 +689,8 @@ golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.3.0 h1:a06MkbcxBrEFc0w0QIZWXrH/9cCX6KJyWbBOIwAn+7A= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -777,6 +781,8 @@ golang.org/x/net v0.0.0-20221014081412-f15817d10f9b/go.mod h1:YDH+HFinaLZZlnHAfS golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.3.0 h1:VWL6FNY2bEEmsGVKabSlHu5Irp34xmMRoqb/9lF9lxk= golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -894,6 +900,8 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -909,6 +917,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From 5fb646ae20093f4bfef1d67148dd225c7adceb3c Mon Sep 17 00:00:00 2001 From: Sreeram Date: Fri, 5 Jan 2024 15:16:27 +0530 Subject: [PATCH 3/7] update: x/crypto version --- go.mod | 6 +++--- go.sum | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index dcdda6f..3e9dd2c 100644 --- a/go.mod +++ b/go.mod @@ -94,11 +94,11 @@ require ( go.opentelemetry.io/proto/otlp v0.19.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.8.0 // indirect - golang.org/x/crypto v0.14.0 // indirect + golang.org/x/crypto v0.17.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/oauth2 v0.2.0 // indirect - golang.org/x/sys v0.13.0 // indirect - golang.org/x/text v0.13.0 // indirect + golang.org/x/sys v0.15.0 // indirect + golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.3.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/api v0.103.0 // indirect diff --git a/go.sum b/go.sum index b64794f..5e48785 100644 --- a/go.sum +++ b/go.sum @@ -691,6 +691,8 @@ golang.org/x/crypto v0.3.0 h1:a06MkbcxBrEFc0w0QIZWXrH/9cCX6KJyWbBOIwAn+7A= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= +golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -902,6 +904,7 @@ golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -919,6 +922,7 @@ golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From c18aa5b8f84211cf63fb78cd44bf4f0c6b15c0eb Mon Sep 17 00:00:00 2001 From: Sreeram Date: Fri, 5 Jan 2024 18:05:46 +0530 Subject: [PATCH 4/7] add: support for VPC endpoint in download from s3 --- aws/s3.go | 33 +++++++++++++++++++++------------ go.sum | 2 ++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/aws/s3.go b/aws/s3.go index 922b362..a523db8 100644 --- a/aws/s3.go +++ b/aws/s3.go @@ -26,6 +26,7 @@ const ( "(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1 "vpce\\." + "(amazonaws\\.com|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov)" + vpceUrlPatternHostIdx = 0 vpceUrlPatternBucketIdx = 2 vpceUrlPatternRegionIdx = 5 @@ -46,6 +47,7 @@ var ( // S3URL holds interesting pieces after parsing a s3 URL type S3URL struct { IsPathStyle bool + EndPoint string Bucket string Key string Region string @@ -54,7 +56,8 @@ type S3URL struct { // DownloadFile downloads a file from s3 based on the key and writes it into WriteAt. func (u S3URL) DownloadFile(ctx context.Context, w io.WriterAt) error { sess, err := session.NewSession(&aws.Config{ - Region: aws.String(u.Region), // Specify the region where the bucket is located + Region: aws.String(u.Region), // Specify the region where the bucket is located + Endpoint: aws.String(u.EndPoint), }) if err != nil { return errors.NewError("Error creating session", err, false) @@ -89,12 +92,9 @@ func (u S3URL) DownloadFile(ctx context.Context, w io.WriterAt) error { // // https://s3.us-east-1.amazonaws.com/mybucket/a/b/c func ParseAmazonS3URL(s3URL *url.URL) (S3URL, error) { - output, err := parseBucketAndRegionFromHost(s3URL.Host, vpceUrlRegex, vpceUrlPatternBucketIdx, vpceUrlPatternRegionIdx) + output, err := parseBucketAndRegionFromHost(s3URL.Host) if err != nil { - output, err = parseBucketAndRegionFromHost(s3URL.Host, nonVpceUrlRegex, nonVpceUrlPatternBucketIdx, nonVpceUrlPatternRegionIdx) - if err != nil { - return S3URL{}, err - } + return S3URL{}, errors.NewError("parsing host failed", err, false) } output.IsPathStyle = output.Bucket == "" @@ -143,15 +143,24 @@ func ParseAmazonS3URL(s3URL *url.URL) (S3URL, error) { return output, nil } -func parseBucketAndRegionFromHost(host string, re *regexp.Regexp, bucketIdx, regionIdx int) (S3URL, error) { - result := re.FindStringSubmatch(host) - if result != nil && len(result) > bucketIdx && len(result) > regionIdx { +func parseBucketAndRegionFromHost(host string) (S3URL, error) { + result := vpceUrlRegex.FindStringSubmatch(host) + if result != nil && len(result) > vpceUrlPatternBucketIdx && len(result) > vpceUrlPatternRegionIdx { return S3URL{ - Bucket: result[bucketIdx], - Region: result[regionIdx], + EndPoint: result[vpceUrlPatternHostIdx], + Bucket: result[vpceUrlPatternBucketIdx], + Region: result[vpceUrlPatternRegionIdx], }, nil } else { - return S3URL{}, errors.NewError("no match", nil, false) + result = nonVpceUrlRegex.FindStringSubmatch(host) + if result != nil && len(result) > vpceUrlPatternBucketIdx && len(result) > vpceUrlPatternRegionIdx { + return S3URL{ + Bucket: result[nonVpceUrlPatternBucketIdx], + Region: result[nonVpceUrlPatternRegionIdx], + }, nil + } else { + return S3URL{}, errors.NewError("failed to match URL", nil, false) + } } } diff --git a/go.sum b/go.sum index 5e48785..b7e5c68 100644 --- a/go.sum +++ b/go.sum @@ -904,6 +904,7 @@ golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -922,6 +923,7 @@ golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From ec32c6bf1fbbffd425b34f072cb1a72cd31ed198 Mon Sep 17 00:00:00 2001 From: Sreeram Date: Thu, 11 Jan 2024 20:03:40 +0530 Subject: [PATCH 5/7] fix: naming for regex var in s3 --- aws/s3.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/aws/s3.go b/aws/s3.go index a523db8..8177851 100644 --- a/aws/s3.go +++ b/aws/s3.go @@ -20,28 +20,28 @@ import ( const ( // Regex for S3 URLs, VPCE interface endpoint - vpceUrlPattern = "^((.+)\\.)?" + // maybe a bucket name + vpceURLPattern = "^((.+)\\.)?" + // maybe a bucket name "(bucket|accesspoint|control)\\.vpce-[-a-z0-9]+\\." + // VPC endpoint DNS name "s3[.-]" + // S3 service name "(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1 "vpce\\." + "(amazonaws\\.com|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov)" - vpceUrlPatternHostIdx = 0 - vpceUrlPatternBucketIdx = 2 - vpceUrlPatternRegionIdx = 5 + vpceURLPatternHostIdx = 0 + vpceURLPatternBucketIdx = 2 + vpceURLPatternRegionIdx = 5 // Regex for S3 URLs, public S3 endpoint - nonVpceUrlPattern = "^((.+)\\.)?" + // maybe a bucket name + nonVpceURLPattern = "^((.+)\\.)?" + // maybe a bucket name "s3[.-](website[-.])?(accelerate\\.)?(dualstack[-.])?" + // S3 service name with optional features "(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1 "(amazonaws\\.com|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov)" - nonVpceUrlPatternBucketIdx = 2 - nonVpceUrlPatternRegionIdx = 7 + nonVpceURLPatternBucketIdx = 2 + nonVpceURLPatternRegionIdx = 7 ) var ( - vpceUrlRegex = regexp.MustCompile(vpceUrlPattern) - nonVpceUrlRegex = regexp.MustCompile(nonVpceUrlPattern) + vpceUrlRegex = regexp.MustCompile(vpceURLPattern) + nonVpceUrlRegex = regexp.MustCompile(nonVpceURLPattern) ) // S3URL holds interesting pieces after parsing a s3 URL @@ -145,18 +145,18 @@ func ParseAmazonS3URL(s3URL *url.URL) (S3URL, error) { func parseBucketAndRegionFromHost(host string) (S3URL, error) { result := vpceUrlRegex.FindStringSubmatch(host) - if result != nil && len(result) > vpceUrlPatternBucketIdx && len(result) > vpceUrlPatternRegionIdx { + if result != nil && len(result) > vpceURLPatternBucketIdx && len(result) > vpceURLPatternRegionIdx { return S3URL{ - EndPoint: result[vpceUrlPatternHostIdx], - Bucket: result[vpceUrlPatternBucketIdx], - Region: result[vpceUrlPatternRegionIdx], + EndPoint: result[vpceURLPatternHostIdx], + Bucket: result[vpceURLPatternBucketIdx], + Region: result[vpceURLPatternRegionIdx], }, nil } else { result = nonVpceUrlRegex.FindStringSubmatch(host) - if result != nil && len(result) > vpceUrlPatternBucketIdx && len(result) > vpceUrlPatternRegionIdx { + if result != nil && len(result) > vpceURLPatternBucketIdx && len(result) > vpceURLPatternRegionIdx { return S3URL{ - Bucket: result[nonVpceUrlPatternBucketIdx], - Region: result[nonVpceUrlPatternRegionIdx], + Bucket: result[nonVpceURLPatternBucketIdx], + Region: result[nonVpceURLPatternRegionIdx], }, nil } else { return S3URL{}, errors.NewError("failed to match URL", nil, false) From 44ac54bed7571c9fa0890e1b5bc2bc8eb09cfdc0 Mon Sep 17 00:00:00 2001 From: Sreeram Date: Mon, 15 Jan 2024 15:25:46 +0530 Subject: [PATCH 6/7] add: source for s3 URL regex --- aws/s3.go | 1 + 1 file changed, 1 insertion(+) diff --git a/aws/s3.go b/aws/s3.go index 8177851..2dbe318 100644 --- a/aws/s3.go +++ b/aws/s3.go @@ -20,6 +20,7 @@ import ( const ( // Regex for S3 URLs, VPCE interface endpoint + // Source - https://github.com/aws/amazon-ssm-agent/blob/mainline/agent/s3util/s3uri.go vpceURLPattern = "^((.+)\\.)?" + // maybe a bucket name "(bucket|accesspoint|control)\\.vpce-[-a-z0-9]+\\." + // VPC endpoint DNS name "s3[.-]" + // S3 service name From 911c7cf9133fa75bb9279d2b7d319860f1dfd01a Mon Sep 17 00:00:00 2001 From: Sreeram Date: Mon, 15 Jan 2024 15:35:08 +0530 Subject: [PATCH 7/7] update: add error msg for url parse --- aws/s3.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws/s3.go b/aws/s3.go index 2dbe318..62a289b 100644 --- a/aws/s3.go +++ b/aws/s3.go @@ -169,13 +169,13 @@ func parseBucketAndRegionFromHost(host string) (S3URL, error) { func DownloadFileFromS3(ctx context.Context, downloadURL, filePath string) error { parsedURL, err := url.Parse(downloadURL) if err != nil { - return err + return errors.NewError("Failed to parse URL", err, false) } // Parse s3 URL to extract region, key and bucket. s3URL, err := ParseAmazonS3URL(parsedURL) if err != nil { - return errors.NewError("Failed to parse URL", err, false) + return errors.NewError("Failed to parse URL as s3 URL", err, false) } // Create file path