From 1a8768d79e36dc5cea9031775a4e24cc520fe0ae Mon Sep 17 00:00:00 2001 From: Zafer Balkan Date: Wed, 6 Sep 2023 16:04:04 +0300 Subject: [PATCH 1/5] Update masker.rb --- lib/fluent/plugin/pan/masker.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fluent/plugin/pan/masker.rb b/lib/fluent/plugin/pan/masker.rb index 688c6cd..c4249a3 100644 --- a/lib/fluent/plugin/pan/masker.rb +++ b/lib/fluent/plugin/pan/masker.rb @@ -35,7 +35,7 @@ def mask_if_found_pan(orgval) pan = match.split("").select { |i| i =~ /\d/ }.map { |j| j.to_i } if valid?(pan) - match = @mask + match = match.gsub(@regexp, @mask) else match end From 79742268bde1543dca3b352dd4eee9b5d2318a0e Mon Sep 17 00:00:00 2001 From: Zafer Balkan Date: Wed, 6 Sep 2023 16:09:05 +0300 Subject: [PATCH 2/5] Update fluent-plugin-pan-anonymizer.gemspec --- fluent-plugin-pan-anonymizer.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fluent-plugin-pan-anonymizer.gemspec b/fluent-plugin-pan-anonymizer.gemspec index dace0ed..6bd365f 100644 --- a/fluent-plugin-pan-anonymizer.gemspec +++ b/fluent-plugin-pan-anonymizer.gemspec @@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) Gem::Specification.new do |spec| spec.name = "fluent-plugin-pan-anonymizer" - spec.version = "0.0.1" + spec.version = "0.0.1.1" spec.authors = ["Hiroaki Sano"] spec.email = ["hiroaki.sano.9stories@gmail.com"] From 13903909becf89801bdcb82ca1a6cfdcfc97b4fd Mon Sep 17 00:00:00 2001 From: Zafer Balkan Date: Wed, 6 Sep 2023 17:20:11 +0300 Subject: [PATCH 3/5] Update README.md --- README.md | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9854a65..b743483 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ A Fluent filter plugin to anonymize records which have PAN (Primary Account Numb Inspired by [fluent-plugin-anonymizer](https://github.com/y-ken/fluent-plugin-anonymizer). +**N.B.:** This fork adds ability to allow Regex capture group usage, so that you can mask partially. The configuration shows how to set first 6 and last 4 numbers available while masking the values in between. See the example below. + # Requirements - fluentd: v0.14.x or later @@ -11,15 +13,16 @@ Inspired by [fluent-plugin-anonymizer](https://github.com/y-ken/fluent-plugin-an # Installation -``` -gem install fluent-plugin-pan-anonymizer +```shell +fluent-gem install specific_install +fluent-gem specific_install https://github.com/zbalkan/fluent-plugin-pan-anonymizer.git ``` # Configuration NOTE: Card numbers in the example don't exist in the world. -``` +```XML @type dummy tag dummy @@ -62,6 +65,71 @@ NOTE: Card numbers in the example don't exist in the world. Card numbers were masked with given configuration except `time` key and `4567890123456789` in "hello inquiry code is 4567890123456789". `4567890123456789` is not a valid card number. + +## A more complex example + +This example reads logs of an application called `sample`, masks and saves under `/var/log/masked/` so that you can use the masked version. This example uses `td-agent`. + +```XML + + @type tail + # update the path + path /var/log/sample.log + pos_file /var/log/td-agent/sample.log.pos + + # Use the source application name as a tag below: + tag sample + + # We don't care about the type and format of log. + # We will explicitly assume that it is plain text. + + @type none + + + +# Use the name of application used in the "tag" above + + @type pan_anonymizer + ignore_keys time + + + # mastercard + formats /(5[1-5][0-9]{2}(?:\ |\-|)[0-9]{2})[0-9]{2}(?:\ |\-|)[0-9]{4}(?:\ |\-|)([0-9]{4})/ + checksum_algorithm luhn + mask \1******\2 + + + # visa + formats /(4[0-9]{3}(?:\ |\-|)[0-9]{2})[0-9]{2}(?:\ |\-|)[0-9]{4}(?:\ |\-|)([0-9]{4})/ + checksum_algorithm luhn + mask \1******\2 + + + # amex + formats /((?:34|37)[0-9]{2}(?:\ |\-|)[0-9]{2})[0-9]{4}(?:\ |\-|)[0-9]{1}([0-9]{4})/ + checksum_algorithm luhn + mask \1******\2 + + + +# Use the name of application used in the "tag" above + + @type file + # Logs will be saved under this folder + # Name will be buffer..log + # At the end of the day, it will rename the file as + # buffer..log + path /var/log/masked + append true + + +# Push fluentd messages to stdout + +``` # License Apache License, Version 2.0 From 0faff0c92953860675da85ac19bcb76d5e58c61e Mon Sep 17 00:00:00 2001 From: Zafer Balkan Date: Mon, 17 Jun 2024 21:04:35 +0300 Subject: [PATCH 4/5] Reverted the changes to readme --- README.md | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index b743483..63359be 100644 --- a/README.md +++ b/README.md @@ -4,21 +4,18 @@ A Fluent filter plugin to anonymize records which have PAN (Primary Account Numb Inspired by [fluent-plugin-anonymizer](https://github.com/y-ken/fluent-plugin-anonymizer). -**N.B.:** This fork adds ability to allow Regex capture group usage, so that you can mask partially. The configuration shows how to set first 6 and last 4 numbers available while masking the values in between. See the example below. - -# Requirements +## Requirements - fluentd: v0.14.x or later - Ruby: 2.4 or later -# Installation +## Installation ```shell -fluent-gem install specific_install -fluent-gem specific_install https://github.com/zbalkan/fluent-plugin-pan-anonymizer.git +gem install fluent-plugin-pan-anonymizer ``` -# Configuration +## Configuration NOTE: Card numbers in the example don't exist in the world. @@ -54,9 +51,9 @@ NOTE: Card numbers in the example don't exist in the world. ``` -## The result of the example given above +### The result of the example given above -``` +```syslog 2018-11-13 22:01:35.074963000 +0900 dummy: {"time":12345678901234567,"subject":"xxxxxx","user_inquiry":"hi, my card number is 9999999999999999 !"} 2018-11-13 22:01:36.001053000 +0900 dummy: {"time":12345678901234568,"subject":"xxxxxx","user_inquiry":"hello inquiry code is 4567890123456789"} 2018-11-13 22:01:37.021032000 +0900 dummy: {"time":12345678901234569,"subject":"I am xxxx-xxxx-xxxx-xxxx","user_inquiry":"xxxx-xxxx-xxxx-xxxx is my number"} @@ -65,8 +62,7 @@ NOTE: Card numbers in the example don't exist in the world. Card numbers were masked with given configuration except `time` key and `4567890123456789` in "hello inquiry code is 4567890123456789". `4567890123456789` is not a valid card number. - -## A more complex example +### A more complex example This example reads logs of an application called `sample`, masks and saves under `/var/log/masked/` so that you can use the masked version. This example uses `td-agent`. @@ -76,7 +72,7 @@ This example reads logs of an application called `sample`, masks and saves under # update the path path /var/log/sample.log pos_file /var/log/td-agent/sample.log.pos - + # Use the source application name as a tag below: tag sample @@ -130,6 +126,6 @@ This example reads logs of an application called `sample`, masks and saves under ``` -# License +## License Apache License, Version 2.0 From 43364e6252d4c0fc35bcf87342ee76a1088753ca Mon Sep 17 00:00:00 2001 From: Zafer Balkan Date: Mon, 17 Jun 2024 21:04:50 +0300 Subject: [PATCH 5/5] Updated version number --- fluent-plugin-pan-anonymizer.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fluent-plugin-pan-anonymizer.gemspec b/fluent-plugin-pan-anonymizer.gemspec index 6bd365f..a8fd2f9 100644 --- a/fluent-plugin-pan-anonymizer.gemspec +++ b/fluent-plugin-pan-anonymizer.gemspec @@ -4,8 +4,8 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) Gem::Specification.new do |spec| spec.name = "fluent-plugin-pan-anonymizer" - spec.version = "0.0.1.1" - spec.authors = ["Hiroaki Sano"] + spec.version = "0.0.2" + spec.authors = ["Hiroaki Sano", "Zafer Balkan"] spec.email = ["hiroaki.sano.9stories@gmail.com"] spec.summary = %q{Fluentd filter plugin to anonymize credit card numbers.}