From c02d31eb926856951cd8e186c6d865315e5682ad Mon Sep 17 00:00:00 2001 From: Henning Schulz Date: Thu, 21 Nov 2019 10:21:42 +0100 Subject: [PATCH] CONT-285: Let the user decide whether epsilon or avg_transition_tolerance should be used. --- clustinator/input.py | 2 +- clustinator/main.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/clustinator/input.py b/clustinator/input.py index 3d84af5..0780164 100644 --- a/clustinator/input.py +++ b/clustinator/input.py @@ -11,7 +11,7 @@ def __init__(self, sessions_json): self.data = json.loads(sessions_json) def cluster_param(self): - return self.data['avg-transition-tolerance'], self.data['min-sample-size'] + return self.data.get('avg-transition-tolerance'), self.data.get('epsilon'), self.data['min-sample-size'] def get_header(self): header_dict = {} diff --git a/clustinator/main.py b/clustinator/main.py index a1d3149..f9210fd 100644 --- a/clustinator/main.py +++ b/clustinator/main.py @@ -27,13 +27,16 @@ def start(self): start_time = datetime.now() data_input = Input(self.sessions_file) - avg_tolerance, min_samples = data_input.cluster_param() + avg_tolerance, epsilon, min_samples = data_input.cluster_param() header = data_input.get_header() app_id = data_input.get_app_id() tailoring = data_input.get_tailoring() start_micros, interval_start_micros, end_micros = data_input.get_range_micros() - print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Clustering for app-id', app_id, 'with avg. transition tolerance', avg_tolerance, 'and min-sample-size', min_samples) + if epsilon is None: + print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Clustering for app-id', app_id, 'with avg. transition tolerance', avg_tolerance, 'and min-sample-size', min_samples) + else: + print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Clustering for app-id', app_id, 'with epsilon', epsilon, 'and min-sample-size', min_samples) print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Loading previous Markov chains...') prev_behavior_model = BehaviorModel(app_id, tailoring, interval_start_micros) @@ -49,7 +52,9 @@ def start(self): print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Converting to CSR...') csr_matrix = matrix.as_csr_matrix() - epsilon = (len(matrix.states()) - 1) * avg_tolerance + + if epsilon is None: + epsilon = (len(matrix.states()) - 1) * avg_tolerance print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Matrix creation done. Starting the clustering with epsilon', epsilon, '...')