From e3fca647c4d6489cac2188ef4843df13626a78f8 Mon Sep 17 00:00:00 2001 From: Henning Schulz Date: Fri, 20 Mar 2020 16:59:23 +0100 Subject: [PATCH] CONT-335: Buffer clustering results in local file. --- clustinator/main.py | 26 +++++++++++++---- clustinator/session_matrix_buffer.py | 42 ++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/clustinator/main.py b/clustinator/main.py index e261884..dd9c861 100644 --- a/clustinator/main.py +++ b/clustinator/main.py @@ -75,14 +75,28 @@ def start(self): csr_matrix = matrix.as_csr_matrix() print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Matrix creation done.') - appenders = {'dbscan': DbscanAppender(epsilon, avg_tolerance, matrix.states(), min_samples, prev_behavior_models, matrix.label_encoder), - 'kmeans': KmeansAppender(prev_behavior_models, k, max_iterations, num_seedings, convergence_tolerance, n_jobs, dimensions, quantile_range), - 'minimum-distance': MinimumDistanceAppender(prev_behavior_models, matrix.label_encoder, dimensions, radius_factor, num_seedings, min_samples)} - print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Appending the new sessions using', append_strategy) - appender = appenders[append_strategy] - appender.append(csr_matrix) + appender = self.matrix_buffer.load_appender(app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor) + + if appender: + print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Reusing the buffered session appender.') + else: + print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Creating and executing a new appender...') + + appenders = {'dbscan': DbscanAppender(epsilon, avg_tolerance, matrix.states(), min_samples, prev_behavior_models, matrix.label_encoder), + 'kmeans': KmeansAppender(prev_behavior_models, k, max_iterations, num_seedings, convergence_tolerance, n_jobs, dimensions, quantile_range), + 'minimum-distance': MinimumDistanceAppender(prev_behavior_models, matrix.label_encoder, dimensions, radius_factor, num_seedings, min_samples)} + + appender = appenders[append_strategy] + appender.append(csr_matrix) + + self.matrix_buffer.store_appender(app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor, appender) + cluster_means = appender.cluster_means labels = appender.labels cluster_mapping = appender.cluster_mapping diff --git a/clustinator/session_matrix_buffer.py b/clustinator/session_matrix_buffer.py index 706e3e8..5ee09a6 100644 --- a/clustinator/session_matrix_buffer.py +++ b/clustinator/session_matrix_buffer.py @@ -51,5 +51,47 @@ def load(self, app_id, tailoring, start_micros, end_micros): else: return None + def _appender_file_path(self, app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor): + + return os.path.join(self.directory, 'appender-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}.pickle'.format(app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor)) + + def store_appender(self, app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor, appender): + """ + Stores a session appender to a binary file, which can be loaded again based on the passed attributes. + """ + + if self.noop: + return + + pickle.dump(appender, open(self._appender_file_path(app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor), 'wb')) + + def load_appender(self, app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor): + """ + Loads a session appender from the corresponding binary file, if one exists. + :return: The stored SessionMatrix object or None if there is none. + """ + + if self.noop: + return None + + file_path = self._appender_file_path(app_id, tailoring, start_micros, end_micros, append_strategy, + avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance, + dimensions, quantile_range, radius_factor) + + if os.path.isfile(file_path): + return pickle.load(open(file_path, 'rb')) + else: + return None + session_matrix_buffer_noop = SessionMatrixBuffer(None, True)