Skip to content

Commit

Permalink
CONT-335: Buffer clustering results in local file.
Browse files Browse the repository at this point in the history
  • Loading branch information
Henning-Schulz committed Mar 20, 2020
1 parent a45794e commit e3fca64
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 6 deletions.
26 changes: 20 additions & 6 deletions clustinator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,28 @@ def start(self):
csr_matrix = matrix.as_csr_matrix()
print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Matrix creation done.')

appenders = {'dbscan': DbscanAppender(epsilon, avg_tolerance, matrix.states(), min_samples, prev_behavior_models, matrix.label_encoder),
'kmeans': KmeansAppender(prev_behavior_models, k, max_iterations, num_seedings, convergence_tolerance, n_jobs, dimensions, quantile_range),
'minimum-distance': MinimumDistanceAppender(prev_behavior_models, matrix.label_encoder, dimensions, radius_factor, num_seedings, min_samples)}

print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Appending the new sessions using', append_strategy)

appender = appenders[append_strategy]
appender.append(csr_matrix)
appender = self.matrix_buffer.load_appender(app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor)

if appender:
print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Reusing the buffered session appender.')
else:
print(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'Creating and executing a new appender...')

appenders = {'dbscan': DbscanAppender(epsilon, avg_tolerance, matrix.states(), min_samples, prev_behavior_models, matrix.label_encoder),
'kmeans': KmeansAppender(prev_behavior_models, k, max_iterations, num_seedings, convergence_tolerance, n_jobs, dimensions, quantile_range),
'minimum-distance': MinimumDistanceAppender(prev_behavior_models, matrix.label_encoder, dimensions, radius_factor, num_seedings, min_samples)}

appender = appenders[append_strategy]
appender.append(csr_matrix)

self.matrix_buffer.store_appender(app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor, appender)

cluster_means = appender.cluster_means
labels = appender.labels
cluster_mapping = appender.cluster_mapping
Expand Down
42 changes: 42 additions & 0 deletions clustinator/session_matrix_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,47 @@ def load(self, app_id, tailoring, start_micros, end_micros):
else:
return None

def _appender_file_path(self, app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor):

return os.path.join(self.directory, 'appender-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}.pickle'.format(app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor))

def store_appender(self, app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor, appender):
"""
Stores a session appender to a binary file, which can be loaded again based on the passed attributes.
"""

if self.noop:
return

pickle.dump(appender, open(self._appender_file_path(app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor), 'wb'))

def load_appender(self, app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor):
"""
Loads a session appender from the corresponding binary file, if one exists.
:return: The stored SessionMatrix object or None if there is none.
"""

if self.noop:
return None

file_path = self._appender_file_path(app_id, tailoring, start_micros, end_micros, append_strategy,
avg_tolerance, epsilon, min_samples, k, max_iterations, num_seedings, convergence_tolerance,
dimensions, quantile_range, radius_factor)

if os.path.isfile(file_path):
return pickle.load(open(file_path, 'rb'))
else:
return None


session_matrix_buffer_noop = SessionMatrixBuffer(None, True)

0 comments on commit e3fca64

Please sign in to comment.