Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed a potential bugs that may lead to positional misalignment between pred and gt on different device. #111

Merged
merged 2 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 76 additions & 82 deletions logparser/DivLog/DivLog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from random import sample
from sklearn.model_selection import train_test_split
from openai.embeddings_utils import get_embedding, cosine_similarity
from collections import Counter


def dpp(kernel_matrix, max_length, epsilon=1E-10):
Expand Down Expand Up @@ -63,15 +64,73 @@ def DPPsplit(log_list, groundtruth_template, candidate_idx):
test_templates = [groundtruth_template[idx] for idx in test_idx]
return test_logs, cand_logs, test_templates, cand_templates

def evaluateGA(dataset, groundtruth, result):
# calculate parsing accuracy
def evaluatePA(groundtruth, result):
# len(predicted_list) may smaller than len(groundtruth)
length = len(result['template'])
if length == 0: return 0
correct = 0
for i in range(length):
if result['template'][i] == groundtruth.loc[groundtruth['Content'] == result['log'][i]]['EventTemplate'].values[0]:
correct += 1
return correct/length

# correctly identified templates over total num of identified template
def evaluatePTA(groundtruth, result):
# generate a "template: log indexes list" mapping for groundtruth
oracle_tem_dict = {}
for idx in range(len(result['template'])):
if groundtruth['EventTemplate'][idx] not in oracle_tem_dict:
oracle_tem_dict[groundtruth['EventTemplate'][idx]] = [groundtruth['Content'][idx]]
else: oracle_tem_dict[groundtruth['EventTemplate'][idx]].append(groundtruth['Content'][idx])

# generate mapping for identified template
result_tem_dict = {}
for idx in range(len(result['template'])):
if result['template'][idx] not in result_tem_dict:
result_tem_dict[result['template'][idx]] = [result['log'][idx]]
else: result_tem_dict[result['template'][idx]].append(result['log'][idx])

correct_num = 0
for key in result_tem_dict.keys():
if key not in oracle_tem_dict: continue
else:
if Counter(oracle_tem_dict[key]) == Counter(result_tem_dict[key]): correct_num += 1

return correct_num/len(result_tem_dict)

# correctly identified templates over total num of oracle template
def evaluateRTA(groundtruth, result):
# generate a "template: log indexes list" mapping for groundtruth
oracle_tem_dict = {}
for idx in range(len(result['template'])):
if groundtruth['EventTemplate'][idx] not in oracle_tem_dict:
oracle_tem_dict[groundtruth['EventTemplate'][idx]] = [groundtruth['Content'][idx]]
else: oracle_tem_dict[groundtruth['EventTemplate'][idx]].append(groundtruth['Content'][idx])

# generate mapping for identified template
result_tem_dict = {}
for idx in range(len(result['template'])):
if result['template'][idx] not in result_tem_dict:
result_tem_dict[result['template'][idx]] = [result['log'][idx]]
else: result_tem_dict[result['template'][idx]].append(result['log'][idx])

correct_num = 0
for key in oracle_tem_dict.keys():
if key not in result_tem_dict: continue
else:
if Counter(oracle_tem_dict[key]) == Counter(result_tem_dict[key]): correct_num += 1

return correct_num/len(oracle_tem_dict)

# calculate grouping accuracy
def evaluateGA(groundtruth, result):
# load logs and templates
df_groundtruth = pd.read_csv(groundtruth)
df_parsedlog = pd.read_csv(result)
compared_list = df_parsedlog['log'].tolist()
compared_list = result['log'].tolist()

# select groundtruth logs that have been parsed
parsed_idx = []
for idx, row in df_groundtruth.iterrows():
for idx, row in groundtruth.iterrows():
if row['Content'] in compared_list:
parsed_idx.append(idx)
compared_list.remove(row['Content'])
Expand All @@ -81,11 +140,11 @@ def evaluateGA(dataset, groundtruth, result):
print("Wrong number of groundtruth logs!")
return 0

df_groundtruth = df_groundtruth.loc[parsed_idx]
groundtruth = groundtruth.loc[parsed_idx]

# grouping
groundtruth_dict = {}
for idx, row in df_groundtruth.iterrows():
for idx, row in groundtruth.iterrows():
if row['EventTemplate'] not in groundtruth_dict:
# create a new key
groundtruth_dict[row['EventTemplate']] = [row['Content']]
Expand All @@ -94,7 +153,7 @@ def evaluateGA(dataset, groundtruth, result):
groundtruth_dict[row['EventTemplate']].append(row['Content'])

result_dict = {}
for idx, row in df_parsedlog.iterrows():
for idx, row in result.iterrows():
if row['template'] not in result_dict:
# create a new key
result_dict[row['template']] = [row['log']]
Expand Down Expand Up @@ -153,7 +212,7 @@ def __init__(self,
self.log_test, self.log_cand, self.gt_test, self.gt_cand = self.splitCandidates(self.log_path, self.cand_ratio, self.split_method)

# build lookup map
# self.lookUpMap = self.buildLookupMap(self.map_path)
self.lookUpMap = self.buildLookupMap(self.map_path)

# generate lookup map
def buildLookupMap(self, map_path):
Expand Down Expand Up @@ -267,73 +326,6 @@ def generatePrompt(self, log, nearest_num=5):
similarist_gt = self.gt_cand[idxes[0]]
return prompt, similarist_gt

# compare if template is correctly extracted: if yes, return 1; else return 0
def compareTemplate(self, tpl_1, tpl_2):
token_list_1 = tpl_1.split()
token_list_2 = tpl_2.split()
if (len(token_list_1) != len(token_list_2)): return 0
length = len(token_list_1)
for i in range(length):
if (token_list_1[i] != token_list_2[i]): return 0
return 1;

# calculate parsing accuracy
def evaluatePA(self, result):
# len(result) may smaller than len(groundtruth)
length = len(result)
if length == 0: return 0
correct = 0
for i in range(length):
correct += self.compareTemplate(result[i], self.gt_test[i])
return correct/length

# correctly identified templates over total num of identified template
def evaluatePTA(self, result):
# generate a "template: log indexes list" mapping for groundtruth
oracle_tem_dict = {}
for idx in range(len(result)):
if self.gt_test[idx] not in oracle_tem_dict:
oracle_tem_dict[self.gt_test[idx]] = [idx]
else: oracle_tem_dict[self.gt_test[idx]].append(idx)

# generate mapping for identified template
result_tem_dict = {}
for idx in range(len(result)):
if result[idx] not in result_tem_dict:
result_tem_dict[result[idx]] = [idx]
else: result_tem_dict[result[idx]].append(idx)

correct_num = 0
for key in result_tem_dict.keys():
if key not in oracle_tem_dict: continue
else:
if oracle_tem_dict[key] == result_tem_dict[key]: correct_num += 1

return correct_num/len(result_tem_dict)

# correctly identified templates over total num of oracle template
def evaluateRTA(self, result):
oracle_tem_dict = {}
for idx in range(len(result)):
if self.gt_test[idx] not in oracle_tem_dict:
oracle_tem_dict[self.gt_test[idx]] = [idx]
else: oracle_tem_dict[self.gt_test[idx]].append(idx)

# generate mapping for identified template
result_tem_dict = {}
for idx in range(len(result)):
if result[idx] not in result_tem_dict:
result_tem_dict[result[idx]] = [idx]
else: result_tem_dict[result[idx]].append(idx)

correct_num = 0
for key in oracle_tem_dict.keys():
if key not in result_tem_dict: continue
else:
if oracle_tem_dict[key] == result_tem_dict[key]: correct_num += 1

return correct_num/len(oracle_tem_dict)

def writeResult(self, result, path, limit):
output = pd.DataFrame(data={"log": self.log_test[:limit], "template": result})
output.to_csv(path, index=False)
Expand Down Expand Up @@ -380,8 +372,8 @@ def BatchParse(self, model, model_name, limit, N=5):
prompt=instruction + "\n\n\n" + prompt + "<prompt>:" + line.strip() + "\n<extraction>: ",
temperature=temperature,
max_tokens=token_len)
except: # if interrupt by request busy
print("Request busy, log {} is now waiting ...".format(line_idx))
except Exception as e: # if exception occurs
print(e)
re_id += 1
if re_id < 5:
time.sleep(0.1)
Expand Down Expand Up @@ -417,10 +409,12 @@ def BatchParse(self, model, model_name, limit, N=5):
df = pd.DataFrame(columns=['Dataset', 'Parsing Accuracy', 'Precision Template Accuracy', 'Recall Template Accuracy', 'Grouping Accuracy'])
else:
df = pd.read_csv("DivLog_bechmark_result.csv")
PA = self.evaluatePA(answer_list)
PTA = self.evaluatePTA(answer_list)
RTA = self.evaluateRTA(answer_list)
GA = evaluateGA(self.dataset, self.log_path, self.result_path)
df_groundtruth = pd.read_csv(self.log_path)
df_parsedlog = pd.read_csv(self.result_path)
PA = evaluatePA(df_groundtruth, df_parsedlog)
PTA = evaluatePTA(df_groundtruth, df_parsedlog)
RTA = evaluateRTA(df_groundtruth, df_parsedlog)
GA = evaluateGA(df_groundtruth, df_parsedlog)
print("{}:\t PA:\t{:.6f}\tPTA:\t{:.6f}\tRTA:\t{:.6f}\tGA:\t{:.6f}".format(self.dataset, PA, PTA, RTA, GA))
if self.dataset not in df['Dataset'].values:
df.loc[len(df)] = [self.dataset, PA, PTA, RTA, GA]
Expand Down
16 changes: 16 additions & 0 deletions logparser/DivLog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ If you wish to re-run all the results (which may cost much time and api budget),
rm -r results
```

#### Attention:

OpenAI has [shut down](https://platform.openai.com/docs/deprecations/2023-07-06-gpt-and-embeddings) the *Text Completion API* for the GPT-3 model series (`ada`,`babbage`,`curie`,`davinci`) as of January 4th, 2024. If you wish to apply the DivLog framework on other OpenAI *Chat Completion APIs* and re-run all the results, you may need to modify the API request in `BatchParse` of `DivLog.py`. Specifically, you need to replace the original API request design for GPT-3 models with the latest Chat Completion API:

```python
### Replace it
response = openai.Completion.create(
model=model,
prompt=instruction + "\n\n\n" + prompt + "<prompt>:" + line.strip() + "\n<extraction>: ",
temperature=temperature,
max_tokens=token_len)
```

More details about APIs can be found [here](https://platform.openai.com/docs/api-reference/chat).


### Benchmark

Running the benchmark script on Loghub_2k datasets, you could obtain the following results.
Expand Down