-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcalculate_baseline.py
129 lines (112 loc) · 4.37 KB
/
calculate_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
import csv, globalparameter
from numpy import random
from sklearn.metrics import precision_score
def baseline_full_text(name,folderpath_pos,folderpath_neg):
pos_list = []
neg_list = []
pos_list1 = []
neg_list1 = []
pos_length = 0
with open(folderpath_pos) as f:
reader = csv.reader(f)
for row in reader:
pos_length = pos_length+1
new_row = row[0]
new_row_work_exp = ' '.join(row[9:44])
new_row_education = ' '.join(row[45:64])
new_row_skills = row[65]
# judge = 0
# if name in new_row_work_exp:
# judge = judge+1
# if name in new_row_education:
# judge = judge+1
# if name in new_row_skills:
# judge = judge+1
if name in new_row_work_exp and name in new_row_education and name in new_row_skills:
pos_list.append(1)
else:
pos_list.append(0)
# list_non_rev_column.append(list(row[i
with open(folderpath_neg) as f:
reader = csv.reader(f)
for row in reader:
new_row = ' '.join(row)
new_row_work_exp = ' '.join(row[9:44])
new_row_education = ' '.join(row[45:64])
new_row_skills = row[65]
judge = 0
if name in new_row_work_exp:
judge = judge+1
if name in new_row_education:
judge = judge+1
if name in new_row_skills:
judge = judge+1
if name in new_row_work_exp and name in new_row_education and name in new_row_skills:
neg_list.append(1)
else:
neg_list.append(0)
# list_non_rev_column.append(list(row[i] for i in row))
bool_list = [1]*500
zero_list = [0]*500
# user_data = pd.read_csv(
# '/Users/pengyuzhou/Google Drive/Linkedin_datafile/LinkedIn_data_lowercase_no_punctuation.csv')
#
# with open('/Users/pengyuzhou/Google Drive/Linkedin_datafile/LinkedIn_data_lowercase_no_punctuation.csv') as f:
# reader = csv.reader(f)
# for row in reader:
# if name in row[4]:
# bool_list.append(1)
# else:
# bool_list.append(0)
score = precision_score(bool_list+zero_list, pos_list+neg_list)
print('baseline_precision_full_text_of '+name+' : {}'.format(score))
def baseline_work_exp(name,folderpath_pos,folderpath_neg):
pos_list = []
neg_list = []
with open(folderpath_pos) as f:
reader = csv.reader(f)
for row in reader:
new_row_work_exp = ' '.join(row[9:44])
new_row_education = ' '.join(row[45:64])
new_row_skills = row[65]
judge = 0
if name in new_row_work_exp:
judge = judge+1
if name in new_row_skills:
judge = judge+1
if name in new_row_work_exp and name in new_row_skills:
pos_list.append(1)
else:
pos_list.append(0)
# list_non_rev_column.append(list(row[i
with open(folderpath_neg) as f:
reader = csv.reader(f)
for row in reader:
new_row_work_exp = ' '.join(row[9:44])
new_row_education = ' '.join(row[45:64])
new_row_skills = row[65]
judge = 0
if name in new_row_work_exp:
judge = judge+1
if name in new_row_skills:
judge = judge+1
if name in new_row_work_exp and name in new_row_skills:
neg_list.append(1)
else:
neg_list.append(0)
# list_non_rev_column.append(list(row[i] for i in row))
bool_list = [1]*500
zero_list = [0]*500
# user_data = pd.read_csv(
# '/Users/pengyuzhou/Google Drive/Linkedin_datafile/LinkedIn_data_lowercase_no_punctuation.csv')
#
# with open('/Users/pengyuzhou/Google Drive/Linkedin_datafile/LinkedIn_data_lowercase_no_punctuation.csv') as f:
# reader = csv.reader(f)
# for row in reader:
# if name in row[4]:
# bool_list.append(1)
# else:
# bool_list.append(0)
score = precision_score(bool_list+zero_list, pos_list+neg_list)
print('baseline_precision_work_exp_skills_of '+name+' : {}'.format(score))