-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpunctuation_location.py
executable file
·39 lines (33 loc) · 1.58 KB
/
punctuation_location.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
import nltk
import init
import chunker
#returns 1 if the character following the answer in the document is one of <, . " : !>
#and returns 0 otherwise
def punc_loc(question, (answer, doc_num, index, features,q_id)):
doc = chunker.clean_punctuation(init.get_doc(doc_num))
answer = chunker.clean_punctuation(answer)
#go to index location of candidate within the document
alist = answer.split() #split candidate answer into words (space delimiter)
answer_len = len(alist) #word length of the candidate answer
#print answer_len
punc_word_index = index+answer_len #index of the word that may contain the punctuation
dlist = doc.split() #split the document by words
punc_word = dlist[punc_word_index] #getting the actual word from the doc
#check if that lastcharacter is a punctuation
if punc_word == ',' or punc_word == '.' or punc_word == '"' or punc_word == ':' or punc_word == '!':
return [1]
else:
return [0]
#test case below was modified to work with specified doc instead of the actual doc to work with a known index
def test():
question = 'What was the name, of the first Russian astronaut to do a spacewalk?'
doc = 'The name of the first Russian astronaut to do a spacewalk is Aleksei A. Leonov!'
doc_num = "LA072490-0034"
init.global_doc_dict[doc_num] = doc
print punc_loc(question,('Aleksei A. Leonov',doc_num,12,{}))
return (question,('Aleksei A. Leonov',doc_num,12,{}))
if __name__ == "__main__":
a,b= test()
print ''
print(punc_loc(a,b))