-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrRow.py
38 lines (30 loc) · 1.11 KB
/
trRow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from bs4 import BeautifulSoup
class trRow:
"""
Represents a row in the transcript table, each row contains:
- Time information
- Speaker information
- Transcript of speaker dialogue
"""
def __init__(self,tr_in):
self.tr = tr_in
self.text = str(self.tr.find("p"))
self.speaker = str(self.tr.find("strong"))
self.time = str(self.tr.find("th"))
#Remove tags from speaker text
self.speaker = self.speaker.replace("<strong>","")
self.speaker = self.speaker.replace("</strong>","")
#Remove tags text
self.text = self.text.replace("<br/>","")
self.text = self.text.replace('<p class="short_transcript">','')
#Remove tags from time text
self.time = self.time[:self.time.find("<br")]
self.time = self.time.replace('<p class="short_transcript">','')
self.time = self.time.replace("<th>",'')
self.time = self.time.replace("\n",'')
def getText(self):
return self.text
def getSpeaker(self):
return self.speaker
def getTime(self):
return self.time