Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

添加读取评论模块。 并更新test case #60

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ def answer_test(answer_url):
answer.to_txt()
# 把答案输出为markdown文件
answer.to_md()
#该回答下的所有评论
all_comments = answer.get_comments()


print question
# <zhihu.Question instance at 0x7f0b25d13f80>
Expand All @@ -101,6 +104,11 @@ def answer_test(answer_url):
print visit_times # 输出: 改答案所属问题被浏览次数


# 输出: 所有答主在该问题下的评论
for c in all_comments :
if c.get_answer_author_flag():
print c.get_content()

def user_test(user_url):
user = User(user_url)
# 获取用户ID
Expand Down Expand Up @@ -282,6 +290,7 @@ def main():
question_test(url)
answer_url = "http://www.zhihu.com/question/24269892/answer/29960616"
answer_test(answer_url)

user_url = "http://www.zhihu.com/people/jixin"
user_test(user_url)
collection_url = "http://www.zhihu.com/collection/36750683"
Expand All @@ -293,6 +302,7 @@ def main():
test()



if __name__ == '__main__':
main()

73 changes: 73 additions & 0 deletions zhihu.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ def get_visit_times(self):
return int(soup.find("meta", itemprop="visitsCount")["content"])



class User:
user_url = None
# session = None
Expand Down Expand Up @@ -1173,6 +1174,27 @@ def get_voters(self):
voter_id = voter_info.a["title"].encode("utf-8")
yield User(voter_url, voter_id)

def get_comments(self):
if self.soup == None:
self.parser()
soup = self.soup

try:
data_aid = soup.find("div", {"class":lambda x : x and "zm-item-answer" in x.split()})["data-aid"]
request_url = 'http://www.zhihu.com/node/AnswerCommentListV2'

r = requests.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)})
soup = BeautifulSoup(r.content, "lxml")
comments = soup.findAll("div",{"class":"zm-item-comment"})

if len(comments) == 0:
return
yield
else:
for comment in comments:
yield Comment(comment["data-id"],comment)
except TypeError as err:
print 'type error in get comments'

class Collection:
url = None
Expand Down Expand Up @@ -1293,3 +1315,54 @@ def get_top_i_answers(self, n):
if j > n:
break
yield answer


class Comment:
comment_id = None
soup = None

def setFlag(self, input):
if (u"提问者" in input):
self.question_author_flag = True
if (u"作者" in input):
self.answer_author_flag = True

def parser(self):
soup = self.soup
commenthddiv = soup.find("div",{"class":"zm-comment-hd"})

if (commenthddiv.contents[0].strip() == u"匿名用户"):
self.author = User(None, u"匿名用户")
self.setFlag(commenthddiv.contents[1].string)
else:
apart = commenthddiv.find("a", {"class":"zg-link"})
if (apart is not None):
self.author = User(apart['href'], apart.string)
self.setFlag(apart.next_sibling.string)

self.content = (" ".join(soup.find("div",{"class":"zm-comment-content"}).stripped_strings))

def __init__(self, comment_id, soup):
self.comment_id = comment_id
self.soup = soup
self.question_author_flag = False
self.answer_author_flag = False
self.parser()

def get_author(self):
return self.author

def get_content(self):
content = self.content
if platform.system() == 'Windows':
content = content.decode('utf-8').encode('gbk')
return content
else:
return content
#是否提问者
def get_question_author_flag(self):
return self.question_author_flag

#是否答案作者
def get_answer_author_flag(self):
return self.answer_author_flag