forked from wewanna/appropriate-filetering
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathcrawler.py
41 lines (33 loc) · 965 Bytes
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from selenium import webdriver
import argparse
import csv
from time import sleep
parser = argparse.ArgumentParser()
parser.add_argument('N')
parser.add_argument('startpage')
parser.add_argument('output')
args = parser.parse_args()
N = int(args.N)
start = int(args.startpage)
output_filename = args.output
print('N = %d, start=%d, output=%s'%(N, start, output_filename))
driver = webdriver.Chrome('./chromedriver')
base_url = 'https://gall.dcinside.com/board/view/?id=game1_new&no='
output_file = open(output_filename, 'w')
wr = csv.writer(output_file)
def read(url):
driver.get(url)
print('get %s' % url)
comments = driver.find_elements_by_css_selector('p.usertxt.ub-word')
count = 0
for comment in comments:
value = comment.text
print(value)
wr.writerow([0, value])
count += 1
if count > 7000:
break
for i in range(N):
read(base_url+str(start-i))
sleep(2)
output_file.close()