-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathexample.py
63 lines (55 loc) · 1.96 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# last edit date: 2016/10/19
# author: Forec
# LICENSE
# Copyright (c) 2015-2017, Forec <[email protected]>
# Permission to use, copy, modify, and/or distribute this code for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import os, sys
import jieba, codecs, math
import jieba.posseg as pseg
names = {}
relationships = {}
lineNames = []
# count names
jieba.load_userdict("dict.txt")
with codecs.open("to_train.txt", "r", "utf8") as f:
for line in f.readlines():
poss = pseg.cut(line)
lineNames.append([])
for w in poss:
if w.flag != "nr" or len(w.word) < 2:
continue
lineNames[-1].append(w.word)
if names.get(w.word) is None:
names[w.word] = 0
relationships[w.word] = {}
names[w.word] += 1
# explore relationships
for line in lineNames:
for name1 in line:
for name2 in line:
if name1 == name2:
continue
if relationships[name1].get(name2) is None:
relationships[name1][name2]= 1
else:
relationships[name1][name2] = relationships[name1][name2]+ 1
# output
with codecs.open("node.txt", "w", "gbk") as f:
f.write("Id Label Weight\r\n")
for name, times in names.items():
f.write(name + " " + name + " " + str(times) + "\r\n")
with codecs.open("edge.txt", "w", "gbk") as f:
f.write("Source Target Weight\r\n")
for name, edges in relationships.items():
for v, w in edges.items():
if w > 10:
f.write(name + " " + v + " " + str(w) + "\r\n")