-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathacademic_network.py
73 lines (60 loc) · 2.51 KB
/
academic_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import json
import requests
from bs4 import BeautifulSoup
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict
cache = {}
def get_dblp_coauthors(url):
if url in cache:
return cache[url]
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to fetch data from {url}, status code: {response.status_code}")
return []
soup = BeautifulSoup(response.content, 'html.parser')
coauthors = defaultdict(int)
section = soup.find('header', id='the2020s')
if section:
publications = section.find_next('ul', class_='publ-list')
if publications:
items = publications.find_all('li', class_='entry')
for pub in items:
authors = pub.select('span[itemprop="author"] span[itemprop="name"]')
author_names = [author.text for author in authors]
if len(author_names) > 1:
for author in author_names:
if author != url:
coauthors[author] += 1
cache[url] = coauthors
return coauthors
def get_all_coauthors(professor_urls):
collaboration_count = defaultdict(int)
for professor_name, url in professor_urls.items():
coauthors = get_dblp_coauthors(url)
print(f"Coauthors for {professor_name}: {coauthors}")
for coauthor_name, count in coauthors.items():
collaboration_count[(professor_name, coauthor_name)] += count
return collaboration_count
def build_network_graph(collaboration_count, min_weight=1):
G = nx.Graph()
for (professor, coauthor), count in collaboration_count.items():
if count >= min_weight:
G.add_edge(professor, coauthor, weight=count)
print(f"Nodes: {G.nodes()}")
print(f"Edges: {G.edges(data=True)}")
return G
def export_network_data(G, filename):
data = {
'nodes': [{'data': {'id': node, 'label': node}} for node in G.nodes()],
'edges': [{'data': {'source': u, 'target': v, 'weight': d['weight']}} for u, v, d in G.edges(data=True)]
}
with open(filename, 'w') as f:
json.dump(data, f, indent=2)
if __name__ == "__main__":
professor_urls = {
}
major_professors = list(professor_urls.keys())
collaboration_count = get_all_coauthors(professor_urls)
G = build_network_graph(collaboration_count, min_weight=1)
export_network_data(G, 'network_data.json')