-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworker1.py
134 lines (119 loc) · 4.09 KB
/
worker1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import json
import time
from bs4 import BeautifulSoup
import csv
import glob
import datetime
import sys
try:
token = sys.argv[1]
except IndexError:
print("not all parameters")
os._exit(0)
os.system('echo '+token+'| gh auth login -h github.com --with-token')
t = datetime.datetime.today()
repoCSV = './results/repo_'+t.strftime('%Y%m%d')+'_h.csv'
depCSV = './results/dep_'+t.strftime('%Y%m%d')+'_h.csv'
repoCSVup = './results/repo_now_h.csv'
depCSVup = './results/dep_now_h.csv'
if os.path.exists(repoCSVup) and os.path.exists(depCSVup):
os.system('cp '+repoCSVup+' '+repoCSV)
os.system('cp '+depCSVup+' '+depCSV)
listURL = list()
for i in range(20):
time.sleep(1)
os.system('rm -rf /tmp/ghTMP')
os.system('gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /search/repositories?q=language:java\&per_page=100\&page='+str(i+1)+'> /tmp/ghTMP')
f = open('/tmp/ghTMP')
data = json.load(f)
if not 'items' in data:
break
if len(data['items']) ==0:
break
for j in data['items']:
if not j['clone_url'] in listURL:
listURL.append(j['clone_url'])
f.close()
def depTocsv(fileNamePOM,filenameCSV):
orCountTMP = 0
if os.path.exists(filenameCSV):
with open(filenameCSV, newline='') as f:
reader = csv.reader(f)
listCSV = list(reader)
else:
listCSV = [['id', 'group', 'aritfact']]
try:
with open(fileNamePOM, 'r') as f:
dxml = f.read()
Bs_data = BeautifulSoup(dxml, "xml")
b_unique = Bs_data.find_all('dependency')
except:
return 0
for di in b_unique:
grTMP = ''
arTMP = ''
if '<groupId>' in str(di) and '<artifactId>' in str(di):
grTMP = str(di).split('<groupId>')[1].split('</groupId>')[0]
arTMP = str(di).split('<artifactId>')[1].split('</artifactId>')[0]
else:
continue
flag = 0
for i in listCSV:
if grTMP in i and arTMP in i:
flag = 1
orCountTMP |= int(i[0],16)
break
if flag==0:
orCountTMP |= 2**(len(listCSV)-1)
listCSV.append([hex(2**(len(listCSV)-1)),grTMP,arTMP])
with open(filenameCSV, 'w') as outcsv:
writer = csv.writer(outcsv, lineterminator='\n')
for item in listCSV:
if len(item)!=3:
print(item)
writer.writerow([item[0], item[1], item[2]])
return orCountTMP
for i in listURL:
usTMP = i.split('/')[3]
rpTMP = i.split('/')[4]
if os.path.exists(repoCSV):
with open(repoCSV, newline='') as f:
reader = csv.reader(f)
listCSV1 = list(reader)
else:
listCSV1 = [['user', 'repo', 'type', 'orcount']]
flag = 0
for i1 in listCSV1:
if usTMP in i1 and rpTMP in i1:
flag = 1
break
if flag == 1:
continue
os.system('rm -rf /tmp/works')
os.system('git clone '+i+' /tmp/works')
orCountRepo=0
tpTMP = 'unknown'
flag = 0
for file in glob.glob("/tmp/works/**/pom.xml",recursive=True):
tpTMP ='pom.xml'
atmp = depTocsv(file,depCSV)
orCountRepo |= int(atmp)
flag = 1
if flag == 0:
for file in glob.glob("/tmp/works/**/build.gradle", recursive=True):
tpTMP = 'build.gradle'
listCSV1.append([usTMP,rpTMP,tpTMP,hex(orCountRepo)])
with open(repoCSV, 'w') as outcsv:
writer = csv.writer(outcsv, lineterminator='\n')
for item in listCSV1:
writer.writerow([item[0], item[1], item[2], item[3]])
# os.system('git config --global http.postBuffer 524288000')
# os.system('git config --local user.email \"[email protected]\"')
# os.system('git config --local user.name \"klssstis\"')
# os.system('git remote -v')
# os.system('git add --all')
# os.system('git commit -m \"local result\"')
# os.system('git push "https://klssstis:'+token+'@github.com/klssstis/dependsDB.git" HEAD:main --force')
os.system('cp '+repoCSV+' '+repoCSVup)
os.system('cp '+depCSV+' '+depCSVup)