-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
80 lines (66 loc) · 2.7 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
from __future__ import print_function
import requests
from bs4 import BeautifulSoup
from random import randint
from time import sleep
def printCollectionContent(openings, level, levelIdentation, identation, key, value):
print(u"{}{}[{}] =>".format(levelIdentation, identation, key), end=" ")
if type(value).__name__ not in openings:
print(u"{}".format(value).encode('utf-8'))
else:
printCollection(value, level + 1)
def printCollection(collection, level = 0):
identation = " "
levelIdentation = level > 0 and identation * (level + 1) or ""
openings = {'list': "[", 'tuple': "(", 'dict': "{"}
endings = {'list': "]", 'tuple': ")", 'dict': "}"}
collectionType = type(collection).__name__
print(collectionType.capitalize())
print("{}{}".format(levelIdentation, openings[collectionType]))
if collectionType == "list" or collectionType == "tuple":
for index, value in enumerate(collection):
printCollectionContent(openings, level, levelIdentation, identation, index, value)
elif collectionType == "dict":
for key, value in collection.items():
printCollectionContent(openings, level, levelIdentation, identation, key, value)
print("{}{}".format(levelIdentation, endings[collectionType]))
def lreplace(oldText, newText, subject):
lastSubstringIndex = subject.rfind(oldText)
newString = subject[:lastSubstringIndex] + subject[lastSubstringIndex+len(oldText):]
return newString
def getKeywordFromString(text):
otherKeywords = {
'alcaraz-carlos': 'alcaraz-garfia-carlos',
'kwon-soonwoo': 'kwon-soon-woo',
'svitolina-elina': 'monfils-elina',
'mayar-sherif-ahmed-abdul-aziz': 'sherif-mayar',
'meligeni-alves-felipe': 'meligeni-rodrigues-alves-felipe'
}
keyword = text.replace(" ", "-").lower()
keyword = keyword.replace("'", "-")
keyword = keyword.replace("(", "")
keyword = keyword.replace(")", "")
if keyword in otherKeywords:
return otherKeywords[keyword]
else:
return keyword
def getStringFromKeyword(text):
string = text.replace("-", " ").title()
return string
def getSoup(url, headersSoup=None):
if headersSoup is None:
headersSoup = {"User-Agent" : "BreakSystem Scraper/1.0"}
tried = 0
while tried < 3:
try:
response = requests.get(url, headers=headersSoup)
tried = 3
except Exception as e:
tried += 1
sleep(randint(3, 5))
if tried == 3:
print("[ERROR] Connection error for the website {}: {}".format(url, e))
exit()
soup = BeautifulSoup(response.content, "lxml")
return soup