-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparse.py
129 lines (118 loc) · 4.42 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import re
from bs4 import BeautifulSoup
from utils import SourceUrlType, StickerType
# begin deprecated
def parse_page(content: bytes, source: SourceUrlType):
parser = {
SourceUrlType.YABE: parse_page_yabe,
SourceUrlType.YABE_EMOJI: parse_page_yabe,
SourceUrlType.LINE: parse_page_line,
SourceUrlType.LINE_EMOJI: parse_page_line,
}
if source not in parser.keys():
raise ValueError
emoji = False
if source in [SourceUrlType.LINE_EMOJI, SourceUrlType.YABE_EMOJI]:
emoji = True
return parser[source](content, emoji)
def parse_page_yabe(content: bytes, emoji: bool):
real_pack_id = 0
if emoji:
sticker_type = StickerType.EMOJI
else:
sticker_type = StickerType.STATIC_STICKER
soup = BeautifulSoup(content, "html5lib")
talk_icon = soup.select_one("div.stickerData div.talkIcon")
move_icon = soup.select_one("div.stickerData div.moveIcon")
popup_icon = soup.select_one("div.stickerData div.PopUpIcon")
if talk_icon and popup_icon:
sticker_type = StickerType.POPUP_AND_SOUND_STICKER
elif popup_icon:
sticker_type = StickerType.POPUP_STICKER
elif talk_icon and move_icon:
sticker_type = StickerType.ANIMATED_AND_SOUND_STICKER
elif talk_icon:
sticker_type = StickerType.STATIC_WITH_SOUND_STICKER
elif move_icon:
if emoji:
sticker_type = StickerType.ANIMATED_EMOJI
else:
sticker_type = StickerType.ANIMATED_STICKER
title = soup.select_one("div.stickerData div.title")
if title:
title = title.text
else:
raise Exception("Error: Title not found")
id_list = list()
if emoji:
sticker_id_pattern = re.compile(
r"sticonshop/v1/sticon/[a-f0-9]+/iPhone/(\d+)(?:_animation)?.png",
flags=re.IGNORECASE,
)
else:
sticker_id_pattern = re.compile(
r"stickershop/v1/sticker/(\d+)/\w+/sticker.png", flags=re.IGNORECASE
)
real_pack_id_pattern = re.compile(
r"sticonshop/v1/sticon/([a-f0-9]+)/iPhone/\d+(?:_animation)?.png",
flags=re.IGNORECASE,
)
sticker_list = soup.find_all("li", {"class": "stickerSub"})
for sticker in sticker_list:
match = sticker_id_pattern.search(str(sticker))
if match:
id_list.append(match.group(1))
if emoji:
if not real_pack_id:
_match = real_pack_id_pattern.search(str(sticker))
if _match:
real_pack_id = _match.group(1)
return real_pack_id, title, id_list, sticker_type
def parse_page_line(content: bytes, emoji: bool):
if emoji:
sticker_type = StickerType.EMOJI
else:
sticker_type = StickerType.STATIC_STICKER
soup = BeautifulSoup(content, "html5lib")
if soup.find("span", {"class": "MdIcoFlash_b"}):
sticker_type = StickerType.POPUP_STICKER
elif soup.find("span", {"class": "MdIcoFlashAni_b"}):
sticker_type = StickerType.POPUP_AND_SOUND_STICKER
elif soup.find("span", {"class": "MdIcoPlay_b"}):
if emoji:
sticker_type = StickerType.ANIMATED_EMOJI
else:
sticker_type = StickerType.ANIMATED_STICKER
elif soup.find("span", {"class": "MdIcoFlash_b"}):
sticker_type = StickerType.ANIMATED_AND_SOUND_STICKER
elif soup.find("span", {"class": "MdIcoSound_b"}):
sticker_type = StickerType.STATIC_WITH_SOUND_STICKER
title = soup.find("p", {"class": "mdCMN38Item01Ttl"})
title2 = soup.find("h3")
if title:
title = title.text
elif title2:
print("Title not found, using the first h3 instead")
title = title2.text
else:
print(soup)
raise Exception("Error: Title not found")
id_list = list()
if emoji:
sticker_id_pattern = re.compile(
r"sticonshop/v1/sticon/[a-f0-9]+/iPhone/(\d+)(?:_animation)?.png",
flags=re.IGNORECASE,
)
else:
sticker_id_pattern = re.compile(
r"stickershop/v1/sticker/(\d+)/\w+/sticker.png", flags=re.IGNORECASE
)
sticker_list = soup.find_all("span", {"class": "mdCMN09Image"})
for sticker in sticker_list:
match = sticker_id_pattern.search(sticker["style"])
if match:
id_list.append(match.group(1))
id_list = list(set(id_list))
# for line id, conversion is not needed, real_id returns 0
return 0, title, id_list, sticker_type
# end deprecated