-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfetch.py
78 lines (69 loc) · 2.51 KB
/
fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from urllib.parse import unquote
from setup_selenium import driver, goto, xpath, css_selector
try:
from secret import *
except:
import os
# from time import sleep
from hash_string import hash
from time import sleep
#import tele_bot
def getSender():
return driver.find_element("id", "m_story_permalink_view").find_element("tag name", "strong").get_attribute("innerText")
def getPost():
try:
s = xpath("//*[@id='m_story_permalink_view']/div[1]/div/div").get_attribute("innerText").replace('See translation', '')
s = s[max(0,s.find('\n\n')):]
return str(s)
except:
return ''
def getExtras():
images = list()
try:
extra = css_selector("div[data-ft='{\"tn\":\"H\"}']")
extra_text = extra.get_attribute("innerText")
except:
extra_text = ''
extra_links = driver.find_elements("xpath", "//div[@id='m_story_permalink_view']/div[1]//a[@href]")
extra_link = ''
for link in extra_links:
link = link.get_attribute('href')
if 'php?u=' in link:
link = unquote(link[link.find('=')+1:])
if 'fbclid' in link:
link = link[:link.find('fbclid')-1]
extra_link = '\n'+link
elif link.startswith('https://mbasic.facebook.com/photo.php?'):
images.append(link)
final_images = list()
for image in images:
driver.get(image.replace('mbasic','web'))
# image = xpath("//img[contains(@class, 'img') and starts-with(@src, 'https://scontent')]").get_attribute('src')
counter = 10
while counter:
try:
image = xpath("//img[@data-visualcompletion='media-vc-image']")
image = image.get_attribute('src')
counter = 0
final_images.append(image)
except:
sleep(1)
counter -= 1
return [final_images, extra_text+extra_link]
def fetch(permalink, saved_posts):
post = dict()
goto('mbasic.facebook.com/groups/'+os.environ['group_link']+'/permalink/'+str(permalink))
post['link'] = permalink
post['text'] = getPost()
post['hash'] = str(hash(post['text']))
# print(post)
if post['link'] in saved_posts and saved_posts[post['link']]==post['hash']:
return None
post['sender'] = getSender() + ' [Jump To Post ↗]'
extra = getExtras()
post['extra'] = extra[1]
post['image'] = extra[0]
if post['link'] in saved_posts:
post['extra'] += '\n#updated_post'
# tele_bot.sendPost(post)
return post