-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathtweet samp.py
67 lines (55 loc) · 3.16 KB
/
tweet samp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 13 12:02:21 2019
@author: Gopi
"""
import pandas as pd
import tweepy as twt
#Twitter API credentials
consumer_key = "Kq4mCtnOSPiNwA9ArvYq03DE7"
consumer_secret = "aWBfVbrJWppmEy3mAbrjUHa6Y8AKU6qkCBZwA6ZpAO8BEFaoC2"
access_key = "529590041-eZXHHkluorWkdRZRWiVYW3GVBuvr3VXt84cZcDYA"
access_secret = "rqlG8jzmKTPU3bZoCwgRnOUoD5UYOx8KDjhoXySPrR3mI"
alltweets = []
def get_all_tweets(screen_name):
auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
new_tweets = api.user_timeline(screen_name = screen_name,count=200)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
while len(new_tweets)>0:
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print ("...%s tweets downloaded so far" % (len(alltweets))) # tweet.get('user', {}).get('location', {})
outtweets = [[tweet.created_at,tweet.entities["hashtags"],tweet.entities["user_mentions"],tweet.favorite_count,
tweet.geo,tweet.id_str,tweet.lang,tweet.place,tweet.retweet_count,tweet.retweeted,tweet.source,tweet.text,
tweet._json["user"]["location"],tweet._json["user"]["name"],tweet._json["user"]["time_zone"],
tweet._json["user"]["utc_offset"]] for tweet in alltweets]
import pandas as pd
tweets_df = pd.DataFrame(columns = ["time","hashtags","user_mentions","favorite_count",
"geo","id_str","lang","place","retweet_count","retweeted","source",
"text","location","name","time_zone","utc_offset"])
tweets_df["time"] = pd.Series([str(i[0]) for i in outtweets])
tweets_df["hashtags"] = pd.Series([str(i[1]) for i in outtweets])
tweets_df["user_mentions"] = pd.Series([str(i[2]) for i in outtweets])
tweets_df["favorite_count"] = pd.Series([str(i[3]) for i in outtweets])
tweets_df["geo"] = pd.Series([str(i[4]) for i in outtweets])
tweets_df["id_str"] = pd.Series([str(i[5]) for i in outtweets])
tweets_df["lang"] = pd.Series([str(i[6]) for i in outtweets])
tweets_df["place"] = pd.Series([str(i[7]) for i in outtweets])
tweets_df["retweet_count"] = pd.Series([str(i[8]) for i in outtweets])
tweets_df["retweeted"] = pd.Series([str(i[9]) for i in outtweets])
tweets_df["source"] = pd.Series([str(i[10]) for i in outtweets])
tweets_df["text"] = pd.Series([str(i[11]) for i in outtweets])
tweets_df["location"] = pd.Series([str(i[12]) for i in outtweets])
tweets_df["name"] = pd.Series([str(i[13]) for i in outtweets])
tweets_df["time_zone"] = pd.Series([str(i[14]) for i in outtweets])
tweets_df["utc_offset"] = pd.Series([str(i[15]) for i in outtweets])
tweets_df.to_csv(screen_name+"_tweets.csv")
return tweets_df
I_srk = get_all_tweets("iamsrk")
#cadd_centre_tweets = get_all_tweets("DreamZoneSchool")