-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitter_get_users_from_list.py
100 lines (77 loc) · 2.93 KB
/
twitter_get_users_from_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import sys
from time import sleep
from twitter import Api, TwitterError
import pandas as pd
import sqlalchemy
from datetime import datetime
DB_HOST = "localhost"
DB_USER = "postgres"
DB_PASS = "_"
DB_DB = "twitter"
TWITTER_CONSUMER_API_KEY = '_'
TWITTER_CONSUMER_API_SECRET_KEY = '_'
TWITTER_ACCESS_TOKEN = '_-_'
TWITTER_ACCESS_TOKEN_SECRET = '_'
TWITTER_DELAY_ON_RATE_LIMIT = 930
engine = sqlalchemy.create_engine("postgresql://" +
DB_USER + ":" +
DB_PASS + "@" +
DB_HOST + '/' +
DB_DB)
account_activity = ['id',
'screen_name',
'list_slug',
'created_at',
'last_activity']
df_accounts = pd.DataFrame(columns=account_activity)
api = Api(TWITTER_CONSUMER_API_KEY,
TWITTER_CONSUMER_API_SECRET_KEY,
TWITTER_ACCESS_TOKEN,
TWITTER_ACCESS_TOKEN_SECRET)
try:
user_data = api.VerifyCredentials()
except TwitterError as e:
print(e.message[0]['message'], e.message[0]['code'])
if e.message[0]['code'] == 88:
for x in range(0, TWITTER_DELAY_ON_RATE_LIMIT):
print(x, end='\r')
sleep(1)
user_data = api.VerifyCredentials()
if e.message[0]['code'] == 32:
sys.exit() # or return
user_id = user_data.id
try:
lists = api.GetLists(user_id)
except TwitterError as e:
sys.exit('ERROR: Twitter' + str(e))
for lista in lists:
list_items = api.GetListMembers(slug=lista.slug, list_id=lista.id)
for y in list_items:
if y.status is None:
last_activity = datetime.strptime(y.created_at,
'%a %b %d %H:%M:%S +0000 %Y')
else:
last_activity = datetime.strptime(y.status.created_at,
'%a %b %d %H:%M:%S +0000 %Y')
created_at = datetime.strptime(y.created_at,
'%a %b %d %H:%M:%S +0000 %Y')
df_accounts.loc[len(df_accounts)] = [y.id,
y.screen_name,
lista.slug,
created_at,
last_activity]
pd.to_numeric(df_accounts['id'],
errors='coerce')
df_accounts.set_index('id',
inplace=True)
df_accounts.drop_duplicates(keep=False,
inplace=True)
pd.to_datetime(df_accounts['last_activity'],
errors='coerce')
dtype = {'id': sqlalchemy.types.BIGINT(),
'screen_name': sqlalchemy.types.VARCHAR(length=16),
'list_slug': sqlalchemy.types.VARCHAR(length=25),
'created_at': sqlalchemy.types.DATE(),
'last_activity': sqlalchemy.types.DATE(),
}
df_accounts.to_sql('accounts', con=engine, if_exists='append', dtype=dtype)