Tweet User Info
List of Main keywords: pilkada pilgub sihar sitorus djarot edy rahmayadi ijeck jr saragih ance selian
Date Range: * 2018-01-18 to 2018-01-24
import os
import numpy as np
import datetime as DT
import psycopg2
import matplotlib.pyplot as plt
from matplotlib import dates
from mpl_toolkits.basemap import Basemap, cm
import seaborn as sns
import pandas as pd
pd.set_option('display.max_row', 10)
pd.set_option('expand_frame_repr', False)
pd.set_option('max_colwidth', 160)
## Set Seaborn style
sns.set_style("darkgrid")
## Postgre part
con = None
con = psycopg2.connect(host='localhost',database='pantawdb', user='postgres',password='postgres123')
cur = con.cursor()
cur.execute("""select tu.name, tu.location,
tu.followerscount,
tu.statusescount,
tu.friendscount,
tc.*
from new_tweets_clean tc
inner join tweet_users tu on tc.user_id = tu.user_id
where (tweet_text like '%sihar sitorus%' or
tweet_text like '%djarot%' or
tweet_text like '%edy rahmayadi%' or
tweet_text like '%jr saragih%' or
tweet_text like '%ijeck%' or
tweet_text like '%ance selian%' or tweet_text like '%pilgub%' or tweet_text like '%pilkada%') and
to_char(tc.tweet_createdat, 'YYYY-MM-DD') between '2018-01-01' and '2018-01-22'
order by tweet_createdat asc
""")
colnames = [desc[0] for desc in cur.description]
df = pd.DataFrame(cur.fetchall(), columns=colnames)
df = df.replace(np.nan, 'N-A', regex=True)
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
dfuser = df[['user_id','name','location','followerscount','statusescount','friendscount']].groupby(['user_id','name','location','followerscount','statusescount','friendscount']).size().to_frame()
dfuser.reset_index(inplace=True)
dfuser[['followerscount','statusescount','friendscount']] = dfuser[['followerscount','statusescount','friendscount']].astype(int)
dfuser_fol = dfuser[['name','followerscount']].sort_values(by='followerscount', ascending=False).head(15)
dfuser_frn = dfuser[['name','friendscount']].sort_values(by='friendscount', ascending=False).head(15)
dfuser_sts = dfuser[['name','statusescount']].sort_values(by='statusescount', ascending=False).head(15)
figuser_1, axuser_1 = plt.subplots(figsize=(12,5), dpi=100)
sns.barplot(x='name',y='followerscount',data=dfuser_fol)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Followers')
plt.show()

figuser_2, axuser_2 = plt.subplots(figsize=(12,5), dpi=100)
sns.barplot(x='name',y='friendscount',data=dfuser_frn)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Friends')
plt.show()

figuser_3, axuser_3 = plt.subplots(figsize=(12,5), dpi=100)
sns.barplot(x='name',y='statusescount',data=dfuser_sts)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Statuses ')
plt.show()

Tweets Info
Most Favorited Tweet
# Most Favorited Tweet
df[['tweet_text','tweet_favoritedcount']].loc[(df['tweet_favoritedcount']>0) & (df['tweet_favoritedcount']!='N-A')].sort_values(by=['tweet_favoritedcount'], ascending=False).head(10)
| tweet_text | tweet_favoritedcount | |
|---|---|---|
| 638 | Di Indonesia saat pilkada bini nya di suruh pake jilbab | 27 |
| 282 | Polisi seolah menutup mata disaat kubu anis mainin isu SARA pd waktu kampanye pilgub dki yg lalu | 21 |
| 234 | Ingat pilkadaTaipan dan gerombolan pembela penista membaurJangan lupakan merekaPilih partai yg jelas membela ke | 13 |
| 717 | Lakoni aja tarung di pilkada Jateng tapi jangan pake cara kayak di DKI kemarin Bisa menang anda hebat | 12 |
| 329 | Byk org stress kalah pilkada | 12 |
| 474 | Melawan lupa sandal jepit kaos oblong kopi tahun pilkada udh depan mata Jgn salah pi | 11 |
| 316 | Wes yo wesskrg sdh pd pilihan msg saatnya bertarung scra fair spt dlm pilkada DKI jkt y | 11 |
| 219 | ini admin bni syariah matanya buta dng brutalnya pilkada DKi kemarin | 10 |
| 60 | Dukung cawalkot bandung NuRul Arifin utk pilkadabangkitlah perempuan istimewa | 10 |
| 92 | pengalaman Pilpres pilgub DKI dan Perpu Ormas membuat kita faham | 10 |
Most Re-Tweeted Tweet
# Most Retweeted Tweet
pd.set_option('max_colwidth', 160)
df[['tweet_text','tweet_retweetcount']].loc[(df['tweet_retweetcount']>0)].sort_values(by=['tweet_retweetcount'], ascending=False).head(10)#& (df['tweet_retweetcount']!='N-A')
| tweet_text | tweet_retweetcount | |
|---|---|---|
| 110 | Maka dgn keputusan terbaru dari tersebut maka koalisi yg kami bangun dgn PKS utk pilkada Jabar dengan demik | 171 |
| 281 | Jika umat Islam Indonesia yang dari total pendudukpemilih bersatu Insya Allah semua pilkada propinsi dimenangkan Cagu | 154 |
| 154 | Gw mendeteksi para konglomerat aseng proxy RRC bermain di banyak kubu untuk pilkada Jabar Waspada pula di Jateng amp Jatim | 125 |
| 359 | Gw mendeteksi para konglomerat aseng proxy RRC bermain di banyak kubu untuk pilkada Jabar Waspada pula di Jateng amp Jatim | 125 |
| 68 | Hati dng mrk sejarah sdh membuktikan koalisi mrk dari pilpres ke pilgub DKI strategi mrk yg paling menonjol jualan fitnah | 92 |
| 0 | Preside kita ini saudara jangan karena pilkada kita pecah | 88 |
| 169 | Dulu di pilkada DKI ada akun dan yg berada di pihak A | 50 |
| 342 | Partai politik yg bermusuhan di pilkada DKI tp berkoalisi di pilkada kota lainJelas bhw tidak ada partai yg berjuang ut | 50 |
| 12 | kite masih inget dg netral di Pilpres hasilnya kita nikmati skrg kita juga ingat netral di pilgub DKI kita juga ing | 47 |
| 187 | sdh kita jalani amp tahun politik banyak pilkada Agustus sdh pendaftaran Capres Siapa Capres yg sdr duk | 46 |
import matplotlib.dates as mdates
days = dates.DayLocator()
from collections import Counter
counted_dates = Counter(df['tweet_createdat'].loc[df['tweet_createdat']>'2018-01-10'].apply(lambda x: x.date()))
counted_series = pd.Series(counted_dates)
counted_series.index = pd.to_datetime(counted_series.index)
counted_series = counted_series.to_frame()
counted_series.index.names = ['date']
counted_series.reset_index(inplace=True)
counted_series = counted_series.rename(columns= {0: 'tweet_count'})
counted_series['date'] = counted_series['date'].dt.strftime('%Y-%m-%d')
fig = plt.figure(figsize=(12,8), dpi=120)
ax = fig.add_subplot(111)
ax.barh(counted_series.date, counted_series.tweet_count , 0.2)
plt.ylabel('Date')
plt.xlabel('Tweet Count')
plt.title('Total Tweet Per - Day')
plt.show()

## Tweet of selected Keyword Day-by-day
days_to_view = [16,17,18,19,20,21]
grp = df[['tweet_createdat']].groupby(by=[df.tweet_createdat.map(lambda x : x.day),\
df.tweet_createdat.map(lambda x: x.hour)],\
as_index=False).size().to_frame()
grp.index.names = ['day','hour']
grp.reset_index(inplace=True)
grp = grp.rename(columns= {0: 'tweet_count'})
grpf=[]
for datedays in days_to_view:
grpf.append(grp.loc[(grp['day']==datedays)])
fig = plt.figure( figsize=(12,4), dpi=120 )
ax = fig.add_subplot(111)
lns=[]
for i,datedays in enumerate(days_to_view):
# added these three lines
lns+=ax.plot(grpf[i].hour, grpf[i].tweet_count,'-',label = 'day-'+str(datedays))
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc=0)
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Tweet Count")
ax.set_xlim([0, 24])
plt.xticks(rotation=90)
plt.title('Tweet Volume Hourly- Day by Day')
plt.show()

## Tweet-Volume Hourly of selected Keyword Day-by-day
df_location_unique = df[['tweet_id','location']].loc[(df['location']!='N-A') & (df['location']!='')].groupby(['tweet_id','location']).size().to_frame()
df_location_unique.index.names=['tweet_id','location']
df_location_unique.reset_index(inplace=True)
df_location_unique = df_location_unique.rename(columns={0: 'count'})
fig3, ax3 = plt.subplots(figsize=(12,10), dpi=180)
locn=40
locfilter = df_location_unique[['location']].groupby(['location']).filter(lambda x: len(x) > locn)
locfilter.groupby(['location']).size().plot(kind='barh', stacked=True, title="Tweet volume per-City", ax=ax3)
plt.xlabel('Count')
plt.show()

figsent = plt.figure( figsize=(12,6), dpi=120 )
axsent = figsent.add_subplot(111)
dfsentiment_perday = df[['tweet_id','sentiment','tweet_createdat']].loc[(df['sentiment']!='N-A') & (df['tweet_createdat']>'2018-01-13')]
dfsentiment_perday['tweet_createdat']=dfsentiment_perday['tweet_createdat'].apply(lambda x: x.date())
sctsentiment_perday = dfsentiment_perday.pivot_table(values='tweet_id', index='tweet_createdat', columns='sentiment', \
aggfunc=lambda x: len(x.unique())).replace(np.nan,0)
sctsentiment_perday.plot(kind='bar', stacked=False, ax=axsent)
plt.title('All Sentiment - By Date')
plt.ylabel(r"Tweet Count")
plt.show()

city='medan'
figsent_city = plt.figure( figsize=(12,6), dpi=120 )
axsent_city = figsent_city.add_subplot(111)
dfsentiment_perday_city = df[['tweet_id','sentiment','tweet_createdat']].loc[(df['sentiment']!='N-A') & (df['tweet_createdat']>'2018-01-13') & (df.location.str.contains(city, case=False))]
dfsentiment_perday_city['tweet_createdat']=dfsentiment_perday_city['tweet_createdat'].apply(lambda x: x.date())
#print dfsentiment_perday_city.size
sctsentiment_perday_city = dfsentiment_perday_city.pivot_table(values='tweet_id', index='tweet_createdat', \
columns='sentiment', \
aggfunc=lambda x: len(x.unique())).replace(np.nan,0)
sctsentiment_perday_city.plot(kind='bar', stacked=False, ax=axsent_city)
plt.title('Sentiment - By Date for city: %s'%city)
plt.ylabel(r"Tweet Count")
plt.show()

Tweet Geo Pointing
df_tweetgeo = df[['tweet_id','tweet_text','tweet_coordinate','tweet_geo']].loc[(df['tweet_coordinate']!='N-A')]
GAPI="AIzaSyBLAgXu_kWSFNroganG4Ff_TPI-OBR8QWI"
from pygeocoder import Geocoder
from pygeolib import GeocoderError
import ast
import decimal
decimal.getcontext().prec = 46 # Change 46 to the precision you want.
lats=[]
lons=[]
for index, row in df_tweetgeo.iterrows():
lats.append(decimal.Decimal(str.replace(row['tweet_geo'].split(',')[0],"{","")))
lons.append(decimal.Decimal(str.replace(row['tweet_geo'].split(',')[1],"}","")))
maps = Basemap(width=12000000,height=9000000,projection='lcc', lat_1=25.,lat_2=45, lat_0=6,lon_0=113.)
import warnings
warnings.filterwarnings('ignore')
figmap, axmap = plt.subplots(figsize=(12,8),dpi=120)
maps.drawcoastlines()
maps.drawcountries()
maps.fillcontinents(color = 'coral',lake_color='aqua')
maps.drawmapboundary(fill_color='aqua')
x,y = maps(lons, lats)
maps.plot(x, y, 'bo', markersize=3)
plt.show()
