Tweet User Info


List of Main keywords: pilkada pilgub sihar sitorus djarot edy rahmayadi ijeck jr saragih ance selian

Date Range: * 2018-01-18 to 2018-01-24

import os
import numpy as np
import datetime as DT
import psycopg2

import matplotlib.pyplot as plt
from matplotlib import dates

from mpl_toolkits.basemap import Basemap, cm

import seaborn as sns

import pandas as pd
pd.set_option('display.max_row', 10)

pd.set_option('expand_frame_repr', False)
pd.set_option('max_colwidth', 160)

## Set Seaborn style
sns.set_style("darkgrid")

## Postgre part
con = None
con = psycopg2.connect(host='localhost',database='pantawdb', user='postgres',password='postgres123')

cur = con.cursor()
cur.execute("""select tu.name, tu.location,
                tu.followerscount,
                tu.statusescount,
                tu.friendscount,
                tc.*
                from new_tweets_clean tc
                inner join tweet_users tu on tc.user_id = tu.user_id
                where (tweet_text like '%sihar sitorus%' or 
                    tweet_text like '%djarot%' or 
                    tweet_text like '%edy rahmayadi%' or
                    tweet_text like '%jr saragih%' or 
                    tweet_text like '%ijeck%' or
                    tweet_text like '%ance selian%' or tweet_text like '%pilgub%' or tweet_text like '%pilkada%') and
                    to_char(tc.tweet_createdat, 'YYYY-MM-DD') between '2018-01-01' and '2018-01-22'
                order by tweet_createdat asc
                """)
colnames = [desc[0] for desc in cur.description]
df = pd.DataFrame(cur.fetchall(), columns=colnames)
df = df.replace(np.nan, 'N-A', regex=True)
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
dfuser = df[['user_id','name','location','followerscount','statusescount','friendscount']].groupby(['user_id','name','location','followerscount','statusescount','friendscount']).size().to_frame()
dfuser.reset_index(inplace=True)
dfuser[['followerscount','statusescount','friendscount']] = dfuser[['followerscount','statusescount','friendscount']].astype(int)

dfuser_fol = dfuser[['name','followerscount']].sort_values(by='followerscount', ascending=False).head(15)
dfuser_frn = dfuser[['name','friendscount']].sort_values(by='friendscount', ascending=False).head(15)
dfuser_sts = dfuser[['name','statusescount']].sort_values(by='statusescount', ascending=False).head(15)

figuser_1, axuser_1 = plt.subplots(figsize=(12,5), dpi=100)

sns.barplot(x='name',y='followerscount',data=dfuser_fol)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Followers')
plt.show()

png



figuser_2, axuser_2 = plt.subplots(figsize=(12,5), dpi=100)

sns.barplot(x='name',y='friendscount',data=dfuser_frn)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Friends')
plt.show()

png

figuser_3, axuser_3 = plt.subplots(figsize=(12,5), dpi=100)

sns.barplot(x='name',y='statusescount',data=dfuser_sts)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Statuses ')
plt.show()

png


Tweets Info

Most Favorited Tweet

# Most Favorited Tweet
df[['tweet_text','tweet_favoritedcount']].loc[(df['tweet_favoritedcount']>0) & (df['tweet_favoritedcount']!='N-A')].sort_values(by=['tweet_favoritedcount'], ascending=False).head(10)
tweet_text tweet_favoritedcount
638 Di Indonesia saat pilkada bini nya di suruh pake jilbab 27
282 Polisi seolah menutup mata disaat kubu anis mainin isu SARA pd waktu kampanye pilgub dki yg lalu 21
234 Ingat pilkadaTaipan dan gerombolan pembela penista membaurJangan lupakan merekaPilih partai yg jelas membela ke 13
717 Lakoni aja tarung di pilkada Jateng tapi jangan pake cara kayak di DKI kemarin Bisa menang anda hebat 12
329 Byk org stress kalah pilkada 12
474 Melawan lupa sandal jepit kaos oblong kopi tahun pilkada udh depan mata Jgn salah pi 11
316 Wes yo wesskrg sdh pd pilihan msg saatnya bertarung scra fair spt dlm pilkada DKI jkt y 11
219 ini admin bni syariah matanya buta dng brutalnya pilkada DKi kemarin 10
60 Dukung cawalkot bandung NuRul Arifin utk pilkadabangkitlah perempuan istimewa 10
92 pengalaman Pilpres pilgub DKI dan Perpu Ormas membuat kita faham 10

Most Re-Tweeted Tweet

# Most Retweeted Tweet
pd.set_option('max_colwidth', 160)
df[['tweet_text','tweet_retweetcount']].loc[(df['tweet_retweetcount']>0)].sort_values(by=['tweet_retweetcount'], ascending=False).head(10)#& (df['tweet_retweetcount']!='N-A')
tweet_text tweet_retweetcount
110 Maka dgn keputusan terbaru dari tersebut maka koalisi yg kami bangun dgn PKS utk pilkada Jabar dengan demik 171
281 Jika umat Islam Indonesia yang dari total pendudukpemilih bersatu Insya Allah semua pilkada propinsi dimenangkan Cagu 154
154 Gw mendeteksi para konglomerat aseng proxy RRC bermain di banyak kubu untuk pilkada Jabar Waspada pula di Jateng amp Jatim 125
359 Gw mendeteksi para konglomerat aseng proxy RRC bermain di banyak kubu untuk pilkada Jabar Waspada pula di Jateng amp Jatim 125
68 Hati dng mrk sejarah sdh membuktikan koalisi mrk dari pilpres ke pilgub DKI strategi mrk yg paling menonjol jualan fitnah 92
0 Preside kita ini saudara jangan karena pilkada kita pecah 88
169 Dulu di pilkada DKI ada akun dan yg berada di pihak A 50
342 Partai politik yg bermusuhan di pilkada DKI tp berkoalisi di pilkada kota lainJelas bhw tidak ada partai yg berjuang ut 50
12 kite masih inget dg netral di Pilpres hasilnya kita nikmati skrg kita juga ingat netral di pilgub DKI kita juga ing 47
187 sdh kita jalani amp tahun politik banyak pilkada Agustus sdh pendaftaran Capres Siapa Capres yg sdr duk 46
import matplotlib.dates as mdates

days = dates.DayLocator()

from collections import Counter
counted_dates = Counter(df['tweet_createdat'].loc[df['tweet_createdat']>'2018-01-10'].apply(lambda x: x.date()))

counted_series = pd.Series(counted_dates)
counted_series.index = pd.to_datetime(counted_series.index)

counted_series = counted_series.to_frame()
counted_series.index.names = ['date']
counted_series.reset_index(inplace=True)
counted_series = counted_series.rename(columns= {0: 'tweet_count'})
counted_series['date'] = counted_series['date'].dt.strftime('%Y-%m-%d')

fig = plt.figure(figsize=(12,8), dpi=120)
ax  = fig.add_subplot(111)

ax.barh(counted_series.date, counted_series.tweet_count  , 0.2)

plt.ylabel('Date')
plt.xlabel('Tweet Count')
plt.title('Total Tweet Per - Day')
plt.show()

png

## Tweet of selected Keyword Day-by-day

days_to_view = [16,17,18,19,20,21]

grp = df[['tweet_createdat']].groupby(by=[df.tweet_createdat.map(lambda x : x.day),\
                                         df.tweet_createdat.map(lambda x: x.hour)],\
                                      as_index=False).size().to_frame()

grp.index.names = ['day','hour']
grp.reset_index(inplace=True)
grp = grp.rename(columns= {0: 'tweet_count'})


grpf=[]
for datedays in days_to_view:
    grpf.append(grp.loc[(grp['day']==datedays)])

fig = plt.figure( figsize=(12,4), dpi=120 )
ax  = fig.add_subplot(111)

lns=[]
for i,datedays in enumerate(days_to_view):
    # added these three lines
    lns+=ax.plot(grpf[i].hour, grpf[i].tweet_count,'-',label = 'day-'+str(datedays))

labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc=0)

ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Tweet Count")

ax.set_xlim([0, 24])

plt.xticks(rotation=90)
plt.title('Tweet Volume Hourly- Day by Day')
plt.show()

png

## Tweet-Volume Hourly of selected Keyword Day-by-day

df_location_unique = df[['tweet_id','location']].loc[(df['location']!='N-A') & (df['location']!='')].groupby(['tweet_id','location']).size().to_frame()

df_location_unique.index.names=['tweet_id','location']
df_location_unique.reset_index(inplace=True)
df_location_unique = df_location_unique.rename(columns={0: 'count'})

fig3, ax3 = plt.subplots(figsize=(12,10), dpi=180)

locn=40
locfilter = df_location_unique[['location']].groupby(['location']).filter(lambda x: len(x) > locn)

locfilter.groupby(['location']).size().plot(kind='barh', stacked=True, title="Tweet volume per-City", ax=ax3)
plt.xlabel('Count')
plt.show()

png

figsent = plt.figure( figsize=(12,6), dpi=120 )

axsent  = figsent.add_subplot(111)

dfsentiment_perday = df[['tweet_id','sentiment','tweet_createdat']].loc[(df['sentiment']!='N-A') & (df['tweet_createdat']>'2018-01-13')]
dfsentiment_perday['tweet_createdat']=dfsentiment_perday['tweet_createdat'].apply(lambda x: x.date())

sctsentiment_perday = dfsentiment_perday.pivot_table(values='tweet_id', index='tweet_createdat', columns='sentiment', \
                                                     aggfunc=lambda x: len(x.unique())).replace(np.nan,0)

sctsentiment_perday.plot(kind='bar', stacked=False, ax=axsent)
plt.title('All Sentiment - By Date')
plt.ylabel(r"Tweet Count")

plt.show()

png

city='medan'

figsent_city = plt.figure( figsize=(12,6), dpi=120 )
axsent_city  = figsent_city.add_subplot(111)

dfsentiment_perday_city = df[['tweet_id','sentiment','tweet_createdat']].loc[(df['sentiment']!='N-A') &  (df['tweet_createdat']>'2018-01-13') & (df.location.str.contains(city, case=False))]
dfsentiment_perday_city['tweet_createdat']=dfsentiment_perday_city['tweet_createdat'].apply(lambda x: x.date())

#print dfsentiment_perday_city.size
sctsentiment_perday_city = dfsentiment_perday_city.pivot_table(values='tweet_id', index='tweet_createdat', \
                                                               columns='sentiment', \
                                                               aggfunc=lambda x: len(x.unique())).replace(np.nan,0)

sctsentiment_perday_city.plot(kind='bar', stacked=False, ax=axsent_city)
plt.title('Sentiment - By Date for city: %s'%city)
plt.ylabel(r"Tweet Count")
plt.show()

png


Tweet Geo Pointing

df_tweetgeo = df[['tweet_id','tweet_text','tweet_coordinate','tweet_geo']].loc[(df['tweet_coordinate']!='N-A')]
GAPI="AIzaSyBLAgXu_kWSFNroganG4Ff_TPI-OBR8QWI"

from pygeocoder import Geocoder
from pygeolib import GeocoderError
import ast

import decimal
decimal.getcontext().prec = 46 # Change 46 to the precision you want.

lats=[]
lons=[]
for index, row in df_tweetgeo.iterrows():
    lats.append(decimal.Decimal(str.replace(row['tweet_geo'].split(',')[0],"{","")))
    lons.append(decimal.Decimal(str.replace(row['tweet_geo'].split(',')[1],"}","")))

maps = Basemap(width=12000000,height=9000000,projection='lcc', lat_1=25.,lat_2=45, lat_0=6,lon_0=113.)

import warnings
warnings.filterwarnings('ignore')

figmap, axmap = plt.subplots(figsize=(12,8),dpi=120)

maps.drawcoastlines()
maps.drawcountries()
maps.fillcontinents(color = 'coral',lake_color='aqua')

maps.drawmapboundary(fill_color='aqua')
x,y = maps(lons, lats)

maps.plot(x, y, 'bo', markersize=3)

plt.show()

png