Tweet User Info

List of Main keywords: pilkada pilgub sihar sitorus djarot edy rahmayadi ijeck jr saragih ance selian

Date Range: * 2018-01-18 to 2018-01-24

import os
import numpy as np
import datetime as DT
import psycopg2

import matplotlib.pyplot as plt
from matplotlib import dates

from mpl_toolkits.basemap import Basemap, cm

import seaborn as sns

import pandas as pd
pd.set_option('display.max_row', 10)

pd.set_option('expand_frame_repr', False)
pd.set_option('max_colwidth', 160)

## Set Seaborn style
sns.set_style("darkgrid")

## Postgre part
con = None
con = psycopg2.connect(host='localhost',database='pantawdb', user='postgres',password='postgres123')

cur = con.cursor()
cur.execute("""select tu.name, tu.location,
                tu.followerscount,
                tu.statusescount,
                tu.friendscount,
                tc.*
                from new_tweets_clean tc
                inner join tweet_users tu on tc.user_id = tu.user_id
                where (tweet_text like '%sihar sitorus%' or 
                    tweet_text like '%djarot%' or 
                    tweet_text like '%edy rahmayadi%' or
                    tweet_text like '%jr saragih%' or 
                    tweet_text like '%ijeck%' or
                    tweet_text like '%ance selian%' or tweet_text like '%pilgub%' or tweet_text like '%pilkada%') and
                    to_char(tc.tweet_createdat, 'YYYY-MM-DD') between '2018-01-01' and '2018-01-22'
                order by tweet_createdat asc
                """)
colnames = [desc[0] for desc in cur.description]
df = pd.DataFrame(cur.fetchall(), columns=colnames)
df = df.replace(np.nan, 'N-A', regex=True)

import sys
reload(sys)
sys.setdefaultencoding("utf-8")
dfuser = df[['user_id','name','location','followerscount','statusescount','friendscount']].groupby(['user_id','name','location','followerscount','statusescount','friendscount']).size().to_frame()
dfuser.reset_index(inplace=True)
dfuser[['followerscount','statusescount','friendscount']] = dfuser[['followerscount','statusescount','friendscount']].astype(int)

dfuser_fol = dfuser[['name','followerscount']].sort_values(by='followerscount', ascending=False).head(15)
dfuser_frn = dfuser[['name','friendscount']].sort_values(by='friendscount', ascending=False).head(15)
dfuser_sts = dfuser[['name','statusescount']].sort_values(by='statusescount', ascending=False).head(15)

figuser_1, axuser_1 = plt.subplots(figsize=(12,5), dpi=100)

sns.barplot(x='name',y='followerscount',data=dfuser_fol)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Followers')
plt.show()

png



figuser_2, axuser_2 = plt.subplots(figsize=(12,5), dpi=100)

sns.barplot(x='name',y='friendscount',data=dfuser_frn)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Friends')
plt.show()

png

figuser_3, axuser_3 = plt.subplots(figsize=(12,5), dpi=100)

sns.barplot(x='name',y='statusescount',data=dfuser_sts)
plt.xlabel('Username')
plt.xticks(rotation=90)
plt.title('Tweets Users - Most Statuses ')
plt.show()

png

Tweets Info

Most Favorited Tweet

# Most Favorited Tweet
df[['tweet_text','tweet_favoritedcount']].loc[(df['tweet_favoritedcount']>0) & (df['tweet_favoritedcount']!='N-A')].sort_values(by=['tweet_favoritedcount'], ascending=False).head(10)

	tweet_text	tweet_favoritedcount
638	Di Indonesia saat pilkada bini nya di suruh pake jilbab	27
282	Polisi seolah menutup mata disaat kubu anis mainin isu SARA pd waktu kampanye pilgub dki yg lalu	21
234	Ingat pilkadaTaipan dan gerombolan pembela penista membaurJangan lupakan merekaPilih partai yg jelas membela ke	13
717	Lakoni aja tarung di pilkada Jateng tapi jangan pake cara kayak di DKI kemarin Bisa menang anda hebat	12
329	Byk org stress kalah pilkada	12
474	Melawan lupa sandal jepit kaos oblong kopi tahun pilkada udh depan mata Jgn salah pi	11
316	Wes yo wesskrg sdh pd pilihan msg saatnya bertarung scra fair spt dlm pilkada DKI jkt y	11
219	ini admin bni syariah matanya buta dng brutalnya pilkada DKi kemarin	10
60	Dukung cawalkot bandung NuRul Arifin utk pilkadabangkitlah perempuan istimewa	10
92	pengalaman Pilpres pilgub DKI dan Perpu Ormas membuat kita faham	10

Most Re-Tweeted Tweet

# Most Retweeted Tweet
pd.set_option('max_colwidth', 160)
df[['tweet_text','tweet_retweetcount']].loc[(df['tweet_retweetcount']>0)].sort_values(by=['tweet_retweetcount'], ascending=False).head(10)#& (df['tweet_retweetcount']!='N-A')

	tweet_text	tweet_retweetcount
110	Maka dgn keputusan terbaru dari tersebut maka koalisi yg kami bangun dgn PKS utk pilkada Jabar dengan demik	171
281	Jika umat Islam Indonesia yang dari total pendudukpemilih bersatu Insya Allah semua pilkada propinsi dimenangkan Cagu	154
154	Gw mendeteksi para konglomerat aseng proxy RRC bermain di banyak kubu untuk pilkada Jabar Waspada pula di Jateng amp Jatim	125
359	Gw mendeteksi para konglomerat aseng proxy RRC bermain di banyak kubu untuk pilkada Jabar Waspada pula di Jateng amp Jatim	125
68	Hati dng mrk sejarah sdh membuktikan koalisi mrk dari pilpres ke pilgub DKI strategi mrk yg paling menonjol jualan fitnah	92
0	Preside kita ini saudara jangan karena pilkada kita pecah	88
169	Dulu di pilkada DKI ada akun dan yg berada di pihak A	50
342	Partai politik yg bermusuhan di pilkada DKI tp berkoalisi di pilkada kota lainJelas bhw tidak ada partai yg berjuang ut	50
12	kite masih inget dg netral di Pilpres hasilnya kita nikmati skrg kita juga ingat netral di pilgub DKI kita juga ing	47
187	sdh kita jalani amp tahun politik banyak pilkada Agustus sdh pendaftaran Capres Siapa Capres yg sdr duk	46

import matplotlib.dates as mdates

days = dates.DayLocator()

from collections import Counter
counted_dates = Counter(df['tweet_createdat'].loc[df['tweet_createdat']>'2018-01-10'].apply(lambda x: x.date()))

counted_series = pd.Series(counted_dates)
counted_series.index = pd.to_datetime(counted_series.index)

counted_series = counted_series.to_frame()
counted_series.index.names = ['date']
counted_series.reset_index(inplace=True)
counted_series = counted_series.rename(columns= {0: 'tweet_count'})
counted_series['date'] = counted_series['date'].dt.strftime('%Y-%m-%d')

fig = plt.figure(figsize=(12,8), dpi=120)
ax  = fig.add_subplot(111)

ax.barh(counted_series.date, counted_series.tweet_count  , 0.2)

plt.ylabel('Date')
plt.xlabel('Tweet Count')
plt.title('Total Tweet Per - Day')
plt.show()

png

## Tweet of selected Keyword Day-by-day

days_to_view = [16,17,18,19,20,21]

grp = df[['tweet_createdat']].groupby(by=[df.tweet_createdat.map(lambda x : x.day),\
                                         df.tweet_createdat.map(lambda x: x.hour)],\
                                      as_index=False).size().to_frame()

grp.index.names = ['day','hour']
grp.reset_index(inplace=True)
grp = grp.rename(columns= {0: 'tweet_count'})


grpf=[]
for datedays in days_to_view:
    grpf.append(grp.loc[(grp['day']==datedays)])

fig = plt.figure( figsize=(12,4), dpi=120 )
ax  = fig.add_subplot(111)

lns=[]
for i,datedays in enumerate(days_to_view):
    # added these three lines
    lns+=ax.plot(grpf[i].hour, grpf[i].tweet_count,'-',label = 'day-'+str(datedays))

labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc=0)

ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Tweet Count")

ax.set_xlim([0, 24])

plt.xticks(rotation=90)
plt.title('Tweet Volume Hourly- Day by Day')
plt.show()

png

## Tweet-Volume Hourly of selected Keyword Day-by-day

df_location_unique = df[['tweet_id','location']].loc[(df['location']!='N-A') & (df['location']!='')].groupby(['tweet_id','location']).size().to_frame()

df_location_unique.index.names=['tweet_id','location']
df_location_unique.reset_index(inplace=True)
df_location_unique = df_location_unique.rename(columns={0: 'count'})

fig3, ax3 = plt.subplots(figsize=(12,10), dpi=180)

locn=40
locfilter = df_location_unique[['location']].groupby(['location']).filter(lambda x: len(x) > locn)

locfilter.groupby(['location']).size().plot(kind='barh', stacked=True, title="Tweet volume per-City", ax=ax3)
plt.xlabel('Count')
plt.show()

png

figsent = plt.figure( figsize=(12,6), dpi=120 )

axsent  = figsent.add_subplot(111)

dfsentiment_perday = df[['tweet_id','sentiment','tweet_createdat']].loc[(df['sentiment']!='N-A') & (df['tweet_createdat']>'2018-01-13')]
dfsentiment_perday['tweet_createdat']=dfsentiment_perday['tweet_createdat'].apply(lambda x: x.date())

sctsentiment_perday = dfsentiment_perday.pivot_table(values='tweet_id', index='tweet_createdat', columns='sentiment', \
                                                     aggfunc=lambda x: len(x.unique())).replace(np.nan,0)

sctsentiment_perday.plot(kind='bar', stacked=False, ax=axsent)
plt.title('All Sentiment - By Date')
plt.ylabel(r"Tweet Count")

plt.show()

png

city='medan'

figsent_city = plt.figure( figsize=(12,6), dpi=120 )
axsent_city  = figsent_city.add_subplot(111)

dfsentiment_perday_city = df[['tweet_id','sentiment','tweet_createdat']].loc[(df['sentiment']!='N-A') &  (df['tweet_createdat']>'2018-01-13') & (df.location.str.contains(city, case=False))]
dfsentiment_perday_city['tweet_createdat']=dfsentiment_perday_city['tweet_createdat'].apply(lambda x: x.date())

#print dfsentiment_perday_city.size
sctsentiment_perday_city = dfsentiment_perday_city.pivot_table(values='tweet_id', index='tweet_createdat', \
                                                               columns='sentiment', \
                                                               aggfunc=lambda x: len(x.unique())).replace(np.nan,0)

sctsentiment_perday_city.plot(kind='bar', stacked=False, ax=axsent_city)
plt.title('Sentiment - By Date for city: %s'%city)
plt.ylabel(r"Tweet Count")
plt.show()

png

Tweet Geo Pointing

df_tweetgeo = df[['tweet_id','tweet_text','tweet_coordinate','tweet_geo']].loc[(df['tweet_coordinate']!='N-A')]
GAPI="AIzaSyBLAgXu_kWSFNroganG4Ff_TPI-OBR8QWI"

from pygeocoder import Geocoder
from pygeolib import GeocoderError
import ast

import decimal
decimal.getcontext().prec = 46 # Change 46 to the precision you want.

lats=[]
lons=[]
for index, row in df_tweetgeo.iterrows():
    lats.append(decimal.Decimal(str.replace(row['tweet_geo'].split(',')[0],"{","")))
    lons.append(decimal.Decimal(str.replace(row['tweet_geo'].split(',')[1],"}","")))

maps = Basemap(width=12000000,height=9000000,projection='lcc', lat_1=25.,lat_2=45, lat_0=6,lon_0=113.)

import warnings
warnings.filterwarnings('ignore')

figmap, axmap = plt.subplots(figsize=(12,8),dpi=120)

maps.drawcoastlines()
maps.drawcountries()
maps.fillcontinents(color = 'coral',lake_color='aqua')

maps.drawmapboundary(fill_color='aqua')
x,y = maps(lons, lats)

maps.plot(x, y, 'bo', markersize=3)

plt.show()

png