twitter_sentimental_analysis.py

# -*- coding: utf-8 -*-
"""Twitter sentimental analysis

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1kJxJQT2WDWug_1mYOgG1m1DpUNDD7Y4Y
"""

# Senstimental Analysis of twitter

#importing libraries
import tweepy
from textblob import TextBlob
from wordcloud import WordCloud
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import re
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
nltk.download('averaged_perceptron_tagger')
nlp = spacy.load("en_core_web_sm")

#Twitter Credentials
consumerKey={Enter Consumer Key}
consumerSecret={Enter Consumer Secret}
accessToken={Enter Access Token}
accessTokenSecret={Enter Access token Secret}

#authenticating customer credentials
authenticate=tweepy.OAuthHandler(consumerKey,consumerSecret)

#authenticating accessTokens
authenticate.set_access_token(accessToken,accessTokenSecret)

#genrating api
api=tweepy.API(authenticate,wait_on_rate_limit=True)

# Extract tweets from twitter 
posts = api.user_timeline(screen_name="elonmusk", count=150, lang="en", tweet_mode="extended")

#TO get first 15 tweets
print("The first 15 tweets are: \n")
i=1
for tweet in posts[0:15]:
      print(str(i) + ') ' + tweet.full_text, '\n' )
      i=i+1
    
#Creating a DataFrame
df=pd.DataFrame([tweet.full_text for tweet in posts], columns=['tweets'])

#printing out 10 rows
df.head(10)    

#Cleaning the data which include #,@and links:
def cleantext(text):
  text = re.sub("@[A-Za-z0-9]+", '',text)
  text = re.sub("#", '', text)
  text = re.sub("RT[\s]+", '', text)
  text = re.sub("https?:\/\/\S+", "", text)

  return text

# Refined dataframe
df['tweets']=df['tweets'].apply(cleantext)  

# printing dataframe
df.head()

# to find the subjectivity 
def Subjectivity(text):
  return TextBlob(text).sentiment.subjectivity

# to find polarity
def Polarity(text):
  return TextBlob(text).sentiment.polarity 

#Tokenization
def tokenize(text):
    text = nltk.word_tokenize(text)
    text = nltk.pos_tag(text)
    return text 

#New columns for subjectivity and polarity
df['subjectivity']=df['tweets'].apply(Subjectivity)
df['polarity']=df['tweets'].apply(Polarity)
df['tokenize']=df['tweets'].apply(tokenize)

#printing dataframe
df.head(10)

#Tweets analysis 

def Analysis(score): #function
  if score>0 :
   return 'Positive' #positive tweet
  elif score == 0 :
   return 'Neutral' #neutal tweet
  else :
    return 'Negative' #negative tweet

#New column Review
df['Review']=df['polarity'].apply(Analysis)

#printing dataframe
df.head()

# Printing out negative tweets
print('Printing negative tweets:\n')
j=1
negativeDF = df.sort_values(by=['polarity'], ascending=False)
for i in range (0, negativeDF.shape[0]):
  if( negativeDF['Review'][i]=='Negative'):
     print(str(j) + ') ' + negativeDF['tweets'][i] )
     print()
     j=j+1

#Postive tweets 
print("Printing positive tweets: \n")
positiveDF=df.sort_values(by=['polarity'])
j=1
for i in range (0, positiveDF.shape[0]):
  if( negativeDF['Review'][i]=='Positive'):
     print(str(j) + ') ' + positiveDF['tweets'][i] )
     print()
     j=j+1

#Neutral tweets 
print("Printing Neutral tweets: \n")
neutralDF=df.sort_values(by=['polarity'])
j=1
for i in range (0, positiveDF.shape[0]):
  if( neutralDF['Review'][i]=='Neutral'):
     print(str(j) + ') ' + neutralDF['tweets'][i] )
     print()
     j=j+1
        
# Plotting the subjectivity and polarity
plt.figure(figsize=(10,8))
plt.scatter(df['polarity'], df['subjectivity'], )
plt.title('sentimental Analysis')
plt.xlabel('Polarity')
plt.ylabel('subjectivity')
plt.show()
plt.savefig('sub&polarity.png')     

# Percentage negative tweets per 150 tweets 
ntweets = df[df['Review']== 'Negative']
round (ntweets.shape[0]/df.shape[0]* 100, 1)

# Percentage positive tweets per 150 tweets
ptweets = df[df['Review']== 'Positive']
round (ptweets.shape[0]/df.shape[0]* 100,1)

# Percentage neutral tweets per 150 tweets 
netweets = df[df['Review']== 'Neutral']
round (netweets.shape[0]/df.shape[0]* 100, 1)

#Value counts
df['Review'].value_counts()

# Plotting the data
plt.title('Sentiment Analysis')
plt.xlabel('Sentiment')
plt.ylabel('Counts')
df['Review'].value_counts().plot(kind='bar')
plt.show()
plt.savefig('percentage.png')