In [1]:
# ! pip install tweepy
In [1]:
# Import package
import tweepy

# my twitter API key, you are free to use! since my website doesn't have many visitors.

# Saya young
access_token = "1330365234-xDjSixFZfSeboDSkHS0WgNvOu5zZw4HeUL8ijVq"
access_token_secret = "QuhhHxIMSxVC2QhVqaxtdgZtc4pyJBWVg2C6D5IHCH9ph"
consumer_key = "JES0pDVJW2WCscy1LhFFMxz4A"
consumer_secret = "uOoW3PCx8nI0kIfsifXfCibYwaeMrHh73TrV2TyuILL9vR9Bdx"


# Pass OAuth details to tweepy's OAuth handler
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)


# api = tweepy.API(auth)
# api.update_status('tweepy + oauth!')
  • a Tweet listener that creates a file called 'tweets.txt'

  • collects streaming tweets as .jsons and writes them to the file 'tweets.txt'

  • once 100 tweets have been streamed, the listener closes the file and stops listening.

http://tweepy.readthedocs.io/en/v3.5.0/streaming_how_to.html#summary

In [3]:
# class use json
import json

class MyStreamListener(tweepy.StreamListener):
    
    def __init__(self, api=None):
        # inherit class attributes
        super(MyStreamListener, self).__init__()
        self.num_tweets = 0
        self.file = open("tweets.txt", "w+")

    def on_status(self, status):
        tweet = status._json
        
        self.file.write( json.dumps(tweet) + '\n' )
        
        self.num_tweets += 1
        if self.num_tweets < 1000:
            return True
        else:
            return False
        self.file.close()

    def on_error(self, status):
        print(status)
In [4]:
# Initialize Stream listener
l = MyStreamListener()

# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)


# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['Trump stupid','Trump Hillary','Hillary stupid','Trump daughter'], async=True)

Load and explore your Twitter data

  • Now that you've got your Twitter data sitting locally in a text file, it's time to explore it!
  • Within the for loop initiated by for line in tweets_file:, load each tweet into a variable tweet using json.loads(), then append tweet to tweets_data using the append() method.
In [10]:
# Import package
import json

# Initialize empty list to store tweets: tweets_data
tweets_data = []

# Open connection to file
h=open('tweets.txt','r')

# Read in tweets and store in list: tweets_data
for i in h:
    try:
        print 'O',
        tmp=json.loads(i)
        tweets_data.append(tmp)
    except:
        print 'X',
h.close()
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O X

Twitter data to DataFrame

  • Now that you have the Twitter data in a list of dictionaries tweets_data, where each dictionary corresponds to a single tweet, it's time to extract the text of the tweets, along with the language of the tweet.
  • The text in a tweet t1 is stored as the value t1['text']; similarly, the language is stored in t1['lang']
In [11]:
import pandas as pd
pd.DataFrame(tweets_data).head(1)
Out[11]:
contributors coordinates created_at display_text_range entities extended_entities extended_tweet favorite_count favorited filter_level ... quoted_status_id quoted_status_id_str retweet_count retweeted retweeted_status source text timestamp_ms truncated user
0 None None Fri Feb 10 02:10:56 +0000 2017 NaN {u'user_mentions': [{u'indices': [3, 15], u'sc... NaN NaN 0 False low ... NaN NaN 0 False {u'contributors': None, u'truncated': True, u'... <a href="http://twitter.com/download/iphone" r... RT @GeorgeTakei: Mr. Trump, you--and minions l... 1486692656536 False {u'follow_request_sent': None, u'profile_use_b...

1 rows × 33 columns

In [12]:
# Build DataFrame of tweet texts and languages
df = pd.DataFrame(tweets_data, columns=['text', 'lang'])
print df.shape
# Print head of DataFrame
df.head(3)
(999, 2)
Out[12]:
text lang
0 RT @GeorgeTakei: Mr. Trump, you--and minions l... en
1 RT @chelseahandler: Trump says his daughter ha... en
2 RT @GeorgeTakei: Mr. Trump, you--and minions l... en

text statistics

  • a way not using Pandas
In [13]:
import re

def word_in_text(word, tweet):
    word = word.lower()
    text = tweet.lower()
    match = re.search(word, tweet)

    if match:
        return True
    return False
In [14]:
# Initialize list to store tweet counts
[Trump, stupid, girl, hillary] = [0, 0, 0, 0]

# Iterate through df, counting the number of tweets in which
# each candidate is mentioned
for index, row in df.iterrows():
    Trump += word_in_text('trump', row['text'].lower())
    stupid += word_in_text('stupid', row['text'].lower())
    girl += word_in_text('girl', row['text'].lower())
    hillary += word_in_text('hillary', row['text'].lower())
print Trump, stupid, girl, hillary
897 57 6 430

stat using Pandas

  • regular expression

  • case=False

    • not case sensitive
In [15]:
# pd.Series.str.contains?
In [16]:
df['text'].str.contains('hillary',case=False).sum()
Out[16]:
430
  • regular expression, does not match upper letter TRUMP
In [17]:
df['text'].str.contains('[Tt]rump').sum()
Out[17]:
889

create a simple one, just to print out the posts

In [2]:
#override tweepy.StreamListener to add logic to on_status
class test(tweepy.StreamListener):
    
    def __init__(self):
        
        # inherit class attributes
        super(test, self).__init__()
#         tweepy.StreamListener.__init__(self)
        
        self.num=0
           

    def on_status(self, status):
        self.num+=1
        print self.num
        print(status.text)
        if self.num==10:
            
            #returning False in on_data disconnects the stream
            return False
        
    def on_error(self, status):
        print(status)

use the defined class test to print results

In [4]:
# Initialize Stream listener
l = test()

# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)

# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['Trump stupid','Trump Hillary','Hillary','Trump daughter'], async=True)
1
RT @asamjulian: Count your blessings everyone, dealing with liberal courts is infinitely better than a President Hillary Clinton. 😊 #9thCir…
2
RT @Harlan: Yeah, you're 0-2 for presidential campaigns. 

Keep up the awesome work, Hillary. https://t.co/89Dkx0g1pa
3
RT @BlackAwakening2: Check Out Hillary Hating From The Side Line And Then Kellyanne Conway's Priceless Comeback😂😂😂 #9thCircuitCourt https:/…
4
RT @easynan2: Bernie showed Trump the way. First to hack, refuse to show tax returns, cheat in the caucuses. When Hillary won, he… 
5
Hillary Clinton had the best response to Trump's 'Muslim ban' defeat https://t.co/Bc5rpqmKaL #hillaryclinton
6
Boom! KellyAnne just trolled Hillary Clinton's 9th Circuit tweet https://t.co/OSOXJKER3Y
7
RT @chelseahandler: Trump says his daughter has been treated ‘so unfairly’ by Nordstrom. Oh, was she detained for 19 hours when she tried t…
8
RT @EricBoehlert: imagined if she's asked her son-in-law for advice abt the raid and then she never bothered to show up in Situation… 
9
RT @xtremediagroup: @greggutfeld, Because they relied on notifications from Hillary's server.
10
RT @khmld9t5: Glad hear someone else calling out DNC 4 commiting fraud 2 push Hillary to win -- and she simply screwed us all by… 

Plotting your Twitter data

In [20]:
'capitalize string'.capitalize()
1
RT @Avraham5772: She like mad dog https://t.co/9Chk2F7dAf
Out[20]:
'Capitalize string'
2
https://t.co/HdHEcwvNlX
In [22]:
# Import packages
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
# Set seaborn style
sns.set(color_codes=True)

# Create a list of labels:cd
cd = ['hillary', 'trump', 'stupid', 'girl']

# Plot histogram
ax = sns.barplot(cd, [hillary, Trump, stupid, girl],alpha=.6)
ax.set(ylabel="count")
plt.show()
In [ ]:
 
In [ ]: