tweepy¶

http://tweepy.readthedocs.io/en/v3.5.0/¶

# ! pip install tweepy

# Import package
import tweepy

# my twitter API key, you are free to use! since my website doesn't have many visitors.

# Saya young
access_token = "1330365234-xDjSixFZfSeboDSkHS0WgNvOu5zZw4HeUL8ijVq"
access_token_secret = "QuhhHxIMSxVC2QhVqaxtdgZtc4pyJBWVg2C6D5IHCH9ph"
consumer_key = "JES0pDVJW2WCscy1LhFFMxz4A"
consumer_secret = "uOoW3PCx8nI0kIfsifXfCibYwaeMrHh73TrV2TyuILL9vR9Bdx"


# Pass OAuth details to tweepy's OAuth handler
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)


# api = tweepy.API(auth)
# api.update_status('tweepy + oauth!')

a Tweet listener that creates a file called 'tweets.txt'
collects streaming tweets as .jsons and writes them to the file 'tweets.txt'
once 100 tweets have been streamed, the listener closes the file and stops listening.

http://tweepy.readthedocs.io/en/v3.5.0/streaming_how_to.html#summary ¶

# class use json
import json

class MyStreamListener(tweepy.StreamListener):
    
    def __init__(self, api=None):
        # inherit class attributes
        super(MyStreamListener, self).__init__()
        self.num_tweets = 0
        self.file = open("tweets.txt", "w+")

    def on_status(self, status):
        tweet = status._json
        
        self.file.write( json.dumps(tweet) + '\n' )
        
        self.num_tweets += 1
        if self.num_tweets < 1000:
            return True
        else:
            return False
        self.file.close()

    def on_error(self, status):
        print(status)

filter keywords¶

https://dev.twitter.com/streaming/overview/request-parameters#track ¶

# Initialize Stream listener
l = MyStreamListener()

# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)


# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['Trump stupid','Trump Hillary','Hillary stupid','Trump daughter'], async=True)

Load and explore your Twitter data¶

Now that you've got your Twitter data sitting locally in a text file, it's time to explore it!

Within the for loop initiated by for line in tweets_file:, load each tweet into a variable tweet using json.loads(), then append tweet to tweets_data using the append() method.

# Import package
import json

# Initialize empty list to store tweets: tweets_data
tweets_data = []

# Open connection to file
h=open('tweets.txt','r')

# Read in tweets and store in list: tweets_data
for i in h:
    try:
        print 'O',
        tmp=json.loads(i)
        tweets_data.append(tmp)
    except:
        print 'X',
h.close()

O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O X

Twitter data to DataFrame¶

Now that you have the Twitter data in a list of dictionaries tweets_data, where each dictionary corresponds to a single tweet, it's time to extract the text of the tweets, along with the language of the tweet.

The text in a tweet t1 is stored as the value t1['text']; similarly, the language is stored in t1['lang']

import pandas as pd
pd.DataFrame(tweets_data).head(1)

# Build DataFrame of tweet texts and languages
df = pd.DataFrame(tweets_data, columns=['text', 'lang'])
print df.shape
# Print head of DataFrame
df.head(3)

(999, 2)

text statistics¶

a way not using Pandas

import re

def word_in_text(word, tweet):
    word = word.lower()
    text = tweet.lower()
    match = re.search(word, tweet)

    if match:
        return True
    return False

# Initialize list to store tweet counts
[Trump, stupid, girl, hillary] = [0, 0, 0, 0]

# Iterate through df, counting the number of tweets in which
# each candidate is mentioned
for index, row in df.iterrows():
    Trump += word_in_text('trump', row['text'].lower())
    stupid += word_in_text('stupid', row['text'].lower())
    girl += word_in_text('girl', row['text'].lower())
    hillary += word_in_text('hillary', row['text'].lower())
print Trump, stupid, girl, hillary

897 57 6 430

stat using Pandas¶

regular expression
case=False
- not case sensitive

# pd.Series.str.contains?

df['text'].str.contains('hillary',case=False).sum()

430

regular expression, does not match upper letter TRUMP

df['text'].str.contains('[Tt]rump').sum()

889

create a simple one, just to print out the posts¶

#override tweepy.StreamListener to add logic to on_status
class test(tweepy.StreamListener):
    
    def __init__(self):
        
        # inherit class attributes
        super(test, self).__init__()
#         tweepy.StreamListener.__init__(self)
        
        self.num=0
           

    def on_status(self, status):
        self.num+=1
        print self.num
        print(status.text)
        if self.num==10:
            
            #returning False in on_data disconnects the stream
            return False
        
    def on_error(self, status):
        print(status)

use the defined class test to print results¶

# Initialize Stream listener
l = test()

# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)

# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['Trump stupid','Trump Hillary','Hillary','Trump daughter'], async=True)

1
RT @asamjulian: Count your blessings everyone, dealing with liberal courts is infinitely better than a President Hillary Clinton. 😊 #9thCir…
2
RT @Harlan: Yeah, you're 0-2 for presidential campaigns. 

Keep up the awesome work, Hillary. https://t.co/89Dkx0g1pa
3
RT @BlackAwakening2: Check Out Hillary Hating From The Side Line And Then Kellyanne Conway's Priceless Comeback😂😂😂 #9thCircuitCourt https:/…
4
RT @easynan2: Bernie showed Trump the way. First to hack, refuse to show tax returns, cheat in the caucuses. When Hillary won, he… 
5
Hillary Clinton had the best response to Trump's 'Muslim ban' defeat https://t.co/Bc5rpqmKaL #hillaryclinton
6
Boom! KellyAnne just trolled Hillary Clinton's 9th Circuit tweet https://t.co/OSOXJKER3Y
7
RT @chelseahandler: Trump says his daughter has been treated ‘so unfairly’ by Nordstrom. Oh, was she detained for 19 hours when she tried t…
8
RT @EricBoehlert: imagined if she's asked her son-in-law for advice abt the raid and then she never bothered to show up in Situation… 
9
RT @xtremediagroup: @greggutfeld, Because they relied on notifications from Hillary's server.
10
RT @khmld9t5: Glad hear someone else calling out DNC 4 commiting fraud 2 push Hillary to win -- and she simply screwed us all by…

Plotting your Twitter data¶

'capitalize string'.capitalize()

1
RT @Avraham5772: She like mad dog https://t.co/9Chk2F7dAf

'Capitalize string'

2
https://t.co/HdHEcwvNlX

# Import packages
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
# Set seaborn style
sns.set(color_codes=True)

# Create a list of labels:cd
cd = ['hillary', 'trump', 'stupid', 'girl']

# Plot histogram
ax = sns.barplot(cd, [hillary, Trump, stupid, girl],alpha=.6)
ax.set(ylabel="count")
plt.show()

	text	lang
0	RT @GeorgeTakei: Mr. Trump, you--and minions l...	en
1	RT @chelseahandler: Trump says his daughter ha...	en
2	RT @GeorgeTakei: Mr. Trump, you--and minions l...	en

tweepy¶

http://tweepy.readthedocs.io/en/v3.5.0/¶

http://tweepy.readthedocs.io/en/v3.5.0/streaming_how_to.html#summary¶

filter keywords¶

https://dev.twitter.com/streaming/overview/request-parameters#track¶

Load and explore your Twitter data¶

Twitter data to DataFrame¶

text statistics¶

stat using Pandas¶

create a simple one, just to print out the posts¶

use the defined class test to print results¶

Plotting your Twitter data¶

http://tweepy.readthedocs.io/en/v3.5.0/streaming_how_to.html#summary ¶

https://dev.twitter.com/streaming/overview/request-parameters#track ¶