# Plot with Seaborn

Statistical Plotting with Seaborn

Seaborn_1
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


### load a dataset online from seaborn¶

In [2]:
tip=sns.load_dataset('tips')

In [3]:
tip.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
total_bill    244 non-null float64
tip           244 non-null float64
sex           244 non-null category
smoker        244 non-null category
day           244 non-null category
time          244 non-null category
size          244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 6.8 KB

In [4]:
tip.head(3)

Out[4]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3

### visualizing regressions¶

• Plot data and regression model fits across a FacetGrid.
In [5]:
sns.lmplot('total_bill','tip',tip,size=3,aspect=2)

Out[5]:
<seaborn.axisgrid.FacetGrid at 0x7f71804eb950>

### group by categorical column¶

In [6]:
sns.lmplot(x='total_bill',y='tip',data=tip, size=3,
col='sex')

Out[6]:
<seaborn.axisgrid.FacetGrid at 0x7f717db077d0>

### plot group data in the same graph¶

In [7]:
sns.lmplot(x='total_bill',y='tip',data=tip, size=3, aspect=2,
hue='sex', palette='Set1')

Out[7]:
<seaborn.axisgrid.FacetGrid at 0x7f71804eba90>

### plot Residuals¶

• residplot()
In [8]:
tip.head(1)

Out[8]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
In [9]:
sns.residplot(x='total_bill',y='tip',data=tip,color='indianred')

Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f717cf9e7d0>

## Higher-order regressions¶

• When there are more complex relationships between two variables, a simple first order regression is often not sufficient to accurately capture the relationship between the variables. Seaborn makes it simple to compute and visualize regressions of varying orders.

• sns.regplot()

• the function sns.lmplot() is a higher-level interface to sns.regplot().

• A principal difference between sns.lmplot() and sns.regplot() is the way in which matplotlib options are passed (sns.regplot() is more permissive).

• For both sns.lmplot() and sns.regplot(), the keyword order is used to control the order of polynomial regression.

• The function sns.regplot() uses the argument scatter=None to prevent plotting the scatter plot points again.

In [10]:
tip.head(1)

Out[10]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
In [11]:
# Generate a scatter plot of 'weight' and 'mpg' using red circles
plt.scatter(tip['total_bill'], tip['tip'], label='data', color='red', marker='o', alpha=.5)

# Plot in blue a linear regression of order 1 between 'weight' and 'mpg'
sns.regplot(x='total_bill', y='tip', data=tip, scatter=None, color='blue', label='order 1')

# Plot in green a linear regression of order 2 between 'weight' and 'mpg'
sns.regplot(x='total_bill', y='tip', data=tip, scatter=None, order=2, color='green', label='order 2')

sns.regplot(x='total_bill', y='tip', data=tip, scatter=None, order=3, color='purple', label='order 2')

# Add a legend and display the plot
plt.legend(loc='upper right')
plt.show()


## Visualizing univariate distributions¶

### swarmplot¶

In [12]:
sns.stripplot(y= 'tip', data=tip)
plt.ylabel('tip ($)')  Out[12]: <matplotlib.text.Text at 0x7f717ce6aa50> In [13]: sns.stripplot(x='day', y='tip', data=tip) plt.ylabel('tip ($)')

Out[13]:
<matplotlib.text.Text at 0x7f717ce22710>
In [14]:
sns.stripplot(x='day', y='tip', data=tip, size=4, jitter=True)
plt.ylabel('tip ($)')  Out[14]: <matplotlib.text.Text at 0x7f717cc93750> In [15]: sns.swarmplot(x='day', y='tip', data=tip) plt.ylabel('tip ($)')

Out[15]:
<matplotlib.text.Text at 0x7f717cca2ad0>
In [16]:
sns.swarmplot(x='day', y='tip', data=tip, hue='sex',  palette='Set1')
plt.ylabel('tip ($)')  Out[16]: <matplotlib.text.Text at 0x7f717cb27350> In [17]: sns.swarmplot(x='tip', y='day', data=tip, hue='sex', orient='h') plt.ylabel('tip ($)')

Out[17]:
<matplotlib.text.Text at 0x7f717ca7a690>

## Violin plot¶

In [18]:
plt.subplot(1,2,1)
sns.boxplot(x='day', y='tip', data=tip)
plt.ylabel('tip ($)') plt.subplot(1,2,2) sns.violinplot(x='day', y='tip', data=tip) plt.ylabel('tip ($)')
plt.tight_layout()

In [19]:
sns.violinplot(x='day', y='tip', data=tip, inner=None,
color='lightgray')

sns.stripplot(x='day', y='tip', data=tip, size=4,
jitter=True)

plt.ylabel('tip (\$)')

Out[19]:
<matplotlib.text.Text at 0x7f717ca25dd0>

## Visualizing multivariate distributions¶

### Joint plots¶

In [20]:
sns.jointplot(x= 'total_bill', y= 'tip', data=tip, size=5)

Out[20]:
<seaborn.axisgrid.JointGrid at 0x7f717ca34a10>

### Using kde=True¶

• kernal density distribution
In [21]:
sns.jointplot(x='total_bill', y= 'tip', data=tip,
kind='kde', size=5)

Out[21]:
<seaborn.axisgrid.JointGrid at 0x7f717ce8b050>

## Pair plot¶

In [22]:
sns.pairplot(tip, size=2)

Out[22]:
<seaborn.axisgrid.PairGrid at 0x7f717c398c50>
In [23]:
sns.pairplot(tip, hue='sex', kind='reg')

Out[23]:
<seaborn.axisgrid.PairGrid at 0x7f717b9e4b10>