#NumPy,pandas,SciPy.stats,matplotlibをインポートする。
import numpy as np
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline

#ロジャー・フェデラーのデータを読み込む。カレントディレクトリにデータを置いておきましょう。
player = 'Roger Federer'
filename = "data/{name}.csv".format(
                       name=player.replace(' ', '-'))
df = pd.read_csv(filename)

#データを確認する。
print("Number of columns: " + str(len(df.columns)))
df[df.columns[:8]].tail()

Number of columns: 70

#全ポイントに占める特典の割合とエースの割合だけを表示する。
npoints = df['player1 total points total']
points = df['player1 total points won'] / npoints
aces = df['player1 aces'] / npoints

plt.plot(points, aces, '.')
plt.xlabel('% of points won')
plt.ylabel('% of aces')
plt.xlim(0., 1.)
plt.ylim(0.)

(0.0, 0.16)

#二つの列だけを持つDataFrameオブジェクトを新しく作る。
df_bis = pd.DataFrame({'points': points,
                      'aces': aces}).dropna()
df_bis.tail()

#ピアソン相関係数を計算する。
df_bis.corr()

#変数の二値化
df_bis['result'] = df_bis['points'] > df_bis['points'].median()
df_bis['manyaces'] = df_bis['aces'] > df_bis['aces'].median()

#それぞれの可能性の頻度からなる分割表を作る。
pd.crosstab(df_bis['result'], df_bis['manyaces'])

#カイ二乗検定の統計値とp値の計算（2番目の数値がp値）
st.chi2_contingency(_)

(27.809858855369555,
 1.3384233799633629e-07,
 1,
 array([[ 257.25024343,  256.74975657],
        [ 256.74975657,  256.25024343]]))

	year	tournament	start date	type	surface	draw	atp points	atp ranking
1174	2012	Australian Open, Australia	16.01.2012	GS	Outdoor: Hard	Draw: 128	720	3
1175	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3
1176	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3
1177	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3
1178	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3

	aces	points
1173	0.024390	0.585366
1174	0.039855	0.471014
1175	0.046512	0.639535
1176	0.020202	0.606061
1177	0.069364	0.531792

manyaces	False	True
result
False	300	214
True	214	299

月: 2016年3月

iPython notebookチャレンジ(カイ二乗検定)

RstanでCVRの前後比較をするためのコード

ディープラーニングの基礎まとめ

OS X YosemiteへのTensorFlowのインストールと簡易な分類モデルの実行

	aces	points
aces	1.000000	0.255457
points	0.255457	1.000000