#NumPy,pandas,SciPy.stats,matplotlibをインポートする。
import numpy as np
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline

#ロジャー・フェデラーのデータを読み込む。カレントディレクトリにデータを置いておきましょう。
player = 'Roger Federer'
filename = "data/{name}.csv".format(
                       name=player.replace(' ', '-'))
df = pd.read_csv(filename)

#データを確認する。
print("Number of columns: " + str(len(df.columns)))
df[df.columns[:8]].tail()

Number of columns: 70

#全ポイントに占める特典の割合とエースの割合だけを表示する。
npoints = df['player1 total points total']
points = df['player1 total points won'] / npoints
aces = df['player1 aces'] / npoints

plt.plot(points, aces, '.')
plt.xlabel('% of points won')
plt.ylabel('% of aces')
plt.xlim(0., 1.)
plt.ylim(0.)

(0.0, 0.16)

#二つの列だけを持つDataFrameオブジェクトを新しく作る。
df_bis = pd.DataFrame({'points': points,
                      'aces': aces}).dropna()
df_bis.tail()

#ピアソン相関係数を計算する。
df_bis.corr()

#変数の二値化
df_bis['result'] = df_bis['points'] > df_bis['points'].median()
df_bis['manyaces'] = df_bis['aces'] > df_bis['aces'].median()

#それぞれの可能性の頻度からなる分割表を作る。
pd.crosstab(df_bis['result'], df_bis['manyaces'])

#カイ二乗検定の統計値とp値の計算（2番目の数値がp値）
st.chi2_contingency(_)

(27.809858855369555,
 1.3384233799633629e-07,
 1,
 array([[ 257.25024343,  256.74975657],
        [ 256.74975657,  256.25024343]]))

	year	tournament	start date	type	surface	draw	atp points	atp ranking
1174	2012	Australian Open, Australia	16.01.2012	GS	Outdoor: Hard	Draw: 128	720	3
1175	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3
1176	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3
1177	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3
1178	2012	Doha, Qatar	02.01.2012	250	Outdoor: Hard	Draw: 32	90	3

	aces	points
1173	0.024390	0.585366
1174	0.039855	0.471014
1175	0.046512	0.639535
1176	0.020202	0.606061
1177	0.069364	0.531792

manyaces	False	True
result
False	300	214
True	214	299

iPython notebookチャレンジ(カイ二乗検定)

コメントを残すコメントをキャンセル

	aces	points
aces	1.000000	0.255457
points	0.255457	1.000000

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル