python 求相关系数
两组序列数据,求两者的线性相关系数。
1:使用numpy
import random import numpy as np a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] # 先构造一个矩阵 ab = np.array([a, b]) # 计算协方差矩阵 print(np.cov(ab)) print(np.corrcoef(ab))
2:使用pandas
import pandas as pd # 使用 pandas 计算协方差、相关系数 # 使用 DataFrame 作为数据结构,为方便计算,我们会将 ab 矩阵转置 dfab = pd.DataFrame(ab.T, columns=[‘A‘, ‘B‘]) # A B 协方差 print(dfab.A.cov(dfab.B)) # A B 相关系数 print(dfab.A.corr(dfab.B))
3:使用原生函数
import random
import math
a = [random.randint(0, 10) for t in range(20)]
b = [random.randint(0, 10) for t in range(20)]
#计算平均值
def mean(x):
return sum(x) / len(x)
# 计算每一项数据与均值的差
def de_mean(x):
x_bar = mean(x)
return [x_i - x_bar for x_i in x]
# 辅助计算函数 dot product 、sum_of_squares
def dot(v, w):
return sum(v_i * w_i for v_i, w_i in zip(v, w))
def sum_of_squares(v):
return dot(v, v)
# 方差
def variance(x):
n = len(x)
deviations = de_mean(x)
return sum_of_squares(deviations) / (n - 1)
# 标准差
def standard_deviation(x):
return math.sqrt(variance(x))
# 协方差
def covariance(x, y):
n = len(x)
return dot(de_mean(x), de_mean(y)) / (n -1)
# 相关系数
def correlation(x, y):
stdev_x = standard_deviation(x)
stdev_y = standard_deviation(y)
if stdev_x > 0 and stdev_y > 0:
return covariance(x, y) / stdev_x / stdev_y
else:
return 0
print(a)
print(b)
print(standard_deviation(a))
print(standard_deviation(b))
print(correlation(a,b))4:使用R,spss,excel