python 求相关系数

两组序列数据,求两者的线性相关系数。

1:使用numpy

import random
import numpy as np
a = [random.randint(0, 10) for t in range(20)]
b = [random.randint(0, 10) for t in range(20)]
# 先构造一个矩阵
ab = np.array([a, b])
# 计算协方差矩阵
print(np.cov(ab))
print(np.corrcoef(ab))

2:使用pandas

import pandas as pd
# 使用 pandas 计算协方差、相关系数
# 使用 DataFrame 作为数据结构,为方便计算,我们会将 ab 矩阵转置
dfab = pd.DataFrame(ab.T, columns=[‘A‘, ‘B‘])
# A B 协方差
print(dfab.A.cov(dfab.B))
# A B 相关系数
print(dfab.A.corr(dfab.B))

3:使用原生函数

import random
import math
a = [random.randint(0, 10) for t in range(20)]
b = [random.randint(0, 10) for t in range(20)]

#计算平均值
def mean(x):
  return sum(x) / len(x)

# 计算每一项数据与均值的差
def de_mean(x):
  x_bar = mean(x)
  return [x_i - x_bar for x_i in x]

# 辅助计算函数 dot product 、sum_of_squares
def dot(v, w):
  return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
  return dot(v, v)

# 方差
def variance(x):
  n = len(x)
  deviations = de_mean(x)
  return sum_of_squares(deviations) / (n - 1)

# 标准差
def standard_deviation(x):
  return math.sqrt(variance(x))

# 协方差
def covariance(x, y):
  n = len(x)
  return dot(de_mean(x), de_mean(y)) / (n -1)

# 相关系数
def correlation(x, y):
  stdev_x = standard_deviation(x)
  stdev_y = standard_deviation(y)
  if stdev_x > 0 and stdev_y > 0:
    return covariance(x, y) / stdev_x / stdev_y
  else:
    return 0

print(a)
print(b)
print(standard_deviation(a))
print(standard_deviation(b))
print(correlation(a,b))

4:使用R,spss,excel