Statistical 2019-01-01

均值标准误差

已知原概率密度函数的标准差和样本容量，预测样本均值的抽样分布的方差．
$$\sigma_{\bar{x}}^2 = \frac{\sigma ^2}{n}$$

样本均值抽样分布的标准差通常称作 均值标注差 也称作均值标准误差
$$\sigma_{\bar{x}} = \frac{\sigma ^2}{\sqrt{n}}$$

方差的代码和样本均值的抽样分布代码都在之前写过了，可以试试把他们结合起来验证一下公式的正确性．

还是再来一下代码吧．
代码是一个讨厌的东西，但是不得不承认它是一个很好的工具．这意味着要让它为你工作要显得比必须花时间调弄它划算．

# 只是在代码最后添加了计算标准差
print('原来分布标准差', np.std(L))
print('计算样本均值抽样分布标准差', np.std(L)/np.sqrt(SIZE))
print('样本均值抽样分布标准差', np.std(random_num(SIZE, 10000, L)[1]))

并不是完美的一样，样本的数量不要设的太小（这里是10000）
代码变量名字起的不好，SIZE才是样本容量，相当于公式里面的N

import numpy as np 
import matplotlib.pyplot as plt 
from mpl_toolkits.axisartist.axislines import SubplotZero
import random
import warnings


def daw(SIZE, N, L):
    fig = plt.figure(figsize=(7, 5), dpi=120, facecolor='#FF9966', edgecolor='c') 
    ax1 = SubplotZero(fig, 1, 1, 1, axisbg='#FF9966')
    fig.add_subplot(ax1)
    # ax1.grid(True, color='k', linestyle='-.')
    ax1.axis['right', 'top'].set_visible(False)
    plt.ion()

    mean = sum(L)/len(L)
    for i in N:
        plt.pause(0.0001)
        S, S_mean = random_num(SIZE, i, L)
        x, y = stats_num(S_mean)
        # print('S', S); print(mean); print('x', x); print('y', y)
        ax1.scatter(x, y, s=15, alpha=0.4)
        ax1.plot((mean,mean), (-10, 1600), 'g-.')
        ax1.set_ylim(-10, 1600) 

    xy = [(a, b) for a, b in zip(x, y)]

    xy = sorted(xy, key=lambda s: s[0])
    x = [a[0] for a in xy]
    y = [a[1] for a in xy]
    ax1.plot(x, y, 'r-')    
    plt.ioff()
    plt.show()


## 随机取样
def random_num(size, n, l):
    S = np.zeros((n, size+1))
    S_mean = list()
    for i in range(0, n):
        _s = list()
        for j in range(0, size):
            _s.append(random.choice(l))
        mean = sum(_s)/size
        _s.append(mean)
        S[i] = _s
        S_mean.append(mean)
    return S, S_mean


## 统计次数
def stats_num(S_mean):
    x = [i for i in set(S_mean)]
    y = []
    for i in x:
        y.append(S_mean.count(i))
    return x, y


if __name__ == "__main__":
    L = [1, 1, 3, 4, 6, 6]      ## 总体中数字频率
    SIZE = 100                  ## 样本容量 
    N = range(100, 10001, 100)  ## 样本数量
    print('原来分布标准差', np.std(L))
    print('计算样本均值抽样分布标准差', np.std(L)/np.sqrt(SIZE))
    print('样本均值抽样分布标准差', np.std(random_num(SIZE, 10000, L)[1]))
    warnings.filterwarnings('ignore') #忽略警告
    daw(SIZE, N, L)