本文共 2849 字,大约阅读时间需要 9 分钟。
https://mp.weixin.qq.com/s/tZXa9GsqkT49YK7bZ83TuA
GMM的本质并不是聚类,而是生成符合观测数据的分布。即使是20个高斯分布可拟合出当前的数据。import mathimport copyimport numpy as npimport matplotlib.pyplot as pltisdebug = False# 指定k个高斯分布参数,这里k=2。2个高斯分布具有相同均方差Sigma,均值分别为Mu1,Mu2。def ini_data_1(Sigma,Mu1,Mu2,k,N): global X global Mu global Expectations X = np.zeros((1,N)) Mu = np.random.random(2) Expectations = np.zeros((N,k)) for i in range(0,N): if np.random.random(1) > 0.5: X[0,i] = np.random.normal() * Sigma + Mu1 else: X[0,i] = np.random.normal() * Sigma + Mu2 print(X)def trans(pix): sum = 0 for i in range(len(pix)): sum = sum * 10 + pix[i] return sum def ini_data_2(k,N): global X global Mu global Expectations Mu = np.random.random(2) # 初始化Mu Expectations = np.zeros((N,k)) pixArr1 = [] num = 0 csvFile = open('data.txt','r') mini = 1000000 maxn = 0 pix = 0 with csvFile as f: for line in f.readlines(): pix = [int(i) for i in line.split('\t')[0:6]] pixx = trans(pix) # print(pixx) if pixx <= mini: mini = pixx if pixx >= maxn: maxn = pixx pixArr1.append([pixx]) num+=1 X = np.zeros([1,N]) Y = np.array(pixArr1) print(Y) X = np.transpose(Y) print(X)def ini_data_3(k,N): global X global Mu global Expectations Mu = np.random.random(2) # 初始化Mu Expectations = np.zeros((N,k)) X = np.zeros([1,N]) X = np.array([-67,-48,6,8,14,16,23,24,28,29,41,49,56,60,75])# EM算法:步骤1,计算E[zij]def e_step(Sigma,k,N): global Expectations global Mu global X for i in range(0,N): Denom = 0 for j in range(0,k): Denom += math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2) for j in range(0,k): Numer = math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2) Expectations[i,j] = Numer / Denom # EM算法:M步,求最大化E[zij]的参数Mudef m_step(k,N): global Expectations global X for j in range(0,k): Numer = 0 Denom = 0 for i in range(0,N): Numer += Expectations[i,j]*X[0,i] Denom += Expectations[i,j] Mu[j] = Numer / Denom # 算法迭代iter_num次,或达到精度Epsilon停止迭代def run(Sigma,k,N,iter_num,Epsilon): # ini_data_1(k,N) # ini_data_2(k,N) ini_data_3(k,N) print(u"初始:", Mu) for i in range(iter_num): Old_Mu = copy.deepcopy(Mu) e_step(Sigma,k,N) m_step(k,N) print(i,Mu) if sum(abs(Mu-Old_Mu)) < Epsilon: # 如果精度达到要求,那么停止迭代 breakif __name__ == '__main__': run(10,2,15,1000,0.0001) # run(6515,11722,34351,2,72276,69454,0.0001) plt.hist(X[0,:],200) plt.show()