2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 08 16:16:36 2016
@author: SumaiWong
"""
import numpy as np
import pandas as pd
from numpy import dot
from numpy.linalg import inv
iris = pd.read_csv('D:\iris.csv')
dummy = pd.get_dummies(iris['Species']) # 对Species生成哑变量
iris = pd.concat([iris, dummy], axis =1 )
iris = iris.iloc[0:100, :] # 截取前一百行样本
X = iris.ix[:, 0:4]
Y = iris['setosa'].reshape(len(iris), 1) #整理出X矩阵 和 Y矩阵
def GDA(Y, X):
theta1 = Y.mean() #类别1的比例
theta0 = 1-Y.mean() #类别2的比例
mu1 = X[Y==1].mean() #类别1特征的均值向量
mu0 = X[Y==0].mean() #类别2特征的均值向量
X_1 = X[Y==1]
X_0 = X[Y==0]
A = dot(X_1.T, X_1) - len(Y[Y==1])*dot(mu1.reshape(4,1), mu1.reshape(4,1).T)
B = dot(X_0.T, X_0) - len(Y[Y==0])*dot(mu0.reshape(4,1), mu0.reshape(4,1).T)
sigma = (A+B)/len(X) #sigma = X'X-n(X.bar)X.bar'=X'[I-1/n 1 1]X
return theta1, theta0, mu1, mu0, sigma
clearclose all
%%%%%%%%%%%%%%%%%%%%%%%%%生成实验数据集
rand('state',0)
sigma_matrix1=eye(2)
sigma_matrix2=50*eye(2)
u1=[0,0]
u2=[30,30]
m1=100
m2=300%样本数
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%sm1数据集
Y1=multivrandn(u1,m1,sigma_matrix1)
Y2=multivrandn(u2,m2,sigma_matrix2)
scatter(Y1(:,1),Y1(:,2),'bo')
hold on
scatter(Y2(:,1),Y2(:,2),'r*')
title('SM1数据集')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%sm2数据集
u11=[0,0]
u22=[5,5]
u33=[10,10]
u44=[15,15]
m=600
sigma_matrix3=2*eye(2)
Y11=multivrandn(u11,m,sigma_matrix3)
Y22=multivrandn(u22,m,sigma_matrix3)
Y33=multivrandn(u33,m,sigma_matrix3)
Y44=multivrandn(u44,m,sigma_matrix3)
figure(2)
scatter(Y11(:,1),Y11(:,2),'bo')
hold on
scatter(Y22(:,1),Y22(:,2),'r*')
scatter(Y33(:,1),Y33(:,2),'go')
scatter(Y44(:,1),Y44(:,2),'c*')
title('SM2数据集')
end
function Y = multivrandn(u,m,sigma_matrix)
%%生成指定均值和协方差矩阵的高斯数据
n=length(u)
c = chol(sigma_matrix)
X=randn(m,n)
Y=X*c+ones(m,1)*u
end