#【输出设置】
#setwd("C:/Users/lst89/Documents/mvexer5") #设置目录
options(digits=4)
par(mar=c(4,4,2,1))
#第二章p57-2-1
R=matrix(c(1,0.8,0.26,0.67,0.34,0.8,1,0.33,0.59,0.34,0.26,0.33,1,0.37,0.21,0.67,0.59,0.37,1,0.35,0.34,0.34,0.21,0.35,1),nrow = 5,ncol = 5)
R #输入数据
solve(R) #求逆矩阵
R.e=eigen(R,symmetric=T) #symmetric是判断是否为对称阵,
R.e #求矩阵的特诊值
R.e $ vectors%*%diag(R.e $ values)%*%t(R.e $ vectors)#特征向量
#第二章p57-2-2
library(openxlsx) #加载读取Excel数据包
E2.2=read.xlsx('mvexer5.xlsx','E2.2')
E2.2 #读取mvexer5.xlsx表格E2.2数据
breaks = seq(0,3000,by = 300) #按组距为300编制频数表
breaks
hist(E2.2 $ X,breaks,col = 1:7,xlab = "工资(元)",ylab = "频数")#以工资x为横轴,频数y为纵轴,将数据划分为0-3000并以300为度量,绘制7列的彩色直方图
hist(E2.2 $ X ,breaks,freq = F,col = 1:7,xlab = "工资(元)",ylab = "频率")
Cumsum <- cumsum(E2.2 $ X)
cumsum
M <- seq(0,96000,by = 3000)
hist(Cumsum,M,freq = F,col = 1:12,las = 3,xlab = "工资(元)",ylab = "累积频率")#绘制出累计频率直方图
H = hist(E2.2 $ X,breaks = seq(900,3000,300))#正态概率图
names(H)
data.frame('组中距' = H $ mids,'频数' = H $ counts,'频率' = H $ density*300,'累积频率' = cumsum(H $ density*300))#
#第二章p57-2-3
library(openxlsx) #加载读取Excel数据包
E2.3=read.xlsx('mvexer5.xlsx','E2.3')
E2.3#读取mvexer5.xlsx表格E2.2数据
str(E2.3)
summary(E2.3)#对数据进行基本统计分析
#第三章P84-2.1
library(openxlsx)
E3.2 = read.xlsx('mvexer5.xlsx',sheet = 'E3.2',rowNames = TRUE)
#设定参数rowNames=TRUE,即可将第一列字符变量变成数据框的行名,供后期使用
E3.2
#在Excel文件中mvexer5.xlsx的表单d3.2中选择A1:E22,并复制到剪切板
dat = read.table("clipboard",header = T) #将剪切板数据读入数据框dat中
dat
#数据框标记转换函数
msa.X <- function(df){ #将数据框第一列设置为数据框行名
X = df[,-1] #删除数据框df的第一列并赋给X
rownames(X) = df[,1] #将df的第一列值赋给X的行名
X #返回新的数值数据框=return(X)
}
E3.2 = msa.X(dat)
E3.2
barplot(apply(E3.2,2,mean)) #按行作均值条形图
barplot(apply(E3.2,1,mean),las = 3) #修改横坐标标记
barplot(apply(E3.2,2,mean)) #按列作均值条图
barplot(apply(E3.2,2,median)) #按列作中位数条图
barplot(apply(E3.2,2,median),col = 1:8) #按列取色
boxplot(E3.2)#按列作箱尾图
boxplot(E3.2,horizontal = T) #箱尾图中图形按水平放置
#四p119-2-1
library(openxlsx) #加载读取Excel数据包
E4.1=read.table("clipboard",header = T)
E4.1
plot(x,y,main = '散点图',xlab = '每周加班时间(小时)',ylab = '每周签发的新保单数目(张)') #绘制散点图
cor(E4.1) #相关系数
lm4.1 <- lm(E4.1)
lm4.1
#估计值
square_sigma <- t(E4.1)/(10-1-1)#square_sigma <- t(x_hat - y)%*%(x_hat - y)/(10-1-1)
square_sigma
y = c(3.5,1,4,2,1,3,4.5,1.5,3,5)
x = c(825,215,1070,550,480,920,1350,325,670,1215)
y_hat <- 46.15 + 251.17*y
s <- t(y_hat - x)%*%(y_hat - x)/(10-1-1)
s
(summary(lm4.1) $ s)^2
#求方差分析
SR <- t(y_hat - mean(x))%*%(y_hat - mean(x))
ST <- t(x - mean(x))%*%(x - mean(x))
s_R <- SR/ST
s_R
(summary(lm4.1) $ r.squared)
anova(lm4.1)
#对回归方程作残差图分析
res <- residuals(lm4.1)
res
plot(y,res,main='残差散点图',xlab='每周签发的新保单数目',ylab='残差')
plot(lm4.1)
#计算1000张要加班的时间
lm4.1_1 <- lm(x ~ y,data = ee4.1)
predict(lm4.1_1,newdata = data.frame(y = 1000))
lm4.1_1 <- lm(y ~ x,data = ee4.1)
predict(lm4.1_1,newdata = data.frame(x = 1000))
#四p119-2-2
library(openxlsx)
E4.2 = read.xlsx('mvexer5.xlsx',sheet = 'E4.2',rowNames = T)
(lm4.2 = lm(y ~ x1 + x2,data = E4.2)) #显示多元线性回归模型
实际上比平均情况下线性时间的选择要复杂很多(算法导论上伪代码都没有)问题是快速排序要求枢纽元在最后一个,如果采用hoare的划分算法,就没有这个要求。而给出的是枢纽元的值,然后要找到位置(搜索一遍),再交换。
如果采用hoare划分法,不用搜索,不过算法和书上描述的就稍有不同了。
另外,因为代码复杂,所以对于随机输入,此算法较慢
下面是hoare划分的选择代码
# include <ctime>
# include <cstdlib>
# include <iostream>
inline void swap(int &x, int&y)
{
int temp = x
x = y
y = temp
}
// A[p..r]
int hoarePartitionX(int *A, int p, int r, int x)
{
int i = p - 1
int j = r + 1
for()
{
while( A[--j] >x)
while( A[++i] <x)
if(i<j)
{
swap(A[i], A[j])
}
else
{
return j
}
}
}
// A[0..size-1]
void insertionSort(int *A, int size)
{
int i
int key
for(int j=1j<sizej+=1)
{
key = A[j]
i = j - 1
while(i >= 0 &&A[i] >key)
{
A[i+1] = A[i]
i -= 1
}
A[i+1] = key
}
}
// return the ith smallest element of A[p..r]
int select(int *A, int p, int r, int i)
{
if(p == r) // only one element, just return
{
return A[p]
}
// #1. groupNum &rest
int groupNum = (r - p + 1) / 5// not counting the rest
int rest = (r - p + 1) % 5
// #2. sort the groups
for(int t=0t<groupNumt+=1)
{
insertionSort(A + p + t*5, 5)
}
if(rest != 0)
{
insertionSort(A + p + groupNum * 5, rest)
}
// #3. get the mid value x
int *mids
if(rest == 0)
mids = new int[groupNum]
else
mids = new int[groupNum+1]
for(int t=0t<groupNumt+=1)
{
mids[t] = A[ p + t*5 + 2 ]
}
if(rest != 0)
{
mids[groupNum] = A[ p + groupNum*5 + (rest-1)/2 ]
}
int x
if( rest == 0 )
{
x = select(mids, 0, groupNum-1, (groupNum-1) / 2 + 1)
}
else
{
x = select(mids, 0, groupNum, groupNum / 2 + 1)
}
delete []mids
// #4. partition with x
int k = hoarePartitionX(A, p, r, x) - p + 1// so the value A[p+k-1] is the kth smallest
// #5.
if(i <= k)
{
return select(A, p, p+k-1, i)
}
else
{
return select(A, p+k, r, i-k)
}
}
int main()
{
int array[100]
for(int i=0i<100i+=1)
array[i] = i
for(int i=0i<100i+=1)
{
int rnd = rand()%100
swap(array[0], array[rnd])
}
std::cout <<select(array, 0, 99, 82)
std::cin.get()
return 0
}