数据科学资源共享
暨南大学 王斌会
Toggle navigation
数据科学资源共享
1. 数据分析简介@
2. Excel数据分析:
___ 数据统计分析
___及Python应用
3. R语言数据分析:
___ 基本数据分析
___ 多元统计分析
___ 计量经济分析
___ 统计模拟实验
4.Python数据分析:
___ *数据可视化*
___ 数据挖掘应用
___ 时间序列模型
5. 数据分析应用#
___ 数据库及应用
___ 案例分析应用
关于我们
3.3-3 计量例子代码
2017-01-26 16:45:43
1593
0
0
rstat
## 基本设置 ``` rm(list=ls()) options(digits=4) par(mar=c(4,4,1.5,1)+0.1,cex=0.8) x=rnorm(20); plot(x,type='b') source("EmRa.R") #EmRa自定义函数 ``` ## 1 引论 ##### ``` X=c(42.55,51.27,60.38,69.10,82.34,92.63,106.83,125.82,153.01,176.36,200.17,241.66,287.79,348.04,456.22,542.24,595.22,732.11,897.38,1006.14,1105.31) X # R语言是用变量名来显示数据的 plot(X) Y=ts(X,start=1993);Y plot(Y);grid() T=1991:2010;T summary(lm(Y~T)) summary(lm(log(Y)~T)) MEdata=read.csv("MEdata.csv"); MEdata names(MEdata) #显示变量名 nrow(MEdata) #数据集行数 ncol(MEdata) #数据集列数 summary(MEdata) plot(MEdata,gap=0) #matplot(MEdata[,1],MEdata[,-1],type='l') #tsME=ts(MEdata[,-1],start=1978);tsME #ts(MEdata,start=1978);grid() ts.plot(MEdata) ts.plot(log(MEdata)) tsME=ts(MEdata,start=1978) ts.plot(tsME) # plot(tsME,plot.type='single') plot.ts(tsME) # plot(tsME,plot.type='multiple') #legend(1980,3000,legend=names(MEdata)) TAX #未解析前 attach(MEdata) #变量解析,MEdata中的变量名可单独使用 TAX #解析后 detach(MEdata) head(UG) tail(UG) dim(UG) names(UG) summary(UG) TAX #去解析后 plot(TAX~GDP) LM=lm(TAX~GDP,data=MEData) LM summary(LM) names(LM) coef(LM) a=coef(LM)[1]; a # 这将得到截距a的值 b=coef(LM)[2]; b # 这将得到斜率b的值 Fit= fitted(LM) # 拟合值 = LM$fitted e=resid(LM) # 模型残差 = LM$resid cbind(TAX,Fit,e) plot(TAX,Fit);abline(0,1) plot(e,type='b');abline(h=0) lm.plot(LM) # 模型拟合图与残差图 predict(LM,data.frame(GDP=6500)) SLM=summary(LM);SLM names(SLM) SLM$r.sq SLM$adj.r.sq SLM$fstat SLM$coef t=SLM$coef[2,3];t P=SLM$coef[2,4];P ``` ## 2 经典回归分析模型 ###### ``` 【例2.1-1 模拟实验】 n=25 x=1:n y=1+2*x plot(x,y);abline(1,2) set.seed(123) e=rnorm(n,0,1) # e ~ N(0,1) y=1+2*x+e # y = 1+ 2x + e lm2.1=lm(y~x) #拟合回归模型 summary(lm2.1) plot(x,y); abline(lm2.1) R2=summary(lm2.1)$r.sq; R2 【例2.1-2】 plot(TAX~GDP,data=MEdata) lm2.2=lm(TAX~GDP,data=MEdata);lm2.2 summary(lm2.2) abline(lm2.2) data.frame(MEdata$GDP,MEdata$TAX,yhat=lm2.2$fitted,resid=lm2.2$resid) plot(predict(lm2.2)) predict(lm2.2) # = fitted(lm2.2) predict(lm2.2,data.frame(GDP=900)) #估计GDP=900时的TAX值 predict(lm2.2,data.frame(GDP=6000)) #预测GDP=6000时的TAX值 summary(lm2.2)$r.sq lm.plot(lm2.2) summary(lm(TAX~EXP)) summary(lm(TAX~IE)) summary(lm(TAX~RS)) summary(lm(TAX~COM)) summary(lm(TAX~INV)) summary(lm(TAX~DEP)) 【例2.1-3】 set.seed(123) n=30 x1=runif(n); x2=runif(n); e=0.2*rnorm(n) # e ~ N(0,0.22) y=1+2*x1+3*x2+ e # y = 1+2x1+3x2+e lm2.3=lm(y~x1+x2);lm2.3 summary(lm2.3) lm(y~-1+x1+x2) 【例2.1-4】 lm2.4=lm(TAX~GDP+EXP+IE+RS+COM+INV+DEP,data=MEdata) lm2.4 summary(lm2.4) slm2.4=summary(lm2.4);slm2.4 names(slm2.4) R2=slm2.4$r.sq; R2 sqrt(R2) adjR2=slm2.4$adj.r.sq; adjR2 plot(MEdata,cex.labels=1) #pairs(MEdata,gap=0,cex.labels=1) step2.4=step(lm2.4);step2.4 summary(step2.4) predict(step2.4,data.frame(EXP=1500,IE=2800,RS=2600,DEP=5000)) tsME=ts(MEdata,start=1978) # 加时间序列 ts.plot(tsME);grid() # 多变量时间序列图 ts.plot(log(tsME));grid() # 对数时间序列图 summary(lm(log(TAX)~GDP+EXP+IE+RS+COM+INV+DEP)) summary(lm(log(TAX)~log(GDP)+EXP+IE+RS+COM+INV+DEP)) summary(lm(log(TAX)~log(GDP)+log(EXP)+IE+RS+COM+INV+DEP)) summary(lm(log(TAX)~log(GDP)+log(EXP)+log(IE)+RS+COM+INV+DEP)) summary(lm(log(TAX)~log(GDP)+log(EXP)+log(IE)+log(RS)+COM+INV+DEP)) summary(lm(log(TAX)~log(GDP)+log(EXP)+log(IE)+log(RS)+log(COM)+INV+DEP)) summary(lm(log(TAX)~log(GDP)+log(EXP)+log(IE)+log(RS)+log(COM)+log(INV)+log(DEP))) summary(lm(TAX~log(GDP)+EXP+IE+RS+COM+INV+DEP,data=MEdata)) 【例2.5 模拟线性相关】 n=30; x=1:n; y=1+2*x #无误差项,直线关系 plot(x,y) cor(x,y) set.seed(12345) n=30; x=1:n; e=rnorm(n); y=1+0.5*x+e # 有误差项,直线相关 plot(x,y) cor(x,y) cor(GDP,TAX) cor(TAX,GDP) cor.test(GDP,TAX) pairs(MEdata,gap=0,cex.labels=1) # = plot(MEdata) cor(MEdata) matcor.test(MEdata) pcor.test(MEdata[,c('TAX','GDP','EXP','IE')]) cor.test(TAX,GDP) pcor.test(data.frame(TAX,GDP)) pcor.test(data.frame(TAX,GDP,EXP)) pcor.test(MEdata[,c('TAX','GDP','EXP')]) pcor.test(data.frame(TAX,GDP,EXP,IE)) lm2.9=lm(TAX~GDP+EXP+IE+RS+COM+INV+DEP) R2=summary(lm2.9)$r.sq; R2 R=sqrt(R2) lm.plot(lm2.9) 【例2.10】 T=1978:2013 plot(T,GDP);abline(v=1993,lty=3) plot(T,TAX);abline(v=1993,lty=3) D=ifelse(T>1993,1,0);D plot(GDP,TAX) abline(h=MEdata[T==1993,'TAX'],v=MEdata[T==1993,'GDP'],lty=3) summary(lm(TAX~GDP)) summary(lm(TAX~GDP+D)) summary(lm(TAX~GDP+GDP*D)) plot(TAX~GDP); abline(3.1137,0.1179,lty=3);abline(-76.53,0.2011) 2.4 非线性模型 set.seed(1234) n=30; x=1:n; e=rnorm(n) y=3+10*log(x) plot(x,y+e,ylab="y=3+10*log(x)+e");lines(x,y) y=0.1*exp(0.2*x) plot(x,y+e,ylab="y=0.1*exp(0.2*x)");lines(x,y) y=-0.03*x^2 plot(x,y+e,ylab="y=-0.03*x^2");lines(x,y) y=1+30/x plot(x,y+e,ylab="y=1+30/x");lines(x,y) y=1+0.5*x+0.03*x^2 plot(x,y+e,ylab="y=1+30/x");lines(x,y) y=MEdata$TAX t=1978:2013 # 时间轴 x=t-1977 # 为建模方便,生成1:36序列 plot(t,y,'b') lm1=lm(y~x); summary(lm1) plot(t,y);lines(t,fitted(lm1)) lm2=lm(y~log(x));summary(lm2) #拟合对数曲线 plot(t,y);lines(t,fitted(lm2)) #加对数回归线 lm3=lm(log(y)~x);summary(lm3) exp(coef(lm3)[1]) plot(t,y);lines(t,exp(fitted(lm3))) lm4=lm(log(y)~log(x));summary(lm4) exp(coef(lm4)) plot(t,y);lines(t,exp(fitted(lm4))) lm5=lm(y~x+I(x^2));summary(lm5) plot(t,y);lines(t,fitted(lm5)) nls1=nls(y~a*exp(b*x),start=list(a=3,b=0.15)); nls1 summary(nls1) plot(t,y);lines(t,fitted(nls1)) predict(nls1,data.frame(x=37)) #CDdata=read.table("clipboard",header=T);CDdata # 选取数据区域,然后拷贝到剪切板 #CDdata=read.csv("http://202.116.1.177/EmRa/CDdata.csv");CDdata t=1:12 Y=c(26.74,34.81,44.72,57.46,73.84,88.45,105.82,126.16,150.9,181.6,204.3,222.8) K=c(23.66,30.55,38.12,46.77,56.45,67.15,78.92,91.67,105.5,121.3,128.6,132.5) L=c(26,28,32,36,41,45,48,52,56,60,66,70) CDdata=data.frame(t,Y,K,L) #简单C-D生产函数模型 CDlm1=lm(log(Y)~log(K)+log(L));summary(CDlm1) #直线化模型 exp(coef(CDlm1)) CDnls1=nls(Y~A*K^a*L^b,start=list(A=0.1,a=0.6,b=1)); #非线性模型 summary(CDnls1) CDlm2=lm(log(Y)~t+log(K)+log(L));summary(CDlm2) #直线化模型 CDnls2=nls(Y~A*exp(m*t)*K^a*L^b,start=list(A=0.1,m=0.03,a=0.6,b=0.5)) #非线性模型 summary(CDnls2) predict(CDnls2,data.frame(t=13,K=140,L=75)) #模型CDnls2预测 predict(CDnls2,data.frame(t=13,K=150,L=80)) #模型CDnls2预测 GY=Growth.rate(Y);GY GK=Growth.rate(K);GK GL=Growth.rate(L);GL a=coef(CDnls2)[[3]] b=coef(CDnls2)[[4]] GA=GY-a*GK-b*GL;GA PA=GA/GY*100;PA PK=GK/GY*100;PK PL=GL/GY*100;PL ``` ## 3 非典型回归分析模型 ##### ### 3.1 模型诊断 ``` 【例3.1】 x1=c(10,8,13,9,11,14,6,4,12,7,5) y1=c(8.04,6.95,7.58,8.81,8.33,9.96,7.24,4.26,10.84,4.82,5.68) y2=c(9.14,8.14,8.74,8.77,9.26,8.1,6.13,3.1,9.13,7.26,4.74) y3=c(7.46,6.77,12.74,7.11,7.81,8.84,6.08,5.39,8.15,6.44,5.73) x2=c(8,8,8,8,8,8,8,19,8,8,8) y4=c(6.58,5.76,7.71,8.84,8.47,7.04,5.25,12.5,5.56,7.91,6.89) M1=lm(y1~x1);summary(M1) M2=lm(y2~x1);summary(M2) M3=lm(y3~x1);summary(M3) M4=lm(y4~x2);summary(M4) par(mfrow=c(2,2),mar=c(4,4,1,0.75)+0.25,cex=0.8) plot(y1~x1);abline(M1) plot(y2~x1);abline(M2) plot(y3~x1);abline(M3) plot(y4~x2);abline(M4) par(mfrow=c(1,1)) e1=resid(M1);e2=resid(M2);e3=resid(M3);e4=resid(M4) #残差 cbind(e1,e2,e3,e4) shapiro.test(e1) shapiro.test(e2) shapiro.test(e3) shapiro.test(e4) par(mfrow=c(2,2),mar=c(4,4,1,0.75)+0.25,cex=0.8) #分别做残差图 plot(e1,type='h',lty=3);points(e1);abline(h=0) plot(e2,type='h',lty=3);points(e2);abline(h=0) plot(e3,type='h',lty=3);points(e3);abline(h=0) plot(e4,type='h',lty=3);points(e4);abline(h=0) par(mfrow=c(1,1)) h1=leverage(M1);h2=leverage(M2); h3=leverage(M3);h4=leverage(M4) cbind(h1,h2,h3,h4) h0=2*(p=1)/(n=length(h1));h0 r1=rstudent(M1);r2=rstudent(M2); r3=rstudent(M3);r4=rstudent(M4); #学生化残差 cbind(r1,r2,r3,r4) par(mfrow=c(2,2),cex=0.75) #分别做学生化残差图 plot(r1,type='h',lty=3);points(r1);abline(h=0) plot(r2,type='h',lty=3);points(r2);abline(h=0) plot(r3,type='h',lty=3);points(r3);abline(h=0) plot(r4,type='h',lty=3);points(r4);abline(h=0) par(mfrow=c(1,1)) par(mfrow=c(2,2),cex=0.75) e2.3=resid(lm2.3) hist(e2.3) qqnorm(e2.3);qqline(e2.3) plot(e2.3,type='h',lty=3);points(e2.3);abline(h=0) plot(rstudent(lm2.3),type='h',lty=3);points(rstudent(lm2.3));abline(h=0) par(mfrow=c(1,1)) lm.diag(lm2.3) cbind(lm.diag(lm2.3),influence.measures(lm2.3)$infmat[,10:12]) residual.plot(M1,type='index') residual.plot(M1,type='predictor') 多重共线性 #lm.test(lm(TAX~GDP,data=MEdata)) summary(lm(TAX~GDP,data=MEdata)) summary(lm(TAX~GDP+EXP,data=MEdata)) summary(lm(TAX~GDP+EXP+IE,data=MEdata)) summary(lm(TAX~GDP+EXP+IE+RS,data=MEdata)) summary(lm(TAX~GDP+EXP+IE+RS+COM,data=MEdata)) summary(lm(TAX~GDP+EXP+IE+RS+COM+INV,data=MEdata)) summary(lm(TAX~GDP+EXP+IE+RS+COM+INV+DEP,data=MEdata)) X=MEdata[,2:8];X #取自变量 cor(X) #自变量相关阵 lamda=eigen(cor(X))$value;lamda #特征值法eigen (CN=max(lamda)/min(lamda)) #条件数法 R2=summary(lm(GDP~EXP+IE+RS+COM+INV+DEP))$r.sq;R2 # 决定系数 R=sqrt(R2);R (VIF1=1/(1-R2)) #方差扩大因子VIF1 library(bstats) #需先安装包bstats vif(lm(TAX~GDP+EXP+IE+RS+COM+INV+DEP,data=MEdata)) ``` ### 3.2 异方差 ``` set.seed(12345) n=50; x=runif(n); y=0.2+0.3*x+x*rnorm(n,0,0.1) plot(x,y) M1=lm(y~x); summary(M1) e=resid(M1) plot(x,e) #winpar() Fm=lm(e^2~x+I(x^2)) R2=summary(Fm)$r.sq n=nrow(Fm$model) m=ncol(Fm$model) W=n*R2;W P=1-pchisq(W,m-1); P library(bstats) white.test(M1) bptest(M1,varformula=~x+I(x^2)) summary(lm(abs(e)~x))$coef summary(lm(abs(e)~I(x^2)))$coef summary(lm(abs(e)~sqrt(x)))$coef summary(M1) M2=lm(y~x,weight=1/abs(e));summary(M2) white.test(M2) M3=lm(TAX~IE+COM+INV,data=MEdata);summary(M3) e3=resid(M3) plot(TAX,abs(e3)) white.test(M3) summary(lm(TAX~IE+COM+INV,weight=1/abs(e3),MEdata)) fm=lm(TAX~GDP,data=MEdata);summary(fm) e=resid(fm); #模型残差 plot(GDP,abs(e)) fm1=lm(TAX~IE,weight=1/abs(e),data=MEdata); summary(fm1) plot(TAX,abs(e)) plot(IE,abs(e)) white.test(fm) ``` ### 3.3 自相关 ``` y=log(TAX); x=log(GDP) lm3.3=lm(y~x);summary(lm3.3) lm.plot(lm3.3) et=resid(lm3.3) plot(et,type='b') library(lmtest) dwtest(lm3.3) bgtest(lm3.3,order=1) bgtest(lm3.3,order=2) y=log(TAX); x=log(GDP) n=length(y) yt=y[-1]; yt_1=y[1:(n-1)] xt=x[-1]; xt_1=x[1:(n-1)] lm3.4=lm(yt~yt_1+xt+xt_1) summary(lm3.4) dwtest(lm3.4) bgtest(lm3.4,order=1) x=1:9; x L_(x,1,na.is=F) #不包含缺失值NA,下同 L_(x,2,na.is=F) L_(x,3,na.is=F) x_1=L_(x,1) #包含缺失值NA,下同 x_2=L_(x,2) x_3=L_(x,3) cbind(x,x_1,x_2,x_3) y=log(TAX); x=log(GDP) lm3.5=lm(y~L_(y)+x+L_(x));summary(lm3.5) ``` ## 4 经典时间序列模型 ##### ### 4.1 时间序列概念 ``` 【例4.1-1】模拟数据---平稳时间序列 set.seed(123) #设置随机种子数 yt=rnorm(50,0,1) plot(yt,type='b'); abline(h=0) yt_1=L_(yt,na.is=T) plot(yt,yt_1);abline(lm(yt~yt_1)) #相关图 cor(yt,yt_1,"complete") acf(yt)$ac Box.test(yt) # Box.test(yt,type='Box-Pierce') Box.test(yt,type='Ljung-Box') ``` ### 4.2 自回归AR模型 ##### ``` set.seed(1234) # 模拟AR(1) n=50; y1=rep(0,n) for(t in 2:n) { y1[t]=0.8*y1[t-1]+rnorm(1) } plot(y1,type='o'); acf(y1) #cor.test(y,L_(y,6)) Box.test(y1) Box.test(y1,type='Ljung-Box') ar(y1,method="yule-walker") #ar.yw(y2) ar(y1,method="ols") #ar.ols(y1) #y1_1=L_(y1,na.in=T) lm4.1=lm(y1~L_(y1)); lm4.1 summary(lm4.1) set.seed(123) # 模拟 AR(2) y2=arima.sim(n=100,list(ar=c(0.7,-0.5)));y2 plot(y2,type='o'); #acf(y2) pacf(y2)$ac[1:5] Box.test(y2,type='Box-Pierce') Box.test(y2,type='Ljung-Box') AR2=arima(y2,order=c(2,0,0)) arima.test(AR2) #summary(lm(y2~L_(y2,1,na.in=T)+L_(y2,2,na.is=T))) AR2=arima(y2,order=c(2,0,0),include.mean=F) arima.test(AR2) tsdiag(AR2) arima.test(y2,order=c(2,0,0)) arima.test(y2,order=c(2,0,0),include.mean = F) pacf(y1)$ac[1:5] pacf.test(yt) pacf(y2)$ac[1:5] library(tseries) #Pt=get.hist.quote('000001.ss',quote="Close",start='2003-01-01',end='2013-12-30',quiet=TRUE) #Rt=read.csv("Return.csv")$Rt Pt=read.csv("http://emlab.jnu.edu.cn/EmRa/SCIdata.csv")$Pt #从SCIdata.csv文件中读取上证指数 plot(Pt,type='l') #上证指数图 Rt=log(Pt[-1]/L_(Pt,na.is=F))*100 #滞后函数L_会损失1组数据,需剔除Rt中第1组数据 head(Rt) plot(Rt,type='l',ylim=c(-10,10));abline(h=0) #上证指数对数收益率图 pacf(Rt) Box.test(Rt,lag=1) Box.test(Rt,lag=2) Box.test(Rt,lag=3) Box.test(Rt,lag=4) AR4=arima(Rt,order=c(4,0,0),include.mean=F) arima.test(AR4) AR4=arima(Rt,order=c(4,0,0),include.mean=F,fixed=c(0,0,0,NA),transform.pars=FALSE) arima.test(AR4) ``` ### 4.3 移动平均MA模型 ``` set.seed(123456) #模拟MA(1) y3=arima.sim(n=100,list(ma=0.8)) plot(y3,type='o') #acf.test(y3) acf(y3)$ac[1:5] MA1=arima(y3,order=c(0,0,1),include.mean=FALSE) arima.test(MA1) acf(Rt) MA4=arima(Rt,order=c(0,0,4),include.mean=F) arima.test(MA4) MA4=arima(Rt,order=c(0,0,4),include.mean=F,fixed=c(0,0,0,NA)) arima.test(MA4) ``` ### 4.4 自回归移动平均ARMA模型 ``` set.seed(12345) # ARMA(1,1): yt=0.8yt-1+et-0.6et-1 y4=arima.sim(model=list(ar=0.8,ma=0.6),n=100) plot(y4,type='o') acf(y4) #acf.test(y4) pacf(y4) #pacf.test(y4) a11.y4=arima(y4,order=c(1,0,1),include.mean=F) arima.test(a11.y4) a21.y4=arima(y4,order=c(2,0,1),include.mean=F) arima.test(a21.y4) a22.y4=arima(y4,order=c(2,0,2),include.mean=F) arima.test(a22.y4) #predict(ARMA11,n.ahead=3) library(forecast) #需先安装包forecast aa.y4=auto.arima(y4); aa.y4 arima.test(aa.y4) aa.Rt=auto.arima(Rt);aa.Rt #Rt=read.csv("Return.csv")$Rt summary(aa.Rt) a22.Rt=arima(Rt,order=c(2,0,2),include.mean=F) a22.Rt arima.test(a22.Rt) predict(a22.Rt,n.ahead=5) a44.Rt=arima(Rt,order=c(4,0,4),include.mean=F) arima.test(a44.Rt) predict(a44.Rt,n.ahead=5) costant<-function(model){ p=length(model$coef) (1-sum(model$coef[1:(p-1)]))*model$coef[p] } costant(AR1) ``` ### 4.5 分布滞后与自回归模型 ##### ``` TAX_1=L_(TAX,na.in=T);GDP_1=L_(GDP,na.in=T) lm3.4=lm(TAX~TAX_1+GDP+GDP_1);summary(lm3.4) dwtest(lm3.3) dwtest(lm3.4) bgtest(lm3.4,order=1) bgtest(lm3.4,order=2) IE_1=L_(IE,na.in=T); INV_1=L_(INV,na.in=T); lm3.5=lm(TAX~TAX_1+GDP+GDP_1+IE+IE_1+INV+INV_1);summary(lm3.5) TAX_2=L_(TAX,2,na.in=T);GDP_2=L_(GDP,2,na.in=T) lm3.5=lm(TAX~TAX_1+TAX_2+GDP+GDP_1+GDP_2,data=MEdata);summary(lm3.5) dwtest(lm3.5) library(lmtest) Yt=log(GDP); Xt=log(INV) summary(lm(Yt~Xt)) #0阶滞后回归 summary(lm(Yt~Xt+L_(Xt))) #1阶滞后回归 summary(lm(Yt~Xt+L_(Xt,1)+L_(Xt,2))) #2阶滞后回归 summary(lm(Yt~Xt+L_(Xt,1)+L_(Xt,2)+L_(Xt,3))) #3阶滞后回归 #经验加权估计 Z1=Xt+1/2*L_(Xt,1)+1/4*L_(Xt,2)+1/8*L_(Xt,3); Z2=1/4*Xt+1/2*L_(Xt,1)+2/3*L_(Xt,2)+1/4*L_(Xt,3); Z3=1/4*Xt+1/4*L_(Xt,1)+1/4*L_(Xt,2)+1/4*L_(Xt,3); #cbind(X,Y,Z1,Z2,Z3) fm1=lm(Yt~Z1);summary(fm1) fm2=lm(Yt~Z2);summary(fm2) fm3=lm(Yt~Z3);summary(fm3) Yt=log(TAX); Xt=log(GDP) summary(lm(Yt~Xt+L_(Xt))) summary(lm(Yt~Xt+L_(Yt))) a.Yt=arima(Yt,order=c(1,0,0),xreg=Xt) arima.test(a.Yt) ``` ## 5 扩展时间序列模型 ##### ### 5.1 非平稳时间序列模型 ``` 【例5.1-1】模拟数据---平稳时间序列 set.seed(12345) xt=rnorm(50,0,1.5) plot(xt,type='b');abline(h=0) 【例5.1-2】模拟数据---非平稳时间序列 set.seed(12345) ut=rnorm(50,0,1.5) xt=cumsum(ut) plot(xt,type='b');abline(h=0) GDP=ts(MEdata$GDP,start=1978) plot(GDP,type='b') x=1:9; x diff(x) # diff(x,1) diff(x,d=2) diff(x,d=1,l = 2) # diff(x,diff=1,lag=2) #随机游走序列xt的差分图 plot(diff(xt),type='b'); abline(h=0) plot(diff(GDP)) lnGDP=log(GDP) plot(lnGDP) plot(diff(lnGDP)) ##### ADF检验 ##### Xt=MEdata$GDP;Xt tt=1978:2010 plot(tt,lngdp,type='b') #GDP序列图5.1-4.png adf.test(Xt) plot(D_(Xt),type='b') summary(lm(Xt~tt+L_(Xt)+D_(L_(Xt)))) summary(lm(Xt~L_(Xt)+D_(L_(Xt)))) summary(lm(Xt~-1+L_(Xt)+D_(L_(Xt)))) ## 单整检验 set.seed(12345) ut=rnorm(50,0,1.5) xt=cumsum(ut) plot(xt,type='b');abline(h=0) set.seed(123) Yt=cumsum(rnorm(100)) ADF.test(Yt,type="trend") ADF.test(Yt,type="drift") ADF.test(Yt,type="none") ADF.test(diff(Yt),type="trend") ADF.test(diff(Yt),type="drift") ADF.test(diff(Yt),type="none") lnGDP=ts(log(MEdata$GDP),start=1978) ADF.test(lnGDP,type="trend") ADF.test(lnGDP,type="drift") ADF.test(lnGDP,type="none") ADF.test(diff(lnGDP),type="trend") ADF.test(diff(lnGDP),type="drift") ADF.test(diff(lnGDP),type="none") ## 模拟ARIMA(1,1,0): Dyt=0.8Dyt-1+et set.seed(123456) A11=arima.sim(n=200,list(order=c(1,1,0),ar=0.8)) plot(A11) plot(diff(A11)) #acf(A11)$ac[1:5] #pacf(A11)$ac[1:5] arima(A11,order = c(1,1,0)) ## 模拟ARIMA(2,1,1): set.seed(123456) A111=arima.sim(n=200,list(order=c(2,1,1),ar=c(0.8,-0.5),ma=-0.7)); plot(A111) plot(diff(A111)) arima(A111,order = c(2,1,1)) A100.GDP=arima(log(GDP),order=c(1,0,0));A100.GDP arima.test(A100.GDP) tsdiag(a100.GDP) A101.GDP=arima(log(GDP),order=c(1,0,1)); Aa101.GDP Aa110.GDP=arima(log(GDP),order=c(1,1,0));A110.GDP arima.test(A110.GDP) tsdiag(A110.GDP) A111.GDP=arima(log(GDP),order=c(1,1,1));arima.test(A111.GDP) A210.GDP=arima(log(GDP),order=c(2,1,0));arima.test(A210.GDP) ``` ### 5.2 协整和ECM的实证分析 #### ``` ## 模拟协整数据 set.seed(123) n=100 u1=rnorm(n) xt=cumsum(u1) u2=rnorm(n) yt=0.6*xt+u2 plot(xt,type='l',ylab='');lines(yt,lty=2) legend("topleft",c("xt","yt"),lty=1:2,bty="n") plot(diff(xt),type='l',ylab='',ylim=c(-5,5)); lines(diff(yt),lty=3);abline(h=0) legend("topleft",c("diff(xt)","diff(yt)"),lty=1:2,bty="n") ADF.test(diff(xt),type="trend") ADF.test(diff(xt),type="drift") ADF.test(diff(xt),type="none") ADF.test(diff(yt),type="trend") ADF.test(diff(yt),type="drift") ADF.test(diff(yt),type="none") lm5.2=lm(yt~xt);summary(lm5.2) et=resid(lm5.2) plot(et,type='h',ylim=c(-4,4));abline(h=0) ADF.test(et,type="none") #残差平稳性检验 #ecm=resid(fm)[-n];ecm #ecm = L_(resid(fm));summary(lm(D_(y)~D_(x)+ecm)) ecmt_1=head(resid(lm5.2),-1); #相当于误差的一阶滞后ecmt_1 #cbind(resid(fm)[-c(99,100)],head(resid(fm),-2)) summary(lm(diff(yt)~diff(xt)+ecmt_1)) library(lmtest) grangertest(xt~yt) grangertest(yt~xt) #IE和GDP协整检验及误差修正模型 attach(MEdata) GDP=ts(GDP,start=1978) IE=ts(IE,start=1978) plot(GDP,ylab='');lines(IE,lty=2); legend("topleft",c("GDP","IE"),lty=1:2,bty="n") lnGDP=log(GDP); lnIE=log(IE) plot(lnGDP,ylab=''); lines(lnIE,lty=2) legend("topleft",c("log(GDP)","log(IE)"),lty=1:2,bty="n") ADF.test(lnIE,type="trend") ADF.test(lnIE,type="drift") ADF.test(lnIE,type="none") ADF.test(diff(lnIE),type="trend") ADF.test(diff(lnIE),type="drift") ADF.test(diff(lnIE),type="none") fm=lm(lnIE~lnGDP);summary(fm) et=resid(fm) plot(et,type="h",ylim=c(-0.8,0.8));abline(h=0) #adf.test(resid) ADF.test(et,type="none") #残差平稳性检验 #summary(lm(D_(lnIE)~D_(lnGDP)+ L_(resid))) ecmt_1=head(et,-1);ecmt_1 #et-1 fm1=lm(diff(lnIE)~diff(lnGDP)+ecmt_1) summary(fm1) plot(resid(fm1),type='h') library(lmtest) grangertest(lnIE~lnGDP) grangertest(lnIE~lnGDP,order=2) grangertest(lnGDP~lnIE) grangertest(lnGDP~lnIE,order=2) ``` ### 5.3 异方差时间序列模型 ##### ``` ###【例5.3-1】模拟研究 #simulate ARCH(1) ## 模拟ARCH(1) set.seed(123456) ht1=garch.sim(alpha=c(0.02,0.8),n=500) plot(ht1,type='l') #,ylab='rt',xlab='t',main="Simulated ARCH(1)") #qqnorm(ht1);qqline(ht1) acf(ht1) pacf(ht1) acf(ht1^2) pacf(ht1^2) library(tseries) G01=garch(ht1,order=c(0,1),trace=F);summary(G01) G01.Rt=garch(Rt,order=c(0,1)); summary(G01.Rt) G11.Rt=garch(Rt,order=c(1,1),trace=F); summary(G11.Rt) #simulate GARCH(1,1) set.seed(123456) ht11=garch.sim(alpha=c(0.1,0.08),beta=0.9,n=500) plot(ht11,type='l') Box.test(ht11) acf(ht11) pacf(ht11) acf(ht11^2) pacf(ht11^2) G11=garch(ht11,order=c(1,1),trace=F) summary(G11) e11=resid(GARCH11) e11 acf(e11[-1]^2) pacf(e11[-1]^2) GARCH22=garch(ht11,order=c(1,2),trace=F) summary(GARCH22) arma11=arma(abs(ht11),order=c(1,1));summary(arma11) m1=garch(r.cref,order=c(1,1));summary(m1) gBox(m1,method='squared') plot(resid(m1),type='h') qqnorm(resid(m1));qqline(resid(m1)) ## 股票数据 #Pt=get.hist.quote(instrument="^gspc",start="2003-01-01",quote="Close") Pt=read.csv("http://emlab.jnu.edu.cn/EmRa/SCIdata.csv")$Pt #从SCIdata.csv文件中读取上证指数 Rt=log(Pt[-1]/L_(Pt,na.is=F))*100 #滞后函数L_会损失1组数据,需剔除Rt中第1组数据 plot(Rt,type='l',ylim=c(-10,10));abline(h=0) #上证指数对数收益率图 plot(Rt,type='l');abline(h=0) hist(Rt,breaks=50,main="") qqnorm(Rt);qqline(Rt) shapiro.test(Rt) #jarque.bera.test(Rt) acf(Rt) pacf(Rt) Box.test(Rt,lag=1) Box.test(Rt,lag=2) Box.test(Rt,lag=3) Box.test(Rt,lag=4) ADF.test(Rt,type="none") lm.Rt=lm(Rt~1);lm.Rt;summary(lm.Rt) et=resid(lm.Rt) plot(et,type='l',ylim=c(-15,15));abline(h=0) acf(et) Box.test(et) Box.test(et,lag=1,type='Ljung-Box') Box.test(et,lag=2) Box.test(et,lag=10) acf(et^2) pacf(et^2) Box.test(et^2) library(tseries) summary(garch(Rt,order=c(0,1),trace=F)) summary(garch(Rt,order=c(0,5),trace=F)) summary(garch(Rt,order=c(0,8),trace=F)) summary(garch(Rt,order=c(0,9),trace=F)) summary(garch(Rt,order=c(1,1),trace=F)) summary(garch(Rt,order=c(2,1),trace=F)) arima.test(arima(Rt,order=c(4,0,0),include.mean=F)) AR4=arima(Rt,order=c(4,0,0),include.mean=F,fixed=c(0,0,0,NA),transform.pars=FALSE) arima.test(AR4) detach(MEdata) ```
上一篇:
3.3-2 计量教程大纲
下一篇:
3.4-1 统计实验与模拟
0
赞
1593 人读过
新浪微博
微信
腾讯微博
QQ空间
人人网
提交评论
立即登录
, 发表评论.
没有帐号?
立即注册
0
条评论
More...
文档导航
没有帐号? 立即注册