# THIS SCRIPT INCLUDES # 1. A SNIPPET ENABLING THE PARALLEL COMPUTING IF MULTIPLE-CORE CPU IS USED # 2. CK(R, L) FUNCTION GIVING THE ASYMPTOTIC PRINCIPAL COMPONENTS METHOD OF Connor and Korajczyk (1988). # 3. CC(FC, PC) FUNCTION GIVING THE CANONICAL CORRELATIONS (CC) BETWEEN FACTOR CANDIDATES (FC) AND CK's PC # 4. CC_test(FC, PC, rho, alpha=0.025) FUNCTION TESTING CORRELATION BETWEEN CANONICAL VARIATE PAIRS ################################################################################################ # PARALLEL COMPUTING ####################################################################### # IT DOES SAVE THE TIME ON SIMULATION, BUT IT CANNOT STRICTLY USE THE RANDOM SEED. ################################################################################################ #require(parallel) #require(doParallel) #PARALLEL <- parallel :: detectCores () #if ( PARALLEL >1) { # PARALLEL <- # tryCatch ({ workers <- makeCluster(PARALLEL) # registerDoParallel(workers) # on.exit(stopCluster(workers), add = TRUE) # PARALLEL # }, # error = function(Err){ # warning(sprintf("Error creating % d clusters ", # PARALLEL ) ) # 1}) # clusterSetRNGStream(workers, 123456789) #} else if ( PARALLEL < 0) { # warning("PARALLEL must be a non - negative integer ") #} ################################################################################################ # USER-DEFINED FUNCTIONS####################################################################### ################################################################################################ CK <- function(R, L, demean=T){ # THIS FUNCTION OUTPUTS ESTIMATES OF L COMMON FACTORS FROM A T x N MATRIX OF RETURNS R; # THE METHOD USED IS THE ASYMPTOTIC PRINCIPAL COMPONENTS METHOD OF Connor and Korajczyk (1988). # THE CODE IS FROM MATLAB CODE BY Chris Jones, SEE HERE http://www.runmycode.org/companion/view/91 T = dim(R)[1] N = dim(R)[2] if (demean==T){ Mean = colMeans(R) R = R - matrix( rep(Mean,each=T),byrow=F,nrow=T) } if (any(is.na(R))){ s = cov(t(R), use = 'pairwise') } else{ s = (1/T) * R %*% t(R) } tmp = eigen(s) PC = tmp$vectors[,1:L] eigs = tmp$values cum_var = cumsum(eigs)/sum(eigs) plot(cum_var,type='h') return(PC) } CC <- function(FC, PC){ # THIS FUNCTION COMPUTES CANONICAL CORRELATIONS (CC) BETWEEN FACTOR CANDIDATES (FC) AND CK's PC # IT ALSO COMPUTES COEFFICIENTS FOR CONSTRUCTING CANONICAL VARIATES # IT IMPLEMENTS THE STEP 4 to 6 IN THE PROTOCOL C = cov(FC, PC, use = "pairwise.complete.obs") # C: K x L MATRIX, CROSS-COVARIANCE MATRIX ON PAGE 13 V_f = var(FC, na.rm = TRUE, use = "pairwise.complete.obs") # V_f: K x K MATRIX, COVARIANCE MATRIX FOR FACTOR ON PAGE 14 V_e = var(PC, na.rm = TRUE, use = "pairwise.complete.obs") # V_e: L x L MATRIX, COVARIANCE MATRIX FOR REAL EIGENVECTORS ON PAGE 14 tmp = geigen(C, V_f, V_e) # SOLVING IT BY EIGEN_DECOMPOSITION return(list(rho = tmp$values, # RHO: L x 1 VECTORS OF CC FC.coef = tmp$Lmat, # FC.coef: K x L MATRIX, MAKING FC TO CANONICAL VARIATES, a ON PAGE 14 PC.coef = tmp$Mmat)) # PC>coef: L x L MATRIX, MAKING PC TO CANONICAL VARIATES, b ON PAGE 14 } CC_test <- function(FC, PC, rho, alpha=0.025){ # THIS FUNCTION TESTS CORRELATION BETWEEN CANONICAL VARIATE PAIRS # THE METHOD USED IS PROVIDED BY PROF. Kuntara Pukthuanthong # THE REFERENCE IS JOHNSON AND WICHERN (2007, CH10 P566) # IT OUTPUTS THE T-STATISTICS AND ITS P-VALUE BASED ON CHI-SQAURE DISTRIBUTION # NOTATION IS CONSISTENT WITH THE SPREADSHEET IN RECENT EMAIL. p = dim(PC)[1] n = dim(PC)[2] m = dim(FC)[2] k = min(n, m) # NUMBER OF CANONICAL VARIATE PAIRS ev = (1 - rho^2) product = rev(cumprod(rev(ev))) multiplier = -(p - 1 - 0.5*(m+n+1)) # initialize df = t = ChiSq = vector("numeric", k) for (i in 1:k) { ChiSq[i] = multiplier * log(product[i]) df[i] = (m - i + 1) * (n - i + 1) t[i] = sqrt(2*ChiSq[i]) - sqrt(2*df[i]-1) } cv = qchisq(1-alpha, df) # ONE-TAILED, 2.5% CUTOFF BASED ON THE CHI-SQUARE DISTRIBUTION decision = (ChiSq >= cv) ret = data.frame(t.stat=t, Chi.Square=ChiSq, DF=df, Critical.Value = cv, Significance=decision) return(ret) } KRS_nec <- function(R,FC){ PC = CK(R, 10) cc1 = CC(FC,PC) # COMPUTE CANONICAL CORRELATION AND COEFFICIENTS rho = cc1$rho result.cc = CC_test(FC,PC,rho) # TESTING CANONICAL CORRELATION AND FINDING OUT THE CORRESPONDING CANONICAL VARIATES ## THE FOLLOWING CODE OUTPUTS SOMETHING LIKE TABLE 3, PANEL B: SIGNIFICANCE LEVELS FOR FACTOR CANDIDATES (FC) Y = PC %*% cc1$PC.coef # Y: T x L MATRIX, CK PC CANONICAL VARIATE X = as.matrix(FC) # X: T x K MATRIX, ACTUAL FC n.FC = dim(FC)[2] L = min(10, n.FC) t = matrix(NA,nrow=L,ncol=n.FC) # t: L x n.k MATRIX, RECORD t-statistics IN REGRESSION Y ON X for(iter in 1:L){ # REGRESS EVERY CANONICAL VARIATES OF PC ON ELEMENTS IN FC tmp = summary(lm(Y[,iter]~X)) t[iter,] = tmp$coefficients[-1,3] } coef = t coef.sig = t[result.cc$Significance,] return(list(coef=coef, coef.sig=coef.sig)) } A_SDF <- function(R,demean=F){ t = nrow(R) n = ncol(R) if (demean==T){ Mean = colMeans(R) R = R - matrix( rep(Mean,each=t),byrow=F,nrow=t) # Mean = rowMeans(R) # R = R - matrix( rep(Mean, each=n), byrow=TRUE, nrow=t) rowSums(ginv(R %*% t(R))%*% R) * t } else{ rowSums(solve(R %*% t(R)) %*% R) * t } } LW_cov <- function(x, shrink=NULL){ t = nrow(x) n = ncol(x) x = sweep(x, 2, colMeans(x)) sample = (1/t) * t(x) %*% x var=diag(sample) sqrtvar= sqrt(var) rBar = ( sum(sample / (sqrtvar %*% t(sqrtvar))) - n ) / (n*(n-1)) prior = rBar * (sqrtvar %*% t(sqrtvar)) diag(prior) = var if( is.null(shrink)){ y = x^2 phiMat = t(y) %*% y / t - 2*(t(x)%*%x) * sample/t + sample^2 phi = sum(phiMat) term1 = t(x^3) %*% x / t help = t(x) %*% x / t helpDiag=diag(help) term2 = matrix(helpDiag,ncol=n,nrow=n) * sample term3 = help * matrix(var,ncol=n,nrow=n) term4 = matrix(var,ncol=n,nrow=n) * sample thetaMat = term1 - term2 - term3 + term4 diag(thetaMat) = 0 rho = sum(diag(phiMat)) + rBar* sum( (1/ sqrtvar) %*% t(sqrtvar) * thetaMat) gamma = norm(sample-prior, type='F')^2 kappa = (phi - rho) / gamma shrinkage = max(0, min(1, kappa/t)) } else{ shrinkage = shrink } return(list(sample=shrinkage*prior + (1-shrinkage)*sample, shrinkage = shrinkage) ) } LW_PC <- function(R, L=10){ tmp = LW_cov(R) cov_LW = tmp$sample tmp = eigen(cov_LW) R %*% tmp$vectors[,1:10] }