# THIS SCRIPT INCLUDES # 1. A SNIPPET ENABLING THE PARALLEL COMPUTING IF MULTIPLE-CORE CPU IS USED # 2. CK(R, L) FUNCTION GIVING THE ASYMPTOTIC PRINCIPAL COMPONENTS METHOD OF Connor and Korajczyk (1988). # 3. CC(FC, PC) FUNCTION GIVING THE CANONICAL CORRELATIONS (CC) BETWEEN FACTOR CANDIDATES (FC) AND CK's PC # 4. CC_test(FC, PC, rho, alpha=0.025) FUNCTION TESTING CORRELATION BETWEEN CANONICAL VARIATE PAIRS ################################################################################################ # PARALLEL COMPUTING ####################################################################### # IT DOES SAVE THE TIME ON SIMULATION, BUT IT CANNOT STRICTLY USE THE RANDOM SEED. ################################################################################################ #require(parallel) #require(doParallel) #PARALLEL <- parallel :: detectCores () #if ( PARALLEL >1) { # PARALLEL <- # tryCatch ({ workers <- makeCluster(PARALLEL) # registerDoParallel(workers) # on.exit(stopCluster(workers), add = TRUE) # PARALLEL # }, # error = function(Err){ # warning(sprintf("Error creating % d clusters ", # PARALLEL ) ) # 1}) # clusterSetRNGStream(workers, 123456789) #} else if ( PARALLEL < 0) { # warning("PARALLEL must be a non - negative integer ") #} ################################################################################################ # USER-DEFINED FUNCTIONS####################################################################### ################################################################################################ CK <- function(R, L, demean=T, use_cov = F){ # THIS FUNCTION OUTPUTS ESTIMATES OF L COMMON FACTORS FROM A T x N MATRIX OF RETURNS R; # THE METHOD USED IS THE ASYMPTOTIC PRINCIPAL COMPONENTS METHOD OF Connor and Korajczyk (1988). # THE CODE IS FROM MATLAB CODE BY Chris Jones, SEE HERE http://www.runmycode.org/companion/view/91 K = dim(R)[1] N = dim(R)[2] if (demean==TRUE){ Mean = colMeans(R,na.rm = TRUE) R = R - matrix( rep(Mean,each=K),byrow=F,nrow=K) } if (use_cov == TRUE){ s = cov(t(R), use='pairwise.complete.obs') } else { s = (1/K) * R %*% t(R) } tmp = eigen(s) PC = tmp$vectors[,1:L] colnames(PC) = paste('APC',1:L,sep='_') return(PC) } CC <- function(FC, PC){ # THIS FUNCTION COMPUTES CANONICAL CORRELATIONS (CC) BETWEEN FACTOR CANDIDATES (FC) AND CK's PC # IT ALSO COMPUTES COEFFICIENTS FOR CONSTRUCTING CANONICAL VARIATES # IT IMPLEMENTS THE STEP 4 to 6 IN THE PROTOCOL C = cov(FC, PC, use = "pairwise") # C: K x L MATRIX, CROSS-COVARIANCE MATRIX ON PAGE 13 V_f = var(FC, na.rm = TRUE, use = "pairwise") # V_f: K x K MATRIX, COVARIANCE MATRIX FOR FACTOR ON PAGE 14 V_e = var(PC, na.rm = TRUE, use = "pairwise") # V_e: L x L MATRIX, COVARIANCE MATRIX FOR REAL EIGENVECTORS ON PAGE 14 tmp = geigen(C, V_f, V_e) # SOLVING IT BY EIGEN_DECOMPOSITION return(list(rho = tmp$values, # RHO: L x 1 VECTORS OF CC FC.coef = tmp$Lmat, # FC.coef: K x L MATRIX, MAKING FC TO CANONICAL VARIATES, a ON PAGE 14 PC.coef = tmp$Mmat)) # PC>coef: L x L MATRIX, MAKING PC TO CANONICAL VARIATES, b ON PAGE 14 } CC_test <- function(FC, PC, rho, alpha=0.025){ # THIS FUNCTION TESTS CORRELATION BETWEEN CANONICAL VARIATE PAIRS # THE METHOD USED IS PROVIDED BY PROF. Kuntara Pukthuanthong # THE REFERENCE IS JOHNSON AND WICHERN (2007, CH10 P566) # IT OUTPUTS THE T-STATISTICS AND ITS P-VALUE BASED ON CHI-SQAURE DISTRIBUTION # NOTATION IS CONSISTENT WITH THE SPREADSHEET IN RECENT EMAIL. p = dim(PC)[1] n = dim(PC)[2] m = dim(FC)[2] k = min(n, m) # NUMBER OF CANONICAL VARIATE PAIRS ev = (1 - rho^2) product = rev(cumprod(rev(ev))) multiplier = -(p - 1 - 0.5*(m+n+1)) # initialize df = t = ChiSq = vector("numeric", k) for (i in 1:k) { ChiSq[i] = multiplier * log(product[i]) df[i] = (m - i + 1) * (n - i + 1) t[i] = sqrt(2*ChiSq[i]) - sqrt(2*df[i]-1) } cv = qchisq(1-alpha, df) # ONE-TAILED, 2.5% CUTOFF BASED ON THE CHI-SQUARE DISTRIBUTION decision = (ChiSq >= cv) ret = data.frame(t.stat=t, Chi.Square=ChiSq, DF=df, Critical.Value = cv, Significance=decision) return(ret) }