""" JNPRW Instrumental Variables Estimator for Cross-Sectional Asset Pricing Tests =============================================================================== Reference: Jegadeesh, N., Noh, J., Pukthuanthong, K., Roll, R., and Wang, J. (2019). "Empirical Tests of Asset Pricing Models with Individual Assets: Resolving the Errors-in-Variables Bias in Risk Premium Estimation." Journal of Financial Economics, 133(2), 273-298. https://doi.org/10.1016/j.jfineco.2019.02.010 Summary: Standard Fama-MacBeth (1973) cross-sectional regressions suffer from errors-in-variables (EIV) bias when using individual stocks as test assets, because betas are estimated with error. Grouping stocks into portfolios mitigates this bias but can mask cross-sectional variation in risk and returns (Roll, 1977; Lewellen, Nagel, and Shanken, 2010). This module implements the JNPRW instrumental variables estimator, which allows the use of individual stocks while delivering consistent estimates of risk premiums. The key idea: estimate betas from two non-overlapping subsamples (odd months and even months within a rolling window), then use one set as instruments for the other. Because the measurement errors in the two sets are uncorrelated, the IV estimator eliminates the EIV bias. The estimator is N-consistent (Proposition 1 in JNPRW): it converges to the true ex post risk premium as the number of stocks increases, even for finite T. The associated t-statistics are well-specified in small samples. Usage: from jnprw_iv import JNPRWEstimator estimator = JNPRWEstimator( daily_stock_returns=daily_df, # DataFrame: date, permno, ret monthly_stock_returns=monthly_df, # DataFrame: date, permno, ret factor_returns=factor_df, # DataFrame: date, factor1, factor2, ... rf_column='RF', # Risk-free rate column in factor_df factor_columns=['MKT', 'SMB', 'HML'], rolling_window_months=36, # Rolling window for beta estimation min_obs_per_stock=100, # Minimum daily obs per subsample ) results = estimator.run() print(results.summary()) Author: K. Pukthuanthong Date: March 2026 License: MIT """ import numpy as np import pandas as pd from scipy import stats from typing import List, Optional, Tuple, Dict import warnings class JNPRWEstimator: """ Instrumental Variables estimator for cross-sectional risk premium estimation using individual stocks. Implements the IV estimator from Equation (4) in JNPRW (2019, JFE): gamma_IV,t = (B_IV @ B_EV')^{-1} @ B_IV @ r_t' where B_IV and B_EV are beta matrices estimated from non-overlapping subsamples (odd and even months within a rolling window of daily data). Parameters ---------- daily_stock_returns : pd.DataFrame Daily stock returns with columns: ['date', 'permno', 'ret'] Returns should be in decimal form (0.01 = 1%). 'date' should be datetime or convertible to datetime. monthly_stock_returns : pd.DataFrame Monthly stock returns with columns: ['date', 'permno', 'ret'] Used as dependent variable in the cross-sectional regression. Returns should be in decimal form. factor_returns_daily : pd.DataFrame Daily factor returns with columns: ['date'] + factor_columns + [rf_column] All returns in decimal form. factor_returns_monthly : pd.DataFrame Monthly factor returns with columns: ['date'] + factor_columns + [rf_column] All returns in decimal form. Used to compute monthly excess returns. factor_columns : list of str Names of the factor columns (e.g., ['Mkt-RF', 'SMB', 'HML']). rf_column : str Name of the risk-free rate column. Default: 'RF'. rolling_window_months : int Number of months in the rolling window for beta estimation. Default: 36 (as in JNPRW simulations). min_obs_per_subsample : int Minimum number of daily observations required per stock per subsample (odd or even months) to estimate a beta. Default: 100. min_stocks_per_month : int Minimum number of stocks required in a cross-section for the month to be included. Default: 100. """ def __init__( self, daily_stock_returns: pd.DataFrame, monthly_stock_returns: pd.DataFrame, factor_returns_daily: pd.DataFrame, factor_returns_monthly: pd.DataFrame, factor_columns: List[str], rf_column: str = 'RF', rolling_window_months: int = 36, min_obs_per_subsample: int = 100, min_stocks_per_month: int = 100, ): self.daily_ret = daily_stock_returns.copy() self.monthly_ret = monthly_stock_returns.copy() self.factor_daily = factor_returns_daily.copy() self.factor_monthly = factor_returns_monthly.copy() self.factor_columns = factor_columns self.rf_column = rf_column self.window = rolling_window_months self.min_obs = min_obs_per_subsample self.min_stocks = min_stocks_per_month self.K = len(factor_columns) self._prepare_data() def _prepare_data(self): """Parse dates and create year-month identifiers.""" self.daily_ret['date'] = pd.to_datetime(self.daily_ret['date']) self.monthly_ret['date'] = pd.to_datetime(self.monthly_ret['date']) self.factor_daily['date'] = pd.to_datetime(self.factor_daily['date']) self.factor_monthly['date'] = pd.to_datetime(self.factor_monthly['date']) # Year-month identifiers self.daily_ret['ym'] = ( self.daily_ret['date'].dt.year * 100 + self.daily_ret['date'].dt.month ) self.monthly_ret['ym'] = ( self.monthly_ret['date'].dt.year * 100 + self.monthly_ret['date'].dt.month ) self.factor_daily['ym'] = ( self.factor_daily['date'].dt.year * 100 + self.factor_daily['date'].dt.month ) self.factor_monthly['ym'] = ( self.factor_monthly['date'].dt.year * 100 + self.factor_monthly['date'].dt.month ) # Merge daily stock returns with daily factor returns self.daily_merged = self.daily_ret.merge( self.factor_daily[['date'] + self.factor_columns], on='date', how='inner' ) # Compute monthly excess returns self.monthly_merged = self.monthly_ret.merge( self.factor_monthly[['ym', self.rf_column]], on='ym', how='inner' ) self.monthly_merged['ret_excess'] = ( self.monthly_merged['ret'] - self.monthly_merged[self.rf_column] ) # Sorted unique months for the monthly cross-sections self.months = sorted(self.monthly_merged['ym'].unique()) print(f"JNPRW IV Estimator initialized:") print(f" Factors: {self.factor_columns}") print(f" Rolling window: {self.window} months") print(f" Daily observations: {len(self.daily_merged):,}") print(f" Monthly observations: {len(self.monthly_merged):,}") print(f" Cross-section months: {len(self.months)}") def _get_rolling_months(self, target_ym: int) -> List[int]: """ Get the list of year-months in the rolling window ending at (and including) target_ym. """ idx = self.months.index(target_ym) if target_ym in self.months else -1 if idx < self.window: return [] return self.months[idx - self.window + 1: idx + 1] def _split_odd_even(self, window_months: List[int]) -> Tuple[List[int], List[int]]: """ Split a list of year-months into odd-indexed and even-indexed months. Following JNPRW: odd months (1st, 3rd, 5th, ...) and even months (2nd, 4th, 6th, ...) within the window. Note: "odd" and "even" refer to the position within the window, not the calendar month number. This ensures each subsample has exactly half the months. """ odd_months = [m for i, m in enumerate(window_months) if i % 2 == 0] even_months = [m for i, m in enumerate(window_months) if i % 2 == 1] return odd_months, even_months def _estimate_betas( self, stock_permnos: np.ndarray, month_subset: List[int] ) -> Dict[int, np.ndarray]: """ Estimate factor betas for each stock using daily returns from the given subset of months. For each stock i, runs OLS: r_{i,tau} = a_i + beta_i' @ f_tau + epsilon_{i,tau} where tau indexes daily observations within the month subset. Parameters ---------- stock_permnos : array of int PERMNOs of stocks to estimate betas for. month_subset : list of int Year-months to use for estimation. Returns ------- betas : dict mapping permno -> np.ndarray of shape (K,) Estimated factor betas for each stock. """ # Filter daily data to the month subset daily_sub = self.daily_merged[ self.daily_merged['ym'].isin(month_subset) ] betas = {} for permno in stock_permnos: stock_data = daily_sub[daily_sub['permno'] == permno] if len(stock_data) < self.min_obs: continue y = stock_data['ret'].values X = stock_data[self.factor_columns].values X = np.column_stack([np.ones(len(y)), X]) try: coef = np.linalg.lstsq(X, y, rcond=None)[0] betas[permno] = coef[1:] # K betas, skip intercept except np.linalg.LinAlgError: continue return betas def _run_iv_cross_section( self, month_ym: int, betas_iv: Dict[int, np.ndarray], betas_ev: Dict[int, np.ndarray], ) -> Optional[np.ndarray]: """ Run the IV cross-sectional regression for a single month. Implements JNPRW Equation (4): gamma_IV,t = (B_IV @ B_EV')^{-1} @ B_IV @ r_t' Parameters ---------- month_ym : int Year-month for the cross-section. betas_iv : dict Instrumental variable betas (permno -> K-vector). betas_ev : dict Explanatory variable betas (permno -> K-vector). Returns ------- gamma : np.ndarray of shape (K+1,) or None Estimated risk premiums [intercept, gamma_1, ..., gamma_K]. None if insufficient stocks. """ # Get monthly excess returns for this month month_data = self.monthly_merged[ self.monthly_merged['ym'] == month_ym ] # Find stocks present in all three: monthly returns, IV betas, EV betas permnos_month = set(month_data['permno'].values) permnos_iv = set(betas_iv.keys()) permnos_ev = set(betas_ev.keys()) common = sorted(permnos_month & permnos_iv & permnos_ev) N = len(common) if N < self.min_stocks: return None # Build matrices # r_t: 1 x N vector of excess returns ret_map = month_data.set_index('permno')['ret_excess'].to_dict() r_t = np.array([ret_map[p] for p in common]) # B_IV: (K+1) x N matrix (row of ones + K rows of IV betas) B_IV = np.zeros((self.K + 1, N)) B_IV[0, :] = 1.0 for j, p in enumerate(common): B_IV[1:, j] = betas_iv[p] # B_EV: (K+1) x N matrix B_EV = np.zeros((self.K + 1, N)) B_EV[0, :] = 1.0 for j, p in enumerate(common): B_EV[1:, j] = betas_ev[p] # IV estimator: gamma = (B_IV @ B_EV')^{-1} @ B_IV @ r_t' # Dimensions: (K+1 x N) @ (N x K+1) = (K+1 x K+1) try: A = B_IV @ B_EV.T # (K+1) x (K+1) b = B_IV @ r_t # (K+1,) gamma = np.linalg.solve(A, b) # (K+1,) return gamma except np.linalg.LinAlgError: return None def run(self, verbose: bool = True) -> 'JNPRWResults': """ Run the full JNPRW IV estimation procedure. For each month t in the sample: 1. Define the rolling window of `rolling_window_months` months ending at month t. 2. Split the window into odd-indexed and even-indexed months. 3. Estimate betas from daily returns in each subsample. 4. If month t is even-indexed within the window: use odd-month betas as IV and even-month betas as EV. If month t is odd-indexed: use even as IV and odd as EV. 5. Run the IV cross-sectional regression (Eq. 4). 6. Store the estimated risk premiums. After processing all months, compute time-series averages and t-statistics. Returns ------- results : JNPRWResults Object containing estimated risk premiums, t-statistics, and monthly gamma series. """ all_gammas = [] all_ym = [] all_N = [] # Determine which months we can estimate # (need at least `window` months of prior history) eligible_months = self.months[self.window:] if verbose: print(f"\nRunning JNPRW IV estimation over {len(eligible_months)} months...") total = len(eligible_months) checkpoint = max(1, total // 10) for count, target_ym in enumerate(eligible_months): if verbose and count > 0 and count % checkpoint == 0: print(f" Processed {count}/{total} months...") # Step 1: Define rolling window window_months = self._get_rolling_months(target_ym) if len(window_months) < self.window: continue # Step 2: Split into odd and even subsets odd_months, even_months = self._split_odd_even(window_months) # Step 3: Get permnos available this month month_permnos = self.monthly_merged[ self.monthly_merged['ym'] == target_ym ]['permno'].unique() # Step 4: Estimate betas from each subsample betas_odd = self._estimate_betas(month_permnos, odd_months) betas_even = self._estimate_betas(month_permnos, even_months) # Step 5: Determine which set is IV and which is EV # If target month's position in the window is even-indexed, # use odd betas as IV; otherwise use even betas as IV. target_position = window_months.index(target_ym) if target_position % 2 == 0: # Target is at an odd position -> use even as IV betas_iv = betas_even betas_ev = betas_odd else: # Target is at an even position -> use odd as IV betas_iv = betas_odd betas_ev = betas_even # Step 6: Run IV cross-sectional regression gamma = self._run_iv_cross_section(target_ym, betas_iv, betas_ev) if gamma is not None: all_gammas.append(gamma) all_ym.append(target_ym) common_n = len( set(betas_iv.keys()) & set(betas_ev.keys()) & set(self.monthly_merged[ self.monthly_merged['ym'] == target_ym ]['permno'].values) ) all_N.append(common_n) if verbose: print(f" Completed. {len(all_gammas)} months with valid estimates.") # Build results gammas_array = np.array(all_gammas) # T x (K+1) factor_names = ['Intercept'] + list(self.factor_columns) return JNPRWResults( gammas=gammas_array, months=all_ym, n_stocks=all_N, factor_names=factor_names, ) class JNPRWResults: """ Container for JNPRW IV estimation results. Attributes ---------- gammas : np.ndarray, shape (T, K+1) Monthly risk premium estimates. months : list of int Year-months corresponding to each row. n_stocks : list of int Number of stocks in each cross-section. factor_names : list of str Names including 'Intercept'. """ def __init__( self, gammas: np.ndarray, months: List[int], n_stocks: List[int], factor_names: List[str], ): self.gammas = gammas self.months = months self.n_stocks = n_stocks self.factor_names = factor_names self.T = len(months) self.K_plus_1 = len(factor_names) # Compute summary statistics self.mean_gamma = gammas.mean(axis=0) self.std_gamma = gammas.std(axis=0, ddof=1) self.tstat = self.mean_gamma / (self.std_gamma / np.sqrt(self.T)) self.pvalue = 2 * (1 - stats.t.cdf(np.abs(self.tstat), self.T - 1)) self.avg_N = np.mean(n_stocks) def summary(self) -> str: """Return a formatted summary string.""" lines = [] lines.append("=" * 70) lines.append("JNPRW IV Risk Premium Estimates") lines.append("Jegadeesh, Noh, Pukthuanthong, Roll, Wang (2019, JFE)") lines.append("=" * 70) lines.append(f"Sample: {len(self.months)} months") lines.append(f"Avg stocks per cross-section: {self.avg_N:.0f}") lines.append("") lines.append( f"{'Factor':<15} {'Price(%/mo)':>12} {'t-stat':>10} " f"{'p-value':>10} {'Sig 5%':>8}" ) lines.append("-" * 57) for j, name in enumerate(self.factor_names): sig = '*' if self.pvalue[j] < 0.05 else '' lines.append( f"{name:<15} {self.mean_gamma[j]*100:>12.4f} " f"{self.tstat[j]:>10.3f} {self.pvalue[j]:>10.4f} " f"{sig:>8}" ) lines.append("") lines.append("Note: The JNPRW IV t-statistics are valid without") lines.append("Shanken (1992) correction because the IV approach") lines.append("directly addresses the errors-in-variables problem.") return "\n".join(lines) def to_dataframe(self) -> pd.DataFrame: """Return results as a pandas DataFrame.""" return pd.DataFrame({ 'factor': self.factor_names, 'risk_price_monthly': self.mean_gamma, 'risk_price_pct': self.mean_gamma * 100, 'std': self.std_gamma, 'tstat': self.tstat, 'pvalue': self.pvalue, 'significant_5pct': self.pvalue < 0.05, }) def monthly_gammas_df(self) -> pd.DataFrame: """Return the full monthly gamma time series as a DataFrame.""" df = pd.DataFrame(self.gammas, columns=self.factor_names) df['ym'] = self.months df['N_stocks'] = self.n_stocks return df # ============================================================ # CONVENIENCE FUNCTION FOR QUICK USE # ============================================================ def jnprw_iv_test( daily_stock_returns: pd.DataFrame, monthly_stock_returns: pd.DataFrame, factor_returns_daily: pd.DataFrame, factor_returns_monthly: pd.DataFrame, factor_columns: List[str], rf_column: str = 'RF', rolling_window_months: int = 36, min_obs_per_subsample: int = 100, min_stocks_per_month: int = 100, verbose: bool = True, ) -> JNPRWResults: """ One-line convenience function to run the JNPRW IV test. Parameters ---------- daily_stock_returns : DataFrame Columns: date, permno, ret (decimals) monthly_stock_returns : DataFrame Columns: date, permno, ret (decimals) factor_returns_daily : DataFrame Columns: date, factor1, factor2, ..., RF (all decimals) factor_returns_monthly : DataFrame Columns: date, factor1, factor2, ..., RF (all decimals) factor_columns : list of str Factor names to test. rf_column : str Risk-free rate column name. rolling_window_months : int Rolling window in months for beta estimation. Default 36. min_obs_per_subsample : int Min daily obs per stock per subsample. Default 100. min_stocks_per_month : int Min stocks for a valid cross-section. Default 100. verbose : bool Print progress. Default True. Returns ------- JNPRWResults Example ------- >>> results = jnprw_iv_test( ... daily_stock_returns=daily_df, ... monthly_stock_returns=monthly_df, ... factor_returns_daily=ff_daily, ... factor_returns_monthly=ff_monthly, ... factor_columns=['Mkt-RF', 'SMB', 'HML'], ... ) >>> print(results.summary()) >>> results.to_dataframe().to_csv('jnprw_results.csv') """ estimator = JNPRWEstimator( daily_stock_returns=daily_stock_returns, monthly_stock_returns=monthly_stock_returns, factor_returns_daily=factor_returns_daily, factor_returns_monthly=factor_returns_monthly, factor_columns=factor_columns, rf_column=rf_column, rolling_window_months=rolling_window_months, min_obs_per_subsample=min_obs_per_subsample, min_stocks_per_month=min_stocks_per_month, ) return estimator.run(verbose=verbose) # ============================================================ # EXAMPLE USAGE WITH WRDS/CRSP DATA # ============================================================ EXAMPLE_CODE = """ # ============================================================ # EXAMPLE: Testing the Fama-French Three-Factor Model # ============================================================ # # This example shows how to use the JNPRW IV estimator with # CRSP daily and monthly stock returns and Fama-French factors # downloaded from Ken French's data library. # # Prerequisites: # pip install pandas numpy scipy wrds # # Step 1: Load data # ----------------- # Daily stock returns (from CRSP via WRDS or local file) # Columns needed: date, permno, ret import pandas as pd daily_stocks = pd.read_csv('crsp_daily.csv.gz', parse_dates=['date'], dtype={'permno': int, 'ret': float}) # Monthly stock returns monthly_stocks = pd.read_csv('crsp_monthly.csv.gz', parse_dates=['date'], dtype={'permno': int, 'ret': float}) # Daily Fama-French factors (download from French's website) # File: F-F_Research_Data_Factors_daily_CSV.zip ff_daily = pd.read_csv('F-F_Research_Data_Factors_daily.CSV', skiprows=3) ff_daily.columns = ['date', 'Mkt-RF', 'SMB', 'HML', 'RF'] ff_daily['date'] = pd.to_datetime(ff_daily['date'], format='%Y%m%d') for c in ['Mkt-RF', 'SMB', 'HML', 'RF']: ff_daily[c] = ff_daily[c] / 100 # percent to decimal # Monthly Fama-French factors ff_monthly = pd.read_csv('F-F_Research_Data_Factors.CSV', skiprows=3) # ... parse similarly, divide by 100 ... # Step 2: Run the JNPRW IV test # ------------------------------ from jnprw_iv import jnprw_iv_test results = jnprw_iv_test( daily_stock_returns=daily_stocks, monthly_stock_returns=monthly_stocks, factor_returns_daily=ff_daily, factor_returns_monthly=ff_monthly, factor_columns=['Mkt-RF', 'SMB', 'HML'], rf_column='RF', rolling_window_months=36, ) # Step 3: Examine results # ----------------------- print(results.summary()) # Save to CSV results.to_dataframe().to_csv('jnprw_ff3_results.csv', index=False) # Get monthly time series of risk premium estimates monthly_gammas = results.monthly_gammas_df() monthly_gammas.to_csv('jnprw_monthly_gammas.csv', index=False) # Step 4: Compare with standard OLS Fama-MacBeth # ----------------------------------------------- # The JNPRW paper shows that OLS risk premium estimates with # individual stocks are biased toward zero. The IV estimates # correct for this bias. Compare: # - OLS t-stats vs IV t-stats (should be similar asymptotically) # - OLS point estimates vs IV point estimates (OLS biased toward 0) """ if __name__ == '__main__': print(__doc__) print("\nExample usage:") print(EXAMPLE_CODE)