import numpy as np
import seaborn as sns
from scipy import optimize, stats
from scipy.stats import spearmanr
def C12_conditional_u2(u1, u2, rho):
return stats.multivariate_normal.cdf(
- rho * stats.norm.ppf(u2)) / (np.sqrt(1 - rho**2))
(stats.norm.ppf(u1)
)
def root(z, u1, u2, rho):
return C12_conditional_u2(u1=z, u2=u2, rho=rho) - u1
4 Chapter 4: Copulas for panel stochastic frontier models
4.1 Exercise 4.1: Simulating from a Gaussian Copula
This exercise demonstrates how to simulate from a 2-dimensional Gaussian copula using the general Rosenblatt transform method (see, e.g. Joe, 2014, Chapter 6.9.1) and the Cholesky decomposition method (see, e.g., McNeil et al., 2015). The marginal distributions are normal and half-normal. The Figures present the distribution plots of the simulated dependent uniform random variables and the simulated dependent standard normal and half-normal random variables together with the associated value of Spearman’s rho.
= 0.8 # Dependence parameter of the Gaussian copula
rho
# Rosenblatt transform approach
= []
simulated_u1 = []
simulated_u2 for i in range(1000):
np.random.seed(i)= stats.uniform.rvs(size=2)
uniforms = uniforms[0]
u2
simulated_u2.append(u2)= optimize.root_scalar(
u1 =root, args=(uniforms[1], uniforms[0], rho), bracket=(0, 1)
f
).root
simulated_u1.append(u1)
# Inverse transforms for the marginal distributions
= stats.norm.ppf(simulated_u1)
x1 = stats.halfnorm.ppf(simulated_u2)
x2
= sns.jointplot(x=simulated_u1, y=simulated_u2)
plot1 r"$u_{1}$", fontsize=16)
plot1.ax_joint.set_xlabel(r"$u_{2}$", fontsize=16)
plot1.ax_joint.set_ylabel(
plot1.ax_joint.annotate(r"$\rho={}$".format(round(spearmanr(simulated_u1, simulated_u2)[0], 3)),
=(0.2, 0.8),
xy="figure fraction",
xycoords=16,
fontsize
)
= sns.jointplot(x=x1, y=x2)
plot2 r"$x_{1} \sim N(0,1)$", fontsize=16)
plot2.ax_joint.set_xlabel(r"$x_{2} \sim N^{+}(0,1)$", fontsize=16)
plot2.ax_joint.set_ylabel(
plot2.ax_joint.annotate(r"$\rho={}$".format(round(spearmanr(x1, x2)[0], 3)),
=(0.25, 0.75),
xy="figure fraction",
xycoords=16,
fontsize )
Text(0.25, 0.75, '$\\rho=0.798$')
# Cholesky decomposition approach
= np.array([[1, rho], [rho, 1]])
P = np.linalg.cholesky(P)
A = stats.multivariate_normal.rvs(cov=np.eye(2), size=1000)
Z = A @ Z.T
J = stats.norm.cdf(J).T
U
= stats.norm.ppf(U[:, 0])
x1 = stats.halfnorm.ppf(U[:, 1])
x2
= sns.jointplot(x=U[:, 0], y=U[:, 1])
plot1 r"$u_{1}$", fontsize=16)
plot1.ax_joint.set_xlabel(r"$u_{2}$", fontsize=16)
plot1.ax_joint.set_ylabel(
plot1.ax_joint.annotate(r"$\rho={}$".format(round(spearmanr(U[:, 0], U[:, 1])[0], 3)),
=(0.2, 0.8),
xy="figure fraction",
xycoords=16,
fontsize
)
= sns.jointplot(x=x1, y=x2)
plot2 r"$x_{1} \sim N(0,1)$", fontsize=16)
plot2.ax_joint.set_xlabel(r"$x_{2} \sim N^{+}(0,1)$", fontsize=16)
plot2.ax_joint.set_ylabel(
plot2.ax_joint.annotate(r"$\rho={}$".format(round(spearmanr(x1, x2)[0], 3)),
=(0.25, 0.75),
xy="figure fraction",
xycoords=16,
fontsize )
Text(0.25, 0.75, '$\\rho=0.783$')
# Remove all previous function and variable definitions before the next exercise
%reset
4.2 Exercise 4.2: QMLE for Indonesian rice production
This exercise implements QMLE for a well-known data set of Indonesian rice farms see, e.g., Lee and Schmidt, 1993; Horrace and Schmidt, 1996, 2000). It contains data for 171 farmers with six annual observations of rice output (in kg), amount of seeds (kg), amount of urea (in kg), labor (in hours), land (in hectares), whether pesticides were used, which of thee rice varieties was planted and in which of six regions the farmer’s village is located.
Our results are based on all six cross sections. The robust standard errors we report for the QMLE account for potential correlation between the cross sections overtime. They are obtained using the “sandwich” formula, while the first set of standard errors are obtained using the estimated Hessian.
import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize
def log_density(coefs, y, X):
# Get parameters
= coefs[0]
alpha = coefs[1:-2]
beta = coefs[-2]
sigma = coefs[-1]
Lambda
# Composed errors from the production function equation
= y - alpha - X @ beta
eps
# Compute the log density
# Note: stats.norm.pdf and stats.norm.cdf use loc=0 and scale=1 by default
= (
Den 2 / sigma)
(* stats.norm.pdf(eps / sigma)
* stats.norm.cdf(-Lambda * eps / sigma)
)= np.log(Den)
logDen
return logDen
def loglikelihood(coefs, y, X):
# Obtain the log likelihood
= log_density(coefs, y, X)
logDen = -np.sum(logDen)
log_likelihood
return log_likelihood
def estimate(y, X):
"""
Estimate model parameters by maximizing the log-likelihood function and compute standard errors.
"""
# Starting values for MLE
= 5.5
alpha = [
beta 0.14,
0.1,
0.07,
0.2,
0.5,
0.006,
0.15,
0.1,
-0.06,
-0.1,
-0.1,
-0.15,
-0.05,
]= 0.4
sigma = 1.3
_lambda
# Initial parameter vector
= np.array([alpha] + beta + [sigma, _lambda])
theta0
# Bounds to ensure sigma2v and sigma2u are >= 0
= [(None, None) for x in range(len(theta0) - 2)] + [
bounds 1e-5, np.inf),
(1e-5, np.inf),
(
]
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =loglikelihood,
fun=theta0,
x0="L-BFGS-B",
method={"maxiter": 1000, "maxfun": 10000},
options=(y, X),
args=bounds,
bounds
)
= MLE_results.x # Estimated parameter vector
theta = MLE_results.fun * -1 # Log-likelihood at the solution
log_likelihood
# Estimate standard errors
= 1e-6
delta = np.zeros((len(y), len(theta)))
grad for i in range(len(theta)):
= np.copy(theta)
theta1 += delta
theta1[i] = (log_density(theta1, y, X) - log_density(theta, y, X)) / delta
grad[:, i]
= grad.T @ grad # Outer product of gradient
OPG = np.sqrt(np.diag(np.linalg.inv(OPG)))
ster
# Robust standard errors
= nd.Hessian(f=loglikelihood, method="central", step=1e-6)(theta, y, X)
hessian = np.sqrt(
robust_ster -hessian) @ OPG @ np.linalg.inv(-hessian))
np.diag(np.linalg.inv(
)
return theta, ster, robust_ster, log_likelihood
= pd.read_csv(r"ricefarm.csv")
ricefarm = ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
= estimate(y=y, X=X)
theta, sterr, robust_ster, log_likelihood
# Display the results as a table
= pd.DataFrame(
results ={"Est": theta, "St.Err": sterr, "Rob.St.Err.": robust_ster},
data=[
index"CONST",
"SEED",
"UREA",
"TSP",
"LABOR",
"LAND",
"DP",
"DV1",
"DV2",
"DR1",
"DR2",
"DR3",
"DR4",
"DR5",
"SIGMA",
"LAMBDA",
],
)= results.round(4)
results print("\nLL", round(log_likelihood, 3))
display(results)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
LL -342.322
Est | St.Err | Rob.St.Err. | |
---|---|---|---|
CONST | 5.5695 | 0.1746 | 0.2625 |
SEED | 0.1451 | 0.0244 | 0.0327 |
UREA | 0.1132 | 0.0142 | 0.0215 |
TSP | 0.0763 | 0.0102 | 0.0122 |
LABOR | 0.2004 | 0.0273 | 0.0312 |
LAND | 0.4902 | 0.0237 | 0.0420 |
DP | 0.0066 | 0.0285 | 0.0275 |
DV1 | 0.1667 | 0.0387 | 0.0379 |
DV2 | 0.1213 | 0.0588 | 0.0491 |
DR1 | -0.0595 | 0.0532 | 0.0580 |
DR2 | -0.1153 | 0.0532 | 0.0519 |
DR3 | -0.1339 | 0.0378 | 0.0319 |
DR4 | -0.1456 | 0.0350 | 0.0343 |
DR5 | -0.0483 | 0.0420 | 0.0451 |
SIGMA | 0.4430 | 0.0249 | 0.0309 |
LAMBDA | 1.3592 | 0.2185 | 0.3871 |
# Save theta for use in exercise 5.3 and 5.5
"exercise_4_2_theta_python.txt", theta, delimiter=",") np.savetxt(
# Remove all previous function and variable definitions before the next exercise
%reset
4.3 Exercise 4.3: IQMLE of Indonesian rice production model
This exercise implements QMLE and two versions of IQMLE. The moment-based estimators have some theoretical advantages such as relatively higher asymptotic efficiency but, in practice, they may be infeasible. In this application, for example, we have 16 parameters, 6 time periods and only 171 cross sectional observations. The vector of moment conditions on which the IQMLE is based is 96×1. Obtaining stable GMM estimates for this problem with only 171 observations proved impossible.Therefore, instead of estimating the full vector of parameters we considered a simplified problem of estimating only two parameters, the constant and one slope coefficient, while keeping the other parameters constrained to be equal to their QMLE values. We therefore have 12 moment conditions to use in the IQMLE. This is a more appropriate setting for a moment-based estimation using a sample of size 171.
IQMLE-1 is the estimates obtained by the two-step GMM. In the first step, we obtain preliminary parameter estimates using the identity matrix for weighting. In the second step, we estimate the covariance matrix of the moment conditions and update the weighting by the inverse of the covariance matrix. IQMLE-2 is the iterated GMM estimation, in which we carry on the updating for 5 more iterations. The iterated GMM estimates are more efficient relative to the QMLE and the two-step GMM, which is evidenced by the slightly smaller standard errors of this estimate. On the other hand, the GMM estimators are known to have substantial small sample biases. The dispersion of the parameter estimates reported in the table suggests that these biases may be an issue here.
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize
def criterion_function(theta, theta_fixed, W, y, X):
= np.mean(mom(theta, theta_fixed, y, X), axis=0)
s = s.T @ W @ s
Qn
return Qn
def mom(theta, theta_fixed, y, X):
= [] # Nx(Tx2) matrix of derivatives
deriv for i in range(T):
# gradient of the density function w.r.t alpha and beta1 for cross section i
deriv.append(score(theta, theta_fixed, y[i], X[i]))
= np.concatenate(deriv, axis=1)
deriv_matrix
return deriv_matrix
def score(theta, theta_fixed, y, X):
= np.concatenate([theta, theta_fixed])
all_theta
= np.array(all_theta[:-2])
beta = all_theta[-2]
sigma = all_theta[-1]
_lambda
= X @ beta
xb = (y - xb) / sigma
ind
= ind / sigma + stats.norm.pdf(ind * _lambda) * _lambda / sigma / (
sc_beta_i 1 - stats.norm.cdf(ind * _lambda)
)= np.diag(sc_beta_i) @ X[:, :2]
sc_beta
return sc_beta
def scorederiv(theta, theta_fixed, y, X):
= []
deriv for i in range(T):
# gradient of the density function w.r.t alpha and beta1 for cross section i
=0))
deriv.append(np.mean(score(theta, theta_fixed, y[i], X[i]), axis
= np.concatenate(deriv, axis=0)
deriv_matrix
return deriv_matrix
def estimate(y, X, theta_fixed):
"""
Estimate model parameters by maximizing the log-likelihood function and compute standard errors.
"""
= 171
N = 6
T
# Starting values for MLE
= 5.2
alpha = 0.3
beta
# Initial parameter vector
= [alpha, beta]
theta0
# Initial weighting matrix as the identity matrix
= np.eye(len(theta0) * T)
W_start
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =criterion_function,
fun=theta0,
x0="Nelder-Mead",
method={"maxiter": 1000},
options=(theta_fixed, W_start, y, X),
args
)
for j in range(5):
= mom(theta=MLE_results["x"], theta_fixed=theta_fixed, y=y, X=X)
revised_moments = (revised_moments.T @ revised_moments) / N
C = np.linalg.inv(C)
Wn
= minimize(
MLE_results =criterion_function,
fun=theta0,
x0="Nelder-Mead",
method={"maxiter": 1000},
options=(theta_fixed, Wn, y, X),
args
)if j == 0:
= MLE_results
two_step_GMM_results = mom(
two_step_revised_moments =MLE_results["x"], theta_fixed=theta_fixed, y=y, X=X
theta
)= np.linalg.inv(
Wn_two_step_GMM @ two_step_revised_moments) / N
(two_step_revised_moments.T
)
= MLE_results
iterated_GMM_results = Wn
Wn_iterated_GMM
= two_step_GMM_results["x"]
two_step_GMM_theta = iterated_GMM_results["x"]
iterated_GMM_theta
# Estimate standard errors
= 1e-6
delta = np.zeros(
grad_two_step_GMM_results len(theta0) * T, len(two_step_GMM_results["x"]))
(
)= np.zeros(
grad_iterated_GMM_results len(theta0) * T, len(iterated_GMM_results["x"]))
(
)for i in range(len(theta0)):
= np.copy(iterated_GMM_results["x"])
theta1_iterated_GMM = np.copy(two_step_GMM_results["x"])
theta1_two_step_GMM += delta
theta1_iterated_GMM[i] += delta
theta1_two_step_GMM[i] = (
grad_iterated_GMM_results[:, i] =theta1_iterated_GMM, theta_fixed=theta_fixed, y=y, X=X)
scorederiv(theta- scorederiv(
=iterated_GMM_results["x"], theta_fixed=theta_fixed, y=y, X=X
theta
)/ delta
) = (
grad_two_step_GMM_results[:, i] =theta1_two_step_GMM, theta_fixed=theta_fixed, y=y, X=X)
scorederiv(theta- scorederiv(
=two_step_GMM_results["x"], theta_fixed=theta_fixed, y=y, X=X
theta
)/ delta
)
= (
OM_iterated_GMM
np.linalg.inv(@ Wn_iterated_GMM @ grad_iterated_GMM_results
grad_iterated_GMM_results.T
)/ N
)= (
OM_two_step_GMM
np.linalg.inv(@ Wn_two_step_GMM @ grad_two_step_GMM_results
grad_two_step_GMM_results.T
)/ N
)= np.sqrt(np.diag(OM_iterated_GMM))
ster_iterated_GMM = np.sqrt(np.diag(OM_two_step_GMM))
ster_two_step_GMM
return two_step_GMM_theta, iterated_GMM_theta, ster_two_step_GMM, ster_iterated_GMM
= pd.read_csv(r"ricefarm.csv")
ricefarm # Import theta vector from exercise 5.2
= np.loadtxt("exercise_4_2_theta_python.txt", delimiter=",")[
exercise_4_2_theta 2:
# keep everything except alpha and beta1
]
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
= np.concatenate([np.ones((len(X), 1)), X], axis=1)
X
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
= estimate(
two_step_GMM_theta, iterated_GMM_theta, ster_two_step_GMM, ster_iterated_GMM =y_t, X=X_t, theta_fixed=exercise_4_2_theta
y
)# Display the results as a table
= pd.DataFrame(
results ={
data"IQMLE-1 Est": two_step_GMM_theta,
"IQMLE-1 St.Err": ster_two_step_GMM,
"IQMLE-2 Est": iterated_GMM_theta,
"IQMLE-2 St.Err": ster_iterated_GMM,
},=["CONST", "SEED"],
index
)= results.round(4)
results display(results)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
IQMLE-1 Est | IQMLE-1 St.Err | IQMLE-2 Est | IQMLE-2 St.Err | |
---|---|---|---|---|
CONST | 5.2154 | 0.0378 | 5.4842 | 0.0311 |
SEED | 0.2871 | 0.0155 | 0.1716 | 0.0123 |
# Remove all previous function and variable definitions before the next exercise
%reset
4.4 Exercise 4.4: Full MLE of the Indonesian rice production model
This exercise implements the FMLE estimator. We use the Gaussian copula to construct a multivariate distribution of εεε. The Gaussian copula is defined in Example45.1. In the Gaussian copula, the dependence structure between the marginals is represented by the correlation matrix \(R\). So our FMLE results contain estimates of a 6×6 correlation matrix, which are not reported. We use the correlation matrix parameterization of Archakov and Hansen (2021) in the numerical optimization routines.
NOTE: This code will take a long time to run because of the numerical integration routines required.
import multiprocessing
from itertools import product
import numpy as np
import pandas as pd
import scipy.integrate as integrate
from joblib import Parallel, delayed
from scipy import stats
from scipy.linalg import expm, logm, norm
from scipy.optimize import minimize
def direct_mapping_mat(C):
= []
gamma
try:
# Check if input is of proper format: C is 2D np.array of suitable dimensions
# and is positive-definite correlation matrix
if not isinstance(C, np.ndarray):
raise ValueError
if C.ndim != 2:
raise ValueError
if C.shape[0] != C.shape[1]:
raise ValueError
if not all(
[all(np.abs(np.diag(C) - np.ones(C.shape[0])) < 1e-8),
np.all(np.linalg.eigvals(C) > 0),
np.all(np.abs(C - C.T) < 1e-8),
np.
]
):raise ValueError
# Apply matrix log-transformation to C and get off-diagonal elements
= logm(C)
A = A[np.triu_indices(C.shape[0], 1)]
gamma
except ValueError:
print("Error: input is of a wrong format")
return gamma
def ALS77_epsilon_density(eps, sigma, _lambda):
return (
2 / sigma)
(* stats.norm.pdf(eps / sigma)
* stats.norm.cdf(-_lambda * eps / sigma)
)
def inverse_mapping_vec(gamma, tol_value=1e-8):
= []
C = -1
iter_number
# Check if input is of proper format: gamma is of suitable length
if not isinstance(gamma, (np.ndarray, list)):
raise ValueError
if isinstance(gamma, np.ndarray):
if gamma.ndim != 1:
raise ValueError
= 0.5 * (1 + np.sqrt(1 + 8 * len(gamma)))
n if not all([n.is_integer(), n > 1]):
raise ValueError
# Check if tolerance value belongs to a proper interval
# and change it to the default value otherwise
if not (0 < tol_value <= 1e-4):
= 1e-8
tol_value print("Warning: tolerance value has been changed to default")
# Place elements from gamma into off-diagonal parts
# and put zeros on the main diagonal of [n x n] symmetric matrix A
= int(n)
n = np.zeros(shape=(n, n))
A 1)] = gamma
A[np.triu_indices(n, = A + A.T
A
# Read properties of the input matrix
= np.diag(A) # get current diagonal
diag_vec = np.diag_indices_from(
diag_ind
A# get row and column indices of diagonal elements
)
# Iterative algorithm to get the proper diagonal vector
= np.sqrt(n)
dist while dist > np.sqrt(n) * tol_value:
= np.log(np.diag(expm(A)))
diag_delta = diag_vec - diag_delta
diag_vec = diag_vec
A[diag_ind] = norm(diag_delta)
dist += 1
iter_number
# Get a unique reciprocal correlation matrix
= expm(A)
C 1)
np.fill_diagonal(C,
return C
def integrate_ALS77(t, i, eps_it, sigma, _lambda):
return (
t,
i,
integrate.quad(=ALS77_epsilon_density, args=(sigma, _lambda), a=-4, b=eps_it
func0],
)[
)
def log_density(coefs, y, X):
= 171
N = 6
T = int((T**2 - T) / 2)
n_corr_terms
# Get parameters
= coefs[0]
alpha = coefs[1:14]
beta = coefs[14 : 14 + n_corr_terms]
gamma = coefs[-2]
sigma = coefs[-1]
_lambda
# Reconstruct the Gaussian copula correlation matrix
= inverse_mapping_vec(gamma=gamma)
Rho
= np.zeros((N, T))
eps = np.zeros((N, T)) # the log density of the margins
log_den_eps_margins for t in range(T):
= y[t] - alpha - X[t] @ beta
eps_t = eps_t
eps[:, t] = np.log(
log_den_eps_margins[:, t] =eps_t, sigma=sigma, _lambda=_lambda)
ALS77_epsilon_density(eps
)
# Compute F(eps_t) (CDF of epsion_t) by integrating the density over the interval (-4, eps)
= np.zeros((N, T))
eps_CDF = product(range(T), range(N))
jobs = Parallel(n_jobs=multiprocessing.cpu_count())(
results 0], job[1], eps[job[1], job[0]], sigma, _lambda)
delayed(integrate_ALS77)(job[for job in jobs
)for result in results:
1], result[0]] = result[2]
eps_CDF[result[
< 1e-4] = 1e-4
eps_CDF[eps_CDF > 0.999] = 0.999
eps_CDF[eps_CDF
# Compute the copula density for (eps_t, ..., eps_T)
= np.zeros(N)
log_copula_den for i in range(N):
= np.log(
log_copula_den[i] 1 / np.sqrt(np.linalg.det(Rho)))
(* np.exp(
-0.5
* stats.norm.ppf(eps_CDF[i, :]).T
@ (np.linalg.inv(Rho) - np.eye(T))
@ stats.norm.ppf(eps_CDF[i, :])
)
)
# Compute the log density
= log_copula_den + np.sum(log_den_eps_margins, axis=1)
logDen
return logDen
def loglikelihood(coefs, y, X):
# Obtain the log likelihood
= log_density(coefs, y, X)
logDen = -np.sum(logDen)
log_likelihood return log_likelihood
def estimate(y, X):
"""
Estimate model parameters by maximizing the log-likelihood function and compute standard errors.
"""
= 171
N = 6
T
# Starting values for MLE
= 5.5
alpha = [0.1, 0.1, 0.06, 0.2, 0.5, 0.02, 0.1, 0.1, -0.1, -0.15, -0.1, -0.15, -0.05]
beta = 0.45
sigma = 1.2
_lambda
# Initial values for the Gaussian copula correlation matrix
= np.array(
start_R
[1.0000, 0.7837, 0.7724, 0.7202, 0.5778, 0.4978],
[0.7837, 1.0000, 0.6474, 0.8265, 0.4693, 0.5305],
[0.7724, 0.6474, 1.0000, 0.7030, 0.5933, 0.5297],
[0.7202, 0.8265, 0.7030, 1.0000, 0.5562, 0.5889],
[0.5778, 0.4693, 0.5933, 0.5562, 1.0000, 0.8475],
[0.4978, 0.5305, 0.5297, 0.5889, 0.8475, 1.0000],
[
]
)= direct_mapping_mat(C=start_R)
gamma
# Initial parameter vector
= np.array([alpha] + beta + gamma.tolist() + [sigma, _lambda])
theta0
# Bounds to ensure sigma2v and sigma2u are >= 0
= [(None, None) for x in range(len(theta0) - 2)] + [
bounds 1e-5, np.inf),
(1e-5, np.inf),
(
]
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =loglikelihood,
fun=theta0,
x0="L-BFGS-B",
method={"maxiter": 1000, "maxfun": 10000},
options=(y, X),
args=bounds,
bounds
)
= MLE_results.x # Estimated parameter vector
theta = MLE_results.fun * -1 # Log-likelihood at the solution
log_likelihood
# Estimate standard errors
= 1e-6
delta = np.zeros((N, len(theta)))
grad for i in range(len(theta)):
= np.copy(theta)
theta1 += delta
theta1[i] = (log_density(theta1, y, X) - log_density(theta, y, X)) / delta
grad[:, i]
= grad.T @ grad # Outer product of gradient
OPG = np.sqrt(np.diag(np.linalg.inv(OPG)))
ster
# Only keep betas, sigma and lambda
= theta[:14].tolist() + theta[-2:].tolist()
theta = ster[:14].tolist() + ster[-2:].tolist()
ster
return theta, ster, log_likelihood
= pd.read_csv(r"ricefarm.csv")
ricefarm
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
= estimate(y=y_t, X=X_t)
theta, ster, log_likelihood
# Display the results as a table
= pd.DataFrame(
results ={"Est": theta, "St.Err": ster},
data=[
index"CONST",
"SEED",
"UREA",
"TSP",
"LABOR",
"LAND",
"DP",
"DV1",
"DV2",
"DR1",
"DR2",
"DR3",
"DR4",
"DR5",
"SIGMA",
"LAMBDA",
],
)= results.round(4)
results print("\nLL", round(log_likelihood, 3))
display(results)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
LL -261.689
Est | St.Err | |
---|---|---|
CONST | 5.4222 | 0.2134 |
SEED | 0.1213 | 0.0230 |
UREA | 0.1195 | 0.0166 |
TSP | 0.0601 | 0.0143 |
LABOR | 0.2184 | 0.0323 |
LAND | 0.4909 | 0.0263 |
DP | 0.0268 | 0.0285 |
DV1 | 0.1379 | 0.0455 |
DV2 | 0.1145 | 0.0608 |
DR1 | -0.1085 | 0.0701 |
DR2 | -0.1828 | 0.0690 |
DR3 | -0.1195 | 0.0437 |
DR4 | -0.1678 | 0.0449 |
DR5 | -0.0872 | 0.0454 |
SIGMA | 0.3959 | 0.0511 |
LAMBDA | 0.7526 | 0.4648 |
# Remove all previous function and variable definitions before the next exercise
%reset
4.5 Exercise 4.5: Copula-based GMM estimation of Indonesian rice production model
This exercise implements the copula-based GMM estimator We use the correlation matrix parameterization of Archakov and Hansen (2021) in the numerical optimization routines. As mentioned in Exercise 4.3, moment based estimators have some theoretical advantages such as relatively higher asymptotic efficiency but, in practice, they may be infeasible or unstable. For the GMM estimation in this application, we have 16 parameters, 6 time periods and only 171 cross sectional observations. The vector of moment conditions on which the GMM estimator is based contains the 96 moment conditions that come from the IQMLE plus the additional vector of copula-based moment conditions employed by the GMM estimator. That additional vector has 15 moment conditions for the parameters of R and 16 moment conditions for the parameters of the marginals. So we have as many as 127 moment conditions for the GMM estimator. Again, obtaining stable GMM estimates for this problem with only 171 observations proved impossible. As in Exercise 4.3, instead of estimating the full vector of parameters we considered the constrained problem of estimating only two parameters, the constant and one slope coefficient, with the other parameters fixed at their QMLE values. We therefore have 14 moment conditions to use in GMM, which is more appropriate for a moment based estimation using a sample of size 171.
import multiprocessing
from itertools import product
import numpy as np
import pandas as pd
import scipy.integrate as integrate
from joblib import Parallel, delayed
from scipy import stats
from scipy.linalg import expm, norm
from scipy.optimize import minimize
def inverse_mapping_vec(gamma, tol_value=1e-8):
= []
C = -1
iter_number
# Check if input is of proper format: gamma is of suitable length
if not isinstance(gamma, (np.ndarray, list)):
raise ValueError
if isinstance(gamma, np.ndarray):
if gamma.ndim != 1:
raise ValueError
= 0.5 * (1 + np.sqrt(1 + 8 * len(gamma)))
n if not all([n.is_integer(), n > 1]):
raise ValueError
# Check if tolerance value belongs to a proper interval
# and change it to the default value otherwise
if not (0 < tol_value <= 1e-4):
= 1e-8
tol_value print("Warning: tolerance value has been changed to default")
# Place elements from gamma into off-diagonal parts
# and put zeros on the main diagonal of [n x n] symmetric matrix A
= int(n)
n = np.zeros(shape=(n, n))
A 1)] = gamma
A[np.triu_indices(n, = A + A.T
A
# Read properties of the input matrix
= np.diag(A) # get current diagonal
diag_vec = np.diag_indices_from(
diag_ind
A# get row and column indices of diagonal elements
)
# Iterative algorithm to get the proper diagonal vector
= np.sqrt(n)
dist while dist > np.sqrt(n) * tol_value:
= np.log(np.diag(expm(A)))
diag_delta = diag_vec - diag_delta
diag_vec = diag_vec
A[diag_ind] = norm(diag_delta)
dist += 1
iter_number
# Get a unique reciprocal correlation matrix
= expm(A)
C 1)
np.fill_diagonal(C,
return C
def ALS77_epsilon_density(eps, sigma, _lambda):
return (
2 / sigma)
(* stats.norm.pdf(eps / sigma)
* stats.norm.cdf(-_lambda * eps / sigma)
)
def integrate_ALS77(t, i, eps_it, sigma, _lambda):
return (
t,
i,
integrate.quad(=ALS77_epsilon_density, args=(sigma, _lambda), a=-4, b=eps_it
func0],
)[
)
def criterion_function(theta, theta_fixed, gamma_fixed, W, y, X):
= np.mean(mom(theta, theta_fixed, gamma_fixed, y, X), axis=0)
s = s.T @ W @ s
Qn
return Qn
def mom(theta, theta_fixed, gamma_fixed, y, X):
= 171
N = 6
T
# Marginals
= [] # Nx(Tx2) matrix of
marginal_deriv_matrix for i in range(T):
# gradient of the density function w.r.t alpha and beta1 for cross section i
marginal_deriv_matrix.append(score(theta, theta_fixed, y[i], X[i]))
= np.concatenate(marginal_deriv_matrix, axis=1)
marginal_deriv_matrix
= np.concatenate([theta, theta_fixed])
all_theta = np.array(all_theta[:-2])
beta = all_theta[-2]
sigma = all_theta[-1]
_lambda
= np.zeros((N, T))
eps = np.zeros((N, T)) # the log density of the margins
eps_density for t in range(T):
= y[t] - X[t] @ beta
eps[:, t] = ALS77_epsilon_density(
eps_density[:, t] =eps[:, t], sigma=sigma, _lambda=_lambda
eps
)
# Compute F(eps_t) (CDF of epsion_t) by integrating the density over the interval (-4, eps)
= np.zeros((N, T))
eps_CDF = product(range(T), range(N))
jobs = Parallel(n_jobs=multiprocessing.cpu_count())(
results 0], job[1], eps[job[1], job[0]], sigma, _lambda)
delayed(integrate_ALS77)(job[for job in jobs
)for result in results:
1], result[0]] = result[2]
eps_CDF[result[
< 1e-4] = 1e-4
eps_CDF[eps_CDF > 0.999] = 0.999
eps_CDF[eps_CDF
= scorc(
scc1 =eps_CDF, eps_density=eps_density, i=0, gamma_fixed=gamma_fixed, X=X
eps_CDF
)= scorc(
scc2 =eps_CDF, eps_density=eps_density, i=1, gamma_fixed=gamma_fixed, X=X
eps_CDF
)
= np.concatenate([marginal_deriv_matrix, scc1, scc2], axis=1)
deriv_matrix
return deriv_matrix
def scorc(eps_CDF, eps_density, i, gamma_fixed, X):
= 6
T
= inverse_mapping_vec(gamma_fixed)
R = np.concatenate([X[t][:, i].reshape(-1, 1) for t in range(T)], axis=1)
X_var
if i == 0:
= np.sum(
sc 1
/ stats.norm.pdf(eps_CDF).T
* eps_density.T
* ((np.linalg.inv(R) - np.eye(T)) @ eps_CDF.T),
0,
)else:
= np.sum(
sc 1
/ stats.norm.pdf(eps_CDF).T
* eps_density.T
* X_var.T
* ((np.linalg.inv(R) - np.eye(T)) @ eps_CDF.T),
0,
)
return sc.reshape(-1, 1)
def score(theta, theta_fixed, y, X):
= np.concatenate([theta, theta_fixed])
all_theta
= np.array(all_theta[:-2])
beta = all_theta[-2]
sigma = all_theta[-1]
_lambda
= X @ beta
xb = (y - xb) / sigma
ind
= ind / sigma + stats.norm.pdf(ind * _lambda) * _lambda / sigma / (
sc_beta_i 1 - stats.norm.cdf(ind * _lambda)
)= np.diag(sc_beta_i) @ X[:, :2]
sc_beta
return sc_beta
def scorederiv(theta, theta_fixed, gamma_fixed, y, X):
= [] # Nx(Tx2) matrix of
marginal_deriv_matrix for i in range(T):
# gradient of the density function w.r.t alpha and beta1 for cross section i
marginal_deriv_matrix.append(=0)
np.mean(score(theta, theta_fixed, y[i], X[i]), axis
)
= np.concatenate(marginal_deriv_matrix, axis=0)
marginal_deriv_matrix
= np.concatenate([theta, theta_fixed])
all_theta = np.array(all_theta[:-2])
beta = all_theta[-2]
sigma = all_theta[-1]
_lambda
= np.zeros((N, T))
eps = np.zeros((N, T)) # the log density of the margins
eps_density for t in range(T):
= y[t] - X[t] @ beta
eps[:, t] = ALS77_epsilon_density(
eps_density[:, t] =eps[:, t], sigma=sigma, _lambda=_lambda
eps
)
# Compute F(eps_t) (CDF of epsion_t) by integrating the density over the interval (-4, eps)
= np.zeros((N, T))
eps_CDF = product(range(T), range(N))
jobs = Parallel(n_jobs=multiprocessing.cpu_count())(
results 0], job[1], eps[job[1], job[0]], sigma, _lambda)
delayed(integrate_ALS77)(job[for job in jobs
)for result in results:
1], result[0]] = result[2]
eps_CDF[result[
< 1e-4] = 1e-4
eps_CDF[eps_CDF > 0.999] = 0.999
eps_CDF[eps_CDF
= np.mean(
scc1
scorc(=eps_CDF, eps_density=eps_density, i=0, gamma_fixed=gamma_fixed, X=X
eps_CDF
),=0,
axis
)= np.mean(
scc2
scorc(=eps_CDF, eps_density=eps_density, i=1, gamma_fixed=gamma_fixed, X=X
eps_CDF
),=0,
axis
)
= np.concatenate([marginal_deriv_matrix, scc1, scc2], axis=0)
deriv_matrix
return deriv_matrix
def estimate(y, X, theta_fixed, gamma_fixed):
"""
Estimate model parameters by maximizing the log-likelihood function and compute standard errors.
"""
= 171
N = 6
T
# Starting values for MLE
= 5.2
alpha = 0.3
beta
# Initial parameter vector
= [alpha, beta]
theta0
# Initial weighting matrix as the identity matrix
= np.eye(len(theta0) * T + 2)
W_start
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =criterion_function,
fun=theta0,
x0="Nelder-Mead",
method={"maxiter": 1000},
options=(theta_fixed, gamma_fixed, W_start, y, X),
args
)
for j in range(7):
= mom(
revised_moments =MLE_results["x"],
theta=theta_fixed,
theta_fixed=gamma_fixed,
gamma_fixed=y,
y=X,
X
)= (revised_moments.T @ revised_moments) / N
C = np.linalg.inv(C)
Wn
= minimize(
MLE_results =criterion_function,
fun=theta0,
x0="L-BFGS-B",
method={"maxiter": 1000},
options=(theta_fixed, gamma_fixed, Wn, y, X),
args
)if j == 3:
= MLE_results
two_step_GMM_results = mom(
two_step_revised_moments =MLE_results["x"],
theta=theta_fixed,
theta_fixed=gamma_fixed,
gamma_fixed=y,
y=X,
X
)= np.linalg.inv(
Wn_two_step_GMM @ two_step_revised_moments) / N
(two_step_revised_moments.T
)
= MLE_results
iterated_GMM_results = Wn
Wn_iterated_GMM
= two_step_GMM_results["x"]
two_step_GMM_theta = iterated_GMM_results["x"]
iterated_GMM_theta
# Estimate standard errors
= 1e-6
delta = np.zeros(
grad_two_step_GMM_results len(theta0) * T + 2, len(two_step_GMM_results["x"]))
(
)= np.zeros(
grad_iterated_GMM_results len(theta0) * T + 2, len(iterated_GMM_results["x"]))
(
)for i in range(len(theta0)):
= np.copy(iterated_GMM_results["x"])
theta1_iterated_GMM = np.copy(two_step_GMM_results["x"])
theta1_two_step_GMM += delta
theta1_iterated_GMM[i] += delta
theta1_two_step_GMM[i] = (
grad_iterated_GMM_results[:, i]
scorederiv(=theta1_iterated_GMM,
theta=theta_fixed,
theta_fixed=gamma_fixed,
gamma_fixed=y,
y=X,
X
)- scorederiv(
=iterated_GMM_results["x"],
theta=theta_fixed,
theta_fixed=gamma_fixed,
gamma_fixed=y,
y=X,
X
)/ delta
) = (
grad_two_step_GMM_results[:, i]
scorederiv(=theta1_two_step_GMM,
theta=theta_fixed,
theta_fixed=gamma_fixed,
gamma_fixed=y,
y=X,
X
)- scorederiv(
=two_step_GMM_results["x"],
theta=theta_fixed,
theta_fixed=gamma_fixed,
gamma_fixed=y,
y=X,
X
)/ delta
)
= (
OM_iterated_GMM
np.linalg.inv(@ Wn_iterated_GMM @ grad_iterated_GMM_results
grad_iterated_GMM_results.T
)/ N
)= (
OM_two_step_GMM
np.linalg.inv(@ Wn_two_step_GMM @ grad_two_step_GMM_results
grad_two_step_GMM_results.T
)/ N
)= np.sqrt(np.diag(OM_iterated_GMM))
ster_iterated_GMM = np.sqrt(np.diag(OM_two_step_GMM))
ster_two_step_GMM
return two_step_GMM_theta, iterated_GMM_theta, ster_two_step_GMM, ster_iterated_GMM
= pd.read_csv(r"ricefarm.csv")
ricefarm = np.loadtxt("exercise_4_2_theta_python.txt", delimiter=",")[
exercise_4_3_theta 2:
# keep everything except alpha and beta1
] = np.loadtxt("exercise_4_4_gamma_python.txt", delimiter=",")
exercise_4_4_gamma
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
= np.concatenate([np.ones((len(X), 1)), X], axis=1)
X
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
= estimate(
two_step_GMM_theta, iterated_GMM_theta, ster_two_step_GMM, ster_iterated_GMM =y_t, X=X_t, theta_fixed=exercise_4_3_theta, gamma_fixed=exercise_4_4_gamma
y
)# Display the results as a table
= pd.DataFrame(
results ={
data"IQMLE-1 Est": two_step_GMM_theta,
"IQMLE-1 St.Err": ster_two_step_GMM,
"IQMLE-2 Est": iterated_GMM_theta,
"IQMLE-2 St.Err": ster_iterated_GMM,
},=["CONST", "SEED"],
index
)= results.round(4)
results display(results)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
IQMLE-1 Est | IQMLE-1 St.Err | IQMLE-2 Est | IQMLE-2 St.Err | |
---|---|---|---|---|
CONST | 5.0724 | 0.0432 | 5.2976 | 0.0372 |
SEED | 0.3673 | 0.0183 | 0.2843 | 0.0157 |
# Remove all previous function and variable definitions before the next exercise
%reset
4.6 Exercise 4.6: Quantiles at desired level of precision
import numpy as np
import pandas as pd
from scipy import stats
from scipy import integrate
def density_function(x, _lambda):
= 2/1*stats.norm.pdf(x/1)*stats.norm.cdf(-x*_lambda/1)
density
return density
= np.arange(0, 1.1, 0.1)
lambda_range = -4
lb = 4
ub = 100000
fine
= [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
quantiles = np.zeros((len(lambda_range), len(quantiles)))
table
= np.arange(-4, 4+(ub-lb)/fine, (ub-lb)/fine)
y for i in range(len(lambda_range)):
= np.zeros(len(y))
F for j in range(len(y)):
= integrate.quad(density_function, lb, y[j], args=(lambda_range[i]))[0]
F[j] = np.zeros(len(quantiles))
lambda_qs for q in range(len(quantiles)):
= np.argmin(np.abs(F-quantiles[q]))
idx = y[idx]
lambda_qs[q] = lambda_qs
table[i, :]
# FINISH TABLE PLOTTING HERE
# Remove all previous function and variable definitions before the next exercise
%reset
4.7 Exercise 4.7: Full MSLE of Indonesian rice farm production model
This Exercise implements FMSLE, where we use the Gaussian copula to construct and sample from a multivariate distribution of \(u\). We use \(S=1000\) draws in evaluating \(\hat{f}_{{\epsilon}}({\epsilon})\) by simulation. As in FMLE, the results include 15 estimated correlations. Now, these are correlations over \(t\) of \(\Phi^{-1}(F_u(u_t))\), not of \(\Phi^{-1}(F_\varepsilon(\varepsilon_t))\), though the two estimated correlation matrices (not reported here) turn out very similar.
import numpy as np
import pandas as pd
from scipy import stats
from scipy.linalg import expm, logm, norm
from scipy.optimize import minimize
from statsmodels.tools import sequences
def direct_mapping_mat(C):
= []
gamma
try:
# Check if input is of proper format: C is 2D np.array of suitable dimensions
# and is positive-definite correlation matrix
if not isinstance(C, np.ndarray):
raise ValueError
if C.ndim != 2:
raise ValueError
if C.shape[0] != C.shape[1]:
raise ValueError
if not all(
[all(np.abs(np.diag(C) - np.ones(C.shape[0])) < 1e-8),
np.all(np.linalg.eigvals(C) > 0),
np.all(np.abs(C - C.T) < 1e-8),
np.
]
):raise ValueError
# Apply matrix log-transformation to C and get off-diagonal elements
= logm(C)
A = A[np.triu_indices(C.shape[0], 1)]
gamma
except ValueError:
print("Error: input is of a wrong format")
return gamma
def inverse_mapping_vec(gamma, tol_value=1e-8):
= []
C = -1
iter_number
# Check if input is of proper format: gamma is of suitable length
if not isinstance(gamma, (np.ndarray, list)):
raise ValueError
if isinstance(gamma, np.ndarray):
if gamma.ndim != 1:
raise ValueError
= 0.5 * (1 + np.sqrt(1 + 8 * len(gamma)))
n if not all([n.is_integer(), n > 1]):
raise ValueError
# Check if tolerance value belongs to a proper interval
# and change it to the default value otherwise
if not (0 < tol_value <= 1e-4):
= 1e-8
tol_value print("Warning: tolerance value has been changed to default")
# Place elements from gamma into off-diagonal parts
# and put zeros on the main diagonal of [n x n] symmetric matrix A
= int(n)
n = np.zeros(shape=(n, n))
A 1)] = gamma
A[np.triu_indices(n, = A + A.T
A
# Read properties of the input matrix
= np.diag(A) # get current diagonal
diag_vec = np.diag_indices_from(
diag_ind
A# get row and column indices of diagonal elements
)
# Iterative algorithm to get the proper diagonal vector
= np.sqrt(n)
dist while dist > np.sqrt(n) * tol_value:
= np.log(np.diag(expm(A)))
diag_delta = diag_vec - diag_delta
diag_vec = diag_vec
A[diag_ind] = norm(diag_delta)
dist += 1
iter_number
# Get a unique reciprocal correlation matrix
= expm(A)
C 1)
np.fill_diagonal(C,
return C
def log_density(coefs, y, X, FMSLE_us):
= 171
N = 6
T = int((T**2 - T) / 2)
n_corr_terms
# Get parameters
= coefs[0]
alpha = coefs[1:14]
beta = coefs[14 : 14 + n_corr_terms]
gamma = coefs[-2]
sigma = coefs[-1]
_lambda
= ((_lambda * sigma) / (1 + _lambda)) ** 2
sigma2u = ((sigma) / (1 + _lambda)) ** 2
sigma2v
# Reconstruct the Gaussian copula correlation matrix
= inverse_mapping_vec(gamma=gamma)
Rho
= np.linalg.cholesky(Rho)
A
= np.zeros((N, T))
eps for t in range(T):
= y[t] - alpha - X[t] @ beta
eps[:, t]
# Evaluate the integral via simulated MLE (FMLSE)
= np.zeros(N)
FMSLE_densities for i in range(N):
= eps[i, :]
eps_i = np.repeat(eps_i.reshape(-1, 1).T, 1000, 0)
rep_eps_i = np.zeros((1000, T))
simulated_us_i for t in range(T):
= FMSLE_us[t][:, i]
simulated_us_i[:, t] # Transform simulated values to half-normal RVs
= stats.norm.cdf(
CDF_u_i @ A
simulated_us_i # Dependent uniform RVs from a gaussian copula
) = stats.halfnorm.ppf(
u_i =0, scale=np.sqrt(sigma2u)
CDF_u_i, loc# Dependent half-normal RVs
)
# Joint density of obervation i
= np.mean(
den_i
stats.multivariate_normal.pdf(+ u_i, mean=np.zeros(T), cov=np.eye(T) * sigma2v
rep_eps_i
)
)if den_i < 1e-8:
= 1e-8
den_i = den_i
FMSLE_densities[i]
= np.log(FMSLE_densities)
logDen return logDen
def loglikelihood(coefs, y, X, FMSLE_us):
# Obtain the log likelihood
= log_density(coefs, y, X, FMSLE_us)
logDen = -np.sum(logDen)
log_likelihood return log_likelihood
def estimate(y, X):
"""
Estimate model parameters by maximizing the log-likelihood function and compute standard errors.
"""
= 171
N = 6
T
# Starting values for MLE
= 5.5
alpha = [
beta 0.15,
0.1,
0.07,
0.2,
0.5,
0.006,
0.2,
0.1,
-0.05,
-0.11,
-0.14,
-0.15,
-0.05,
]= 0.4
sigma = 1.3
_lambda
# Initial values for the Gaussian copula correlation matrix
= np.array(
start_R
[1.0000, 0.7837, 0.7724, 0.7202, 0.5778, 0.4978],
[0.7837, 1.0000, 0.6474, 0.8265, 0.4693, 0.5305],
[0.7724, 0.6474, 1.0000, 0.7030, 0.5933, 0.5297],
[0.7202, 0.8265, 0.7030, 1.0000, 0.5562, 0.5889],
[0.5778, 0.4693, 0.5933, 0.5562, 1.0000, 0.8475],
[0.4978, 0.5305, 0.5297, 0.5889, 0.8475, 1.0000],
[
]
)= direct_mapping_mat(C=start_R)
gamma
# Independent uniform random variables for FMSLE - assumes a Gaussian copula
= sequences.halton(dim=T, n_sample=1000)
halton_sequence = stats.norm.ppf(halton_sequence)
us_ # transform to standard normal
= {}
FMSLE_us for t in range(T):
= np.repeat(us_[:, t].reshape(-1, 1), N, 1)
us_Sxn = us_Sxn
FMSLE_us[t]
# Initial parameter vector
= np.array([alpha] + beta + gamma.tolist() + [sigma, _lambda])
theta0
# Bounds to ensure sigma2v, sigma2u and lambda are >= 0
= [(None, None) for x in range(len(theta0) - 2)] + [
bounds 1e-5, np.inf),
(1e-5, np.inf),
(
]
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =loglikelihood,
fun=theta0,
x0="L-BFGS-B",
method={"maxiter": 1000, "maxfun": 50000},
options=(y, X, FMSLE_us),
args=bounds,
bounds
)
= MLE_results.x # Estimated parameter vector
theta = MLE_results.fun * -1 # Log-likelihood at the solution
log_likelihood
# Estimate standard errors
= 1e-6
delta = np.zeros((N, len(theta)))
grad for i in range(len(theta)):
= np.copy(theta)
theta1 += delta
theta1[i] = (
grad[:, i] - log_density(theta, y, X, FMSLE_us)
log_density(theta1, y, X, FMSLE_us) / delta
)
= grad.T @ grad # Outer product of gradient
OPG = np.linalg.inv(OPG)
Sigma = np.sqrt(np.diag(Sigma))
ster
= int((T**2 - T) / 2)
n_corr_terms = theta[14 : 14 + n_corr_terms]
gamma
# Only keep betas, sigma and lambda
= theta[:14].tolist() + theta[-2:].tolist()
theta = ster[:14].tolist() + ster[-2:].tolist()
ster
return theta, ster, gamma, log_likelihood
= pd.read_csv(r"ricefarm.csv")
ricefarm
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
= estimate(y=y_t, X=X_t)
theta, ster, gamma, log_likelihood
# Display the results as a table
= pd.DataFrame(
results ={"Est": theta, "St.Err": ster},
data=[
index"CONST",
"SEED",
"UREA",
"TSP",
"LABOR",
"LAND",
"DP",
"DV1",
"DV2",
"DR1",
"DR2",
"DR3",
"DR4",
"DR5",
"SIGMA",
"LAMBDA",
],
)= results.round(4)
results print("\nLL", round(log_likelihood, 3))
display(results)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
LL -282.943
Est | St.Err | |
---|---|---|
CONST | 5.7630 | 0.1712 |
SEED | 0.1422 | 0.0244 |
UREA | 0.1170 | 0.0142 |
TSP | 0.0720 | 0.0122 |
LABOR | 0.1851 | 0.0301 |
LAND | 0.5150 | 0.0235 |
DP | -0.0191 | 0.0243 |
DV1 | 0.1561 | 0.0396 |
DV2 | 0.0996 | 0.0564 |
DR1 | -0.0355 | 0.0542 |
DR2 | -0.1337 | 0.0616 |
DR3 | -0.1617 | 0.0339 |
DR4 | -0.1609 | 0.0352 |
DR5 | -0.0293 | 0.0327 |
SIGMA | 0.6475 | 0.0244 |
LAMBDA | 2.0451 | 0.1988 |
# Save the model parameters for exercise 5.9
"exercise_4_8_theta_python.txt", theta, delimiter=",")
np.savetxt("exercise_4_8_gamma_python.txt", gamma, delimiter=",") np.savetxt(
# Remove all previous function and variable definitions before the next exercise
%reset
4.8 Exercise 4.9: Inefficiency estimation using copulas
import numpy as np
import pandas as pd
from scipy import stats
from scipy.linalg import expm, logm, norm
def inverse_mapping_vec(gamma, tol_value=1e-8):
= []
C = -1
iter_number
# Check if input is of proper format: gamma is of suitable length
if not isinstance(gamma, (np.ndarray, list)):
raise ValueError
if isinstance(gamma, np.ndarray):
if gamma.ndim != 1:
raise ValueError
= 0.5 * (1 + np.sqrt(1 + 8 * len(gamma)))
n if not all([n.is_integer(), n > 1]):
raise ValueError
# Check if tolerance value belongs to a proper interval
# and change it to the default value otherwise
if not (0 < tol_value <= 1e-4):
= 1e-8
tol_value print("Warning: tolerance value has been changed to default")
# Place elements from gamma into off-diagonal parts
# and put zeros on the main diagonal of [n x n] symmetric matrix A
= int(n)
n = np.zeros(shape=(n, n))
A 1)] = gamma
A[np.triu_indices(n, = A + A.T
A
# Read properties of the input matrix
= np.diag(A) # get current diagonal
diag_vec = np.diag_indices_from(
diag_ind
A# get row and column indices of diagonal elements
)
# Iterative algorithm to get the proper diagonal vector
= np.sqrt(n)
dist while dist > np.sqrt(n) * tol_value:
= np.log(np.diag(expm(A)))
diag_delta = diag_vec - diag_delta
diag_vec = diag_vec
A[diag_ind] = norm(diag_delta)
dist += 1
iter_number
# Get a unique reciprocal correlation matrix
= expm(A)
C 1)
np.fill_diagonal(C,
return C
def JLMS_panel_technical_inefficiency_scores(theta, y, X):
= 171
N = 6
T
= theta[0]
alpha = theta[1:14]
beta = theta[-2]
sigma = theta[-1]
_lambda
= ((_lambda * sigma) / (1 + _lambda)) ** 2
sigma2u = ((sigma) / (1 + _lambda)) ** 2
sigma2v
= np.zeros((N, T))
u_hat for t in range(T):
= np.sqrt(sigma2u) / np.sqrt(sigma2v)
_lambda = np.sqrt(sigma2u + sigma2v)
sigma
= y[t] - alpha - X[t] @ beta
eps_t = (eps_t * _lambda) / sigma
b = ((sigma * _lambda) / (1 + _lambda**2)) * (
u_hat[:, t] / (1 - stats.norm.cdf(b)) - b
stats.norm.pdf(b)
)
return u_hat
def NW_panel_technical_inefficiency_scores(theta, y, X, U_hat):
= 171
N = 6
T
= theta[0]
alpha = theta[1:14]
beta = theta[-2]
sigma = theta[-1]
_lambda
= ((_lambda * sigma) / (1 + _lambda)) ** 2
sigma2u = ((sigma) / (1 + _lambda)) ** 2
sigma2v
= np.zeros((N, T))
obs_eps for t in range(T):
= y[t] - alpha - X[t] @ beta
obs_eps[:, t]
# Simulated half normal RVs from the estimated Gaussian copula
= stats.halfnorm.ppf(
simulated_u =np.zeros(T), scale=np.array([np.sqrt(sigma2u) for x in range(T)])
U_hat, loc
)# Simulated random noise for all T
= stats.multivariate_normal.rvs(
simulated_v =10000, mean=np.zeros(T), cov=np.eye(T) * sigma2v, random_state=123
size
)= simulated_v - simulated_u
simulated_eps
# Rule of thumb bandwidth
= (
h_eps 1.06
* 10000 ** (-1 / 5)
* (max(np.std(simulated_eps), stats.iqr(simulated_eps) / 1.34))
)
= np.zeros((N, T))
u_hat for i in range(N):
= np.zeros(T)
panel_i_kernel_regression_results = np.zeros((10000, T))
eps_kernel # Construct the kernel distances for all T time periods
for t in range(T):
= stats.norm.pdf(
eps_kernel[:, t] - obs_eps[i, t]) / h_eps
(simulated_eps[:, t]
)= np.prod(eps_kernel, 1)
kernel_product for j in range(T):
= np.sum(
panel_i_kernel_regression_results[j] * simulated_u[:, j]
kernel_product / np.sum(kernel_product)
) = panel_i_kernel_regression_results
u_hat[i, :]
return u_hat
= pd.read_csv(r"ricefarm.csv")
ricefarm
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
# Import the model parameters from exercise 5.8
= np.loadtxt("exercise_4_8_theta_python.txt", delimiter=",")
theta = np.loadtxt("exercise_4_8_gamma_python.txt", delimiter=",")
gamma
# Estimation of technical inefficiencies
# Get idx of percentile firms
= np.round(np.arange(0.05, 1, 0.1) * N).astype(int)
percetile_firm_idx
# JLMS formula technical inefficiencies
= JLMS_panel_technical_inefficiency_scores(theta=theta, y=y_t, X=X_t)
JLMS_u_hat # Sort technical inefficiency scores by t=1 values
= JLMS_u_hat[JLMS_u_hat[:, 0].argsort()]
sorted_t1_JLMS_u_hat # Mean technical inefficiency for each firms over all t
= np.mean(JLMS_u_hat, axis=1)
JLMS_mean = JLMS_mean[JLMS_u_hat[:, 0].argsort()]
sorted_t1_JLMS_mean # Standard deviation of technical inefficiency for each firm over all t
= np.std(JLMS_u_hat, axis=1)
JLMS_std = JLMS_std[JLMS_u_hat[:, 0].argsort()]
sorted_t1_JLMS_std
= sorted_t1_JLMS_u_hat[percetile_firm_idx, :]
JLMS_u_hat_for_table = sorted_t1_JLMS_std[percetile_firm_idx]
JLMS_std_u_hat_for_table = sorted_t1_JLMS_mean[percetile_firm_idx]
JLMS_mean_u_hat_for_table
# Estimate the Nadaraya Watson technical inefficiency scores.
= inverse_mapping_vec(gamma=gamma)
Rho = stats.multivariate_normal.rvs(
Z =10000, mean=np.zeros(T), cov=np.eye(T), random_state=1234
size
)= np.linalg.cholesky(Rho)
A = stats.norm.cdf(
U_hat @ A, loc=np.zeros(T), scale=np.ones(T)
Z # Dependent uniform RVs from a gaussian copula
) = NW_panel_technical_inefficiency_scores(
NW_u_hat =theta, y=y_t, X=X_t, U_hat=U_hat
theta
)# Sort technical inefficiency scores by t=1 values
= NW_u_hat[NW_u_hat[:, 0].argsort()]
sorted_t1_NW_u_hat # Mean technical inefficiency for each firms over all t
= np.mean(NW_u_hat, axis=1)
NW_mean = NW_mean[NW_u_hat[:, 0].argsort()]
sorted_t1_NW_mean # Standard deviation of technical inefficiency for each firm over all t
= np.std(NW_u_hat, axis=1)
NW_std = NW_std[NW_u_hat[:, 0].argsort()]
sorted_t1_NW_std
= sorted_t1_JLMS_u_hat[percetile_firm_idx, :]
JLMS_u_hat_for_table = sorted_t1_JLMS_std[percetile_firm_idx]
JLMS_std_u_hat_for_table = sorted_t1_JLMS_mean[percetile_firm_idx]
JLMS_mean_u_hat_for_table = sorted_t1_NW_u_hat[percetile_firm_idx, :]
NW_u_hat_for_table = sorted_t1_NW_std[percetile_firm_idx]
NW_std_u_hat_for_table = sorted_t1_NW_mean[percetile_firm_idx]
NW_mean_u_hat_for_table
# Tabulate the technical inefficiency scores
= pd.DataFrame(
JLMS_technical_inefficiency_results =[
columns"Rank",
"Fractile",
"Std(u)",
"Mean(u)",
"t=1",
"t=2",
"t=3",
"t=4",
"t=5",
"t=6",
]
)"Rank"] = percetile_firm_idx
JLMS_technical_inefficiency_results["Fractile"] = np.arange(0.05, 1, 0.1)
JLMS_technical_inefficiency_results["Std(u)"] = JLMS_std_u_hat_for_table
JLMS_technical_inefficiency_results["Mean(u)"] = JLMS_mean_u_hat_for_table
JLMS_technical_inefficiency_results[-6:] = JLMS_u_hat_for_table
JLMS_technical_inefficiency_results.iloc[:,
= pd.DataFrame(
NW_technical_inefficiency_results =[
columns"Rank",
"Fractile",
"Std(u)",
"Mean(u)",
"t=1",
"t=2",
"t=3",
"t=4",
"t=5",
"t=6",
]
)"Rank"] = percetile_firm_idx
NW_technical_inefficiency_results["Fractile"] = np.arange(0.05, 1, 0.1)
NW_technical_inefficiency_results["Std(u)"] = NW_std_u_hat_for_table
NW_technical_inefficiency_results["Mean(u)"] = NW_mean_u_hat_for_table
NW_technical_inefficiency_results[-6:] = NW_u_hat_for_table
NW_technical_inefficiency_results.iloc[:,
# Compute averages
# Compute averages
= pd.DataFrame(
average_JLMS
np.concatenate(
[-1, 1).T,
np.mean(JLMS_std_u_hat_for_table).reshape(-1, 1).T,
np.mean(JLMS_mean_u_hat_for_table).reshape(=0).reshape(-1, 1).T,
np.mean(JLMS_u_hat_for_table, axis
],=1,
axis
),=["Std(u)", "Mean(u)", "t=1", "t=2", "t=3", "t=4", "t=5", "t=6"],
columns=["Average"],
index
)= pd.DataFrame(
average_NW
np.concatenate(
[-1, 1).T,
np.mean(NW_std_u_hat_for_table).reshape(-1, 1).T,
np.mean(NW_mean_u_hat_for_table).reshape(=0).reshape(-1, 1).T,
np.mean(NW_u_hat_for_table, axis
],=1,
axis
),=["Std(u)", "Mean(u)", "t=1", "t=2", "t=3", "t=4", "t=5", "t=6"],
columns=["Average"],
index
)
print("JLMS Scores")
display(JLMS_technical_inefficiency_results)
display(average_JLMS)print("Nadaraya Watson Scores")
display(NW_technical_inefficiency_results) display(average_NW)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
JLMS Scores
/var/folders/q0/9kb46g7n1_57dfk7vtdsv8t4w169ch/T/ipykernel_41687/2118629636.py:116: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
JLMS_technical_inefficiency_results.iloc[:, -6:] = JLMS_u_hat_for_table
/var/folders/q0/9kb46g7n1_57dfk7vtdsv8t4w169ch/T/ipykernel_41687/2118629636.py:136: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
NW_technical_inefficiency_results.iloc[:, -6:] = NW_u_hat_for_table
Rank | Fractile | Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 9 | 0.05 | 0.201200 | 0.279132 | 0.108546 | 0.691882 | 0.219671 | 0.351592 | 0.119252 | 0.183846 |
1 | 26 | 0.15 | 0.066929 | 0.217285 | 0.144209 | 0.209552 | 0.315604 | 0.299263 | 0.159164 | 0.175914 |
2 | 43 | 0.25 | 0.179223 | 0.298755 | 0.177579 | 0.153856 | 0.246140 | 0.641231 | 0.151389 | 0.422333 |
3 | 60 | 0.35 | 0.047762 | 0.227413 | 0.210354 | 0.221302 | 0.157466 | 0.293201 | 0.284465 | 0.197689 |
4 | 77 | 0.45 | 0.136598 | 0.386201 | 0.239593 | 0.502968 | 0.299376 | 0.526694 | 0.217342 | 0.531234 |
5 | 94 | 0.55 | 0.128254 | 0.306586 | 0.265450 | 0.168300 | 0.172615 | 0.485234 | 0.471267 | 0.276651 |
6 | 111 | 0.65 | 0.082763 | 0.301932 | 0.299589 | 0.264676 | 0.324191 | 0.429899 | 0.155199 | 0.338037 |
7 | 128 | 0.75 | 0.147324 | 0.317070 | 0.371703 | 0.102898 | 0.557115 | 0.331811 | 0.171988 | 0.366904 |
8 | 145 | 0.85 | 0.163346 | 0.446896 | 0.452379 | 0.187891 | 0.501138 | 0.721794 | 0.485270 | 0.332902 |
9 | 162 | 0.95 | 0.144646 | 0.484171 | 0.597844 | 0.516857 | 0.394947 | 0.717682 | 0.280458 | 0.397237 |
Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|
Average | 0.129804 | 0.326544 | 0.286725 | 0.302018 | 0.318826 | 0.47984 | 0.249579 | 0.322275 |
Nadaraya Watson Scores
Rank | Fractile | Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 9 | 0.05 | 0.330444 | 0.306770 | 0.026839 | 0.123946 | 1.011691 | 0.347678 | 0.116644 | 0.213822 |
1 | 26 | 0.15 | 0.146198 | 0.333706 | 0.053118 | 0.248875 | 0.445100 | 0.492827 | 0.387741 | 0.374573 |
2 | 43 | 0.25 | 0.078828 | 0.220370 | 0.104268 | 0.133439 | 0.278481 | 0.325720 | 0.217926 | 0.262387 |
3 | 60 | 0.35 | 0.148418 | 0.287170 | 0.159290 | 0.104014 | 0.348222 | 0.563926 | 0.245607 | 0.301963 |
4 | 77 | 0.45 | 0.187210 | 0.358527 | 0.195668 | 0.089393 | 0.250601 | 0.535480 | 0.504677 | 0.575341 |
5 | 94 | 0.55 | 0.088490 | 0.158540 | 0.231306 | 0.110368 | 0.053070 | 0.056478 | 0.226633 | 0.273389 |
6 | 111 | 0.65 | 0.053096 | 0.330868 | 0.291478 | 0.302805 | 0.390932 | 0.418771 | 0.290071 | 0.291153 |
7 | 128 | 0.75 | 0.237595 | 0.332493 | 0.342374 | 0.139111 | 0.470355 | 0.778741 | 0.094591 | 0.169790 |
8 | 145 | 0.85 | 0.232028 | 0.535362 | 0.496575 | 1.005025 | 0.570154 | 0.515193 | 0.318977 | 0.306248 |
9 | 162 | 0.95 | 0.176955 | 0.607612 | 0.711980 | 0.852485 | 0.284710 | 0.686375 | 0.577751 | 0.532371 |
Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|
Average | 0.167926 | 0.347142 | 0.26129 | 0.310946 | 0.410332 | 0.472119 | 0.298062 | 0.330104 |
np.concatenate(
[-1, 1).T,
np.mean(JLMS_std_u_hat_for_table).reshape(-1, 1).T,
np.mean(JLMS_mean_u_hat_for_table).reshape(=0).reshape(-1, 1).T,
np.mean(JLMS_u_hat_for_table, axis
],=1,
axis )
array([[0.12980448, 0.32654393, 0.2867246 , 0.30201845, 0.3188263 ,
0.4798401 , 0.2495794 , 0.32227471]])
# Remove all previous function and variable definitions before the next exercise
%reset