import numpy as np
import pandas as pd
from scipy import stats
def JLMS_panel_technical_inefficiency_scores(theta, y, X):
= 171
N = 6
T
= theta[0]
alpha = theta[1:14]
beta = theta[-2]
sigma = theta[-1]
_lambda
= np.zeros((N, T))
u_hat for t in range(T):
= y[t] - alpha - X[t] @ beta
eps_t = (eps_t * _lambda) / sigma
b = ((sigma * _lambda) / (1 + _lambda**2)) * (
u_hat[:, t] / (1 - stats.norm.cdf(b)) - b
stats.norm.pdf(b)
)
return u_hat
7 Chapter 7: Predicting inefficiency
7.1 Exercise 7.1: JLMS estimator of inefficiency of rice production
This exercise uses the procedure outlined in Section 7.2 to estimate technical inefficiencies of Indonesian rice farms. The details of the data are provided in Exercise 4.2. The table reports predicted technical inefficiencies in each time period for ten farms, namely those that are at the 0.05,0.15,…,0.95 fractiles of the sample of inefficiencies for \(t = 1\). The first two columns report the rank and the quantile of the selected farms. The third and fourth columns contain the sample variation and sample mean over t, respectively, of the inefficiencies, which are reported in columns 5 through 10. The last row of the tables reports column-wise averages over the ten farms.
= pd.read_csv(r"ricefarm.csv")
ricefarm
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
# Import the model parameters from exercise 5.8
= np.loadtxt("exercise_4_8_theta_python.txt", delimiter=",")
theta
# Estimation of technical inefficiencies
# Get idx of percentile firms
= np.round(np.arange(0.05, 1, 0.1) * N).astype(int)
percetile_firm_idx
# JLMS formula technical inefficiencies
= JLMS_panel_technical_inefficiency_scores(theta=theta, y=y_t, X=X_t)
JLMS_u_hat # Sort technical inefficiency scores by t=1 values
= JLMS_u_hat[:, 0].argsort()
sort_idx = JLMS_u_hat[sort_idx]
sorted_t1_JLMS_u_hat # Mean technical inefficiency for each firms over all t
= np.mean(JLMS_u_hat, axis=1)
JLMS_mean = JLMS_mean[sort_idx]
sorted_t1_JLMS_mean # Standard deviation of technical inefficiency for each firm over all t
= np.std(JLMS_u_hat, axis=1)
JLMS_std = JLMS_std[sort_idx]
sorted_t1_JLMS_std
= sorted_t1_JLMS_u_hat[percetile_firm_idx-1, :]
JLMS_u_hat_for_table = sorted_t1_JLMS_std[percetile_firm_idx-1]
JLMS_std_u_hat_for_table = sorted_t1_JLMS_mean[percetile_firm_idx-1]
JLMS_mean_u_hat_for_table
# Tabulate the technical inefficiency scores
= pd.DataFrame(
JLMS_technical_inefficiency_results =[
columns"Rank",
"Fractile",
"Std(u)",
"Mean(u)",
"t=1",
"t=2",
"t=3",
"t=4",
"t=5",
"t=6",
]
)"Rank"] = percetile_firm_idx
JLMS_technical_inefficiency_results["Fractile"] = np.arange(0.05, 1, 0.1)
JLMS_technical_inefficiency_results["Std(u)"] = JLMS_std_u_hat_for_table
JLMS_technical_inefficiency_results["Mean(u)"] = JLMS_mean_u_hat_for_table
JLMS_technical_inefficiency_results[-6:] = JLMS_u_hat_for_table
JLMS_technical_inefficiency_results.iloc[:,
# Compute averages
= pd.DataFrame(
average_JLMS
np.concatenate(
[-1, 1).T,
np.mean(JLMS_std_u_hat_for_table).reshape(-1, 1).T,
np.mean(JLMS_mean_u_hat_for_table).reshape(=0).reshape(-1, 1).T,
np.mean(JLMS_u_hat_for_table, axis
],=1,
axis
),=["Std(u)", "Mean(u)", "t=1", "t=2", "t=3", "t=4", "t=5", "t=6"],
columns=["Average"],
index
)
print("JLMS Scores")
display(JLMS_technical_inefficiency_results) display(average_JLMS)
JLMS Scores
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
/var/folders/q0/9kb46g7n1_57dfk7vtdsv8t4w169ch/T/ipykernel_12261/1289086853.py:88: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
JLMS_technical_inefficiency_results.iloc[:, -6:] = JLMS_u_hat_for_table
Rank | Fractile | Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 9 | 0.05 | 0.199559 | 0.353626 | 0.155185 | 0.334495 | 0.751678 | 0.298441 | 0.168792 | 0.413164 |
1 | 26 | 0.15 | 0.266284 | 0.389258 | 0.194313 | 0.247702 | 0.901049 | 0.577669 | 0.251172 | 0.163642 |
2 | 43 | 0.25 | 0.089351 | 0.285298 | 0.227678 | 0.192656 | 0.460310 | 0.332479 | 0.232645 | 0.266019 |
3 | 60 | 0.35 | 0.139124 | 0.389688 | 0.258642 | 0.210409 | 0.295907 | 0.535117 | 0.477681 | 0.560369 |
4 | 77 | 0.45 | 0.200043 | 0.338445 | 0.283757 | 0.201320 | 0.559250 | 0.664111 | 0.178528 | 0.143705 |
5 | 94 | 0.55 | 0.065599 | 0.362450 | 0.307076 | 0.346067 | 0.296354 | 0.380834 | 0.349237 | 0.495129 |
6 | 111 | 0.65 | 0.108994 | 0.455132 | 0.336327 | 0.376101 | 0.393122 | 0.559206 | 0.642810 | 0.423228 |
7 | 128 | 0.75 | 0.139395 | 0.307457 | 0.393067 | 0.551203 | 0.307025 | 0.291704 | 0.134145 | 0.167600 |
8 | 145 | 0.85 | 0.293376 | 0.600202 | 0.466352 | 0.382522 | 1.180340 | 0.717295 | 0.284017 | 0.570689 |
9 | 162 | 0.95 | 0.138623 | 0.552315 | 0.568924 | 0.719727 | 0.540897 | 0.716119 | 0.425638 | 0.342586 |
Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|
Average | 0.164035 | 0.403387 | 0.319132 | 0.35622 | 0.568593 | 0.507298 | 0.314467 | 0.354613 |
# Remove all previous function and variable definitions before the next exercise
%reset
7.2 Exercise 7.2: The APS estimator of inefficiency of rice production
This exercise uses the procedure outlined in Section 7.4.2 to estimate technical inefficiencies of Indonesian rice farms. The predictions employ the Gaussian copula to draw \(S = 10,000\) observations from the joint distribution of \(u\) and \(\epsilon\). For the simulated sample, the predictions are based on the Nadaraya-Watson estimator which uses the Gaussian kernel with a common bandwidth parameter chosen by cross-validation.
The table reports predicted inefficiencies in each time period for ten farms, namely those that are at the 0.05, 0.15, . . . , 0.95 fractiles of the sample of inefficiencies at \(t = 1\). The first two columns report the rank and the quantile of the selected farms. The third and fourth columns contain the sample variation and sample mean over \(t\), respectively, of the inefficiency estimates, which are reported in columns 5 through 10. The last row of the tables reports column-wise averages over the ten farms. (The results is somewhat different from the original results by Amsler et al. (2014), who used GAUSS and obtained a smaller average \(\hat{\sigma}\) compared to JLMS.)
import numpy as np
import pandas as pd
from scipy import stats
from scipy.linalg import expm, logm, norm
def JLMS_panel_technical_inefficiency_scores(theta, y, X):
= 171
N = 6
T
= theta[0]
alpha = theta[1:14]
beta = theta[-2]
sigma = theta[-1]
_lambda
= np.zeros((N, T))
u_hat for t in range(T):
= y[t] - alpha - X[t] @ beta
eps_t = (eps_t * _lambda) / sigma
b = ((sigma * _lambda) / (1 + _lambda**2)) * (
u_hat[:, t] / (1 - stats.norm.cdf(b)) - b
stats.norm.pdf(b)
)
return u_hat
def inverse_mapping_vec(gamma, tol_value=1e-8):
= []
C = -1
iter_number
# Check if input is of proper format: gamma is of suitable length
if not isinstance(gamma, (np.ndarray, list)):
raise ValueError
if isinstance(gamma, np.ndarray):
if gamma.ndim != 1:
raise ValueError
= 0.5 * (1 + np.sqrt(1 + 8 * len(gamma)))
n if not all([n.is_integer(), n > 1]):
raise ValueError
# Check if tolerance value belongs to a proper interval
# and change it to the default value otherwise
if not (0 < tol_value <= 1e-4):
= 1e-8
tol_value print("Warning: tolerance value has been changed to default")
# Place elements from gamma into off-diagonal parts
# and put zeros on the main diagonal of [n x n] symmetric matrix A
= int(n)
n = np.zeros(shape=(n, n))
A 1)] = gamma
A[np.triu_indices(n, = A + A.T
A
# Read properties of the input matrix
= np.diag(A) # get current diagonal
diag_vec = np.diag_indices_from(
diag_ind
A# get row and column indices of diagonal elements
)
# Iterative algorithm to get the proper diagonal vector
= np.sqrt(n)
dist while dist > np.sqrt(n) * tol_value:
= np.log(np.diag(expm(A)))
diag_delta = diag_vec - diag_delta
diag_vec = diag_vec
A[diag_ind] = norm(diag_delta)
dist += 1
iter_number
# Get a unique reciprocal correlation matrix
= expm(A)
C 1)
np.fill_diagonal(C,
return C
def NW_panel_technical_inefficiency_scores(theta, y, X, U_hat):
= 171
N = 6
T
= theta[0]
alpha = theta[1:14]
beta = theta[-2]
sigma = theta[-1]
_lambda
= ((_lambda * sigma) / (1 + _lambda)) ** 2
sigma2u = ((sigma) / (1 + _lambda)) ** 2
sigma2v
= np.zeros((N, T))
obs_eps for t in range(T):
= y[t] - alpha - X[t] @ beta
obs_eps[:, t]
# Simulated half normal RVs from the estimated Gaussian copula
= stats.halfnorm.ppf(
simulated_u =np.zeros(T), scale=np.array([np.sqrt(sigma2u) for x in range(T)])
U_hat, loc
)# Simulated random noise for all T
= stats.multivariate_normal.rvs(
simulated_v =10000, mean=np.zeros(T), cov=np.eye(T) * sigma2v, random_state=123
size
)= simulated_v - simulated_u
simulated_eps
# Rule of thumb bandwidth
= (
h_eps 1.06
* 10000 ** (-1 / 5)
* (max(np.std(simulated_eps), stats.iqr(simulated_eps) / 1.34))
)
= np.zeros((N, T))
u_hat for i in range(N):
= np.zeros(T)
panel_i_kernel_regression_results = np.zeros((10000, T))
eps_kernel # Construct the kernel distances for all T time periods
for t in range(T):
= stats.norm.pdf(
eps_kernel[:, t] - obs_eps[i, t]) / h_eps
(simulated_eps[:, t]
)= np.prod(eps_kernel, 1)
kernel_product for j in range(T):
= np.sum(
panel_i_kernel_regression_results[j] * simulated_u[:, j]
kernel_product / np.sum(kernel_product)
) = panel_i_kernel_regression_results
u_hat[i, :]
return u_hat
= pd.read_csv(r"ricefarm.csv")
ricefarm
= ricefarm.iloc[:, [0, 1, 3, 5, 6, 7, 8, 14, 16]]
ricefarm
# Create ID dummies
= np.round(ricefarm["ID"] / 100000).astype(int)
dar = pd.get_dummies(dar, prefix="DR").astype(int)
id_dummies = pd.concat([ricefarm, id_dummies.iloc[:, :-1]], axis=1)
ricefarm
# Create rice variety dummies
= pd.get_dummies(ricefarm.iloc[:, 2], prefix="VAR").astype(int)
rice_dummies = pd.concat([ricefarm, rice_dummies.iloc[:, 1:]], axis=1)
ricefarm
# Recode TSP as logs and zeros
5]] = np.where(
ricefarm[ricefarm.columns[5] > 0, np.log(ricefarm.iloc[:, 5]), 0
ricefarm.iloc[:,
)
# Convert pesticide usage to a dummy
6]] = (ricefarm.iloc[:, 6] != 0).astype(int)
ricefarm[ricefarm.columns[
# Reorder the data
= ricefarm.iloc[:, [8, 3, 4, 5, 7, 1, 6, 14, 15, 9, 10, 11, 12, 13]]
ricefarm
= np.log(ricefarm.iloc[:, 0].values)
y = ricefarm.iloc[:, 1:].values
X
# Log covariates
0:2] = np.log(X[:, 0:2])
X[:, 3] = np.log(X[:, 3])
X[:, 4] = np.log(X[:, 4])
X[:,
# Cross sections of y and X
= 171
N = 6
T = []
y_t = []
X_t for t in range(T):
if t == 0:
y_t.append(y[:N])
X_t.append(X[:N, :])else:
* N : (t + 1) * N])
y_t.append(y[t * N : (t + 1) * N, :])
X_t.append(X[t
# Import the model parameters from exercise 5.8
= np.loadtxt("exercise_4_8_theta_python.txt", delimiter=",")
theta = np.loadtxt("exercise_4_8_gamma_python.txt", delimiter=",")
gamma
# Estimation of technical inefficiencies
# Get idx of percentile firms
= np.round(np.arange(0.05, 1, 0.1) * N).astype(int)
percetile_firm_idx
# NW technical inefficiency estimates
= inverse_mapping_vec(gamma)
Rho_hat # Draw T dependent unfirm RV's for each s_{1}, ... 10000
= stats.multivariate_normal.rvs(
Z =10000, mean=np.zeros(T), cov=np.eye(T), random_state=1234
size
)= np.linalg.cholesky(Rho_hat)
A = stats.norm.cdf(
U_hat @ A, loc=np.zeros(T), scale=np.ones(T)
Z # Dependent uniform RVs from a gaussian copula
) = NW_panel_technical_inefficiency_scores(
NW_u_hat =theta, y=y_t, X=X_t, U_hat=U_hat
theta
)
= NW_u_hat[:, 0].argsort()
sort_idx # Sort technical inefficiency scores by t=1 values
= NW_u_hat[sort_idx]
sorted_t1_NW_u_hat # Mean technical inefficiency for each firms over all t
= np.mean(NW_u_hat, axis=1)
NW_mean = NW_mean[sort_idx]
sorted_t1_NW_mean # Standard deviation of technical inefficiency for each firm over all t
= np.std(NW_u_hat, axis=1)
NW_std = NW_std[sort_idx]
sorted_t1_NW_std
= sorted_t1_NW_u_hat[percetile_firm_idx-1, :]
NW_u_hat_for_table = sorted_t1_NW_std[percetile_firm_idx-1]
NW_std_u_hat_for_table = sorted_t1_NW_mean[percetile_firm_idx-1]
NW_mean_u_hat_for_table
= pd.DataFrame(
NW_technical_inefficiency_results =[
columns"Rank",
"Fractile",
"Std(u)",
"Mean(u)",
"t=1",
"t=2",
"t=3",
"t=4",
"t=5",
"t=6",
]
)"Rank"] = percetile_firm_idx
NW_technical_inefficiency_results["Fractile"] = np.arange(0.05, 1, 0.1)
NW_technical_inefficiency_results["Std(u)"] = NW_std_u_hat_for_table
NW_technical_inefficiency_results["Mean(u)"] = NW_mean_u_hat_for_table
NW_technical_inefficiency_results[-6:] = NW_u_hat_for_table
NW_technical_inefficiency_results.iloc[:,
# Compute averages
= pd.DataFrame(
average_NW
np.concatenate(
[-1, 1).T,
np.mean(NW_std_u_hat_for_table).reshape(-1, 1).T,
np.mean(NW_mean_u_hat_for_table).reshape(=0).reshape(-1, 1).T,
np.mean(NW_u_hat_for_table, axis
],=1,
axis
),=["Std(u)", "Mean(u)", "t=1", "t=2", "t=3", "t=4", "t=5", "t=6"],
columns=["Average"],
index
)
print("Nadaraya Watson Scores")
display(NW_technical_inefficiency_results) display(average_NW)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/pandas/core/arraylike.py:402: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
Nadaraya Watson Scores
/var/folders/q0/9kb46g7n1_57dfk7vtdsv8t4w169ch/T/ipykernel_12261/1977337797.py:100: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
NW_technical_inefficiency_results.iloc[:, -6:] = NW_u_hat_for_table
Rank | Fractile | Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 9 | 0.05 | 0.245873 | 0.403594 | 0.024135 | 0.318582 | 0.738054 | 0.688861 | 0.270232 | 0.381701 |
1 | 26 | 0.15 | 0.169122 | 0.239246 | 0.052832 | 0.124861 | 0.509873 | 0.430818 | 0.138350 | 0.178745 |
2 | 43 | 0.25 | 0.172628 | 0.251123 | 0.096750 | 0.066012 | 0.585722 | 0.327563 | 0.202194 | 0.228496 |
3 | 60 | 0.35 | 0.127765 | 0.243374 | 0.158353 | 0.491428 | 0.309240 | 0.098721 | 0.214911 | 0.187592 |
4 | 77 | 0.45 | 0.187637 | 0.358608 | 0.195284 | 0.089106 | 0.250239 | 0.535277 | 0.505213 | 0.576530 |
5 | 94 | 0.55 | 0.145152 | 0.196747 | 0.227607 | 0.125899 | 0.506787 | 0.104959 | 0.108918 | 0.106310 |
6 | 111 | 0.65 | 0.100962 | 0.287877 | 0.290810 | 0.273255 | 0.249796 | 0.501644 | 0.218975 | 0.192783 |
7 | 128 | 0.75 | 0.235708 | 0.332172 | 0.339868 | 0.141119 | 0.475475 | 0.772116 | 0.095174 | 0.169282 |
8 | 145 | 0.85 | 0.225826 | 0.392722 | 0.466092 | 0.489747 | 0.082886 | 0.791562 | 0.298176 | 0.227871 |
9 | 162 | 0.95 | 0.222817 | 0.544130 | 0.667975 | 0.660000 | 0.315315 | 0.917145 | 0.307852 | 0.396496 |
Std(u) | Mean(u) | t=1 | t=2 | t=3 | t=4 | t=5 | t=6 | |
---|---|---|---|---|---|---|---|---|
Average | 0.183349 | 0.324959 | 0.251971 | 0.278001 | 0.402339 | 0.516867 | 0.235999 | 0.264581 |
# Remove all previous function and variable definitions before the next exercise
%reset
7.3 Exercise 7.3: Improved JLMS estimator for the 72 US utilities
This exercise applies the predictors from Sections 7.4.2 and 7.6.1 to the US electricity generation dataset comprised of 72 firms over the period 1986-1999. The data file is steamelectric.xlsx. The output variable is net steam electric power in megawatt-hours and the values of the three inputs (fuel, labour and maintenance, and capital) are obtained by dividing respective expenses by relevant input prices; the data are cleaned following Amsler et al. (2021). The price of fuel aggregate is a Tornqvist price index of fuels. The aggregate price of labor and maintenance is a cost-share weighted price, where the price of labor is a company-wide average wage rate and the price of maintenance and other supplies is a price index of electrical supplies from the U.S. Bureau of Labor Statistics. The price of capital is the yield of the firm’s latest issue of long term debt adjusted for appreciation and depreciation. This data has recently been used by Amsler et al. (2021) and Lai and Kumbhakar (2019).
The first table reports the average estimated technical inefficiencies by the year as well as the average of their estimated standard deviations, which can be viewed as measures of the unexplained variation in \(u\). (We use a different random seed to Amsler et al. (2023) so our estimates are different from theirs.) Table 7.3 contains three predictors based on the panel model where the model parameters are estimated by MSLE. The JLMS estimator uses only the contemporaneous \(\epsilon_{it}\) in the conditioning set. The Nadaraya-Watson [NW] and Local Linear Forest [LLF] estimators use the expanded conditioning set and the APS14 approach to estimating the conditional expectation, described in Section 7.4.2. The second table contains three estimators based on the model with endogeneity where the parameters of the model are estimated by MSLE. For this part, we ignore the panel nature of the dataset (by viewing it as a pooled cross-section, i.e. by assuming independence over t as well as over i) and focus on the presence of the two types of inefficiency. In this case, JLMS is the estimator that does not use the $_{j}’s while NW and LLF include them in the conditioning set using the APS16 approach described in Section 7.6.1.
In the second table the average technical inefficiency score over all firms and years is similar across estimators, regardless of whether or not we condition on the allocative inefficiency terms. Over all years, the average standard deviation of technical inefficiency scores from the Local Linear Forest is considerably smaller than those from the JLMS or Nadaraya-Watson estimators. In Table 7.3, the mean technical inefficiency scores are always larger for all estimators and years in the data. Again, we find that the average technical inefficiency score over all firms and years is similar for all estimators. However, there are noticeable differences in the mean technical inefficiency score of estimators between years, for example in years 86, 87, 91 and 98. Finally, we note that the mean standard deviations for both non-parametric estimators are remarkably smaller than those associated with the JLMS estimator, suggesting that we explain a much larger portion of the unconditional variation in u. The technical inefficiency scores computed from the Local Linear Forest estimator are always associated with the smallest standard deviations.
Note: Users should uncomment the line calling the subprocess and change the file path to their own R executable location.
import numpy as np
import pandas as pd
import subprocess
import matplotlib.pyplot as plt
from scipy import stats
from scipy.linalg import expm, logm, norm
from scipy.optimize import minimize
def direct_mapping_mat(C):
= []
gamma
try:
# Check if input is of proper format: C is 2D np.array of suitable dimensions
# and is positive-definite correlation matrix
if not isinstance(C, np.ndarray):
raise ValueError
if C.ndim != 2:
raise ValueError
if C.shape[0] != C.shape[1]:
raise ValueError
if not all(
[all(np.abs(np.diag(C) - np.ones(C.shape[0])) < 1e-8),
np.all(np.linalg.eigvals(C) > 0),
np.all(np.abs(C - C.T) < 1e-8),
np.
]
):raise ValueError
# Apply matrix log-transformation to C and get off-diagonal elements
= logm(C)
A = A[np.triu_indices(C.shape[0], 1)]
gamma
except ValueError:
print("Error: input is of a wrong format")
return gamma
def inverse_mapping_vec(gamma, tol_value=1e-8):
= []
C = -1
iter_number
# Check if input is of proper format: gamma is of suitable length
if not isinstance(gamma, (np.ndarray, list)):
raise ValueError
if isinstance(gamma, np.ndarray):
if gamma.ndim != 1:
raise ValueError
= 0.5 * (1 + np.sqrt(1 + 8 * len(gamma)))
n if not all([n.is_integer(), n > 1]):
raise ValueError
# Check if tolerance value belongs to a proper interval
# and change it to the default value otherwise
if not (0 < tol_value <= 1e-4):
= 1e-8
tol_value print("Warning: tolerance value has been changed to default")
# Place elements from gamma into off-diagonal parts
# and put zeros on the main diagonal of [n x n] symmetric matrix A
= int(n)
n = np.zeros(shape=(n, n))
A 1)] = gamma
A[np.triu_indices(n, = A + A.T
A
# Read properties of the input matrix
= np.diag(A) # get current diagonal
diag_vec = np.diag_indices_from(
diag_ind
A# get row and column indices of diagonal elements
)
# Iterative algorithm to get the proper diagonal vector
= np.sqrt(n)
dist while dist > np.sqrt(n) * tol_value:
= np.log(np.diag(expm(A)))
diag_delta = diag_vec - diag_delta
diag_vec = diag_vec
A[diag_ind] = norm(diag_delta)
dist += 1
iter_number
# Get a unique reciprocal correlation matrix
= expm(A)
C 1)
np.fill_diagonal(C,
return C
def Loglikelihood_Gaussian_copula_cross_sectional_application_SFA(theta, y, X, P, us_Sxn, n_inputs, S):
= len(y)
n
= np.exp(theta[0])
alpha = np.exp(theta[1:n_inputs+1])
beta = theta[1+n_inputs]
sigma2_v = theta[2+n_inputs]
sigma2_u = np.exp(theta[3+n_inputs:(3+n_inputs)+(n_inputs-1)])
sigma2_w = theta[(3+n_inputs)+(n_inputs-2)+1:(3+n_inputs)+(n_inputs-2)+(n_inputs)]
mu_W
= theta[-3:]
rhos_log_form
= inverse_mapping_vec(rhos_log_form)
Rho_hat
# Cobb-Douglas production function
= y - np.log(alpha) - X@beta
eps = (np.tile(X[:, 0].reshape(-1, 1), (1, n_inputs-1)) - X[:,1:]) - (P[:,1:] - np.tile(P[:, 0].reshape(-1, 1), (1, n_inputs-1)) +(np.log(beta[0]) - np.log(beta[1:])))
W # Marginal density of allocative inefficiency terms
= stats.norm.pdf(W, np.tile(mu_W, (n, 1)), np.tile(np.sqrt(sigma2_w), (n, 1)))
Den_W = stats.norm.cdf(W, np.tile(mu_W, (n, 1)), np.tile(np.sqrt(sigma2_w), (n, 1)))
CDF_W
= np.repeat(eps.reshape(-1,1), S, axis=1).T
eps_Sxn = np.sqrt(sigma2_u)*us_Sxn
us_Sxn_scaled = 2*(stats.norm.cdf(np.sqrt(sigma2_u)*us_Sxn, 0, np.sqrt(sigma2_u)) -0.5)
CdfUs = eps_Sxn + us_Sxn_scaled
eps_plus_us = stats.norm.pdf(eps_plus_us, 0, np.sqrt(sigma2_v))
den_eps_plus_us
#Evaluate the integral via simulation (to integrate out u from eps+u)
= np.zeros((S,n))
simulated_copula_pdfs = {}
CDF_W_rep for i in range(n_inputs-1):
= np.repeat(CDF_W[:, i].reshape(-1,1), S, axis=1).T
CDF_W_rep[i]
for j in range(n):
= np.zeros((S, n_inputs-1))
CDF_w_j for i in range(n_inputs-1):
= CDF_W_rep[i][:,j]
CDF_w_j[:,i] = np.concatenate([stats.norm.ppf(CdfUs[:, j]).reshape(-1,1),
U
stats.norm.ppf(CDF_w_j)], =1)
axis= stats.multivariate_normal.pdf(U,
c123 = np.array([0, 0, 0]),
mean =Rho_hat)/ np.prod(stats.norm.pdf(U), axis=1)
cov= c123
simulated_copula_pdfs[:,j]
= np.mean(simulated_copula_pdfs*den_eps_plus_us,
Integral =0) #Evaluation of the integral over S simulated samples. Column-wise mean.
axis# Joint desnity. product of marginal density of w_{i}, i = 1, ..., n_inputs and the joint density f(\epsilon, w)
= np.prod(Den_W, 1)
prod_Den_W = prod_Den_W*Integral;
DenAll < 1e-6] = 1e-6
DenAll[DenAll = np.log(np.sum(beta))
r = np.log(DenAll) + r
logDen = -np.sum(logDen)
logL
return logL
def estimate_Jondrow1982_u_hat(theta, n_inputs, n_corr_terms, y, X):
= theta[0]
alpha = theta[1:n_inputs+1]
beta = theta[1+n_inputs]
sigma2_v = theta[2+n_inputs]
sigma2_u
= y - np.log(alpha) - X@beta
obs_eps = np.sqrt(sigma2_u/sigma2_v)
_lambda = np.sqrt(sigma2_u+sigma2_v)
sigma = np.sqrt(sigma2_u*sigma2_v/(sigma**2))
sig_star = sig_star*(((stats.norm.pdf(_lambda*obs_eps/sigma))/(1-stats.norm.cdf(_lambda*obs_eps/sigma))) - ((_lambda*obs_eps)/sigma))
u_hat = sig_star**2*(1+stats.norm.pdf(_lambda*obs_eps/sigma)/(1-stats.norm.cdf(_lambda*obs_eps/sigma))*_lambda*obs_eps/sigma-(stats.norm.pdf(_lambda*obs_eps/sigma)/(1-stats.norm.cdf(_lambda*obs_eps/sigma)))**2)
V_u_hat
return u_hat, V_u_hat
def Estimate_Jondrow1982_u_hat_panel_SFA_application_RS2007(alpha, beta, delta, sigma2_v, y, X, T, N):
= {}
obs_eps = np.zeros((N, T))
u_hat = np.zeros((N, T))
V_u_hat for t in range(T):
= np.exp(delta[0] + delta[1]*t)
sigma2_u = np.sqrt(sigma2_u)/np.sqrt(sigma2_v)
_lambda = np.sqrt(sigma2_u+sigma2_v)
sigma = np.sqrt(sigma2_u*sigma2_v/(sigma**2))
sig_star
= np.full(N, np.nan)
u_hat_ = np.full(N, np.nan)
V_u_hat_ = y[t] - np.log(alpha) - X[t]@beta # composed errors from the production function equation (i.e residuals from the production function)
obs_eps[t] = (obs_eps[t]*_lambda)/sigma
b = ((sigma*_lambda)/(1 + _lambda**2))*(stats.norm.pdf(b)/(1 - stats.norm.cdf(b)) - b)
u_hat_tmp = sig_star**2*(1+stats.norm.pdf(b)/(1-stats.norm.cdf(b))*b-(stats.norm.pdf(b)/(1-stats.norm.cdf(b)))**2)
V_u_hat_tmp
len(u_hat_tmp)] = u_hat_tmp
u_hat_[:len(V_u_hat_tmp)] = V_u_hat_tmp
V_u_hat_[:= u_hat_
u_hat[:, t] = V_u_hat_
V_u_hat[:, t]
return u_hat, V_u_hat
def simulate_error_components(Rho, n_inputs, S_kernel, seed):
= np.linalg.cholesky(Rho)
chol_of_rho = stats.multivariate_normal.rvs(mean=np.zeros(n_inputs),
Z =np.eye(n_inputs),
cov=S_kernel,
size=seed)
random_state= chol_of_rho@Z.T
X = stats.norm.cdf(X)
U
return U.T
def Estimate_NW_u_hat_conditional_eps_panel_SFA_RS2007(theta, y, X, N, T, k, U_hat, S_kernel):
= theta[0]
alpha = theta[1:n_inputs+1]
beta = theta[4:6]
delta = theta[6]
sigma2_v
# Observed variables
= {}
obs_eps for t in range(T):
= np.full(N, np.nan)
eps__ = y[t] - np.log(alpha) - X[t]@beta
tmp_eps len(tmp_eps)] = tmp_eps
eps__[:= eps__
obs_eps[t]
# Simulated variables
= stats.multivariate_normal.rvs(mean=np.zeros(T),
simulated_v =np.eye(T)*sigma2_v,
cov=S_kernel) #simulate random noise for all T panels
size= np.zeros((S_kernel, T))
simulated_u = np.zeros((S_kernel, T))
simulated_eps for t in range(T):
= np.exp(delta[0] + delta[1]*t)
sigma2_u = np.sqrt(sigma2_u)*stats.norm.ppf((U_hat[:,t]+1)/2)
simulated_u[:, t] = simulated_v[:,t] - simulated_u[:,t]
simulated_eps[:,t]
# Bandwidth information for each conditioning variable
= np.zeros(T)
h_eps for t in range(T):
= 1.06*S_kernel**(-1/5)*(max(np.std(simulated_eps[:,t]), stats.iqr(simulated_eps[:,t])/1.34))
h_eps[t]
# kernel estimates for E[u_{t}|eps_{1}, ..., eps_{T}]
# V[u|eps, w1, w2] = E[u^2|w2, w3, eps] - (E[u|w2, w3, eps])^2
= np.zeros((N, T))
u_hat = np.zeros((N, T))
u_hat2 = np.concatenate([x.reshape(-1,1) for x in obs_eps.values()], axis=1)
all_eps for i in range(N):
= all_eps[i, :]
obs_eps_i
= np.zeros(T)
panel_i_kernel_regression_results = np.zeros(T)
panel_i_kernel_regression_results2 = np.zeros((S_kernel, T))
eps_kernel # Construct the kernel distances for all T time periods
for t in range(T):
= stats.norm.pdf((simulated_eps[:,t] - obs_eps[t][i])/h_eps[t])
eps_kernel[:,t]
= eps_kernel[:,np.all(~np.isnan(eps_kernel), axis=0)]
out = np.prod(out, 1)
kernel_product for j in range(T): # NW for each t = 1, .., T observation in each panel i
if not np.isnan(obs_eps_i[j]):
= np.sum(kernel_product*simulated_u[:, j])/np.sum(kernel_product)
panel_i_kernel_regression_results[j] = np.sum(kernel_product*simulated_u[:,j]**2)/np.sum(kernel_product)
panel_i_kernel_regression_results2[j] else:
= np.nan
panel_i_kernel_regression_results[j] = np.nan
panel_i_kernel_regression_results2[j]
= panel_i_kernel_regression_results
u_hat[i, :] = panel_i_kernel_regression_results2
u_hat2[i, :]
= u_hat2 - (u_hat**2)
V_u_hat
return u_hat, V_u_hat
def Estimate_NW_u_hat_conditional_W_cross_sectional_application(theta, n_inputs, n_corr_terms, y, X, P, U_hat, S_kernel):
= len(y)
n
= theta[0]
alpha = theta[1:n_inputs+1]
beta = theta[1+n_inputs]
sigma2_v = theta[2+n_inputs]
sigma2_u = np.exp(theta[3+n_inputs:(3+n_inputs)+(n_inputs-1)])
sigma2_w = theta[(3+n_inputs)+(n_inputs-2)+1:(3+n_inputs)+(n_inputs-2)+(n_inputs)]
mu_W
# Observed variables
= y - np.log(alpha) - X@beta
obs_eps = (np.tile(X[:, 0].reshape(-1, 1), (1, n_inputs-1)) - X[:,1:]) - (P[:,1:] - np.tile(P[:, 0].reshape(-1, 1), (1, n_inputs-1)) +(np.log(beta[0]) - np.log(beta[1:])))
W = np.repeat(obs_eps.reshape(-1,1), S_kernel, axis=1).T
rep_obs_eps = {}
rep_obs_W for i in range(n_inputs-1):
= np.repeat(W[:, i].reshape(-1,1), S_kernel, axis=1).T
rep_obs_W[i]
# Simulated variables
= stats.norm.rvs(loc=0, scale=np.sqrt(sigma2_v), size=S_kernel)
simulated_v = np.sqrt(sigma2_u)*stats.norm.ppf((U_hat[:, 0]+1)/2) # Simulated half normal rvs
simulated_u = np.zeros((S_kernel, n_inputs-1))
simulated_W for i in range(n_inputs-1):
= stats.norm.ppf(U_hat[:,i+1], mu_W[i], np.sqrt(sigma2_w[i]))
simulated_W[:,i] = simulated_v - simulated_u
simulated_eps
# Bandwidth information for each conditioning variable
= 1.06*S_kernel**(-1/5)*(max(np.std(simulated_eps), stats.iqr(simulated_eps)/1.34))
h_eps = np.zeros(n_inputs-1)
h_W for i in range(n_inputs-1):
= 1.06*S_kernel**(-1/5)*(max(np.std(simulated_W[:,i]), stats.iqr(simulated_W[:,i])/1.34))
h_W[i] = np.concatenate([np.array([h_eps]), h_W])
h
# Kernel estimates for E[u|eps, w1, w2]
# V[u|eps, w1, w2] = E[u^2|w2, w3, eps] - (E[u|w2, w3, eps])^2
= np.zeros(n)
kernel_regression_results1 = np.zeros(n)
kernel_regression_results2 for i in range(n):
= stats.norm.pdf((simulated_eps - rep_obs_eps[:,i])/h[0])
eps_kernel = np.zeros((S_kernel,n_inputs-1))
W_kernel for j in range(n_inputs-1):
= stats.norm.pdf((simulated_W[:,j] - rep_obs_W[j][:,i])/h[j+1])
W_kernel[:,j]
= np.prod(W_kernel, 1)
W_kernel_prod = eps_kernel*W_kernel_prod
kernel_product = np.sum(kernel_product*simulated_u)/np.sum(kernel_product)
kernel_regression_results1[i] = np.sum(kernel_product*(simulated_u**2))/np.sum(kernel_product)
kernel_regression_results2[i]
= kernel_regression_results1
u_hat = kernel_regression_results2 - (u_hat**2)
V_u_hat
return u_hat, V_u_hat
def Loglikelihood_APS14_dynamic_panel_SFA_u_RS2007(theta, y, X, N, T, k, S, FMSLE_us):
if np.any(np.isnan(theta)):
= np.ones((n, 1))*-1e8
logDen = -np.sum(logDen)
logL else:
= theta[7:]
rhos = inverse_mapping_vec(rhos) # Gaussian copula correlation matrix
Rho
= np.exp(theta[0])
alpha = 1/(1+np.exp(-theta[1:4])) # Inverse logit transform of betas
beta = theta[4:6]
delta = theta[6]
sigma2_v
= {}
eps for t in range(T):
= np.full(N, np.nan)
eps__ = y[t] - np.log(alpha) - X[t]@beta
tmp_eps len(tmp_eps)] = tmp_eps
eps__[:= eps__
eps[t]
if not np.all(np.linalg.eigvals(Rho) > 0):
= np.ones((n, 1))*-1e8
logDen = -np.sum(logDen)
logL else:
# Evaluate the integral via simulated MLE (FMSLE)
= np.linalg.cholesky(Rho)
A = np.concatenate([_eps.reshape(-1,1) for _eps in eps.values()], axis=1)
all_eps = np.zeros(N)
FMSLE_densities for i in range(N):
= all_eps[i, :]
eps_i = len(eps_i[np.isnan(eps_i)])
n_NaNs = eps_i[~np.isnan(eps_i)] # remove any NaN from an unbalanced panel
eps_i = np.tile(eps_i, (S, 1))
rep_eps_i = np.zeros((S, T))
simulated_us_i for t in range(T):
= FMSLE_us[t][:, i]
simulated_us_i[:, t]
# Transform simulated values to half-normal RV's
= stats.norm.cdf(simulated_us_i@A, np.zeros((S, T)), np.ones((S, T)))
CDF_u_i = np.exp(delta[0] + delta[1]*np.arange(1, T+1, 1))
sigma2_u_hat = stats.halfnorm.ppf(CDF_u_i,
u_i
np.zeros((S, T)), 1, T)) * np.sqrt(sigma2_u_hat), (S, 1)))
np.tile(np.ones((
# adjust for possible unbalanced panel
= u_i[:,:T-n_NaNs]
u_i # Joint density
= np.mean(stats.multivariate_normal.pdf(rep_eps_i + u_i,
den_i =np.zeros(T-n_NaNs),
mean=np.eye(T-n_NaNs)*sigma2_v)) # eq 1 pg. 510 section 5.1 APS14
covif den_i < 1e-10:
= 1e-10
den_i = den_i
FMSLE_densities[i]
= -np.sum(np.log(FMSLE_densities))
logL
return logL
def export_simulation_data_RS2007_electricity_application(theta, n_inputs, y, X, P, U_hat, S_kernel):
= theta[0]
alpha = theta[1:n_inputs+1]
beta = theta[1+n_inputs]
sigma2_v = theta[2+n_inputs]
sigma2_u = np.exp(theta[3+n_inputs:(3+n_inputs)+(n_inputs-1)])
sigma2_w = theta[(3+n_inputs)+(n_inputs-2)+1:(3+n_inputs)+(n_inputs-2)+(n_inputs)]
mu_W
# Observed variables
= y - np.log(alpha) - X@beta
obs_eps = (np.tile(X[:, 0].reshape(-1, 1), (1, n_inputs-1)) - X[:,1:]) - (P[:,1:] - np.tile(P[:, 0].reshape(-1, 1), (1, n_inputs-1)) +(np.log(beta[0]) - np.log(beta[1:])))
W = {}
rep_obs_W for i in range(n_inputs-1):
= np.repeat(W[:, i].reshape(-1,1), S_kernel, axis=1).T
rep_obs_W[i]
# Simulated variables
= stats.norm.rvs(loc=0, scale=np.sqrt(sigma2_v), size=S_kernel)
simulated_v = np.sqrt(sigma2_u)*stats.norm.ppf((U_hat[:, 0]+1)/2) # Simulated half normal rvs
simulated_u = np.zeros((S_kernel, n_inputs-1))
simulated_W for i in range(n_inputs-1):
= stats.norm.ppf(U_hat[:,i+1], mu_W[i], np.sqrt(sigma2_w[i]))
simulated_W[:,i] = simulated_v - simulated_u
simulated_eps
# Export
= pd.DataFrame(np.concatenate([simulated_eps.reshape(-1,1), simulated_W],
NN_train_eps_W =1),
axis=['train_simulated_eps']+[f'train_simulated_w{i+1}' for i in range(n_inputs-1)])
columns= pd.DataFrame(simulated_u,
NN_train_u =['train_simulated_u'])
columns= pd.DataFrame(np.concatenate([obs_eps.reshape(-1,1), W],
NN_test_eps_W =1),
axis=['test_simulated_eps']+[f'test_simulated_w{i+1}' for i in range(n_inputs-1)])
columns
r'./cross_sectional_SFA_RS2007_electricity_application_NN_train_eps_W.csv',
NN_train_eps_W.to_csv(=False)
indexr'./cross_sectional_SFA_RS2007_electricity_application_NN_train_u.csv',
NN_train_u.to_csv(=False)
indexr'./cross_sectional_SFA_RS2007_electricity_application_NN_test_eps_W.csv',
NN_test_eps_W.to_csv(=False)
index
def export_RS2007_electricity_SFA_panel_data(theta, y, X, N, T, U_hat, S_kernel):
= theta[0]
alpha = theta[1:4]
beta = theta[4:6]
delta = theta[6]
sigma2_v
#Observed variables
= {}
obs_eps for t in range(T):
= np.full(N, np.nan)
eps__ = y[t] - np.log(alpha) - X[t]@beta
tmp_eps len(tmp_eps)] = tmp_eps
eps__[:= eps__
obs_eps[t]
= np.concatenate([x.reshape(-1, 1) for x in obs_eps.values()], axis=1)
obs_eps
# Simulated variables
= stats.multivariate_normal.rvs(np.zeros(T), np.eye(T)*sigma2_v, S_kernel) # simulate random noise for all T panels
simulated_v = np.zeros((S_kernel, T))
simulated_u = np.zeros((S_kernel, T))
simulated_eps for t in range(T):
= np.exp(delta[0] + delta[1]*t)
sigma2_u = np.sqrt(sigma2_u)*stats.norm.ppf((U_hat[:, t]+1)/2) # simulated half normal rvs
simulated_u[:, t] = simulated_v[:, t] - simulated_u[:, t]
simulated_eps[:, t]
= pd.DataFrame(simulated_eps,
NN_train_eps =[f'train_eps_{t}' for t in range(T)])
columns
= pd.DataFrame(simulated_u,
NN_train_u =[f'train_u_{t}' for t in range(T)])
columns
= pd.DataFrame(obs_eps,
NN_test_eps =[f'test_eps_{t}' for t in range(T)])
columns
r'./panel_SFA_RS2007_electricty_application_NN_train_eps.csv',
NN_train_eps.to_csv(=False)
indexr'./panel_SFA_RS2007_electricty_application_NN_train_u.csv',
NN_train_u.to_csv(=False)
indexr'./panel_SFA_RS2007_electricty_application_NN_test_eps.csv',
NN_test_eps.to_csv(=False) index
= pd.read_excel('steamelectric.xlsx')
data
'fuel_price_index'] = np.nan
data[-1, -1] = data.iloc[:-1, 2].values/data.iloc[1:,2].values
data.iloc[:'LM_price_index'] = np.nan
data[-1, -1] = data.iloc[:-1, 3].values/data.iloc[1:,3].values
data.iloc[:
13:1007:14, :] = np.nan #drop last fuel price index for each plant
data.iloc[= data.dropna()
data
= (data['Fuel Costs ($1000)']/data['fuel_price_index'])*1000*1e-6; #fuel costs over price index
X1 = data['Operating Costs ($1000)']/data['LM_price_index']*1000*1e-6; #LM costs over price index
X2 = data['Capital ($1000)']/1000; # capital
X3
= np.log(pd.concat([X1, X2, X3], axis=1))
X = np.log(data[['fuel_price_index', 'LM_price_index', 'User Cost of Capital']])
P = np.log(data['Output (MWhr)']*1e-6) # output ml MWpH
y
# Estimate cross sectional models
= len(y)
n = 500 # Number of Halton draws used in maximum simulated likelihood
S = 10000; # Number of simulated draws for evaluation of the conditional expectation
S_kernel = 3
n_inputs = ((n_inputs)^2-(n_inputs))/2 # Number of off diagonal lower triangular correlation/covariance terms for Gaussian copula correlation matrix.
n_corr_terms
= -2.6
initial_lalpha = np.array([0.5, 0.3, 0.3])
initial_beta = np.log(initial_beta)
initial_lbeta = np.log(initial_beta/(1-initial_beta))
initial_logit_beta = 0.015
initial_sigma2_v = np.log(initial_sigma2_v)
initial_lsigma2_v = 0.15
initial_sigma2_u = np.log(initial_sigma2_u)
initial_lsigma2_u = np.array([0.5, 0])
initial_mu_W
= stats.qmc.Halton(d=1,
sampler =True,
scramble=123)
seed= sampler.random(n=n)
sample = stats.norm.ppf((sample+1)/2, 0, 1)
us_ = np.reshape(np.repeat(us_[:, np.newaxis], S, axis=1), (S, n))
us_Sxn
# Gaussian Copula
= np.array([[0.2, 0.09],
initial_Sigma 0.09, 0.2]])
[= np.diag(initial_Sigma)
initial_sigma2_w = np.log(initial_sigma2_w)
initial_lsigma2_w
= y - initial_lalpha - X@initial_beta
eps_ = (np.tile(X.iloc[:, 0].values.reshape(-1, 1), (1, n_inputs-1)) - X.iloc[:,1:].values) - (P.iloc[:,1:].values - np.tile(P.iloc[:, 0].values.reshape(-1, 1), (1, n_inputs-1)) +(np.log(initial_beta[0]) - np.log(initial_beta[1:])))
W_ = np.corrcoef(np.concatenate([eps_.values.reshape(-1,1), W_], axis=1).T)
initial_Rho = direct_mapping_mat(initial_Rho)
initial_lRho
= np.concatenate([np.array([initial_lalpha]), initial_lbeta, np.array([initial_sigma2_v, initial_sigma2_u]),
Gaussian_copula_theta0
initial_lsigma2_w, initial_mu_W, initial_lRho])
# Bounds to ensure sigma2v and sigma2u are >= 0
= [(None, None) for x in range(4)] + [
bounds 1e-5, np.inf),
(1e-5, np.inf),
(+ [(None, None) for x in range(7)]
]
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =Loglikelihood_Gaussian_copula_cross_sectional_application_SFA,
fun=Gaussian_copula_theta0,
x0="L-BFGS-B",
method= 1e-6,
tol ={"ftol": 1e-6, "maxiter": 1000, "maxfun": 6*1000},
options=(y.values, X.values, P.values, us_Sxn, n_inputs, S),
args=bounds,
bounds
)
= MLE_results.x
Gaussian_copula_theta = MLE_results.fun * -1 # Log-likelihood at the solution
logMLE
# Transform parameters
4] = np.exp(Gaussian_copula_theta[:4]) # Transform production system coefficients
Gaussian_copula_theta[:6:8] = np.exp(Gaussian_copula_theta[6:8])
Gaussian_copula_theta[= Gaussian_copula_theta[-3:]
rhos_log_form = inverse_mapping_vec(rhos_log_form)
Rho -3:] = direct_mapping_mat(initial_Rho)
Gaussian_copula_theta[= Rho[np.tril_indices(Rho.shape[1], 1)]
Rho_lower_trianglular
r'./cross_sectional_SFA_RS2007_electricity_application_gaussian_copula_correlation_matrix.csv',
np.savetxt(
Rho, =',')
delimiter
# JLMS technical inefficiency scores
(Gaussian_copula_JLMS_u_hat, = estimate_Jondrow1982_u_hat(theta=Gaussian_copula_theta,
Gaussian_copula_JLMS_V_u_hat) =n_inputs,
n_inputs=n_corr_terms,
n_corr_terms=y.values,
y=X.values)
X= np.concatenate([data.loc[:, ['YEAR']].values, Gaussian_copula_JLMS_u_hat.reshape(-1,1)], axis=1)
JLMS_u_hat_matrix = np.concatenate([data.loc[:, ['YEAR']].values, Gaussian_copula_JLMS_V_u_hat.reshape(-1,1)], axis=1)
JLMS_V_u_hat_matrix = np.zeros((13, 2))
JLMS_u_hat_year_mean = np.zeros((13, 2))
JLMS_V_u_hat_year_mean = np.zeros((13, 2))
JLMS_std_u_hat_year_mean for i, t in enumerate(range(86, 99)):
0] = t
JLMS_u_hat_year_mean[i, 0] = t
JLMS_V_u_hat_year_mean[i, 0] = t
JLMS_std_u_hat_year_mean[i, 1] = np.mean(JLMS_u_hat_matrix[JLMS_u_hat_matrix[:,0] == t, 1])
JLMS_u_hat_year_mean[i, 1] = np.mean(JLMS_V_u_hat_matrix[JLMS_V_u_hat_matrix[:,0] == t, 1])
JLMS_V_u_hat_year_mean[i, 1:] = np.mean(np.sqrt(JLMS_V_u_hat_matrix[JLMS_V_u_hat_matrix[:,0] == t, 1]))
JLMS_std_u_hat_year_mean[i,
# Copula Nadaraya Watson Scores
= simulate_error_components(Rho=Rho,
Gaussian_copula_U_hat =n_inputs,
n_inputs=S_kernel,
S_kernel=1234)
seed
(Gaussian_copula_NW_conditional_W_u_hat, = Estimate_NW_u_hat_conditional_W_cross_sectional_application(theta=Gaussian_copula_theta,
Gaussian_copula_NW_conditional_W_V_u_hat) =n_inputs,
n_inputs=n_corr_terms,
n_corr_terms=y.values,
y=X.values,
X=P.values,
P=Gaussian_copula_U_hat,
U_hat=S_kernel)
S_kernel= np.concatenate([data.loc[:, ['YEAR']].values, Gaussian_copula_NW_conditional_W_u_hat.reshape(-1,1)], axis=1)
NW_conditional_W_u_hat_matrix = np.concatenate([data.loc[:, ['YEAR']].values, Gaussian_copula_NW_conditional_W_V_u_hat.reshape(-1,1)], axis=1)
NW_conditional_W_V_u_hat_matrix = np.zeros((13, 2))
NW_conditional_W_u_hat_year_mean = np.zeros((13, 2))
NW_conditional_W_V_u_hat_year_mean = np.zeros((13, 2))
NW_conditional_W_std_u_hat_year_mean for i, t in enumerate(range(86, 99)):
0] = t
NW_conditional_W_u_hat_year_mean[i, 0] = t
NW_conditional_W_V_u_hat_year_mean[i, 0] = t
NW_conditional_W_std_u_hat_year_mean[i, 1] = np.mean(NW_conditional_W_u_hat_matrix[NW_conditional_W_u_hat_matrix[:,0] == t, 1])
NW_conditional_W_u_hat_year_mean[i, 1] = np.mean(NW_conditional_W_V_u_hat_matrix[NW_conditional_W_V_u_hat_matrix[:,0] == t, 1])
NW_conditional_W_V_u_hat_year_mean[i, 1] = np.mean(np.sqrt(NW_conditional_W_V_u_hat_matrix[NW_conditional_W_V_u_hat_matrix[:,0] == t, 1]))
NW_conditional_W_std_u_hat_year_mean[i,
# Export simulated training data and compute LLF u hat
=Gaussian_copula_theta,
export_simulation_data_RS2007_electricity_application(theta=n_inputs,
n_inputs=y.values,
y=X.values,
X=P.values,
P=Gaussian_copula_U_hat,
U_hat=S_kernel)
S_kernel
# Users should change the first directory to the path where R is installed - use the code depending on your OS
# Windows
# subprocess.call([r'C:\Program Files\R\R-4.2.2\bin\Rscript.exe ./train_LocalLinear_forest_cross_sectional_RS2007_electricty_application.R'],
# shell=True)
# Mac
# subprocess.call([r'/usr/local/bin/Rscript', './train_LocalLinear_forest_cross_sectional_RS2007_electricty_application.R'])
= pd.read_csv(r'./LLF_Gaussian_copula_u_hat.csv')
Gaussian_copula_LLF_results = Gaussian_copula_LLF_results.iloc[:,0].values
Gaussian_copula_LLF_conditional_W_u_hat = Gaussian_copula_LLF_results.iloc[:,1].values
Gaussian_copula_LLF_conditional_W_V_u_hat
= np.concatenate([data.loc[:, ['YEAR']].values, Gaussian_copula_LLF_conditional_W_u_hat.reshape(-1,1)], axis=1)
LLF_u_hat_matrix = np.concatenate([data.loc[:, ['YEAR']].values, Gaussian_copula_LLF_conditional_W_V_u_hat.reshape(-1,1)], axis=1)
LLF_V_u_hat_matrix = np.zeros((13, 2))
LLF_u_hat_year_mean = np.zeros((13, 2))
LLF_V_u_hat_year_mean = np.zeros((13, 2))
LLF_std_u_hat_year_mean for i, t in enumerate(range(86, 99)):
0] = t
LLF_u_hat_year_mean[i, 0] = t
LLF_V_u_hat_year_mean[i, 0] = t
LLF_std_u_hat_year_mean[i, 1] = np.mean(LLF_u_hat_matrix[LLF_u_hat_matrix[:,0] == t, 1])
LLF_u_hat_year_mean[i, 1] = np.mean(LLF_V_u_hat_matrix[LLF_V_u_hat_matrix[:,0] == t, 1])
LLF_V_u_hat_year_mean[i, 1] = np.mean(np.sqrt(LLF_V_u_hat_matrix[LLF_V_u_hat_matrix[:,0] == t, 1]))
LLF_std_u_hat_year_mean[i,
= np.mean(Gaussian_copula_JLMS_u_hat)
mean_Gaussian_copula_JLMS_u_hat = np.mean(JLMS_std_u_hat_year_mean[:,1])
mean_Gaussian_copula_JLMS_std_u_hat = np.mean(Gaussian_copula_NW_conditional_W_u_hat)
mean_Gaussian_copula_NW_conditional_W_u_hat = np.mean(NW_conditional_W_std_u_hat_year_mean[:,1])
mean_Gaussian_copula_NW_conditional_W_std_u_hat = np.mean(Gaussian_copula_LLF_conditional_W_u_hat);
mean_Gaussian_copula_LLF_conditional_W_u_hat = np.mean(LLF_std_u_hat_year_mean[:,1])
mean_Gaussian_copula_LLF_conditional_W_std_u_hat
= pd.DataFrame(np.concatenate([np.array([x for x in range(86, 99)]).reshape(-1,1),
cross_sectional_results_table 1]], JLMS_std_u_hat_year_mean[:, [1]],
JLMS_u_hat_year_mean[:, [1]], NW_conditional_W_std_u_hat_year_mean[:, [1]],
NW_conditional_W_u_hat_year_mean[:, [1]], LLF_std_u_hat_year_mean[:, [1]]], axis=1),
LLF_u_hat_year_mean[:, [=['Year', 'JLMS Est.', 'JLMS Std Dev', 'NW Est.', 'NW Std Dev', 'LLF Est.', 'LLF Std Dev'])
columns= pd.DataFrame(np.array([mean_Gaussian_copula_JLMS_u_hat,
cross_sectional_averages
mean_Gaussian_copula_JLMS_std_u_hat,
mean_Gaussian_copula_NW_conditional_W_u_hat,
mean_Gaussian_copula_NW_conditional_W_std_u_hat,
mean_Gaussian_copula_LLF_conditional_W_u_hat, 1,-1),
mean_Gaussian_copula_LLF_conditional_W_std_u_hat]).reshape(=['JLMS Est.', 'JLMS Std Dev', 'NW Est.', 'NW Std Dev', 'LLF Est.', 'LLF Std Dev'],
columns=['Average'])
indexprint('Cross-Sectional Models (APS16 Estimator)')
display(cross_sectional_results_table) display(cross_sectional_averages)
Cross-Sectional Models (APS16 Estimator)
Year | JLMS Est. | JLMS Std Dev | NW Est. | NW Std Dev | LLF Est. | LLF Std Dev | |
---|---|---|---|---|---|---|---|
0 | 86.0 | 0.460616 | 0.113611 | 0.453145 | 0.116160 | 0.459684 | 0.006971 |
1 | 87.0 | 0.395511 | 0.110433 | 0.400148 | 0.113094 | 0.399679 | 0.006790 |
2 | 88.0 | 0.389046 | 0.110389 | 0.388944 | 0.112926 | 0.395435 | 0.006410 |
3 | 89.0 | 0.342816 | 0.107974 | 0.345560 | 0.110662 | 0.348193 | 0.006073 |
4 | 90.0 | 0.376541 | 0.108117 | 0.375689 | 0.111443 | 0.382351 | 0.006412 |
5 | 91.0 | 0.382511 | 0.108342 | 0.382901 | 0.111295 | 0.389021 | 0.006524 |
6 | 92.0 | 0.404540 | 0.107777 | 0.403757 | 0.110551 | 0.408993 | 0.005997 |
7 | 93.0 | 0.413886 | 0.108060 | 0.408557 | 0.112749 | 0.417195 | 0.006726 |
8 | 94.0 | 0.391928 | 0.105610 | 0.389851 | 0.105226 | 0.400428 | 0.006062 |
9 | 95.0 | 0.406917 | 0.106064 | 0.411143 | 0.109713 | 0.415814 | 0.006120 |
10 | 96.0 | 0.379603 | 0.102596 | 0.377596 | 0.103694 | 0.387660 | 0.007358 |
11 | 97.0 | 0.348042 | 0.099227 | 0.351766 | 0.103336 | 0.356639 | 0.005932 |
12 | 98.0 | 0.357750 | 0.099781 | 0.358207 | 0.101077 | 0.364057 | 0.006379 |
JLMS Est. | JLMS Std Dev | NW Est. | NW Std Dev | LLF Est. | LLF Std Dev | |
---|---|---|---|---|---|---|
Average | 0.388515 | 0.106768 | 0.388322 | 0.109379 | 0.394315 | 0.006443 |
# Estimate Panel data models
= pd.concat([data[['Firm No', 'YEAR']], y, X], axis=1)
all_ = 72
N = len(all_['YEAR'].unique())
T = {}
panel_y = {}
panel_X for j, t in enumerate(range(86, 99)):
= all_.loc[all_['YEAR'] == t, 'Output (MWhr)']
y__ = all_.loc[all_['YEAR'] == t, [0, 1, 'Capital ($1000)']]
X__ = y__.values
panel_y[j] = X__.values
panel_X[j]
# Independent uniform random variables for FMSLE - assumes a Gaussian copula
= stats.qmc.Halton(d=T,
sampler =True,
scramble=123)
seed= sampler.random(n=S)
us_ = stats.norm.ppf(us_) # transform to standard normal
us_ = {}
FMSLE_us for t in range(T):
= np.tile(us_[:, t-1][np.newaxis, :], (N, 1)).T
us_Sxn = us_Sxn
FMSLE_us[t]
= -2.6
initial_lalpha = np.array([0.5, 0.2, 0.2])
initial_beta = np.log(initial_beta)
initial_lbeta = np.log(initial_beta/(1-initial_beta))
initial_logit_beta = 0.015
initial_sigma2_v = np.log(initial_sigma2_v)
initial_lsigma2_v = 0.15
initial_sigma2_u = np.log(initial_sigma2_u)
initial_lsigma2_u
= np.array([initial_lsigma2_u, 0.2])
initial_delta = {}
eps_ for t in range(T):
= np.zeros(N)
eps__array = panel_y[t] - initial_lalpha- panel_X[t]@initial_beta
eps__ len(eps__)] = eps__
eps__array[:= eps__array
eps_[t]
= len(initial_beta) + 1
k = np.corrcoef(np.concatenate([eps.reshape(-1,1) for eps in eps_.values()], axis=1).T)
initial_Rho = direct_mapping_mat(initial_Rho)
initial_lRho = np.concatenate([np.array([initial_lalpha]),
theta0
initial_logit_beta,
initial_delta,
np.array([initial_sigma2_v]), initial_lRho])
# Bounds to ensure sigma2v and sigma2u are >= 0
= [(None, None) for x in range(6)] + [
bounds 1e-5, np.inf),
(+ [(None, None) for x in range(len(initial_lRho))]
]
# Minimize the negative log-likelihood using numerical optimization.
= minimize(
MLE_results =Loglikelihood_APS14_dynamic_panel_SFA_u_RS2007,
fun=theta0,
x0="L-BFGS-B",
method= 1e-8,
tol ={"ftol": 1e-8, "maxiter": 1000, "maxfun": 35000, "maxcor": 500},
options=(panel_y, panel_X, N, T, k, S, FMSLE_us),
args=bounds,
bounds
)
= MLE_results.x
APS14_theta 0] = np.exp(APS14_theta[0])
APS14_theta[1:4] = 1/(1+np.exp(-APS14_theta[1:4])) # inverse logit transform of betas
APS14_theta[= MLE_results.fun * -1
APS14_logMLE
= inverse_mapping_vec(APS14_theta[7:])
APS14_Rho r'./panel_SFA_RS2007_electricty_application_gaussian_copula_correlation_matrix.csv',
np.savetxt(
APS14_Rho, =',')
delimiter
# Simulated dependent U based upon estimated copula parameters
= simulate_error_components(Rho=APS14_Rho,
APS14_U_hat =T,
n_inputs=S_kernel,
S_kernel=10)
seed
# JLMS scores
(APS14_JLMS_u_hat, = Estimate_Jondrow1982_u_hat_panel_SFA_application_RS2007(alpha=APS14_theta[0],
APS14_JLMS_V_u_hat) =APS14_theta[1:4],
beta=APS14_theta[4:6],
delta=APS14_theta[6],
sigma2_v=panel_y,
y=panel_X,
X=T,
T=N)
N= np.nanmean(APS14_JLMS_u_hat, axis=0)
APS14_JLMS_u_hat_year_mean = np.nanmean(APS14_JLMS_V_u_hat, axis=0)
APS14_JLMS_V_u_hat_year_mean = np.nanmean(np.sqrt(APS14_JLMS_V_u_hat), axis=0)
APS14_JLMS_std_u_hat_year_mean
# Copula Nadaraya Watson
(APS14_NW_u_hat_conditional_eps, = Estimate_NW_u_hat_conditional_eps_panel_SFA_RS2007(theta=APS14_theta,
APS14_NW_V_u_hat_conditional_eps) =panel_y,
y=panel_X,
X=N,
N=T,
T=k,
k=APS14_U_hat,
U_hat=S_kernel)
S_kernel= np.nanmean(APS14_NW_u_hat_conditional_eps, axis=0)
APS14_NW_u_hat_conditional_eps_year_mean = np.nanmean(APS14_NW_V_u_hat_conditional_eps, axis=0)
APS14_NW_V_u_hat_conditional_eps_year_mean = np.nanmean(np.sqrt(APS14_NW_V_u_hat_conditional_eps), axis=0)
APS14_NW_std_u_hat_conditional_eps_year_mean
=APS14_theta,
export_RS2007_electricity_SFA_panel_data(theta=panel_y,
y=panel_X,
X=N,
N=T,
T=APS14_U_hat,
U_hat=S_kernel)
S_kernel
# Users should change the first directory to the path where R is installed
# Windows
# subprocess.call(r'C:\Program Files\R\R-4.2.2\bin\Rscript.exe ./train_LocalLinear_forest_panel_RS2007_electricty_application.R',
# shell=True)
# Mac
# subprocess.call([r'/usr/local/bin/Rscript', './train_LocalLinear_forest_panel_RS2007_electricty_application.R'])
= pd.read_csv(r'./RS2007_electricity_LLF_Gaussian_copula_u_hat.csv')
APS14_LLF_conditional_eps_u_hat = pd.read_csv(r'./RS2007_electricity_LLF_Gaussian_copula_V_u_hat.csv')
APS14_LLF_conditional_eps_V_u_hat = APS14_LLF_conditional_eps_u_hat.iloc[:-1, :]
APS14_LLF_conditional_eps_u_hat = APS14_LLF_conditional_eps_V_u_hat.iloc[:-1, :]
APS14_LLF_conditional_eps_V_u_hat = np.nanmean(APS14_LLF_conditional_eps_u_hat, axis=0)
APS14_LLF_conditional_eps_u_hat_year_mean = np.nanmean(APS14_LLF_conditional_eps_V_u_hat, axis=0)
APS14_LLF_conditional_eps_V_u_hat_year_mean = np.nanmean(np.sqrt(APS14_LLF_conditional_eps_V_u_hat), axis=0)
APS14_LLF_conditional_eps_std_u_hat_year_mean
= np.mean(APS14_JLMS_u_hat_year_mean)
mean_Gaussian_copula_JLMS_u_hat = np.mean(APS14_JLMS_std_u_hat_year_mean)
mean_Gaussian_copula_JLMS_std_u_hat = np.mean(APS14_NW_u_hat_conditional_eps_year_mean)
mean_Gaussian_copula_NW_conditional_W_u_hat = np.mean(APS14_NW_std_u_hat_conditional_eps_year_mean)
mean_Gaussian_copula_NW_conditional_W_std_u_hat = np.mean(APS14_LLF_conditional_eps_u_hat_year_mean)
mean_Gaussian_copula_LLF_conditional_W_u_hat = np.mean(APS14_LLF_conditional_eps_std_u_hat_year_mean)
mean_Gaussian_copula_LLF_conditional_W_std_u_hat
= pd.DataFrame(np.concatenate([np.array([x for x in range(86, 99)]).reshape(-1,1),
panel_results_table -1,1), APS14_JLMS_std_u_hat_year_mean.reshape(-1,1),
APS14_JLMS_u_hat_year_mean.reshape(-1,1), APS14_NW_std_u_hat_conditional_eps_year_mean.reshape(-1,1),
APS14_NW_u_hat_conditional_eps_year_mean.reshape(-1,1), APS14_LLF_conditional_eps_std_u_hat_year_mean.reshape(-1,1)], axis=1),
APS14_LLF_conditional_eps_u_hat_year_mean.reshape(=['Year', 'JLMS Est.', 'JLMS Std Dev', 'NW Est.', 'NW Std Dev', 'LLF Est.', 'LLF Std Dev'])
columns= pd.DataFrame(np.array([mean_Gaussian_copula_JLMS_u_hat,
panel_averages
mean_Gaussian_copula_JLMS_std_u_hat,
mean_Gaussian_copula_NW_conditional_W_u_hat,
mean_Gaussian_copula_NW_conditional_W_std_u_hat,
mean_Gaussian_copula_LLF_conditional_W_u_hat, 1,-1),
mean_Gaussian_copula_LLF_conditional_W_std_u_hat]).reshape(=['JLMS Est.', 'JLMS Std Dev', 'NW Est.', 'NW Std Dev', 'LLF Est.', 'LLF Std Dev'],
columns=['Average'])
indexprint('Panel Data Models (APS14 Estimator)')
display(panel_results_table) display(panel_averages)
Panel Data Models (APS14 Estimator)
Year | JLMS Est. | JLMS Std Dev | NW Est. | NW Std Dev | LLF Est. | LLF Std Dev | |
---|---|---|---|---|---|---|---|
0 | 86.0 | 0.511809 | 0.088466 | 0.408943 | 0.028140 | 0.424587 | 0.004156 |
1 | 87.0 | 0.404085 | 0.087483 | 0.418386 | 0.030822 | 0.432794 | 0.004808 |
2 | 88.0 | 0.438372 | 0.088059 | 0.433766 | 0.024779 | 0.429139 | 0.003546 |
3 | 89.0 | 0.416660 | 0.088250 | 0.440535 | 0.028039 | 0.434090 | 0.004797 |
4 | 90.0 | 0.404666 | 0.086727 | 0.457208 | 0.023650 | 0.460818 | 0.002694 |
5 | 91.0 | 0.409640 | 0.087197 | 0.465823 | 0.030365 | 0.470895 | 0.003679 |
6 | 92.0 | 0.407005 | 0.086893 | 0.387987 | 0.039109 | 0.388921 | 0.007091 |
7 | 93.0 | 0.412600 | 0.086621 | 0.415747 | 0.046351 | 0.432685 | 0.007462 |
8 | 94.0 | 0.385891 | 0.085183 | 0.404131 | 0.036672 | 0.410006 | 0.003713 |
9 | 95.0 | 0.414157 | 0.086411 | 0.396447 | 0.038572 | 0.401456 | 0.005797 |
10 | 96.0 | 0.374778 | 0.082006 | 0.366305 | 0.037658 | 0.368819 | 0.005441 |
11 | 97.0 | 0.332448 | 0.077521 | 0.323574 | 0.041057 | 0.331670 | 0.007950 |
12 | 98.0 | 0.381271 | 0.083361 | 0.382061 | 0.053896 | 0.386111 | 0.004549 |
JLMS Est. | JLMS Std Dev | NW Est. | NW Std Dev | LLF Est. | LLF Std Dev | |
---|---|---|---|---|---|---|
Average | 0.407183 | 0.085706 | 0.407763 | 0.035316 | 0.41323 | 0.005053 |
# density plots
= stats.gaussian_kde(Gaussian_copula_JLMS_u_hat)
kde = np.linspace(Gaussian_copula_JLMS_u_hat.min(), Gaussian_copula_JLMS_u_hat.max(), 100)
Gaussian_copula_JLMS_TE_kernel_x = kde(Gaussian_copula_JLMS_TE_kernel_x)
Gaussian_copula_JLMS_TE_kernel
= stats.gaussian_kde(Gaussian_copula_NW_conditional_W_u_hat)
kde = np.linspace(Gaussian_copula_JLMS_u_hat.min(), Gaussian_copula_JLMS_u_hat.max(), 100)
Gaussian_copula_NW_TE_kernel_x = kde(Gaussian_copula_NW_TE_kernel_x)
Gaussian_copula_NW_TE_kernel
= stats.gaussian_kde(Gaussian_copula_LLF_conditional_W_u_hat)
kde = np.linspace(Gaussian_copula_JLMS_u_hat.min(), Gaussian_copula_JLMS_u_hat.max(), 100)
Gaussian_copula_LLF_TE_kernel_x = kde(Gaussian_copula_LLF_TE_kernel_x)
Gaussian_copula_LLF_TE_kernel
= plt.Figure(figsize=(10,10))
figure
plt.plot(Gaussian_copula_JLMS_TE_kernel_x, Gaussian_copula_JLMS_TE_kernel, ='k',
color='-',
ls=r'JLMS $E[u_{i}|\varepsilon_{i}]$')
label
plt.plot(Gaussian_copula_NW_TE_kernel_x,
Gaussian_copula_NW_TE_kernel, =':',
ls='k',
color=r'NW $E[u_{i}|\varepsilon_{i}, \omega_{i2}, \omega_{i3}]$')
label
plt.plot(Gaussian_copula_LLF_TE_kernel_x,
Gaussian_copula_LLF_TE_kernel, ='-.',
ls='k',
color=r'LLF $E[u_{i}|\varepsilon_{i}, \omega_{i2}, \omega_{i3}]$')
labelr'$u_{i}$', fontsize=14)
plt.xlabel('Kernel Density', fontsize=14)
plt.ylabel('JLMS & APS16 Estimator', fontsize=14)
plt.title(
plt.legend()
plt.show()
= APS14_JLMS_u_hat.flatten()
APS14_JLMS_u_hat_flat = APS14_JLMS_u_hat_flat[~np.isnan(APS14_JLMS_u_hat_flat)]
APS14_JLMS_u_hat_flat = APS14_NW_u_hat_conditional_eps.flatten()
APS14_NW_u_hat_conditional_eps_flat = APS14_NW_u_hat_conditional_eps_flat[~np.isnan(APS14_NW_u_hat_conditional_eps_flat)]
APS14_NW_u_hat_conditional_eps_flat = APS14_LLF_conditional_eps_u_hat.values.flatten()
APS14_LLF_conditional_eps_u_hat_flat = APS14_LLF_conditional_eps_u_hat_flat[~np.isnan(APS14_LLF_conditional_eps_u_hat_flat)]
APS14_LLF_conditional_eps_u_hat_flat
= stats.gaussian_kde(APS14_JLMS_u_hat_flat)
kde = np.linspace(APS14_JLMS_u_hat_flat.min(),
APS14_JLMS_TE_kernel_x max(), 100)
APS14_JLMS_u_hat_flat.= kde(APS14_JLMS_TE_kernel_x)
APS14_JLMS_TE_kernel
= stats.gaussian_kde(APS14_NW_u_hat_conditional_eps_flat)
kde = np.linspace(APS14_NW_u_hat_conditional_eps_flat.min(),
APS14_NW_TE_kernel_x max(), 100)
APS14_NW_u_hat_conditional_eps_flat.= kde(APS14_NW_TE_kernel_x)
APS14_NW_TE_kernel
= stats.gaussian_kde(APS14_LLF_conditional_eps_u_hat_flat)
kde = np.linspace(APS14_LLF_conditional_eps_u_hat_flat.min(),
APS14_LLF_TE_kernel_x max(), 100)
APS14_LLF_conditional_eps_u_hat_flat.= kde(APS14_LLF_TE_kernel_x)
APS14_LLF_TE_kernel
= plt.Figure(figsize=(10,10))
figure
plt.plot(APS14_JLMS_TE_kernel_x, APS14_JLMS_TE_kernel, ='k',
color='-',
ls=r'JLMS $E[u_{it}|\varepsilon_{it}]$')
label
plt.plot(APS14_NW_TE_kernel_x,
APS14_NW_TE_kernel, =':',
ls='k',
color=r'NW $E[u_{it}|\varepsilon_{i1}, \dots, \varepsilon_{i,13}]$')
label
plt.plot(APS14_LLF_TE_kernel_x,
APS14_LLF_TE_kernel, ='-.',
ls='k',
color=r'LLF $E[u_{it}|\varepsilon_{i1}, \dots, \varepsilon_{i,13}]$')
labelr'$u_{i}$', fontsize=14)
plt.xlabel('Kernel Density', fontsize=14)
plt.ylabel('JLMS & APS14 Estimator', fontsize=14)
plt.title(
plt.legend() plt.show()
# Remove all previous function and variable definitions before the next exercise
%reset