import pandas as pd
import numpy as np
import statsmodels.api as sm
5 Chapter 5: Modelling Endogeneity in Stochastic Frontier Models
5.1 Exercise 5.1: C2SLS of Spanish diary farm production model.
This exercise implements the C2SLS estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
# load data
= pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
dairyfarms 'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
= dairyfarms.astype(float)
dairyfarms
# Drop farms with zero roughage
= dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data =True)
data.reset_index(inplace'ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
data[
# scaling data
= np.log(data['MILK']) - np.log(np.mean(data['MILK']))
y = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.log(np.mean(X))
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
X
# create indicator matrix D
= pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
D
# create Z matrix
'COOP'] = data['COOP'].astype(int)
data[= pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
COOP_dummy
= pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
Z 'LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
X[[=1)
COOP_dummy], axis
= pd.concat([Z, D], axis=1)
Z
# compute regression coefficients and standard errors
= sm.OLS(X[['LABOR']].values, Z.values)
OLS1 = OLS1.fit()
results1 = results1.params
p1 = np.sqrt(np.diag(results1.cov_params()))
se1 = Z@p1
x1h
= sm.OLS(X[['COWS']].values, Z.values)
OLS2 = OLS2.fit()
results2 = results2.params
p2 = np.sqrt(np.diag(results2.cov_params()))
se2 = Z@p2
x2h
= sm.OLS(X[['FEED']].values, Z.values)
OLS3 = OLS3.fit()
results3 = results3.params
p3 = np.sqrt(np.diag(results3.cov_params()))
se3 = Z@p3
x3h
= sm.OLS(X[['LAND']].values, Z.values)
OLS4 = OLS4.fit()
results4 = results4.params
p4 = np.sqrt(np.diag(results4.cov_params()))
se4 = Z@p4
x4h
= sm.OLS(X[['ROUGHAGE']].values, Z.values)
OLS5 = OLS5.fit()
results5 = results5.params
p5 = np.sqrt(np.diag(results5.cov_params()))
se5 = Z@p5
x5h
= pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
Xh = sm.OLS(y.values, Xh.values)
OLS_Xh = OLS_Xh.fit()
results_Xh = results_Xh.params
ls2 = np.sqrt(np.diag(results_Xh.cov_params()))
se2s = y.values - (pd.concat([X,D], axis=1)@ls2).values
uh2s
# under homoskedastity
= 1/(len(y)-Xh.shape[1])*np.sum(uh2s**2)*np.linalg.inv((Xh.T@Xh))
v1 # under heteroskedasticity
= np.zeros((Xh.shape[1], Xh.shape[1]))
vv for i in range(len(y)):
= uh2s[i]**2
uh2si += uh2si*Xh.values[i,:].reshape(-1,1)@Xh.values[i,:].reshape(-1,1).T
vv = len(y)/(len(y)-Xh.shape[1])*np.linalg.inv(Xh.values.T@Xh.values)@vv@np.linalg.inv(Xh.values.T@Xh.values)
v2 = np.sqrt(np.diag(v1))
correct_se2s_NCH = np.sqrt(np.diag(v2))
correct_se2s_noNCH
= np.mean(uh2s**3)
mu3h2SLS = np.mean(uh2s**2)
sige2h2SLS = (np.pi/(np.pi - 4)*np.sqrt(np.pi/2)*mu3h2SLS)**(2/3)*(mu3h2SLS<0)
sigu2h2SLS = ls2[0] + np.sqrt(2/np.pi*sigu2h2SLS)
alphah2SLS = sige2h2SLS-(np.pi-2)/np.pi*sigu2h2SLS
sigv2h2SLS = np.sqrt(sigu2h2SLS)
siguh2SLS = np.sqrt(sigv2h2SLS)
sigvh2SLS = siguh2SLS/sigvh2SLS
lamh2SLS = np.sqrt(sigu2h2SLS+sigv2h2SLS)
sigh2SLS
= np.concatenate(([alphah2SLS], ls2[1:]))
C2SLS_beta0 = sigh2SLS
C2SLS_sig0 = lamh2SLS
C2SLS_lam0 = sigu2h2SLS
C2SLS_sigu = sigv2h2SLS
C2SLS_sigv = correct_se2s_NCH
C2SLS_ster_beta0 = np.sqrt((2 / np.pi) * sigu2h2SLS)
C2SLS_Eu
#Display the results as a table
= pd.DataFrame(data = {'Est': np.concatenate([C2SLS_beta0, np.array([C2SLS_sig0, C2SLS_lam0, C2SLS_sigu, C2SLS_sigv, C2SLS_Eu])], axis = 0),
results 'St.Err':np.concatenate([C2SLS_ster_beta0, np.array([np.nan, np.nan, np.nan, np.nan, np.nan])], axis = 0)},
= ["Const", "Labor", "Cows", "Feed",
index "Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
= results.round(4)
results display(results)
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'
return mean(axis=axis, dtype=dtype, out=out, **kwargs)
Est | St.Err | |
---|---|---|
Const | 0.0679 | 0.0255 |
Labor | 0.1246 | 0.0628 |
Cows | 0.9704 | 0.0827 |
Feed | 0.0889 | 0.0472 |
Land | -0.1136 | 0.0177 |
Rough | 0.1108 | 0.0260 |
D00 | 0.0093 | 0.0202 |
D01 | 0.0256 | 0.0221 |
D02 | 0.0406 | 0.0244 |
D03 | 0.0163 | 0.0250 |
D04 | 0.0430 | 0.0256 |
D05 | 0.1005 | 0.0371 |
D06 | 0.1320 | 0.0401 |
D07 | 0.1294 | 0.0412 |
D08 | 0.1135 | 0.0403 |
D09 | 0.1031 | 0.0419 |
D10 | 0.1194 | 0.0405 |
Sigma | 0.2167 | NaN |
Lambda | 1.7735 | NaN |
Sigma2_u | 0.0356 | NaN |
Sigma2_v | 0.0113 | NaN |
E(u) | 0.1506 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset
5.2 Exercise 5.2 GMM of Spanish diary farm production model
This exercise implements the GMM estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimize
def AppGMM(coefs, y, X, Z):
= X.shape[1]
nX = coefs[:nX]
beta = coefs[-2]
sig = coefs[-1]
lam
= y - X@beta
er = -1 +1/sig**2*er**2;
m1
= stats.norm.pdf(lam/sig*er)
pdf = stats.norm.cdf(lam/sig*er)
cdf = er*pdf/(1-cdf)
m2
= np.zeros((len(y), Z.shape[1]))
m3 for i in range(Z.shape[1]):
= 1/sig*Z[:, i]*er + lam*Z[:, i]*pdf/(1-cdf)
m3[:, i]
= np.concatenate([np.array([np.mean(m1)]),
mbar
np.array([np.mean(m2)]), =0)])
np.mean(m3, axis= np.concatenate([m1.reshape(-1,1), m2.reshape(-1,1), m3], axis=1)
mom
= mom.T@mom/len(y)
C = np.linalg.inv(C)
W = mbar@W@mbar.T
Q
return Q
def m(coefs, y, X, Z):
= X.shape[1]
nX = coefs[:nX]
beta = coefs[-2]
sig = coefs[-1]
lam
= y - X@beta
er = -1 +1/sig**2*er**2;
m1
= stats.norm.pdf(lam/sig*er)
pdf = stats.norm.cdf(lam/sig*er)
cdf = er*pdf/(1-cdf)
m2
= np.zeros((len(y), Z.shape[1]))
m3 for i in range(Z.shape[1]):
= 1/sig*Z[:, i]*er + lam*Z[:, i]*pdf/(1-cdf)
m3[:, i]
= np.concatenate([np.array([np.mean(m1)]),
m
np.array([np.mean(m2)]), =0)])
np.mean(m3, axis
return m
def AppEstimate_GMM(y, X, Xh, Z):
= [0.0368, 0.1169, 0.9685, 0.1156, -0.1047,
beta0 0.0911, 0.0096, 0.0244, 0.0354,
0.0132, 0.0423, 0.0933, 0.1305, 0.1302,
0.1151, 0.1025, 0.1222]
= 0.21
sig0 = 1.81
lam0
= np.array(beta0 + [sig0, lam0])
theta0
= [(None, None) for x in range(len(theta0) - 2)] + [
bounds 1e-6, np.inf),
(1e-6, np.inf),
(
]
# Minimize the negative log-likelihood using numerical optimization.
# nelder-mead method obtains a solution closer to the MATLAB solution
# L-BFGS-B method obtaines a better solution (smaller values of the objective). However, this solution is different to te MATLAB solution
= minimize(
MLE_results =AppGMM,
fun=theta0,
x0="nelder-mead",
method= 1e-6,
tol ={"maxiter": 1000},
options=(y.values, X.values, Z.values),
args=bounds,
bounds
)= MLE_results.x
theta
= Xh.shape[1]
nX = theta[:nX]
beta = theta[-2]
sig = theta[-1]
lam
= y.values - X.values@beta
er = -1 +1/sig**2*er**2;
m1
= stats.norm.pdf(lam/sig*er)
pdf = stats.norm.cdf(lam/sig*er);
cdf = er*pdf/(1-cdf)
m2
= np.zeros((len(y), Z.shape[1]))
m3 for i in range(Z.shape[1]):
= 1/sig*Z.values[:, i]*er + lam*Z.values[:, i]*pdf/(1-cdf)
m3[:, i]
= np.concatenate([m1.reshape(-1,1), m2.reshape(-1,1), m3], axis=1)
mom = mom.T@mom/len(y)
C = np.linalg.inv(C)
W
# standard errors
= 1e-1
delta = np.zeros((25, len(theta)))
grad for i in range(len(theta)):
= np.copy(theta)
theta1 += delta
theta1[i] = (m(theta1, y.values, X.values, Z.values) - m(theta, y.values, X.values, Z.values))/delta
grad[:,i] = grad.T@W@grad
OPG = np.diag(np.linalg.inv(OPG)/len(y))
Vmle
return theta, Vmle
# load data
= pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
dairyfarms 'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
# Drop farms with zero roughage
= dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data =True)
data.reset_index(inplace'ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
data[
#scaling to have unit means:
# scaling data
= np.log(data['MILK']) - np.log(np.mean(data['MILK']))
y = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.repeat(np.log(np.mean(X.values, axis=0)).reshape(1,-1), len(y), axis=0)
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
X
# create indicator matrix D
= pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
D
# create Z matrix
'COOP'] = data['COOP'].astype(int)
data[= pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
COOP_dummy
= pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
Z 'LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
X[[=1)
COOP_dummy], axis
= pd.concat([Z, D], axis=1)
Z
# compute regression coefficients and standard errors
= sm.OLS(X[['LABOR']].values, Z.values)
OLS1 = OLS1.fit()
results1 = results1.params
p1 = np.sqrt(np.diag(results1.cov_params()))
se1 = Z@p1
x1h
= sm.OLS(X[['COWS']].values, Z.values)
OLS2 = OLS2.fit()
results2 = results2.params
p2 = np.sqrt(np.diag(results2.cov_params()))
se2 = Z@p2
x2h
= sm.OLS(X[['FEED']].values, Z.values)
OLS3 = OLS3.fit()
results3 = results3.params
p3 = np.sqrt(np.diag(results3.cov_params()))
se3 = Z@p3
x3h
= sm.OLS(X[['LAND']].values, Z.values)
OLS4 = OLS4.fit()
results4 = results4.params
p4 = np.sqrt(np.diag(results4.cov_params()))
se4 = Z@p4
x4h
= sm.OLS(X[['ROUGHAGE']].values, Z.values)
OLS5 = OLS5.fit()
results5 = results5.params
p5 = np.sqrt(np.diag(results5.cov_params()))
se5 = Z@p5
x5h
= pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
Xh
= pd.concat([X, D], axis=1)
X
= AppEstimate_GMM(y, X, Xh, Z)
[GMM_coefs, GMM_vars] = GMM_coefs[:-2]
GMM_beta0 = GMM_coefs[-2]
GMM_sig0 = GMM_coefs[-1]
GMM_lam0 = (GMM_sig0/np.sqrt(1+GMM_lam0**2))**2
GMM_sigv = GMM_sig0**2-GMM_sigv
GMM_sigu = np.sqrt((2/np.pi)*GMM_sigu)
GMM_Eu = np.sqrt(GMM_vars)
GMM_ster = GMM_ster[:-2]
GMM_ster_beta0 = GMM_ster[-2]
GMM_sig0_ster = GMM_ster[-1]
GMM_lam0_ster
#Display the results as a table
= pd.DataFrame(data = {'Est': np.concatenate([GMM_beta0,
results
np.array([GMM_sig0, GMM_lam0, GMM_sigu, GMM_sigv, GMM_Eu])], = 0),
axis 'St.Err':np.concatenate([GMM_ster_beta0, np.array([GMM_sig0_ster, GMM_lam0_ster]), np.array([np.nan, np.nan, np.nan])], axis = 0)},
= ["Const", "Labor", "Cows", "Feed",
index "Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
= results.round(4)
results display(results)
Est | St.Err | |
---|---|---|
Const | 0.0485 | 0.0249 |
Labor | 0.2545 | 0.0528 |
Cows | 0.8541 | 0.0698 |
Feed | 0.0506 | 0.0401 |
Land | -0.1031 | 0.0163 |
Rough | 0.1336 | 0.0212 |
D00 | 0.0015 | 0.0225 |
D01 | -0.0152 | 0.0223 |
D02 | 0.0284 | 0.0228 |
D03 | 0.0127 | 0.0243 |
D04 | 0.0219 | 0.0257 |
D05 | 0.1292 | 0.0346 |
D06 | 0.1670 | 0.0374 |
D07 | 0.2048 | 0.0379 |
D08 | 0.1640 | 0.0377 |
D09 | 0.1584 | 0.0386 |
D10 | 0.1732 | 0.0367 |
Sigma | 0.2200 | 0.0320 |
Lambda | 1.7594 | 0.1766 |
Sigma2_u | 0.0366 | NaN |
Sigma2_v | 0.0118 | NaN |
E(u) | 0.1526 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset
5.3 Exercise 5.3: LIML-1 of Spanish diary farm production model
This exercise implements the LIML-1 estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimize
def fill_lower_triangular_columnwise(matrix, values):
= 0
val_index for j in range(matrix.shape[1]):
for i in range(j, matrix.shape[0]):
if val_index < len(values):
= values[val_index]
matrix[i, j] += 1
val_index return matrix
def lnL1_4_2(coefs, y, X, etas):
= X.shape[1]
nX = coefs[:nX]
beta = coefs[-2]
sig = coefs[-1]
lam = coefs[nX:-2]
sigve = 1/len(y)*(etas.T@etas)
sigee
= (np.linalg.lstsq(sigee, sigve.T, rcond=None)[0])@sigve.T
correction = lam/np.sqrt(1-(1+lam**2)/sig**2*correction)
lamc = np.sqrt(sig**2-correction)
sigc = (np.linalg.lstsq(sigee, sigve.T, rcond=None)[0])@etas.T
muci = y - X@beta - muci
er = np.log(stats.norm.cdf(-lamc/sigc*er))
l3
= -0.5*np.log(sigc**2)-0.5*(1/(sigc**2)*(er**2))+l3
lnL1
return lnL1
def lnL2_4_2(sigee, etas):
= np.zeros(len(etas))
L2 for i in range(len(etas)):
=None)[0]
np.linalg.lstsq(sigee, etas[i,:], rcond= -0.5*np.log(np.linalg.det(sigee))-0.5*np.linalg.lstsq(sigee, etas[i,:], rcond=None)[0]@etas[i,:].T
L2[i]
return L2
def AppLogDen_LIML(coefs, y, X, Z):
= X.shape[1]
nX = Z.shape[1]
nZ
= coefs[:nX]
beta = coefs[nX:nX+nZ]
p1 = coefs[nX+nZ:nX+2*nZ]
p2 = coefs[nX+2*nZ:nX+3*nZ]
p3 = coefs[nX+3*nZ:nX+4*nZ]
p5
= coefs[(nX+4*nZ):(nX+4*nZ+4)]
sigve = np.zeros((4, 4))
sigee = fill_lower_triangular_columnwise(matrix=sigee,
sigee =coefs[(nX+4*nZ+4):-2])
values= coefs[-2]
sig = coefs[-1]
lam
= sigee + sigee.T-np.diag(np.diag(sigee),0)
sigee
= np.concatenate([(Z@p1).reshape(-1,1), (Z@p2).reshape(-1,1),
zz @p3).reshape(-1,1), (Z@p5).reshape(-1,1)],
(Z=1)
axis= X[:,[1, 2, 3, 5]] - zz
etas
= np.concatenate([beta, sigve, np.array([sig]), np.array([lam])])
lnL1_4_2_input
= lnL1_4_2(lnL1_4_2_input, y, X, etas)
logL1 = lnL2_4_2(sigee, etas)
logL2
= logL1+logL2
den
return den
def AppLoglikelihood_LIML(coefs, y, X, Z):
= AppLogDen_LIML(coefs, y, X, Z)
den
= -np.sum(den)
logL
return logL
def AppEstimate_LIML(y, X, Xh, Z):
= [0.0368, 0.1169, 0.9685, 0.1156, -0.1047,
beta0 0.0911, 0.0096, 0.0244, 0.0354,
0.0132, 0.0423, 0.0933, 0.1305, 0.1302,
0.1151, 0.1025, 0.1222]
=[-2.4855, 0.3611, 5.8886, 0.0099, -0.0004, 0.3254, 0.2840, 0.3871,
pi0 0.2345, 0.4246, 0.2559, -0.0259, 0.0354, -0.1946, 0.0931, 0.1653,
0.0441, -0.0607, -0.0133, -0.3113, -0.4158, -0.0276, 0.0979, -3.2824,
0.5151, 7.2669, -0.3733, -0.0010, 0.2991, 0.2543, 0.2994, 0.3148,
0.3958, 0.1797, 0.1222, 0.0900, -0.0557, 0.3615, 0.4272, 0.3492,
0.4799, 0.6147, 0.2772, 0.1449, 0.6324, 0.7493, -4.5369, 0.5496,
11.5406, -2.3140, -0.0002, 0.1831, 0.2777, 0.2762, 0.2423, 0.3778,
0.0484, -0.0042, 0.1155, -0.0888, 0.5038, 0.5813, 0.4908, 0.6293,
0.8405, 0.3581, 0.2583, 0.9450, 1.0859, -6.6371, 0.7891, 15.9134,
-0.5840, -0.0031, 0.0151, 0.1536, 0.1794, 0.2626, 0.0726, -0.0861,
-0.4394, 0.1696, -0.1905, 0.6295, 0.6861, 0.6209, 0.7861, 1.0791,
0.3271, -0.0266, 1.0086, 1.2396]
= [0.1171, 0.0641, 0.0736, 0.0832, 0.0815, 0.0921, 0.1065, 0.1664,
sigee0 0.1441, 0.3662]
= [-0.0198, -0.0163, -0.0032, -0.0134]
sigve0
= 0.19
sig0 = 1.27
lam0
= np.array(beta0 + pi0 + sigve0 + sigee0 + [sig0, lam0])
theta0
# Minimize the negative log-likelihood using numerical optimization.
# nelder-mead method obtains a solution closer to the MATLAB solution
= minimize(
MLE_results =AppLoglikelihood_LIML,
fun=theta0,
x0="Nelder-Mead",
method= 1e-6,
tol ={"maxiter": 10000},
options=(y.values, X.values, Z.values),
args
)= MLE_results.x
theta
#standard errors
= 1e-10
delta = np.zeros((len(y), len(theta)))
grad for i in range(len(theta)):
= np.copy(theta)
theta1 = theta[i] + delta
theta1[i] = (AppLogDen_LIML(theta1, y.values, X.values, Z.values) - AppLogDen_LIML(theta, y.values, X.values, Z.values))/delta
grad[:,i]
= grad.T@grad
OPG
= np.diag(np.linalg.inv(OPG))
Vmle
return theta, Vmle
# load data
= pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
dairyfarms 'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
# Drop farms with zero roughage
= dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data =True)
data.reset_index(inplace'ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
data[
#scaling to have unit means:
# scaling data
= np.log(data['MILK']) - np.log(np.mean(data['MILK']))
y = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.repeat(np.log(np.mean(X.values, axis=0)).reshape(1,-1), len(y), axis=0)
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
X
# create indicator matrix D
= pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
D
# create Z matrix
'COOP'] = data['COOP'].astype(int)
data[= pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
COOP_dummy
= pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
Z 'LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
X[[=1)
COOP_dummy], axis
= pd.concat([Z, D], axis=1)
Z
# compute regression coefficients and standard errors
= sm.OLS(X[['LABOR']].values, Z.values)
OLS1 = OLS1.fit()
results1 = results1.params
p1 = np.sqrt(np.diag(results1.cov_params()))
se1 = Z@p1
x1h
= sm.OLS(X[['COWS']].values, Z.values)
OLS2 = OLS2.fit()
results2 = results2.params
p2 = np.sqrt(np.diag(results2.cov_params()))
se2 = Z@p2
x2h
= sm.OLS(X[['FEED']].values, Z.values)
OLS3 = OLS3.fit()
results3 = results3.params
p3 = np.sqrt(np.diag(results3.cov_params()))
se3 = Z@p3
x3h
= sm.OLS(X[['LAND']].values, Z.values)
OLS4 = OLS4.fit()
results4 = results4.params
p4 = np.sqrt(np.diag(results4.cov_params()))
se4 = Z@p4
x4h
= sm.OLS(X[['ROUGHAGE']].values, Z.values)
OLS5 = OLS5.fit()
results5 = results5.params
p5 = np.sqrt(np.diag(results5.cov_params()))
se5 = Z@p5
x5h
= pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
Xh
= pd.concat([X, D], axis=1)
X
= AppEstimate_LIML(y, X, Xh, Z)
(LIML_coefs, LIML_vars)
= LIML_coefs[:17]
LIML_beta0 = LIML_coefs[-2]
LIML_sig0 = LIML_coefs[-1]
LIML_lam0 = (LIML_sig0/np.sqrt(1+LIML_lam0**2))**2
LIML_sigv = LIML_sig0**2-LIML_sigv
LIML_sigu = np.sqrt((2/np.pi)*LIML_sigu)
LIML_Eu = np.sqrt(LIML_vars)
LIML_ster = LIML_ster[:17]
LIML_ster_beta0 = LIML_ster[-2]
LIML_sig0_ster = LIML_ster[-1]
LIML_lam0_ster
#Display the results as a table
= pd.DataFrame(data = {'Est': np.concatenate([LIML_beta0,
results
np.array([LIML_sig0, LIML_lam0, LIML_sigu, LIML_sigv, LIML_Eu])], = 0),
axis 'St.Err':np.concatenate([LIML_ster_beta0, np.array([LIML_sig0_ster, LIML_lam0_ster]), np.array([np.nan, np.nan, np.nan])], axis = 0)},
= ["Const", "Labor", "Cows", "Feed",
index "Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
= results.round(4)
results display(results)
Est | St.Err | |
---|---|---|
Const | 0.0334 | 0.0268 |
Labor | 0.1269 | 0.0631 |
Cows | 0.9747 | 0.0811 |
Feed | 0.1066 | 0.0571 |
Land | -0.1136 | 0.0183 |
Rough | 0.0944 | 0.0279 |
D00 | 0.0118 | 0.0220 |
D01 | 0.0204 | 0.0239 |
D02 | 0.0346 | 0.0264 |
D03 | 0.0130 | 0.0268 |
D04 | 0.0384 | 0.0261 |
D05 | 0.0926 | 0.0379 |
D06 | 0.1326 | 0.0402 |
D07 | 0.1329 | 0.0411 |
D08 | 0.1147 | 0.0402 |
D09 | 0.1010 | 0.0430 |
D10 | 0.1270 | 0.0425 |
Sigma | 0.1895 | 0.0077 |
Lambda | 1.1973 | 0.1476 |
Sigma2_u | 0.0212 | NaN |
Sigma2_v | 0.0148 | NaN |
E(u) | 0.1161 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset
5.4 Exercise 5.4: LIML-2 of Spanish diary farm production model
This exercise implements the LIML-2 estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimize
def AppLogDen_LIML2(coefs, y, X, etas):
= X.shape[1]
nX
= coefs[:nX]
beta = coefs[-2]
sig = coefs[-1]
lam
= coefs[nX:-2]
sigve = 1/len(y)*(etas.T@etas)
sigee = (np.linalg.lstsq(sigee, sigve, rcond=None)[0])@sigve.T
correction
= lam/np.sqrt(1-(1+lam**2)/sig**2*correction)
lamc = np.sqrt(sig**2-correction)
sigc = (np.linalg.lstsq(sigee, sigve, rcond=None)[0])@etas.T
muci
= y - X@beta - muci.T
er = np.log(stats.norm.cdf(-lamc/sigc*er));
l3 = -0.5*np.log(sigc**2)-0.5*(1/(sigc**2)*(er**2))+l3
lnL1
return lnL1
def AppLoglikelihood_LIML2(coefs, y, X, etas):
= AppLogDen_LIML2(coefs, y, X, etas)
den
= -np.sum(den)
logL
return logL
def AppEstimate_LIML2(y, X, Xh, etas):
= [0.0368, 0.1169, 0.9685, 0.1156, -0.1047,
beta0 0.0911, 0.0096, 0.0244, 0.0354,
0.0132, 0.0423, 0.0933, 0.1305, 0.1302,
0.1151, 0.1025, 0.1222]
= [0.0, 0.0, 0.0, 0.0]
sigve0
= 0.17
sig0 = 1.7
lam0
= np.array(beta0 + sigve0 + [sig0, lam0])
theta0
# Minimize the negative log-likelihood using numerical optimization.
# nelder-mead method obtains a solution closer to the MATLAB solution
= minimize(
MLE_results =AppLoglikelihood_LIML2,
fun=theta0,
x0="Nelder-Mead",
method= 1e-6,
tol ={"maxiter": 5000},
options=(y.values, X.values, etas.values),
args
)= MLE_results.x
theta
#standard errors
= 1e-10
delta = np.zeros((len(y), len(theta)))
grad for i in range(len(theta)):
= np.copy(theta)
theta1 = theta[i] + delta
theta1[i] = (AppLogDen_LIML2(theta1, y.values, X.values, etas.values) - AppLogDen_LIML2(theta, y.values, X.values, etas.values))/delta
grad[:,i]
= grad.T@grad
OPG
= np.diag(np.linalg.inv(OPG))
Vmle
return theta, Vmle
# load data
= pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
dairyfarms 'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
# Drop farms with zero roughage
= dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data =True)
data.reset_index(inplace'ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
data[
#scaling to have unit means:
# scaling data
= np.log(data['MILK']) - np.log(np.mean(data['MILK']))
y = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.repeat(np.log(np.mean(X.values, axis=0)).reshape(1,-1), len(y), axis=0)
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
X
# create indicator matrix D
= pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
D
# create Z matrix
'COOP'] = data['COOP'].astype(int)
data[= pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
COOP_dummy
= pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
Z 'LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
X[[=1)
COOP_dummy], axis
= pd.concat([Z, D], axis=1)
Z
# compute regression coefficients and standard errors
= sm.OLS(X[['LABOR']].values, Z.values)
OLS1 = OLS1.fit()
results1 = results1.params
p1 = np.sqrt(np.diag(results1.cov_params()))
se1 = Z@p1
x1h = X['LABOR'] - x1h
etah1
= sm.OLS(X[['COWS']].values, Z.values)
OLS2 = OLS2.fit()
results2 = results2.params
p2 = np.sqrt(np.diag(results2.cov_params()))
se2 = Z@p2
x2h = X['COWS'] - x2h
etah2
= sm.OLS(X[['FEED']].values, Z.values)
OLS3 = OLS3.fit()
results3 = results3.params
p3 = np.sqrt(np.diag(results3.cov_params()))
se3 = Z@p3
x3h = X['FEED'] - x3h
etah3
= sm.OLS(X[['LAND']].values, Z.values)
OLS4 = OLS4.fit()
results4 = results4.params
p4 = np.sqrt(np.diag(results4.cov_params()))
se4 = Z@p4
x4h = X['LAND'] - x4h
etah4
= sm.OLS(X[['ROUGHAGE']].values, Z.values)
OLS5 = OLS5.fit()
results5 = results5.params
p5 = np.sqrt(np.diag(results5.cov_params()))
se5 = Z@p5
x5h = X['ROUGHAGE'] - x5h
etah5
= pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
Xh
= pd.concat([X, D], axis=1)
X
= pd.concat([etah1, etah2, etah3, etah5], axis=1)
etas
= AppEstimate_LIML2(y, X, Xh, etas)
(LIML2_coefs, LIML2_vars)
= LIML2_coefs[:17]
LIML2_beta0 = LIML2_coefs[-2]
LIML2_sig0 = LIML2_coefs[-1]
LIML2_lam0 = (LIML2_sig0/np.sqrt(1+LIML2_lam0**2))**2
LIML2_sigv = LIML2_sig0**2-LIML2_sigv
LIML2_sigu = np.sqrt((2/np.pi)*LIML2_sigu)
LIML2_Eu = np.sqrt(LIML2_vars)
LIML2_ster = LIML2_ster[:17]
LIML2_ster_beta0 = LIML2_ster[-2]
LIML2_sig0_ster = LIML2_ster[-1]
LIML2_lam0_ster
#Display the results as a table
= pd.DataFrame(data = {'Est': np.concatenate([LIML2_beta0,
results
np.array([LIML2_sig0, LIML2_lam0, LIML2_sigu, LIML2_sigv, LIML2_Eu])], = 0),
axis 'St.Err':np.concatenate([LIML2_ster_beta0, np.array([LIML2_sig0_ster, LIML2_lam0_ster]), np.array([np.nan, np.nan, np.nan])], axis = 0)},
= ["Const", "Labor", "Cows", "Feed",
index "Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
= results.round(4)
results display(results)
Est | St.Err | |
---|---|---|
Const | 0.0212 | 0.0192 |
Labor | 0.1392 | 0.0451 |
Cows | 0.8295 | 0.0615 |
Feed | 0.1731 | 0.0242 |
Land | -0.0652 | 0.0128 |
Rough | 0.0883 | 0.0182 |
D00 | -0.0037 | 0.0156 |
D01 | 0.0365 | 0.0174 |
D02 | 0.0445 | 0.0190 |
D03 | 0.0733 | 0.0206 |
D04 | 0.0636 | 0.0196 |
D05 | 0.1210 | 0.0277 |
D06 | 0.1564 | 0.0292 |
D07 | 0.1546 | 0.0294 |
D08 | 0.1377 | 0.0284 |
D09 | 0.1225 | 0.0306 |
D10 | 0.1470 | 0.0298 |
Sigma | 0.1848 | 0.0057 |
Lambda | 1.4775 | 0.1424 |
Sigma2_u | 0.0234 | NaN |
Sigma2_v | 0.0107 | NaN |
E(u) | 0.1221 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset