import pandas as pd
import numpy as np
import statsmodels.api as sm5 Chapter 5: Modelling Endogeneity in Stochastic Frontier Models
5.1 Exercise 5.1: C2SLS of Spanish diary farm production model.
This exercise implements the C2SLS estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
# load data
dairyfarms = pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
dairyfarms = dairyfarms.astype(float)
# Drop farms with zero roughage
data = dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data.reset_index(inplace=True)
data['ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
# scaling data
y = np.log(data['MILK']) - np.log(np.mean(data['MILK']))
X = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.log(np.mean(X))
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
# create indicator matrix D
D = pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
# create Z matrix
data['COOP'] = data['COOP'].astype(int)
COOP_dummy = pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
Z = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
X[['LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
COOP_dummy], axis=1)
Z = pd.concat([Z, D], axis=1)
# compute regression coefficients and standard errors
OLS1 = sm.OLS(X[['LABOR']].values, Z.values)
results1 = OLS1.fit()
p1 = results1.params
se1 = np.sqrt(np.diag(results1.cov_params()))
x1h = Z@p1
OLS2 = sm.OLS(X[['COWS']].values, Z.values)
results2 = OLS2.fit()
p2 = results2.params
se2 = np.sqrt(np.diag(results2.cov_params()))
x2h = Z@p2
OLS3 = sm.OLS(X[['FEED']].values, Z.values)
results3 = OLS3.fit()
p3 = results3.params
se3 = np.sqrt(np.diag(results3.cov_params()))
x3h = Z@p3
OLS4 = sm.OLS(X[['LAND']].values, Z.values)
results4 = OLS4.fit()
p4 = results4.params
se4 = np.sqrt(np.diag(results4.cov_params()))
x4h = Z@p4
OLS5 = sm.OLS(X[['ROUGHAGE']].values, Z.values)
results5 = OLS5.fit()
p5 = results5.params
se5 = np.sqrt(np.diag(results5.cov_params()))
x5h = Z@p5
Xh = pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
OLS_Xh = sm.OLS(y.values, Xh.values)
results_Xh = OLS_Xh.fit()
ls2 = results_Xh.params
se2s = np.sqrt(np.diag(results_Xh.cov_params()))
uh2s = y.values - (pd.concat([X,D], axis=1)@ls2).values
# under homoskedastity
v1 = 1/(len(y)-Xh.shape[1])*np.sum(uh2s**2)*np.linalg.inv((Xh.T@Xh))
# under heteroskedasticity
vv = np.zeros((Xh.shape[1], Xh.shape[1]))
for i in range(len(y)):
uh2si = uh2s[i]**2
vv += uh2si*Xh.values[i,:].reshape(-1,1)@Xh.values[i,:].reshape(-1,1).T
v2 = len(y)/(len(y)-Xh.shape[1])*np.linalg.inv(Xh.values.T@Xh.values)@vv@np.linalg.inv(Xh.values.T@Xh.values)
correct_se2s_NCH = np.sqrt(np.diag(v1))
correct_se2s_noNCH = np.sqrt(np.diag(v2))
mu3h2SLS = np.mean(uh2s**3)
sige2h2SLS = np.mean(uh2s**2)
sigu2h2SLS = (np.pi/(np.pi - 4)*np.sqrt(np.pi/2)*mu3h2SLS)**(2/3)*(mu3h2SLS<0)
alphah2SLS = ls2[0] + np.sqrt(2/np.pi*sigu2h2SLS)
sigv2h2SLS = sige2h2SLS-(np.pi-2)/np.pi*sigu2h2SLS
siguh2SLS = np.sqrt(sigu2h2SLS)
sigvh2SLS = np.sqrt(sigv2h2SLS)
lamh2SLS = siguh2SLS/sigvh2SLS
sigh2SLS = np.sqrt(sigu2h2SLS+sigv2h2SLS)
C2SLS_beta0 = np.concatenate(([alphah2SLS], ls2[1:]))
C2SLS_sig0 = sigh2SLS
C2SLS_lam0 = lamh2SLS
C2SLS_sigu = sigu2h2SLS
C2SLS_sigv = sigv2h2SLS
C2SLS_ster_beta0 = correct_se2s_NCH
C2SLS_Eu = np.sqrt((2 / np.pi) * sigu2h2SLS)
#Display the results as a table
results = pd.DataFrame(data = {'Est': np.concatenate([C2SLS_beta0, np.array([C2SLS_sig0, C2SLS_lam0, C2SLS_sigu, C2SLS_sigv, C2SLS_Eu])], axis = 0),
'St.Err':np.concatenate([C2SLS_ster_beta0, np.array([np.nan, np.nan, np.nan, np.nan, np.nan])], axis = 0)},
index = ["Const", "Labor", "Cows", "Feed",
"Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
results = results.round(4)
display(results)/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'
return mean(axis=axis, dtype=dtype, out=out, **kwargs)
| Est | St.Err | |
|---|---|---|
| Const | 0.0679 | 0.0255 |
| Labor | 0.1246 | 0.0628 |
| Cows | 0.9704 | 0.0827 |
| Feed | 0.0889 | 0.0472 |
| Land | -0.1136 | 0.0177 |
| Rough | 0.1108 | 0.0260 |
| D00 | 0.0093 | 0.0202 |
| D01 | 0.0256 | 0.0221 |
| D02 | 0.0406 | 0.0244 |
| D03 | 0.0163 | 0.0250 |
| D04 | 0.0430 | 0.0256 |
| D05 | 0.1005 | 0.0371 |
| D06 | 0.1320 | 0.0401 |
| D07 | 0.1294 | 0.0412 |
| D08 | 0.1135 | 0.0403 |
| D09 | 0.1031 | 0.0419 |
| D10 | 0.1194 | 0.0405 |
| Sigma | 0.2167 | NaN |
| Lambda | 1.7735 | NaN |
| Sigma2_u | 0.0356 | NaN |
| Sigma2_v | 0.0113 | NaN |
| E(u) | 0.1506 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset5.2 Exercise 5.2 GMM of Spanish diary farm production model
This exercise implements the GMM estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimizedef AppGMM(coefs, y, X, Z):
nX = X.shape[1]
beta = coefs[:nX]
sig = coefs[-2]
lam = coefs[-1]
er = y - X@beta
m1 = -1 +1/sig**2*er**2;
pdf = stats.norm.pdf(lam/sig*er)
cdf = stats.norm.cdf(lam/sig*er)
m2 = er*pdf/(1-cdf)
m3 = np.zeros((len(y), Z.shape[1]))
for i in range(Z.shape[1]):
m3[:, i] = 1/sig*Z[:, i]*er + lam*Z[:, i]*pdf/(1-cdf)
mbar = np.concatenate([np.array([np.mean(m1)]),
np.array([np.mean(m2)]),
np.mean(m3, axis=0)])
mom = np.concatenate([m1.reshape(-1,1), m2.reshape(-1,1), m3], axis=1)
C = mom.T@mom/len(y)
W = np.linalg.inv(C)
Q = mbar@W@mbar.T
return Q
def m(coefs, y, X, Z):
nX = X.shape[1]
beta = coefs[:nX]
sig = coefs[-2]
lam = coefs[-1]
er = y - X@beta
m1 = -1 +1/sig**2*er**2;
pdf = stats.norm.pdf(lam/sig*er)
cdf = stats.norm.cdf(lam/sig*er)
m2 = er*pdf/(1-cdf)
m3 = np.zeros((len(y), Z.shape[1]))
for i in range(Z.shape[1]):
m3[:, i] = 1/sig*Z[:, i]*er + lam*Z[:, i]*pdf/(1-cdf)
m = np.concatenate([np.array([np.mean(m1)]),
np.array([np.mean(m2)]),
np.mean(m3, axis=0)])
return m
def AppEstimate_GMM(y, X, Xh, Z):
beta0 = [0.0368, 0.1169, 0.9685, 0.1156, -0.1047,
0.0911, 0.0096, 0.0244, 0.0354,
0.0132, 0.0423, 0.0933, 0.1305, 0.1302,
0.1151, 0.1025, 0.1222]
sig0 = 0.21
lam0 = 1.81
theta0 = np.array(beta0 + [sig0, lam0])
bounds = [(None, None) for x in range(len(theta0) - 2)] + [
(1e-6, np.inf),
(1e-6, np.inf),
]
# Minimize the negative log-likelihood using numerical optimization.
# nelder-mead method obtains a solution closer to the MATLAB solution
# L-BFGS-B method obtaines a better solution (smaller values of the objective). However, this solution is different to te MATLAB solution
MLE_results = minimize(
fun=AppGMM,
x0=theta0,
method="nelder-mead",
tol = 1e-6,
options={"maxiter": 1000},
args=(y.values, X.values, Z.values),
bounds=bounds,
)
theta = MLE_results.x
nX = Xh.shape[1]
beta = theta[:nX]
sig = theta[-2]
lam = theta[-1]
er = y.values - X.values@beta
m1 = -1 +1/sig**2*er**2;
pdf = stats.norm.pdf(lam/sig*er)
cdf = stats.norm.cdf(lam/sig*er);
m2 = er*pdf/(1-cdf)
m3 = np.zeros((len(y), Z.shape[1]))
for i in range(Z.shape[1]):
m3[:, i] = 1/sig*Z.values[:, i]*er + lam*Z.values[:, i]*pdf/(1-cdf)
mom = np.concatenate([m1.reshape(-1,1), m2.reshape(-1,1), m3], axis=1)
C = mom.T@mom/len(y)
W = np.linalg.inv(C)
# standard errors
delta = 1e-1
grad = np.zeros((25, len(theta)))
for i in range(len(theta)):
theta1 = np.copy(theta)
theta1[i] += delta
grad[:,i] = (m(theta1, y.values, X.values, Z.values) - m(theta, y.values, X.values, Z.values))/delta
OPG = grad.T@W@grad
Vmle = np.diag(np.linalg.inv(OPG)/len(y))
return theta, Vmle# load data
dairyfarms = pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
# Drop farms with zero roughage
data = dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data.reset_index(inplace=True)
data['ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
#scaling to have unit means:
# scaling data
y = np.log(data['MILK']) - np.log(np.mean(data['MILK']))
X = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.repeat(np.log(np.mean(X.values, axis=0)).reshape(1,-1), len(y), axis=0)
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
# create indicator matrix D
D = pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
# create Z matrix
data['COOP'] = data['COOP'].astype(int)
COOP_dummy = pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
Z = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
X[['LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
COOP_dummy], axis=1)
Z = pd.concat([Z, D], axis=1)
# compute regression coefficients and standard errors
OLS1 = sm.OLS(X[['LABOR']].values, Z.values)
results1 = OLS1.fit()
p1 = results1.params
se1 = np.sqrt(np.diag(results1.cov_params()))
x1h = Z@p1
OLS2 = sm.OLS(X[['COWS']].values, Z.values)
results2 = OLS2.fit()
p2 = results2.params
se2 = np.sqrt(np.diag(results2.cov_params()))
x2h = Z@p2
OLS3 = sm.OLS(X[['FEED']].values, Z.values)
results3 = OLS3.fit()
p3 = results3.params
se3 = np.sqrt(np.diag(results3.cov_params()))
x3h = Z@p3
OLS4 = sm.OLS(X[['LAND']].values, Z.values)
results4 = OLS4.fit()
p4 = results4.params
se4 = np.sqrt(np.diag(results4.cov_params()))
x4h = Z@p4
OLS5 = sm.OLS(X[['ROUGHAGE']].values, Z.values)
results5 = OLS5.fit()
p5 = results5.params
se5 = np.sqrt(np.diag(results5.cov_params()))
x5h = Z@p5
Xh = pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
X = pd.concat([X, D], axis=1)
[GMM_coefs, GMM_vars] = AppEstimate_GMM(y, X, Xh, Z)
GMM_beta0 = GMM_coefs[:-2]
GMM_sig0 = GMM_coefs[-2]
GMM_lam0 = GMM_coefs[-1]
GMM_sigv = (GMM_sig0/np.sqrt(1+GMM_lam0**2))**2
GMM_sigu = GMM_sig0**2-GMM_sigv
GMM_Eu = np.sqrt((2/np.pi)*GMM_sigu)
GMM_ster = np.sqrt(GMM_vars)
GMM_ster_beta0 = GMM_ster[:-2]
GMM_sig0_ster = GMM_ster[-2]
GMM_lam0_ster = GMM_ster[-1]
#Display the results as a table
results = pd.DataFrame(data = {'Est': np.concatenate([GMM_beta0,
np.array([GMM_sig0, GMM_lam0, GMM_sigu, GMM_sigv, GMM_Eu])],
axis = 0),
'St.Err':np.concatenate([GMM_ster_beta0, np.array([GMM_sig0_ster, GMM_lam0_ster]), np.array([np.nan, np.nan, np.nan])], axis = 0)},
index = ["Const", "Labor", "Cows", "Feed",
"Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
results = results.round(4)
display(results)| Est | St.Err | |
|---|---|---|
| Const | 0.0485 | 0.0249 |
| Labor | 0.2545 | 0.0528 |
| Cows | 0.8541 | 0.0698 |
| Feed | 0.0506 | 0.0401 |
| Land | -0.1031 | 0.0163 |
| Rough | 0.1336 | 0.0212 |
| D00 | 0.0015 | 0.0225 |
| D01 | -0.0152 | 0.0223 |
| D02 | 0.0284 | 0.0228 |
| D03 | 0.0127 | 0.0243 |
| D04 | 0.0219 | 0.0257 |
| D05 | 0.1292 | 0.0346 |
| D06 | 0.1670 | 0.0374 |
| D07 | 0.2048 | 0.0379 |
| D08 | 0.1640 | 0.0377 |
| D09 | 0.1584 | 0.0386 |
| D10 | 0.1732 | 0.0367 |
| Sigma | 0.2200 | 0.0320 |
| Lambda | 1.7594 | 0.1766 |
| Sigma2_u | 0.0366 | NaN |
| Sigma2_v | 0.0118 | NaN |
| E(u) | 0.1526 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset5.3 Exercise 5.3: LIML-1 of Spanish diary farm production model
This exercise implements the LIML-1 estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimizedef fill_lower_triangular_columnwise(matrix, values):
val_index = 0
for j in range(matrix.shape[1]):
for i in range(j, matrix.shape[0]):
if val_index < len(values):
matrix[i, j] = values[val_index]
val_index += 1
return matrix
def lnL1_4_2(coefs, y, X, etas):
nX = X.shape[1]
beta = coefs[:nX]
sig = coefs[-2]
lam = coefs[-1]
sigve = coefs[nX:-2]
sigee = 1/len(y)*(etas.T@etas)
correction = (np.linalg.lstsq(sigee, sigve.T, rcond=None)[0])@sigve.T
lamc = lam/np.sqrt(1-(1+lam**2)/sig**2*correction)
sigc = np.sqrt(sig**2-correction)
muci = (np.linalg.lstsq(sigee, sigve.T, rcond=None)[0])@etas.T
er = y - X@beta - muci
l3 = np.log(stats.norm.cdf(-lamc/sigc*er))
lnL1 = -0.5*np.log(sigc**2)-0.5*(1/(sigc**2)*(er**2))+l3
return lnL1
def lnL2_4_2(sigee, etas):
L2 = np.zeros(len(etas))
for i in range(len(etas)):
np.linalg.lstsq(sigee, etas[i,:], rcond=None)[0]
L2[i] = -0.5*np.log(np.linalg.det(sigee))-0.5*np.linalg.lstsq(sigee, etas[i,:], rcond=None)[0]@etas[i,:].T
return L2
def AppLogDen_LIML(coefs, y, X, Z):
nX = X.shape[1]
nZ = Z.shape[1]
beta = coefs[:nX]
p1 = coefs[nX:nX+nZ]
p2 = coefs[nX+nZ:nX+2*nZ]
p3 = coefs[nX+2*nZ:nX+3*nZ]
p5 = coefs[nX+3*nZ:nX+4*nZ]
sigve = coefs[(nX+4*nZ):(nX+4*nZ+4)]
sigee = np.zeros((4, 4))
sigee = fill_lower_triangular_columnwise(matrix=sigee,
values=coefs[(nX+4*nZ+4):-2])
sig = coefs[-2]
lam = coefs[-1]
sigee = sigee + sigee.T-np.diag(np.diag(sigee),0)
zz = np.concatenate([(Z@p1).reshape(-1,1), (Z@p2).reshape(-1,1),
(Z@p3).reshape(-1,1), (Z@p5).reshape(-1,1)],
axis=1)
etas = X[:,[1, 2, 3, 5]] - zz
lnL1_4_2_input = np.concatenate([beta, sigve, np.array([sig]), np.array([lam])])
logL1 = lnL1_4_2(lnL1_4_2_input, y, X, etas)
logL2 = lnL2_4_2(sigee, etas)
den = logL1+logL2
return den
def AppLoglikelihood_LIML(coefs, y, X, Z):
den = AppLogDen_LIML(coefs, y, X, Z)
logL = -np.sum(den)
return logL
def AppEstimate_LIML(y, X, Xh, Z):
beta0 = [0.0368, 0.1169, 0.9685, 0.1156, -0.1047,
0.0911, 0.0096, 0.0244, 0.0354,
0.0132, 0.0423, 0.0933, 0.1305, 0.1302,
0.1151, 0.1025, 0.1222]
pi0 =[-2.4855, 0.3611, 5.8886, 0.0099, -0.0004, 0.3254, 0.2840, 0.3871,
0.2345, 0.4246, 0.2559, -0.0259, 0.0354, -0.1946, 0.0931, 0.1653,
0.0441, -0.0607, -0.0133, -0.3113, -0.4158, -0.0276, 0.0979, -3.2824,
0.5151, 7.2669, -0.3733, -0.0010, 0.2991, 0.2543, 0.2994, 0.3148,
0.3958, 0.1797, 0.1222, 0.0900, -0.0557, 0.3615, 0.4272, 0.3492,
0.4799, 0.6147, 0.2772, 0.1449, 0.6324, 0.7493, -4.5369, 0.5496,
11.5406, -2.3140, -0.0002, 0.1831, 0.2777, 0.2762, 0.2423, 0.3778,
0.0484, -0.0042, 0.1155, -0.0888, 0.5038, 0.5813, 0.4908, 0.6293,
0.8405, 0.3581, 0.2583, 0.9450, 1.0859, -6.6371, 0.7891, 15.9134,
-0.5840, -0.0031, 0.0151, 0.1536, 0.1794, 0.2626, 0.0726, -0.0861,
-0.4394, 0.1696, -0.1905, 0.6295, 0.6861, 0.6209, 0.7861, 1.0791,
0.3271, -0.0266, 1.0086, 1.2396]
sigee0 = [0.1171, 0.0641, 0.0736, 0.0832, 0.0815, 0.0921, 0.1065, 0.1664,
0.1441, 0.3662]
sigve0 = [-0.0198, -0.0163, -0.0032, -0.0134]
sig0 = 0.19
lam0 = 1.27
theta0 = np.array(beta0 + pi0 + sigve0 + sigee0 + [sig0, lam0])
# Minimize the negative log-likelihood using numerical optimization.
# nelder-mead method obtains a solution closer to the MATLAB solution
MLE_results = minimize(
fun=AppLoglikelihood_LIML,
x0=theta0,
method="Nelder-Mead",
tol = 1e-6,
options={"maxiter": 10000},
args=(y.values, X.values, Z.values),
)
theta = MLE_results.x
#standard errors
delta = 1e-10
grad = np.zeros((len(y), len(theta)))
for i in range(len(theta)):
theta1 = np.copy(theta)
theta1[i] = theta[i] + delta
grad[:,i] = (AppLogDen_LIML(theta1, y.values, X.values, Z.values) - AppLogDen_LIML(theta, y.values, X.values, Z.values))/delta
OPG = grad.T@grad
Vmle = np.diag(np.linalg.inv(OPG))
return theta, Vmle# load data
dairyfarms = pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
# Drop farms with zero roughage
data = dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data.reset_index(inplace=True)
data['ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
#scaling to have unit means:
# scaling data
y = np.log(data['MILK']) - np.log(np.mean(data['MILK']))
X = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.repeat(np.log(np.mean(X.values, axis=0)).reshape(1,-1), len(y), axis=0)
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
# create indicator matrix D
D = pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
# create Z matrix
data['COOP'] = data['COOP'].astype(int)
COOP_dummy = pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
Z = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
X[['LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
COOP_dummy], axis=1)
Z = pd.concat([Z, D], axis=1)
# compute regression coefficients and standard errors
OLS1 = sm.OLS(X[['LABOR']].values, Z.values)
results1 = OLS1.fit()
p1 = results1.params
se1 = np.sqrt(np.diag(results1.cov_params()))
x1h = Z@p1
OLS2 = sm.OLS(X[['COWS']].values, Z.values)
results2 = OLS2.fit()
p2 = results2.params
se2 = np.sqrt(np.diag(results2.cov_params()))
x2h = Z@p2
OLS3 = sm.OLS(X[['FEED']].values, Z.values)
results3 = OLS3.fit()
p3 = results3.params
se3 = np.sqrt(np.diag(results3.cov_params()))
x3h = Z@p3
OLS4 = sm.OLS(X[['LAND']].values, Z.values)
results4 = OLS4.fit()
p4 = results4.params
se4 = np.sqrt(np.diag(results4.cov_params()))
x4h = Z@p4
OLS5 = sm.OLS(X[['ROUGHAGE']].values, Z.values)
results5 = OLS5.fit()
p5 = results5.params
se5 = np.sqrt(np.diag(results5.cov_params()))
x5h = Z@p5
Xh = pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
X = pd.concat([X, D], axis=1)
(LIML_coefs, LIML_vars) = AppEstimate_LIML(y, X, Xh, Z)
LIML_beta0 = LIML_coefs[:17]
LIML_sig0 = LIML_coefs[-2]
LIML_lam0 = LIML_coefs[-1]
LIML_sigv = (LIML_sig0/np.sqrt(1+LIML_lam0**2))**2
LIML_sigu = LIML_sig0**2-LIML_sigv
LIML_Eu = np.sqrt((2/np.pi)*LIML_sigu)
LIML_ster = np.sqrt(LIML_vars)
LIML_ster_beta0 = LIML_ster[:17]
LIML_sig0_ster = LIML_ster[-2]
LIML_lam0_ster = LIML_ster[-1]
#Display the results as a table
results = pd.DataFrame(data = {'Est': np.concatenate([LIML_beta0,
np.array([LIML_sig0, LIML_lam0, LIML_sigu, LIML_sigv, LIML_Eu])],
axis = 0),
'St.Err':np.concatenate([LIML_ster_beta0, np.array([LIML_sig0_ster, LIML_lam0_ster]), np.array([np.nan, np.nan, np.nan])], axis = 0)},
index = ["Const", "Labor", "Cows", "Feed",
"Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
results = results.round(4)
display(results)| Est | St.Err | |
|---|---|---|
| Const | 0.0334 | 0.0268 |
| Labor | 0.1269 | 0.0631 |
| Cows | 0.9747 | 0.0811 |
| Feed | 0.1066 | 0.0571 |
| Land | -0.1136 | 0.0183 |
| Rough | 0.0944 | 0.0279 |
| D00 | 0.0118 | 0.0220 |
| D01 | 0.0204 | 0.0239 |
| D02 | 0.0346 | 0.0264 |
| D03 | 0.0130 | 0.0268 |
| D04 | 0.0384 | 0.0261 |
| D05 | 0.0926 | 0.0379 |
| D06 | 0.1326 | 0.0402 |
| D07 | 0.1329 | 0.0411 |
| D08 | 0.1147 | 0.0402 |
| D09 | 0.1010 | 0.0430 |
| D10 | 0.1270 | 0.0425 |
| Sigma | 0.1895 | 0.0077 |
| Lambda | 1.1973 | 0.1476 |
| Sigma2_u | 0.0212 | NaN |
| Sigma2_v | 0.0148 | NaN |
| E(u) | 0.1161 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset5.4 Exercise 5.4: LIML-2 of Spanish diary farm production model
This exercise implements the LIML-2 estimator for a sample of 137 milk farms in Northern Spain during 1999-2010.
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimizedef AppLogDen_LIML2(coefs, y, X, etas):
nX = X.shape[1]
beta = coefs[:nX]
sig = coefs[-2]
lam = coefs[-1]
sigve = coefs[nX:-2]
sigee = 1/len(y)*(etas.T@etas)
correction = (np.linalg.lstsq(sigee, sigve, rcond=None)[0])@sigve.T
lamc = lam/np.sqrt(1-(1+lam**2)/sig**2*correction)
sigc = np.sqrt(sig**2-correction)
muci = (np.linalg.lstsq(sigee, sigve, rcond=None)[0])@etas.T
er = y - X@beta - muci.T
l3 = np.log(stats.norm.cdf(-lamc/sigc*er));
lnL1 = -0.5*np.log(sigc**2)-0.5*(1/(sigc**2)*(er**2))+l3
return lnL1
def AppLoglikelihood_LIML2(coefs, y, X, etas):
den = AppLogDen_LIML2(coefs, y, X, etas)
logL = -np.sum(den)
return logL
def AppEstimate_LIML2(y, X, Xh, etas):
beta0 = [0.0368, 0.1169, 0.9685, 0.1156, -0.1047,
0.0911, 0.0096, 0.0244, 0.0354,
0.0132, 0.0423, 0.0933, 0.1305, 0.1302,
0.1151, 0.1025, 0.1222]
sigve0 = [0.0, 0.0, 0.0, 0.0]
sig0 = 0.17
lam0 = 1.7
theta0 = np.array(beta0 + sigve0 + [sig0, lam0])
# Minimize the negative log-likelihood using numerical optimization.
# nelder-mead method obtains a solution closer to the MATLAB solution
MLE_results = minimize(
fun=AppLoglikelihood_LIML2,
x0=theta0,
method="Nelder-Mead",
tol = 1e-6,
options={"maxiter": 5000},
args=(y.values, X.values, etas.values),
)
theta = MLE_results.x
#standard errors
delta = 1e-10
grad = np.zeros((len(y), len(theta)))
for i in range(len(theta)):
theta1 = np.copy(theta)
theta1[i] = theta[i] + delta
grad[:,i] = (AppLogDen_LIML2(theta1, y.values, X.values, etas.values) - AppLogDen_LIML2(theta, y.values, X.values, etas.values))/delta
OPG = grad.T@grad
Vmle = np.diag(np.linalg.inv(OPG))
return theta, Vmle# load data
dairyfarms = pd.read_excel('spanishdairy.xls', usecols=['DEPREC', 'FARMEXP', 'MILK',
'LABOR', 'COWS', 'FEED', 'LAND',
'YEAR', 'COOP', 'PFEED', 'PMILK', 'LANDOWN'])
# Drop farms with zero roughage
data = dairyfarms[(dairyfarms['DEPREC'] != 0) & (dairyfarms['FARMEXP'] != 0)].copy()
data.reset_index(inplace=True)
data['ROUGHAGE'] = data['FARMEXP'] + data['DEPREC']
#scaling to have unit means:
# scaling data
y = np.log(data['MILK']) - np.log(np.mean(data['MILK']))
X = data[['LABOR', 'COWS', 'FEED', 'LAND', 'ROUGHAGE']]
X = np.log(X) - np.repeat(np.log(np.mean(X.values, axis=0)).reshape(1,-1), len(y), axis=0)
X = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}), X], axis=1)
# create indicator matrix D
D = pd.get_dummies(data['YEAR'], prefix='YEAR', drop_first=True, dtype=int)
# create Z matrix
data['COOP'] = data['COOP'].astype(int)
COOP_dummy = pd.get_dummies(data['COOP'], prefix='COOP', drop_first=False, dtype=int)
COOP_dummy = COOP_dummy[['COOP_303', 'COOP_304', 'COOP_306', 'COOP_313', 'COOP_307', 'COOP_308', 'COOP_310']]
Z = pd.concat([pd.DataFrame(data={'CONST': np.ones((len(X)))}),
X[['LAND']], data[['PMILK', 'PFEED', 'LANDOWN']],
COOP_dummy], axis=1)
Z = pd.concat([Z, D], axis=1)
# compute regression coefficients and standard errors
OLS1 = sm.OLS(X[['LABOR']].values, Z.values)
results1 = OLS1.fit()
p1 = results1.params
se1 = np.sqrt(np.diag(results1.cov_params()))
x1h = Z@p1
etah1 = X['LABOR'] - x1h
OLS2 = sm.OLS(X[['COWS']].values, Z.values)
results2 = OLS2.fit()
p2 = results2.params
se2 = np.sqrt(np.diag(results2.cov_params()))
x2h = Z@p2
etah2 = X['COWS'] - x2h
OLS3 = sm.OLS(X[['FEED']].values, Z.values)
results3 = OLS3.fit()
p3 = results3.params
se3 = np.sqrt(np.diag(results3.cov_params()))
x3h = Z@p3
etah3 = X['FEED'] - x3h
OLS4 = sm.OLS(X[['LAND']].values, Z.values)
results4 = OLS4.fit()
p4 = results4.params
se4 = np.sqrt(np.diag(results4.cov_params()))
x4h = Z@p4
etah4 = X['LAND'] - x4h
OLS5 = sm.OLS(X[['ROUGHAGE']].values, Z.values)
results5 = OLS5.fit()
p5 = results5.params
se5 = np.sqrt(np.diag(results5.cov_params()))
x5h = Z@p5
etah5 = X['ROUGHAGE'] - x5h
Xh = pd.concat([X['CONST'], x1h, x2h, x3h, x4h, x5h, D], axis = 1)
X = pd.concat([X, D], axis=1)
etas = pd.concat([etah1, etah2, etah3, etah5], axis=1)
(LIML2_coefs, LIML2_vars) = AppEstimate_LIML2(y, X, Xh, etas)
LIML2_beta0 = LIML2_coefs[:17]
LIML2_sig0 = LIML2_coefs[-2]
LIML2_lam0 = LIML2_coefs[-1]
LIML2_sigv = (LIML2_sig0/np.sqrt(1+LIML2_lam0**2))**2
LIML2_sigu = LIML2_sig0**2-LIML2_sigv
LIML2_Eu = np.sqrt((2/np.pi)*LIML2_sigu)
LIML2_ster = np.sqrt(LIML2_vars)
LIML2_ster_beta0 = LIML2_ster[:17]
LIML2_sig0_ster = LIML2_ster[-2]
LIML2_lam0_ster = LIML2_ster[-1]
#Display the results as a table
results = pd.DataFrame(data = {'Est': np.concatenate([LIML2_beta0,
np.array([LIML2_sig0, LIML2_lam0, LIML2_sigu, LIML2_sigv, LIML2_Eu])],
axis = 0),
'St.Err':np.concatenate([LIML2_ster_beta0, np.array([LIML2_sig0_ster, LIML2_lam0_ster]), np.array([np.nan, np.nan, np.nan])], axis = 0)},
index = ["Const", "Labor", "Cows", "Feed",
"Land", "Rough", "D00", "D01", "D02",
"D03", "D04", "D05", "D06", "D07",
"D08", "D09", "D10", "Sigma",
"Lambda", "Sigma2_u", "Sigma2_v", "E(u)"])
results = results.round(4)
display(results)| Est | St.Err | |
|---|---|---|
| Const | 0.0212 | 0.0192 |
| Labor | 0.1392 | 0.0451 |
| Cows | 0.8295 | 0.0615 |
| Feed | 0.1731 | 0.0242 |
| Land | -0.0652 | 0.0128 |
| Rough | 0.0883 | 0.0182 |
| D00 | -0.0037 | 0.0156 |
| D01 | 0.0365 | 0.0174 |
| D02 | 0.0445 | 0.0190 |
| D03 | 0.0733 | 0.0206 |
| D04 | 0.0636 | 0.0196 |
| D05 | 0.1210 | 0.0277 |
| D06 | 0.1564 | 0.0292 |
| D07 | 0.1546 | 0.0294 |
| D08 | 0.1377 | 0.0284 |
| D09 | 0.1225 | 0.0306 |
| D10 | 0.1470 | 0.0298 |
| Sigma | 0.1848 | 0.0057 |
| Lambda | 1.4775 | 0.1424 |
| Sigma2_u | 0.0234 | NaN |
| Sigma2_v | 0.0107 | NaN |
| E(u) | 0.1221 | NaN |
# Remove all previous function and variable definitions before the next exercise
%reset