3  Chapter 3: Basics of stochastic frontier analysis for panel data

3.1 Exercise 3.1: Within and Between Summary Statistics

This exercise reports summary statistics for the between and within variation of log GPD from the WBpanel.csv data as well as the correlations of log GDP with itself lagged once, twice and three times.

import numpy as np
import pandas as pd

WBpanel = pd.read_csv("WBpanel.csv")

overall_stats = [
    np.mean(WBpanel["lny"]),
    np.std(WBpanel["lny"]),
    np.min(WBpanel["lny"]),
    np.max(WBpanel["lny"]),
]

# Within and between variation for log GDP
group_means = WBpanel.groupby(["country"], as_index=False)["lny"].mean()
group_means.rename(columns={"lny": "lny_bar"}, inplace=True)
WBpanel = WBpanel.merge(group_means, how="left", on=["country"])
within_lny_data = WBpanel["lny"] - WBpanel["lny_bar"] + np.mean(WBpanel["lny"])
within_stats = [
    None,
    np.std(within_lny_data),
    np.min(within_lny_data),
    np.max(within_lny_data),
]

between_stats = [
    None,
    np.std(group_means["lny_bar"]),
    np.min(group_means["lny_bar"]),
    np.max(group_means["lny_bar"]),
]

all_stats = np.array([overall_stats, between_stats, within_stats])
panel_summary_statistics = pd.DataFrame(
    all_stats,
    columns=["Mean", "Std. Dev", "Min", "Max"],
    index=["Overall", "Between", "Within"],
)
display(panel_summary_statistics)
Mean Std. Dev Min Max
Overall 2.778852 1.921035 -1.31903 8.401306
Between None 1.885078 -0.844032 8.009964
Within None 0.369944 1.67231 3.942549
# Create lags of GDP
lag_WBpanel = []
for country, group in WBpanel.groupby("country", as_index=False):
    for i in range(1, 4):
        group[f"lny_lag{i}"] = group["lny"].shift(i)
    lag_WBpanel.append(group)
lag_WBpanel = pd.concat(lag_WBpanel, axis=0)
serial_correlation_matrix = lag_WBpanel[
    ["lny", "lny_lag1", "lny_lag2", "lny_lag3"]
].corr()

display(serial_correlation_matrix)
lny lny_lag1 lny_lag2 lny_lag3
lny 1.000000 0.999576 0.999006 0.998384
lny_lag1 0.999576 1.000000 0.999566 0.998988
lny_lag2 0.999006 0.999566 1.000000 0.999558
lny_lag3 0.998384 0.998988 0.999558 1.000000
# Remove all previous function and variable definitions before the next exercise
%reset

3.2 Exercise 3.2: Fixed Effects Regression

This exercise implements a fixed effects (within) panel regression model for the WBpanel.csv. We report cluster robust standard errors.

import numpy as np
import pandas as pd
from linearmodels import PanelOLS
from scipy import stats

# Fixed effects model using the linearmodels python package
regression_data = pd.read_csv("WBpanel.csv")
regression_data = regression_data.set_index(keys=["country", "yr"], drop=False)
regression_data["intercept"] = 1

mod = PanelOLS(
    regression_data["lny"],
    regression_data[["intercept", "lnk", "lnl", "yr"]],
    entity_effects=True,
)
res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=True)
print(res)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    lny   R-squared:                        0.8934
Estimator:                   PanelOLS   R-squared (Between):              0.8380
No. Observations:                2296   R-squared (Within):               0.8934
Date:                Sun, Mar 31 2024   R-squared (Overall):              0.8401
Time:                        16:22:48   Log-likelihood                    1594.7
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6173.6
Entities:                          82   P-value                           0.0000
Avg Obs:                       28.000   Distribution:                  F(3,2211)
Min Obs:                       28.000                                           
Max Obs:                       28.000   F-statistic (robust):             514.70
                                        P-value                           0.0000
Time periods:                      28   Distribution:                  F(3,2211)
Avg Obs:                       82.000                                           
Min Obs:                       82.000                                           
Max Obs:                       82.000                                           
                                                                                
                             Parameter Estimates                              
==============================================================================
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
intercept      0.5099     0.1793     2.8443     0.0045      0.1583      0.8614
lnk            0.5315     0.0514     10.341     0.0000      0.4307      0.6323
lnl            0.1336     0.1094     1.2222     0.2218     -0.0808      0.3481
yr             0.0075     0.0029     2.5386     0.0112      0.0017      0.0132
==============================================================================

F-test for Poolability: 211.15
P-value: 0.0000
Distribution: F(81,2211)

Included effects: Entity
# Remove all previous function and variable definitions before the next exercise
%reset

3.3 Exercise 3.3: Random Effects Regression

This exercise implements a random effects panel regression model for the WBpanel.csv. We report cluster robust standard errors.

import numpy as np
import pandas as pd
from linearmodels import RandomEffects
from scipy import stats

regression_data = pd.read_csv("WBpanel.csv")
regression_data = regression_data.set_index(keys=["country", "yr"], drop=False)
regression_data["intercept"] = 1

re_model = RandomEffects(
    regression_data["lny"], regression_data[["intercept", "lnk", "lnl", "yr"]]
)
results = re_model.fit(cov_type="clustered", cluster_entity=True, debiased=True)
print(results)
print(results.variance_decomposition)
                        RandomEffects Estimation Summary                        
================================================================================
Dep. Variable:                    lny   R-squared:                        0.8926
Estimator:              RandomEffects   R-squared (Between):              0.9125
No. Observations:                2296   R-squared (Within):               0.8902
Date:                Sun, Mar 31 2024   R-squared (Overall):              0.9117
Time:                        16:22:48   Log-likelihood                    1452.3
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6351.5
Entities:                          82   P-value                           0.0000
Avg Obs:                       28.000   Distribution:                  F(3,2292)
Min Obs:                       28.000                                           
Max Obs:                       28.000   F-statistic (robust):             424.86
                                        P-value                           0.0000
Time periods:                      28   Distribution:                  F(3,2292)
Avg Obs:                       82.000                                           
Min Obs:                       82.000                                           
Max Obs:                       82.000                                           
                                                                                
                             Parameter Estimates                              
==============================================================================
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
intercept      0.1170     0.1304     0.8970     0.3698     -0.1388      0.3728
lnk            0.6069     0.0436     13.905     0.0000      0.5213      0.6925
lnl            0.2515     0.0581     4.3314     0.0000      0.1376      0.3654
yr             0.0006     0.0023     0.2458     0.8059     -0.0040      0.0051
==============================================================================
Effects                   0.112335
Residual                  0.015157
Percent due to Effects    0.881115
Name: Variance Decomposition, dtype: float64
# Remove all previous function and variable definitions before the next exercise
%reset

3.4 Exercise 3.4: CSS Model Estimation

This exercise estimates the stochastic frontier model with time-varying inefficiency of Cornwell, Schmidt and Sickles (1990) via the Least Squared Dummy Variable (LSDV) approach.

import numpy as np
import pandas as pd
import statsmodels.api as sm

regression_data = pd.read_csv("WBpanel.csv")
regression_data["intercept"] = 1

regression_data["yr2"] = regression_data["yr"] ** 2
country_dummies = pd.get_dummies(
    regression_data["country"], prefix="cntrydum", drop_first=True
)
regression_data = pd.concat([regression_data, country_dummies], axis=1)

yr_df = pd.DataFrame(
    columns=["yr_{}".format(i) for i in regression_data["country"].unique()[1:]]
)
yr2_df = pd.DataFrame(
    columns=["yr2_{}".format(i) for i in regression_data["country"].unique()[1:]]
)
for i in regression_data["country"].unique()[1:]:
    yr_df["yr_{}".format(i)] = (
        regression_data["yr"] * regression_data["cntrydum_{}".format(i)]
    )
    yr2_df["yr2_{}".format(i)] = (
        regression_data["yr"] * regression_data["cntrydum_{}".format(i)]
    )
regression_data = pd.concat([regression_data, yr_df, yr2_df], axis=1)

y = regression_data["lny"]
X = regression_data[
    ["intercept", "lnk", "lnl"]
    + [x for x in regression_data.columns if "cntrydum" in x]
    + [x for x in regression_data.columns if "yr_" in x]
    + [x for x in regression_data.columns if "yr2_" in x]
]

model = sm.OLS(y, X)
result = model.fit()
print(result.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    lny   R-squared:                       0.998
Model:                            OLS   Adj. R-squared:                  0.998
Method:                 Least Squares   F-statistic:                     7999.
Date:                Sun, 31 Mar 2024   Prob (F-statistic):               0.00
Time:                        16:22:49   Log-Likelihood:                 2618.2
No. Observations:                2296   AIC:                            -4906.
Df Residuals:                    2131   BIC:                            -3959.
Df Model:                         164                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
intercept                 1.0822      0.145      7.466      0.000       0.798       1.366
lnk                       0.5983      0.018     32.510      0.000       0.562       0.634
lnl                       0.0944      0.061      1.537      0.125      -0.026       0.215
cntrydum_algeria         -0.8270      0.054    -15.384      0.000      -0.932      -0.722
cntrydum_argentina       -0.1227      0.052     -2.371      0.018      -0.224      -0.021
cntrydum_austria         -0.0832      0.053     -1.573      0.116      -0.187       0.021
cntrydum_bangladesh      -0.9975      0.113     -8.866      0.000      -1.218      -0.777
cntrydum_belgium         -0.1146      0.042     -2.699      0.007      -0.198      -0.031
cntrydum_bolivia         -1.4566      0.108    -13.445      0.000      -1.669      -1.244
cntrydum_brazil          -0.2562      0.107     -2.399      0.017      -0.466      -0.047
cntrydum_cameroon        -0.7823      0.103     -7.591      0.000      -0.984      -0.580
cntrydum_canada           0.1854      0.038      4.833      0.000       0.110       0.261
cntrydum_chile           -0.8604      0.067    -12.888      0.000      -0.991      -0.730
cntrydum_china           -0.9796      0.239     -4.093      0.000      -1.449      -0.510
cntrydum_colombia        -0.8670      0.062    -14.038      0.000      -0.988      -0.746
cntrydum_costa rica      -1.1206      0.167     -6.704      0.000      -1.448      -0.793
cntrydum_cote d'ivoir    -0.9441      0.113     -8.347      0.000      -1.166      -0.722
cntrydum_cyprus          -1.8042      0.197     -9.151      0.000      -2.191      -1.418
cntrydum_denmark         -0.0120      0.071     -0.170      0.865      -0.151       0.127
cntrydum_ecuador         -1.5541      0.099    -15.672      0.000      -1.749      -1.360
cntrydum_egypt           -0.6610      0.096     -6.897      0.000      -0.849      -0.473
cntrydum_el salvador     -0.7825      0.128     -6.105      0.000      -1.034      -0.531
cntrydum_ethiopia        -0.6929      0.115     -6.049      0.000      -0.918      -0.468
cntrydum_finland         -0.4464      0.073     -6.108      0.000      -0.590      -0.303
cntrydum_france           0.5969      0.081      7.398      0.000       0.439       0.755
cntrydum_germany          0.5796      0.090      6.417      0.000       0.402       0.757
cntrydum_ghana           -0.9426      0.088    -10.691      0.000      -1.115      -0.770
cntrydum_greece          -0.5257      0.061     -8.677      0.000      -0.644      -0.407
cntrydum_guatamala       -1.0904      0.108    -10.073      0.000      -1.303      -0.878
cntrydum_haiti           -0.8120      0.125     -6.472      0.000      -1.058      -0.566
cntrydum_honduras        -1.2707      0.144     -8.817      0.000      -1.553      -0.988
cntrydum_iceland         -1.2606      0.262     -4.804      0.000      -1.775      -0.746
cntrydum_india           -0.2132      0.213     -1.000      0.317      -0.631       0.205
cntrydum_indonesia       -0.3087      0.145     -2.123      0.034      -0.594      -0.024
cntrydum_iran             0.5936      0.063      9.375      0.000       0.469       0.718
cntrydum_iraq             0.4977      0.078      6.417      0.000       0.346       0.650
cntrydum_ireland         -0.5610      0.108     -5.184      0.000      -0.773      -0.349
cntrydum_israel          -0.8595      0.116     -7.380      0.000      -1.088      -0.631
cntrydum_italy            0.2893      0.088      3.285      0.001       0.117       0.462
cntrydum_jamaica         -1.5468      0.146    -10.627      0.000      -1.832      -1.261
cntrydum_japan            0.9007      0.126      7.177      0.000       0.655       1.147
cntrydum_jordan          -0.8722      0.153     -5.709      0.000      -1.172      -0.573
cntrydum_kenya           -2.1457      0.077    -27.689      0.000      -2.298      -1.994
cntrydum_korea           -0.1300      0.092     -1.412      0.158      -0.311       0.051
cntrydum_madagascar      -1.0681      0.103    -10.325      0.000      -1.271      -0.865
cntrydum_malawi          -1.7015      0.138    -12.333      0.000      -1.972      -1.431
cntrydum_malaysia        -0.5980      0.085     -7.059      0.000      -0.764      -0.432
cntrydum_mali            -1.8251      0.111    -16.405      0.000      -2.043      -1.607
cntrydum_mauritius       -2.1201      0.196    -10.799      0.000      -2.505      -1.735
cntrydum_mexico          -0.2121      0.077     -2.771      0.006      -0.362      -0.062
cntrydum_morocco         -0.8885      0.077    -11.510      0.000      -1.040      -0.737
cntrydum_mozambique      -1.5586      0.094    -16.627      0.000      -1.742      -1.375
cntrydum_myanmar         -1.1300      0.090    -12.509      0.000      -1.307      -0.953
cntrydum_netherlands      0.0151      0.037      0.411      0.681      -0.057       0.087
cntrydum_new zealand     -0.4821      0.111     -4.345      0.000      -0.700      -0.264
cntrydum_nigeria         -0.3550      0.111     -3.210      0.001      -0.572      -0.138
cntrydum_norway          -0.6310      0.085     -7.462      0.000      -0.797      -0.465
cntrydum_pakistan        -0.8620      0.108     -7.962      0.000      -1.074      -0.650
cntrydum_panama          -1.1381      0.169     -6.733      0.000      -1.470      -0.807
cntrydum_paraguay        -1.2202      0.158     -7.726      0.000      -1.530      -0.910
cntrydum_peru            -0.8454      0.061    -13.936      0.000      -0.964      -0.726
cntrydum_philippines     -0.5339      0.082     -6.492      0.000      -0.695      -0.373
cntrydum_portugal        -0.7992      0.059    -13.485      0.000      -0.915      -0.683
cntrydum_rwanda          -0.9509      0.147     -6.490      0.000      -1.238      -0.664
cntrydum_senegal         -1.1770      0.110    -10.654      0.000      -1.394      -0.960
cntrydum_sierra leone    -1.8211      0.150    -12.170      0.000      -2.114      -1.528
cntrydum_singapore       -0.6369      0.150     -4.243      0.000      -0.931      -0.343
cntrydum_spain            0.2257      0.067      3.366      0.001       0.094       0.357
cntrydum_sri lanka       -1.1863      0.095    -12.530      0.000      -1.372      -1.001
cntrydum_sudan           -0.0276      0.087     -0.319      0.750      -0.198       0.142
cntrydum_sweden           0.0533      0.046      1.150      0.250      -0.038       0.144
cntrydum_switzerland      0.2416      0.060      4.030      0.000       0.124       0.359
cntrydum_tanzania        -1.7139      0.090    -19.002      0.000      -1.891      -1.537
cntrydum_thailand        -0.5939      0.090     -6.628      0.000      -0.770      -0.418
cntrydum_tunisia         -1.4549      0.105    -13.863      0.000      -1.661      -1.249
cntrydum_turkey          -0.6384      0.073     -8.705      0.000      -0.782      -0.495
cntrydum_uganda          -0.3710      0.088     -4.239      0.000      -0.543      -0.199
cntrydum_uk               0.5916      0.091      6.525      0.000       0.414       0.769
cntrydum_uruguay         -1.1736      0.106    -11.032      0.000      -1.382      -0.965
cntrydum_us               0.9525      0.153      6.213      0.000       0.652       1.253
cntrydum_venezuala       -0.4419      0.064     -6.954      0.000      -0.567      -0.317
cntrydum_zaire           -0.7014      0.087     -8.102      0.000      -0.871      -0.532
cntrydum_zambia          -2.1445      0.112    -19.208      0.000      -2.363      -1.926
cntrydum_zimbabwe        -1.7038      0.109    -15.643      0.000      -1.917      -1.490
yr_algeria                0.0078      0.001      6.037      0.000       0.005       0.010
yr_argentina             -0.0005      0.001     -0.473      0.636      -0.003       0.002
yr_austria                0.0007      0.001      0.646      0.519      -0.001       0.003
yr_bangladesh             0.0043      0.001      3.733      0.000       0.002       0.007
yr_belgium                0.0046      0.001      4.631      0.000       0.003       0.007
yr_bolivia                0.0010      0.001      0.811      0.417      -0.001       0.003
yr_brazil                 0.0083      0.001      6.185      0.000       0.006       0.011
yr_cameroon               0.0027      0.001      2.154      0.031       0.000       0.005
yr_canada                 0.0069      0.001      6.055      0.000       0.005       0.009
yr_chile                  0.0032      0.001      2.722      0.007       0.001       0.005
yr_china                  0.0150      0.001     12.156      0.000       0.013       0.017
yr_colombia               0.0092      0.001      7.008      0.000       0.007       0.012
yr_costa rica             0.0023      0.001      1.579      0.115      -0.001       0.005
yr_cote d'ivoir           0.0030      0.002      1.959      0.050   -3.32e-06       0.006
yr_cyprus                 0.0112      0.001     10.567      0.000       0.009       0.013
yr_denmark               -0.0006      0.001     -0.553      0.580      -0.003       0.001
yr_ecuador                0.0128      0.001      9.396      0.000       0.010       0.015
yr_egypt                  0.0070      0.001      5.359      0.000       0.004       0.010
yr_el salvador           -0.0038      0.001     -3.151      0.002      -0.006      -0.001
yr_ethiopia              -0.0024      0.001     -1.915      0.056      -0.005    5.71e-05
yr_finland                0.0052      0.001      5.150      0.000       0.003       0.007
yr_france                 0.0011      0.001      0.991      0.322      -0.001       0.003
yr_germany                0.0018      0.001      1.745      0.081      -0.000       0.004
yr_ghana                 -0.0043      0.001     -3.721      0.000      -0.007      -0.002
yr_greece                 0.0034      0.001      3.034      0.002       0.001       0.006
yr_guatamala              0.0047      0.001      3.689      0.000       0.002       0.007
yr_haiti                 -0.0070      0.001     -6.185      0.000      -0.009      -0.005
yr_honduras               0.0043      0.001      3.246      0.001       0.002       0.007
yr_iceland                0.0080      0.001      7.173      0.000       0.006       0.010
yr_india                  0.0031      0.001      2.626      0.009       0.001       0.005
yr_indonesia              0.0029      0.001      2.252      0.024       0.000       0.005
yr_iran                  -0.0088      0.002     -5.705      0.000      -0.012      -0.006
yr_iraq                  -0.0105      0.001     -7.290      0.000      -0.013      -0.008
yr_ireland                0.0030      0.001      2.793      0.005       0.001       0.005
yr_israel                 0.0107      0.001      8.557      0.000       0.008       0.013
yr_italy                  0.0053      0.001      5.145      0.000       0.003       0.007
yr_jamaica               -0.0029      0.001     -2.656      0.008      -0.005      -0.001
yr_japan                  0.0003      0.001      0.220      0.826      -0.002       0.003
yr_jordan                -0.0019      0.001     -1.388      0.165      -0.004       0.001
yr_kenya                  0.0175      0.001     12.920      0.000       0.015       0.020
yr_korea                  0.0034      0.002      2.282      0.023       0.000       0.006
yr_madagascar            -0.0032      0.001     -2.798      0.005      -0.006      -0.001
yr_malawi                -0.0035      0.001     -2.522      0.012      -0.006      -0.001
yr_malaysia               0.0021      0.001      1.427      0.154      -0.001       0.005
yr_mali                   0.0069      0.001      6.344      0.000       0.005       0.009
yr_mauritius              0.0109      0.001      8.839      0.000       0.008       0.013
yr_mexico                 0.0041      0.001      2.893      0.004       0.001       0.007
yr_morocco                0.0034      0.001      2.597      0.009       0.001       0.006
yr_mozambique            -0.0093      0.001     -8.055      0.000      -0.012      -0.007
yr_myanmar                0.0074      0.001      6.289      0.000       0.005       0.010
yr_netherlands            0.0025      0.001      2.312      0.021       0.000       0.005
yr_new zealand            0.0012      0.001      1.136      0.256      -0.001       0.003
yr_nigeria               -0.0129      0.001     -9.678      0.000      -0.016      -0.010
yr_norway                 0.0073      0.001      7.194      0.000       0.005       0.009
yr_pakistan               0.0060      0.001      4.403      0.000       0.003       0.009
yr_panama                 0.0021      0.001      1.557      0.120      -0.001       0.005
yr_paraguay               0.0018      0.001      1.206      0.228      -0.001       0.005
yr_peru                   0.0038      0.001      2.957      0.003       0.001       0.006
yr_philippines            0.0002      0.001      0.160      0.873      -0.002       0.003
yr_portugal               0.0047      0.001      4.338      0.000       0.003       0.007
yr_rwanda                 0.0018      0.001      1.396      0.163      -0.001       0.004
yr_senegal                0.0026      0.001      2.299      0.022       0.000       0.005
yr_sierra leone          -0.0002      0.001     -0.167      0.867      -0.002       0.002
yr_singapore             -0.0017      0.002     -1.035      0.301      -0.005       0.002
yr_spain                  0.0005      0.001      0.414      0.679      -0.002       0.003
yr_sri lanka              0.0034      0.001      2.753      0.006       0.001       0.006
yr_sudan                 -0.0084      0.001     -6.497      0.000      -0.011      -0.006
yr_sweden                 0.0017      0.001      1.657      0.098      -0.000       0.004
yr_switzerland           -0.0028      0.001     -2.735      0.006      -0.005      -0.001
yr_tanzania               0.0037      0.001      2.857      0.004       0.001       0.006
yr_thailand               0.0028      0.001      1.856      0.064      -0.000       0.006
yr_tunisia                0.0084      0.001      6.613      0.000       0.006       0.011
yr_turkey                 0.0055      0.001      4.305      0.000       0.003       0.008
yr_uganda                -0.0186      0.001    -15.046      0.000      -0.021      -0.016
yr_uk                    -0.0006      0.001     -0.601      0.548      -0.003       0.001
yr_uruguay                0.0026      0.001      2.738      0.006       0.001       0.004
yr_us                     0.0046      0.001      4.321      0.000       0.002       0.007
yr_venezuala             -0.0010      0.001     -0.712      0.476      -0.004       0.002
yr_zaire                 -0.0011      0.001     -0.918      0.359      -0.004       0.001
yr_zambia                 0.0022      0.001      1.773      0.076      -0.000       0.005
yr_zimbabwe               0.0090      0.001      6.588      0.000       0.006       0.012
yr2_algeria               0.0078      0.001      6.037      0.000       0.005       0.010
yr2_argentina            -0.0005      0.001     -0.473      0.636      -0.003       0.002
yr2_austria               0.0007      0.001      0.646      0.519      -0.001       0.003
yr2_bangladesh            0.0043      0.001      3.733      0.000       0.002       0.007
yr2_belgium               0.0046      0.001      4.631      0.000       0.003       0.007
yr2_bolivia               0.0010      0.001      0.811      0.417      -0.001       0.003
yr2_brazil                0.0083      0.001      6.185      0.000       0.006       0.011
yr2_cameroon              0.0027      0.001      2.154      0.031       0.000       0.005
yr2_canada                0.0069      0.001      6.055      0.000       0.005       0.009
yr2_chile                 0.0032      0.001      2.722      0.007       0.001       0.005
yr2_china                 0.0150      0.001     12.156      0.000       0.013       0.017
yr2_colombia              0.0092      0.001      7.008      0.000       0.007       0.012
yr2_costa rica            0.0023      0.001      1.579      0.115      -0.001       0.005
yr2_cote d'ivoir          0.0030      0.002      1.959      0.050   -3.32e-06       0.006
yr2_cyprus                0.0112      0.001     10.567      0.000       0.009       0.013
yr2_denmark              -0.0006      0.001     -0.553      0.580      -0.003       0.001
yr2_ecuador               0.0128      0.001      9.396      0.000       0.010       0.015
yr2_egypt                 0.0070      0.001      5.359      0.000       0.004       0.010
yr2_el salvador          -0.0038      0.001     -3.151      0.002      -0.006      -0.001
yr2_ethiopia             -0.0024      0.001     -1.915      0.056      -0.005    5.71e-05
yr2_finland               0.0052      0.001      5.150      0.000       0.003       0.007
yr2_france                0.0011      0.001      0.991      0.322      -0.001       0.003
yr2_germany               0.0018      0.001      1.745      0.081      -0.000       0.004
yr2_ghana                -0.0043      0.001     -3.721      0.000      -0.007      -0.002
yr2_greece                0.0034      0.001      3.034      0.002       0.001       0.006
yr2_guatamala             0.0047      0.001      3.689      0.000       0.002       0.007
yr2_haiti                -0.0070      0.001     -6.185      0.000      -0.009      -0.005
yr2_honduras              0.0043      0.001      3.246      0.001       0.002       0.007
yr2_iceland               0.0080      0.001      7.173      0.000       0.006       0.010
yr2_india                 0.0031      0.001      2.626      0.009       0.001       0.005
yr2_indonesia             0.0029      0.001      2.252      0.024       0.000       0.005
yr2_iran                 -0.0088      0.002     -5.705      0.000      -0.012      -0.006
yr2_iraq                 -0.0105      0.001     -7.290      0.000      -0.013      -0.008
yr2_ireland               0.0030      0.001      2.793      0.005       0.001       0.005
yr2_israel                0.0107      0.001      8.557      0.000       0.008       0.013
yr2_italy                 0.0053      0.001      5.145      0.000       0.003       0.007
yr2_jamaica              -0.0029      0.001     -2.656      0.008      -0.005      -0.001
yr2_japan                 0.0003      0.001      0.220      0.826      -0.002       0.003
yr2_jordan               -0.0019      0.001     -1.388      0.165      -0.004       0.001
yr2_kenya                 0.0175      0.001     12.920      0.000       0.015       0.020
yr2_korea                 0.0034      0.002      2.282      0.023       0.000       0.006
yr2_madagascar           -0.0032      0.001     -2.798      0.005      -0.006      -0.001
yr2_malawi               -0.0035      0.001     -2.522      0.012      -0.006      -0.001
yr2_malaysia              0.0021      0.001      1.427      0.154      -0.001       0.005
yr2_mali                  0.0069      0.001      6.344      0.000       0.005       0.009
yr2_mauritius             0.0109      0.001      8.839      0.000       0.008       0.013
yr2_mexico                0.0041      0.001      2.893      0.004       0.001       0.007
yr2_morocco               0.0034      0.001      2.597      0.009       0.001       0.006
yr2_mozambique           -0.0093      0.001     -8.055      0.000      -0.012      -0.007
yr2_myanmar               0.0074      0.001      6.289      0.000       0.005       0.010
yr2_netherlands           0.0025      0.001      2.312      0.021       0.000       0.005
yr2_new zealand           0.0012      0.001      1.136      0.256      -0.001       0.003
yr2_nigeria              -0.0129      0.001     -9.678      0.000      -0.016      -0.010
yr2_norway                0.0073      0.001      7.194      0.000       0.005       0.009
yr2_pakistan              0.0060      0.001      4.403      0.000       0.003       0.009
yr2_panama                0.0021      0.001      1.557      0.120      -0.001       0.005
yr2_paraguay              0.0018      0.001      1.206      0.228      -0.001       0.005
yr2_peru                  0.0038      0.001      2.957      0.003       0.001       0.006
yr2_philippines           0.0002      0.001      0.160      0.873      -0.002       0.003
yr2_portugal              0.0047      0.001      4.338      0.000       0.003       0.007
yr2_rwanda                0.0018      0.001      1.396      0.163      -0.001       0.004
yr2_senegal               0.0026      0.001      2.299      0.022       0.000       0.005
yr2_sierra leone         -0.0002      0.001     -0.167      0.867      -0.002       0.002
yr2_singapore            -0.0017      0.002     -1.035      0.301      -0.005       0.002
yr2_spain                 0.0005      0.001      0.414      0.679      -0.002       0.003
yr2_sri lanka             0.0034      0.001      2.753      0.006       0.001       0.006
yr2_sudan                -0.0084      0.001     -6.497      0.000      -0.011      -0.006
yr2_sweden                0.0017      0.001      1.657      0.098      -0.000       0.004
yr2_switzerland          -0.0028      0.001     -2.735      0.006      -0.005      -0.001
yr2_tanzania              0.0037      0.001      2.857      0.004       0.001       0.006
yr2_thailand              0.0028      0.001      1.856      0.064      -0.000       0.006
yr2_tunisia               0.0084      0.001      6.613      0.000       0.006       0.011
yr2_turkey                0.0055      0.001      4.305      0.000       0.003       0.008
yr2_uganda               -0.0186      0.001    -15.046      0.000      -0.021      -0.016
yr2_uk                   -0.0006      0.001     -0.601      0.548      -0.003       0.001
yr2_uruguay               0.0026      0.001      2.738      0.006       0.001       0.004
yr2_us                    0.0046      0.001      4.321      0.000       0.002       0.007
yr2_venezuala            -0.0010      0.001     -0.712      0.476      -0.004       0.002
yr2_zaire                -0.0011      0.001     -0.918      0.359      -0.004       0.001
yr2_zambia                0.0022      0.001      1.773      0.076      -0.000       0.005
yr2_zimbabwe              0.0090      0.001      6.588      0.000       0.006       0.012
==============================================================================
Omnibus:                      335.765   Durbin-Watson:                   0.529
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             4840.746
Skew:                          -0.075   Prob(JB):                         0.00
Kurtosis:                      10.112   Cond. No.                     7.88e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.04e-29. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
# Remove all previous function and variable definitions before the next exercise
%reset

3.5 Exercise 3.5: LS Model Estimation

This exercise estimates the stochastic frontier model with time-varying inefficiency of Lee and Schmidt (1993) via the Least Squared Dummy Variable (LSDV) approach.

import numpy as np
import pandas as pd
import statsmodels.api as sm

regression_data = pd.read_csv("WBpanel.csv")
regression_data["intercept"] = 1

country_dummies = pd.get_dummies(
    regression_data["country"], prefix="cntrydum", drop_first=True
)
year_dummies = pd.get_dummies(regression_data["yr"], prefix="yrdum", drop_first=True)
regression_data = pd.concat([regression_data, country_dummies, year_dummies], axis=1)

y = regression_data["lny"]
X = regression_data[
    ["intercept", "lnk", "lnl"]
    + [x for x in regression_data.columns if "cntrydum" in x]
    + [x for x in regression_data.columns if "yrdum" in x]
]

model = sm.OLS(y, X)
result = model.fit()
print(result.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    lny   R-squared:                       0.996
Model:                            OLS   Adj. R-squared:                  0.996
Method:                 Least Squares   F-statistic:                     5350.
Date:                Sun, 31 Mar 2024   Prob (F-statistic):               0.00
Time:                        16:22:49   Log-Likelihood:                 1671.5
No. Observations:                2296   AIC:                            -3121.
Df Residuals:                    2185   BIC:                            -2484.
Df Model:                         110                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
intercept                 1.2958      0.078     16.518      0.000       1.142       1.450
lnk                       0.5149      0.012     42.011      0.000       0.491       0.539
lnl                       0.1609      0.033      4.913      0.000       0.097       0.225
cntrydum_algeria         -0.6913      0.035    -19.857      0.000      -0.760      -0.623
cntrydum_argentina       -0.2229      0.040     -5.604      0.000      -0.301      -0.145
cntrydum_austria         -0.0755      0.037     -2.053      0.040      -0.148      -0.003
cntrydum_bangladesh      -1.2143      0.076    -16.061      0.000      -1.363      -1.066
cntrydum_belgium          0.0046      0.033      0.138      0.891      -0.061       0.070
cntrydum_bolivia         -1.6379      0.058    -28.093      0.000      -1.752      -1.524
cntrydum_brazil          -0.1534      0.071     -2.154      0.031      -0.293      -0.014
cntrydum_cameroon        -0.9801      0.057    -17.182      0.000      -1.092      -0.868
cntrydum_canada           0.3789      0.036     10.585      0.000       0.309       0.449
cntrydum_chile           -0.9467      0.043    -21.986      0.000      -1.031      -0.862
cntrydum_china           -0.8476      0.139     -6.093      0.000      -1.120      -0.575
cntrydum_colombia        -0.8074      0.045    -17.756      0.000      -0.897      -0.718
cntrydum_costa rica      -1.2801      0.081    -15.731      0.000      -1.440      -1.121
cntrydum_cote d'ivoir    -1.0915      0.056    -19.489      0.000      -1.201      -0.982
cntrydum_cyprus          -1.6283      0.105    -15.541      0.000      -1.834      -1.423
cntrydum_denmark         -0.0286      0.044     -0.651      0.515      -0.115       0.058
cntrydum_ecuador         -1.3823      0.052    -26.773      0.000      -1.484      -1.281
cntrydum_egypt           -0.7620      0.061    -12.510      0.000      -0.881      -0.643
cntrydum_el salvador     -1.1519      0.068    -16.925      0.000      -1.285      -1.018
cntrydum_ethiopia        -1.2025      0.075    -15.973      0.000      -1.350      -1.055
cntrydum_finland         -0.2907      0.045     -6.505      0.000      -0.378      -0.203
cntrydum_france           0.6467      0.053     12.141      0.000       0.542       0.751
cntrydum_germany          0.6721      0.060     11.245      0.000       0.555       0.789
cntrydum_ghana           -1.3528      0.055    -24.395      0.000      -1.462      -1.244
cntrydum_greece          -0.5470      0.038    -14.303      0.000      -0.622      -0.472
cntrydum_guatamala       -1.1986      0.058    -20.544      0.000      -1.313      -1.084
cntrydum_haiti           -1.3816      0.073    -19.005      0.000      -1.524      -1.239
cntrydum_honduras        -1.4039      0.074    -19.077      0.000      -1.548      -1.260
cntrydum_iceland         -1.0848      0.135     -8.008      0.000      -1.350      -0.819
cntrydum_india           -0.3807      0.126     -3.032      0.002      -0.627      -0.135
cntrydum_indonesia       -0.5458      0.088     -6.233      0.000      -0.718      -0.374
cntrydum_iran             0.2011      0.043      4.672      0.000       0.117       0.286
cntrydum_iraq             0.0548      0.040      1.359      0.174      -0.024       0.134
cntrydum_ireland         -0.5572      0.059     -9.456      0.000      -0.673      -0.442
cntrydum_israel          -0.6169      0.057    -10.731      0.000      -0.730      -0.504
cntrydum_italy            0.4504      0.055      8.126      0.000       0.342       0.559
cntrydum_jamaica         -1.7758      0.077    -23.177      0.000      -1.926      -1.626
cntrydum_japan            0.9255      0.075     12.313      0.000       0.778       1.073
cntrydum_jordan          -1.1820      0.079    -14.980      0.000      -1.337      -1.027
cntrydum_kenya           -1.8759      0.048    -38.885      0.000      -1.970      -1.781
cntrydum_korea           -0.2502      0.052     -4.837      0.000      -0.352      -0.149
cntrydum_madagascar      -1.5019      0.064    -23.486      0.000      -1.627      -1.376
cntrydum_malawi          -2.1826      0.074    -29.308      0.000      -2.329      -2.037
cntrydum_malaysia        -0.7484      0.045    -16.454      0.000      -0.838      -0.659
cntrydum_mali            -1.9573      0.067    -29.225      0.000      -2.089      -1.826
cntrydum_mauritius       -2.0097      0.100    -20.159      0.000      -2.205      -1.814
cntrydum_mexico          -0.2480      0.056     -4.462      0.000      -0.357      -0.139
cntrydum_morocco         -1.0342      0.048    -21.525      0.000      -1.128      -0.940
cntrydum_mozambique      -2.1575      0.059    -36.324      0.000      -2.274      -2.041
cntrydum_myanmar         -1.2569      0.063    -20.018      0.000      -1.380      -1.134
cntrydum_netherlands      0.0963      0.032      3.003      0.003       0.033       0.159
cntrydum_new zealand     -0.4834      0.058     -8.271      0.000      -0.598      -0.369
cntrydum_nigeria         -0.9962      0.068    -14.579      0.000      -1.130      -0.862
cntrydum_norway          -0.3984      0.050     -7.971      0.000      -0.496      -0.300
cntrydum_pakistan        -1.0048      0.072    -13.962      0.000      -1.146      -0.864
cntrydum_panama          -1.2794      0.083    -15.389      0.000      -1.442      -1.116
cntrydum_paraguay        -1.4493      0.078    -18.620      0.000      -1.602      -1.297
cntrydum_peru            -0.9027      0.040    -22.303      0.000      -0.982      -0.823
cntrydum_philippines     -0.7771      0.057    -13.654      0.000      -0.889      -0.666
cntrydum_portugal        -0.7885      0.038    -20.496      0.000      -0.864      -0.713
cntrydum_rwanda          -1.3095      0.081    -16.090      0.000      -1.469      -1.150
cntrydum_senegal         -1.3665      0.063    -21.706      0.000      -1.490      -1.243
cntrydum_sierra leone    -2.2342      0.086    -25.912      0.000      -2.403      -2.065
cntrydum_singapore       -0.8430      0.070    -12.071      0.000      -0.980      -0.706
cntrydum_spain            0.1821      0.045      4.065      0.000       0.094       0.270
cntrydum_sri lanka       -1.4234      0.059    -24.192      0.000      -1.539      -1.308
cntrydum_sudan           -0.5627      0.053    -10.522      0.000      -0.668      -0.458
cntrydum_sweden           0.1107      0.035      3.138      0.002       0.042       0.180
cntrydum_switzerland      0.1979      0.040      5.002      0.000       0.120       0.275
cntrydum_tanzania        -1.9387      0.058    -33.230      0.000      -2.053      -1.824
cntrydum_thailand        -0.7739      0.057    -13.505      0.000      -0.886      -0.662
cntrydum_tunisia         -1.4176      0.055    -25.675      0.000      -1.526      -1.309
cntrydum_turkey          -0.6720      0.052    -12.974      0.000      -0.774      -0.570
cntrydum_uganda          -1.1946      0.055    -21.795      0.000      -1.302      -1.087
cntrydum_uk               0.5680      0.055     10.250      0.000       0.459       0.677
cntrydum_uruguay         -1.2512      0.063    -19.929      0.000      -1.374      -1.128
cntrydum_us               1.1515      0.093     12.355      0.000       0.969       1.334
cntrydum_venezuala       -0.5627      0.036    -15.747      0.000      -0.633      -0.493
cntrydum_zaire           -1.0831      0.061    -17.694      0.000      -1.203      -0.963
cntrydum_zambia          -2.2912      0.060    -38.259      0.000      -2.409      -2.174
cntrydum_zimbabwe        -1.6633      0.057    -29.181      0.000      -1.775      -1.552
yrdum_2                   0.0012      0.019      0.065      0.948      -0.035       0.038
yrdum_3                   0.0146      0.019      0.776      0.438      -0.022       0.051
yrdum_4                   0.0385      0.019      2.041      0.041       0.002       0.076
yrdum_5                   0.0561      0.019      2.955      0.003       0.019       0.093
yrdum_6                   0.0725      0.019      3.785      0.000       0.035       0.110
yrdum_7                   0.0828      0.019      4.283      0.000       0.045       0.121
yrdum_8                   0.0826      0.020      4.218      0.000       0.044       0.121
yrdum_9                   0.1017      0.020      5.127      0.000       0.063       0.141
yrdum_10                  0.1282      0.020      6.366      0.000       0.089       0.168
yrdum_11                  0.1491      0.020      7.284      0.000       0.109       0.189
yrdum_12                  0.1599      0.021      7.665      0.000       0.119       0.201
yrdum_13                  0.1764      0.021      8.289      0.000       0.135       0.218
yrdum_14                  0.1890      0.022      8.704      0.000       0.146       0.232
yrdum_15                  0.1920      0.022      8.659      0.000       0.149       0.236
yrdum_16                  0.1772      0.023      7.822      0.000       0.133       0.222
yrdum_17                  0.1937      0.023      8.351      0.000       0.148       0.239
yrdum_18                  0.2014      0.024      8.483      0.000       0.155       0.248
yrdum_19                  0.2056      0.024      8.461      0.000       0.158       0.253
yrdum_20                  0.2212      0.025      8.896      0.000       0.172       0.270
yrdum_21                  0.2133      0.025      8.393      0.000       0.163       0.263
yrdum_22                  0.2021      0.026      7.777      0.000       0.151       0.253
yrdum_23                  0.1876      0.027      7.075      0.000       0.136       0.240
yrdum_24                  0.1822      0.027      6.744      0.000       0.129       0.235
yrdum_25                  0.1891      0.027      6.880      0.000       0.135       0.243
yrdum_26                  0.1928      0.028      6.868      0.000       0.138       0.248
yrdum_27                  0.2034      0.029      7.113      0.000       0.147       0.260
yrdum_28                  0.2172      0.029      7.457      0.000       0.160       0.274
==============================================================================
Omnibus:                      392.679   Durbin-Watson:                   0.332
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2722.020
Skew:                          -0.619   Prob(JB):                         0.00
Kurtosis:                       8.188   Cond. No.                         835.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# Remove all previous function and variable definitions before the next exercise
%reset

3.6 Exercise 3.6: Random effects with Time-Invariant Inefficiency via MLE

This exercise estimates a random effects stochastic frontier model with time-invariant inefficiency via MLE.

import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize


def estimate(production_data):
    # Starting values for MLE
    alpha = 1.3
    beta1 = 0.6
    beta2 = 0.2
    sigma2u = 0.3
    sigma2v = 0.01
    mu = 1.1

    # Initial parameter vector
    theta0 = np.array([alpha, beta1, beta2, mu, sigma2u, sigma2v])
    # Bounds to ensure Sigma2u and Sigma2v are positive
    bounds = [(None, None) for x in range(len(theta0) - 2)] + [
        (1e-3, np.inf),
        (1e-3, np.inf),
    ]

    MLE_results = minimize(
        loglikelihood,
        theta0,
        method="L-BFGS-B",
        options={"maxiter": 10000, "maxfun": 20000, "maxcor": 100},
        args=(production_data),
        bounds=bounds,
    )
    theta = MLE_results.x
    logMLE = MLE_results.fun * -1

    # Estimate the hessian
    hessian = nd.Hessian(f=loglikelihood, method="central", step=1e-6)(
        theta, production_data
    )

    # Estimation of standard errors
    ster = np.sqrt(np.diag(np.linalg.inv(hessian)))

    return theta, ster, logMLE


def loglikelihood(coefs, production_data):
    # Obtain the log likelihood
    logDen = log_density(coefs, production_data)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, production_data):
    alpha = coefs[0]
    beta1 = coefs[1]
    beta2 = coefs[2]
    mu = coefs[3]
    sigma2u = coefs[4]
    sigma2v = coefs[5]

    panels = np.unique(production_data[:, 0])
    logDen_is = np.zeros(len(panels))
    for j in range(len(panels)):
        i = panels[j]
        production_data_i = production_data[production_data[:, 0] == i, :]
        y = production_data_i[:, 1]
        x1 = production_data_i[:, 2]
        x2 = production_data_i[:, 3]
        T = len(production_data_i)
        eps_i = y - alpha - x1 * beta1 - x2 * beta2

        sigma2_star = (sigma2v * sigma2u) / (sigma2v + T * sigma2u)
        mu_i_star = (sigma2v * mu - sigma2u * sum(eps_i)) / (sigma2v + T * sigma2u)
        normdcdf_1 = stats.norm.cdf(mu_i_star / np.sqrt(sigma2_star))
        if normdcdf_1 < 1e-6:
            normdcdf_1 = 1e-6
        logDen_i = (
            0.5 * np.log(sigma2_star)
            + np.log(normdcdf_1)
            - 0.5
            * (
                np.sum(eps_i**2) / sigma2v
                + (mu / np.sqrt(sigma2u)) ** 2
                - (mu_i_star / np.sqrt(sigma2_star)) ** 2
            )
            - T * np.log(np.sqrt(sigma2v))
            - np.log(np.sqrt(sigma2u))
            - np.log(stats.norm.cdf(mu / np.sqrt(sigma2u)))
        )
        logDen_is[j] = logDen_i

    return logDen_is

production_data = pd.read_csv("WBpanel.csv")
production_data = production_data[["code", "lny", "lnk", "lnl"]].values

# Estimate coefficients via MLE
theta, sterr, logMLE = estimate(production_data)

Zscores = theta / sterr
Pvalues = 2 * (1 - stats.norm.cdf(np.abs(Zscores)))
lower_ConfInt95 = stats.norm.ppf(0.025, loc=theta, scale=sterr)
upper_ConfInt95 = stats.norm.ppf(0.975, loc=theta, scale=sterr)

# Display the results as a table
results = pd.DataFrame(
    columns=["Est", "Std.Err", "z", "p>|z|", "[95%Conf.", "Interv]"],
    index=[
        r"$$\beta_{0}$$",
        r"$$\beta_{1}$$",
        r"$$\beta_{2}$$",
        " ",
        r"$$\mu$$",
        " ",
        r"$$\sigma^{2}_{u}$$",
        r"$$\sigma^{2}_{v}$$",
    ],
)

theta = np.round(theta.reshape(-1, 1), 5)
sterr = np.round(sterr.reshape(-1, 1), 5)
Zscores = np.round(Zscores.reshape(-1, 1), 5)
Pvalues = np.round(Pvalues.reshape(-1, 1), 5)
lower_ConfInt95 = np.round(lower_ConfInt95.reshape(-1, 1), 5)
upper_ConfInt95 = np.round(upper_ConfInt95.reshape(-1, 1), 5)

frontier = np.concatenate(
    [
        theta[:3],
        sterr[:3],
        Zscores[:3],
        Pvalues[:3],
        lower_ConfInt95[:3],
        upper_ConfInt95[:3],
    ],
    axis=1,
)

mu = np.concatenate(
    [theta[3], sterr[3], Zscores[3], Pvalues[3], lower_ConfInt95[3], upper_ConfInt95[3]]
).T
sigmas = np.concatenate(
    [
        theta[4:],
        sterr[4:],
        Zscores[4:],
        Pvalues[4:],
        lower_ConfInt95[4:],
        upper_ConfInt95[4:],
    ],
    axis=1,
)

print("\nLL", round(logMLE, 3))
results.iloc[0:3, :] = frontier
results.iloc[3, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[4, :] = mu
results.iloc[5, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[6:, :] = sigmas
display(results)

LL 3370.215
Est Std.Err z p>|z| [95%Conf. Interv]
$$\beta_{0}$$ 1.29718 0.08278 15.67037 0.0 1.13493 1.45942
$$\beta_{1}$$ 0.59481 0.01011 58.8148 0.0 0.57499 0.61463
$$\beta_{2}$$ 0.28374 0.02589 10.96053 0.0 0.233 0.33447
$$\mu$$ 1.14806 0.10538 10.89418 0.0 0.94152 1.35461
$$\sigma^{2}_{u}$$ 0.366 0.07153 5.11667 0.0 0.2258 0.5062
$$\sigma^{2}_{v}$$ 0.01559 0.00047 33.25028 0.0 0.01467 0.01651
# Remove all previous function and variable definitions before the next exercise
%reset

3.7 Exercise 3.7: BC92 “time-varying decay” model

This exercise estimates the time-varying decay model of Battese and Coelli (1992) via MLE.

import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize


def estimate(production_data):
    # Starting values for MLE
    alpha = 6.3
    beta1 = 0.5
    beta2 = 0.17
    sigma2u = 0.4
    sigma2v = 0.015
    mu = 5.8
    gamma = 0.0007

    # Initial parameter vector
    theta0 = np.array([alpha, beta1, beta2, mu, gamma, sigma2u, sigma2v])

    # Bounds to ensure Sigma2v and Sigma2v are positive
    bounds = [(None, None) for x in range(len(theta0) - 2)] + [
        (1e-3, np.inf),
        (1e-3, np.inf),
    ]

    MLE_results = minimize(
        loglikelihood,
        theta0,
        method="L-BFGS-B",
        options={"maxiter": 10000, "maxfun": 20000, "maxcor": 100},
        args=(production_data),
        bounds=bounds,
    )
    theta = MLE_results.x
    logMLE = MLE_results.fun * -1

    # Estimate the hessian
    hessian = nd.Hessian(f=loglikelihood, method="central", step=1e-3)(
        theta, production_data
    )

    # Estimation of standard errors
    ster = np.sqrt(np.diag(np.linalg.inv(hessian)))

    return theta, ster, logMLE


def loglikelihood(coefs, production_data):
    # Obtain the log likelihood
    logDen = log_density(coefs, production_data)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, production_data):
    alpha = coefs[0]
    beta1 = coefs[1]
    beta2 = coefs[2]
    mu = coefs[3]
    gamma = coefs[4]
    sigma2u = coefs[5]
    sigma2v = coefs[6]

    panels = np.unique(production_data[:, 0])
    logDen_is = np.zeros(len(panels))
    for j in range(len(panels)):
        i = panels[j]
        production_data_i = production_data[production_data[:, 0] == i, :]

        y = production_data_i[:, 2]
        x1 = production_data_i[:, 3]
        x2 = production_data_i[:, 4]

        t = production_data_i[:, 1]
        T = len(production_data_i)
        G_t = np.exp(gamma * (t - T))

        eps_i = y - alpha - x1 * beta1 - x2 * beta2
        sigma2_star = (sigma2v * sigma2u) / (sigma2v + sigma2u * np.sum(G_t**2))
        mu_i_star = (sigma2v * mu - sigma2u * np.sum(G_t * eps_i)) / (
            sigma2v + sigma2u * np.sum(G_t**2)
        )
        normdcdf_1 = stats.norm.cdf(mu_i_star / np.sqrt(sigma2_star))
        logDen_i = (
            0.5 * np.log(sigma2_star)
            + np.log(normdcdf_1)
            - 0.5
            * (
                np.sum(eps_i**2) / sigma2v
                + (mu / np.sqrt(sigma2u)) ** 2
                - (mu_i_star / np.sqrt(sigma2_star)) ** 2
            )
            - T * np.log(np.sqrt(sigma2v))
            - np.log(np.sqrt(sigma2u))
            - np.log(stats.norm.cdf(mu / np.sqrt(sigma2u)))
        )
        logDen_is[j] = logDen_i

    return logDen_is
production_data = pd.read_csv(r"WBpanel.csv")
production_data = production_data[["code", "yr", "lny", "lnk", "lnl"]].values

# Estimate coefficients via MLE
theta, sterr, logMLE = estimate(production_data)

Zscores = theta / sterr
Pvalues = 2 * (1 - stats.norm.cdf(np.abs(Zscores)))
lower_ConfInt95 = stats.norm.ppf(0.025, loc=theta, scale=sterr)
upper_ConfInt95 = stats.norm.ppf(0.975, loc=theta, scale=sterr)

# Display the results as a table
results = pd.DataFrame(
    columns=["Est", "Std.Err", "z", "p>|z|", "[95%Conf.", "Interv]"],
    index=[
        r"$$\beta_{0}$$",
        r"lnk",
        r"lnl",
        " ",
        r"$$\mu$$",
        " ",
        r"$$\gamma$$",
        r"$$\sigma^{2}_{u}$$",
        r"$$\sigma^{2}_{v}$$",
    ],
)

theta = np.round(theta.reshape(-1, 1), 5)
sterr = np.round(sterr.reshape(-1, 1), 5)
Zscores = np.round(Zscores.reshape(-1, 1), 5)
Pvalues = np.round(Pvalues.reshape(-1, 1), 5)
lower_ConfInt95 = np.round(lower_ConfInt95.reshape(-1, 1), 5)
upper_ConfInt95 = np.round(upper_ConfInt95.reshape(-1, 1), 5)

frontier = np.concatenate(
    [
        theta[:3],
        sterr[:3],
        Zscores[:3],
        Pvalues[:3],
        lower_ConfInt95[:3],
        upper_ConfInt95[:3],
    ],
    axis=1,
)

mu = np.concatenate(
    [theta[3], sterr[3], Zscores[3], Pvalues[3], lower_ConfInt95[3], upper_ConfInt95[3]]
).T
sigmas = np.concatenate(
    [
        theta[4:],
        sterr[4:],
        Zscores[4:],
        Pvalues[4:],
        lower_ConfInt95[4:],
        upper_ConfInt95[4:],
    ],
    axis=1,
)

print("\nLL", round(logMLE, 3))
results.iloc[0:3, :] = frontier
results.iloc[3, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[4, :] = mu
results.iloc[5, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[6:, :] = sigmas
display(results)

LL 3382.773
Est Std.Err z p>|z| [95%Conf. Interv]
$$\beta_{0}$$ 6.3039 4.2127 1.4964 0.13455 -1.95283 14.56063
lnk 0.56151 0.01422 39.48886 0.0 0.53364 0.58938
lnl 0.1856 0.03198 5.8043 0.0 0.12293 0.24827
$$\mu$$ 5.79624 4.18993 1.38338 0.16655 -2.41586 14.00834
$$\gamma$$ -0.00079 0.00052 -1.50828 0.13148 -0.00181 0.00024
$$\sigma^{2}_{u}$$ 0.40464 0.06632 6.1014 0.0 0.27465 0.53462
$$\sigma^{2}_{v}$$ 0.0152 0.00045 33.55504 0.0 0.01431 0.01609
# Remove all previous function and variable definitions before the next exercise
%reset

3.8 Exercise 3.8: BC95 with non-constant \(\mu\)

This execise exercise the model of Battese and Coelli (1995) with non-constant \(\mu\) via MLE.

import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize


def estimate(production_data):
    # Starting values for MLE
    alpha = 1.7  # cons
    beta1 = 0.5  # lnk
    beta2 = 0.3  # lnl
    beta3 = 0.004  # yr
    sigma2u = 0.23
    sigma2v = 0.015
    delta0 = 1.8
    delta1 = -0.3
    gamma = 0.0005

    # Initial parameter vector
    theta0 = np.array(
        [alpha, beta1, beta2, beta3, delta0, delta1, gamma, sigma2u, sigma2v]
    )

    # Bounds to ensure Sigma2v and Sigma2v are positive
    bounds = [(None, None) for x in range(len(theta0) - 2)] + [
        (1e-3, np.inf),
        (1e-3, np.inf),
    ]

    MLE_results = minimize(
        loglikelihood,
        theta0,
        method="L-BFGS-B",
        options={"maxiter": 1000, "maxfun": 10000, "maxcor": 100},
        args=(production_data),
        bounds=bounds,
    )
    theta = MLE_results.x
    logMLE = MLE_results.fun * -1

    # Estimate the hessian
    hessian = nd.Hessian(f=loglikelihood, method="central", step=1e-3)(
        theta, production_data
    )

    # Estimation of standard errors
    ster = np.sqrt(np.diag(np.linalg.inv(hessian)))

    return theta, ster, logMLE


def loglikelihood(coefs, production_data):
    # Obtain the log likelihood
    logDen = log_density(coefs, production_data)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, production_data):
    alpha = coefs[0]
    beta1 = coefs[1]
    beta2 = coefs[2]
    beta3 = coefs[3]
    delta0 = coefs[4]
    delta1 = coefs[5]
    gamma = coefs[6]
    sigma2u = coefs[7]
    sigma2v = coefs[8]

    panels = np.unique(production_data[:, 0])
    logDen_is = np.zeros(len(panels))
    for j in range(len(panels)):
        i = panels[j]
        production_data_i = production_data[production_data[:, 0] == i, :]

        y = production_data_i[:, 2]
        x1 = production_data_i[:, 3]  # lnk
        x2 = production_data_i[:, 4]  # lnl
        x3 = production_data_i[:, 1]  # yr

        z1 = np.unique(production_data_i[:, 6])[0]  # iniStat
        z2 = production_data_i[:, 5]  # yrT

        T = len(production_data_i)
        G_t = np.exp(gamma * z2)

        mu = delta0 + delta1 * z1

        eps_i = y - alpha - x1 * beta1 - x2 * beta2 - x3 * beta3
        sigma2_star = (sigma2v * sigma2u) / (sigma2v + sigma2u * np.sum(G_t**2))
        mu_i_star = (sigma2v * mu - sigma2u * np.sum(G_t * eps_i)) / (
            sigma2v + sigma2u * np.sum(G_t**2)
        )
        normdcdf_1 = stats.norm.cdf(mu_i_star / np.sqrt(sigma2_star))
        logDen_i = (
            0.5 * np.log(sigma2_star)
            + np.log(normdcdf_1)
            - 0.5
            * (
                np.sum(eps_i**2) / sigma2v
                + (mu / np.sqrt(sigma2u)) ** 2
                - (mu_i_star / np.sqrt(sigma2_star)) ** 2
            )
            - T * np.log(np.sqrt(sigma2v))
            - np.log(np.sqrt(sigma2u))
            - np.log(stats.norm.cdf(mu / np.sqrt(sigma2u)))
        )
        logDen_is[j] = logDen_i

    return logDen_is
production_data = pd.read_csv(r"WBpanel.csv")

# create the yrT and iniStat variables
production_data.sort_values(by=["code", "yr"])
bigT = np.max(production_data["yr"])
production_data["yrT"] = production_data["yr"] - bigT
production_data["ic1"] = np.where(
    production_data["yr"] == 1, production_data["lnk"] - production_data["lnl"], np.nan
)
iniStat_data = production_data.groupby(["code"], as_index=False)["ic1"].mean()
iniStat_data.rename(columns={"ic1": "iniStat"}, inplace=True)
production_data = production_data.merge(iniStat_data, how="left", on=["code"])

production_data = production_data[
    ["code", "yr", "lny", "lnk", "lnl", "yrT", "iniStat"]
].values

# Estimate coefficients via MLE
theta, sterr, logMLE = estimate(production_data)

Zscores = theta / sterr
Pvalues = 2 * (1 - stats.norm.cdf(np.abs(Zscores)))
lower_ConfInt95 = stats.norm.ppf(0.025, loc=theta, scale=sterr)
upper_ConfInt95 = stats.norm.ppf(0.975, loc=theta, scale=sterr)

# Display the results as a table
results = pd.DataFrame(
    columns=["Est", "Std.Err", "z", "p>|z|", "[95%Conf.", "Interv]"],
    index=[
        r"$$\beta_{0}$$",
        "lnk",
        "lnl",
        "yr",
        " ",
        "$$\gamma_{0}$$",
        "iniStat",
        " ",
        "yrT",
        " ",
        r"$$\sigma^{2}_{u}$$",
        r"$$\sigma^{2}_{v}$$",
    ],
)

theta = np.round(theta.reshape(-1, 1), 5)
sterr = np.round(sterr.reshape(-1, 1), 5)
Zscores = np.round(Zscores.reshape(-1, 1), 5)
Pvalues = np.round(Pvalues.reshape(-1, 1), 5)
lower_ConfInt95 = np.round(lower_ConfInt95.reshape(-1, 1), 5)
upper_ConfInt95 = np.round(upper_ConfInt95.reshape(-1, 1), 5)

frontier = np.concatenate(
    [
        theta[:4],
        sterr[:4],
        Zscores[:4],
        Pvalues[:4],
        lower_ConfInt95[:4],
        upper_ConfInt95[:4],
    ],
    axis=1,
)

mu = np.concatenate(
    [
        theta[4:6],
        sterr[4:6],
        Zscores[4:6],
        Pvalues[4:6],
        lower_ConfInt95[4:6],
        upper_ConfInt95[4:6],
    ],
    axis=1,
)

gamma = np.concatenate(
    [theta[6], sterr[6], Zscores[6], Pvalues[6], lower_ConfInt95[6], upper_ConfInt95[6]]
).T

sigmas = np.concatenate(
    [
        theta[7:],
        sterr[7:],
        Zscores[7:],
        Pvalues[7:],
        lower_ConfInt95[7:],
        upper_ConfInt95[7:],
    ],
    axis=1,
)

print("\nLL", round(logMLE, 3))
results.iloc[0:4, :] = frontier
results.iloc[4, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[5:7, :] = mu
results.iloc[7, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[8, :] = gamma
results.iloc[9, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[10:, :] = sigmas
display(results)

LL 3404.673
Est Std.Err z p>|z| [95%Conf. Interv]
$$\beta_{0}$$ 1.68368 0.1714 9.8229 0.0 1.34774 2.01963
lnk 0.53576 0.01524 35.14506 0.0 0.50588 0.56563
lnl 0.30183 0.03914 7.7124 0.0 0.22512 0.37853
yr 0.00434 0.00137 3.15733 0.00159 0.00165 0.00703
$$\gamma_{0}$$ 1.8314 0.1368 13.38734 0.0 1.56328 2.09953
iniStat -0.31098 0.0427 -7.28313 0.0 -0.39466 -0.22729
yrT 0.00056 0.0007 0.80208 0.42251 -0.00081 0.00193
$$\sigma^{2}_{u}$$ 0.23165 0.04501 5.14646 0.0 0.14343 0.31988
$$\sigma^{2}_{v}$$ 0.01532 0.00046 33.34028 0.0 0.01442 0.01622
# Remove all previous function and variable definitions before the next exercise
%reset

3.9 Exercise 3.9: TFE-G model

This exercise estimates the Fixed Effects SFM of Greene (2005a,b) using MLE for the TaiwaneseManufacturing.csv data.

import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize


def estimate(y, X, z1):
    np.random.seed(10)
    N, p = X.shape
    # Starting values for MLE
    alpha = 7.5
    beta1 = 0.1
    beta2 = 0.15
    beta3_101 = stats.uniform.rvs(size=p - 2)
    omega0 = -3.6
    omega1 = 1.6  # Coefficient for z1
    sigma2v = 0.085

    # Initial parameter vector
    theta0 = np.concatenate(
        [
            np.array([alpha, beta1, beta2]),
            beta3_101,
            np.array([omega0, omega1, sigma2v]),
        ]
    )
    # Bounds to ensure Sigma2v and Sigma2v are positive
    bounds = [(None, None) for x in range(len(theta0) - 1)] + [(1e-3, np.inf)]

    MLE_results = minimize(
        loglikelihood,
        theta0,
        method="L-BFGS-B",
        options={"maxiter": 10000, "maxfun": 20000, "maxcor": 100},
        args=(y, X, z1),
        bounds=bounds,
    )

    theta = MLE_results.x
    logMLE = MLE_results.fun
    # Estimate the hessian
    hessian = nd.Hessian(f=loglikelihood, method="central", step=1e-6)(theta, y, X, z1)

    # Estimation of standard errors
    ster = np.sqrt(np.diag(np.linalg.inv(hessian)))

    return theta, ster, logMLE


def loglikelihood(coefs, y, X, z1):
    # Obtain the log-likelihood
    logDen = log_density(coefs, y, X, z1)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, y, X, z1):
    N, p = X.shape
    alpha = coefs[0]
    beta1 = coefs[1]
    beta2 = coefs[2]
    beta3_101 = coefs[3 : 3 + p - 3 + 1]
    omega0 = coefs[-3]
    omega1 = coefs[-2]
    sigma2v = coefs[-1]

    sigma2u = np.exp(omega0 + omega1 * z1)
    lambda_ = np.sqrt(sigma2u / sigma2v)
    sigma2 = sigma2u + sigma2v
    sigma = np.sqrt(sigma2)
    eps = y - alpha - X[:, 0] * beta1 - X[:, 1] * beta2 - X[:, 2:] @ beta3_101

    Den = (
        2
        / sigma
        * stats.norm.pdf(eps / sigma, 0, 1)
        * stats.norm.cdf(-lambda_ * eps / sigma, 0, 1)
    )
    Den = np.where(
        np.abs(Den) < 1e-5, 1e-5, Den
    )  # Adjust small densities for numerical precision
    logDen = np.log(Den)  # Log density

    return logDen
production_data = pd.read_csv(r"TaiwaneseManufacturing.csv")
id_dummies = pd.get_dummies(production_data["id"], prefix="dum", drop_first=True)

y = production_data["y"].values
X = np.concatenate([production_data[["x1", "x2"]].values, id_dummies.values], axis=1)
z1 = production_data["z1"].values

# Estimate coefficients via MLE
theta, sterr, logMLE = estimate(y, X, z1)

Zscores = theta / sterr
Pvalues = 2 * (1 - stats.norm.cdf(np.abs(Zscores)))
lower_ConfInt95 = stats.norm.ppf(0.025, loc=theta, scale=sterr)
upper_ConfInt95 = stats.norm.ppf(0.975, loc=theta, scale=sterr)

# Display the results as a table
results = pd.DataFrame(
    columns=["Est", "StEr", "t-stat", "p-val", "[95%Conf.", "Interv]"],
    index=[
        r"$$\beta_{0}$$",
        "x1",
        "x2",
        " ",
        r"$$\omega_{0}$$",
        r"z1",
        r"$$\sigma^{2}_{v}$$",
    ],
)

theta = np.round(theta.reshape(-1, 1), 3)
sterr = np.round(sterr.reshape(-1, 1), 3)
Zscores = np.round(Zscores.reshape(-1, 1), 3)
Pvalues = np.round(Pvalues.reshape(-1, 1), 3)
lower_ConfInt95 = np.round(lower_ConfInt95.reshape(-1, 1), 3)
upper_ConfInt95 = np.round(upper_ConfInt95.reshape(-1, 1), 3)

frontier = np.concatenate(
    [
        theta[:3],
        sterr[:3],
        Zscores[:3],
        Pvalues[:3],
        lower_ConfInt95[:3],
        upper_ConfInt95[:3],
    ],
    axis=1,
)

Omegas = np.concatenate(
    [
        theta[-3:-1],
        sterr[-3:-1],
        Zscores[-3:-1],
        Pvalues[-3:-1],
        lower_ConfInt95[-3:-1],
        upper_ConfInt95[-3:-1],
    ],
    axis=1,
)
sigmas = np.array(
    [
        theta[-1],
        sterr[-1],
        Zscores[-1],
        Pvalues[-1],
        lower_ConfInt95[-1],
        upper_ConfInt95[-1],
    ]
).T

print("\nLL", round(logMLE, 3))
results.iloc[0:3, :] = frontier
results.iloc[3, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[4:6, :] = Omegas
results.iloc[6:, :] = sigmas
display(results)

LL 183.842
Est StEr t-stat p-val [95%Conf. Interv]
$$\beta_{0}$$ 0.776 0.135 5.744 0.0 0.511 1.04
x1 0.523 0.014 36.22 0.0 0.494 0.551
x2 0.677 0.014 47.728 0.0 0.649 0.704
$$\omega_{0}$$ -3.603 0.525 -6.869 0.0 -4.631 -2.575
z1 1.659 0.277 5.988 0.0 1.116 2.202
$$\sigma^{2}_{v}$$ 0.087 0.008 11.285 0.0 0.072 0.102
# Remove all previous function and variable definitions before the next exercise
%reset

3.10 Exercise 3.10: TRE-G model

This exercise estimates the Random Effects SFM of Greene (2005a,b) using MLE for the TaiwaneseManufacturing.csv data.

import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize
from statsmodels.tools import sequences


def estimate(y, X):
    np.random.seed(10)

    # Starting values for MLE
    alpha = 0.4
    beta1 = 0.5
    beta2 = 0.7
    gamma0 = -3.1
    gamma1 = 1.5
    sigma2w = 0.4
    sigma2v = 0.1

    # Initial parameter vector
    theta0 = np.array([alpha, beta1, beta2, gamma0, gamma1, sigma2w, sigma2v])
    # Bounds to ensure Sigma22 and Sigma2v are positive
    bounds = [(None, None) for x in range(len(theta0) - 2)] + [
        (1e-3, np.inf),
        (1e-3, np.inf),
    ]

    # random uniform variables for the simulated likelihood
    halton_sequence = sequences.halton(dim=6, n_sample=100).T

    MLE_results = minimize(
        loglikelihood,
        theta0,
        method="L-BFGS-B",
        options={"maxiter": 10000, "maxfun": 20000, "maxcor": 100},
        args=(y, X, halton_sequence),
        bounds=bounds,
    )
    theta = MLE_results.x
    logMLE = MLE_results.fun * -1

    # Estimate the hessian
    hessian = nd.Hessian(f=loglikelihood, method="central", step=1e-6)(
        theta, y, X, halton_sequence
    )

    # Estimation of standard errors
    ster = np.sqrt(np.diag(np.linalg.inv(hessian)))

    return theta, ster, logMLE


def loglikelihood(coefs, y, X, halton_sequence):
    # Obtain the log likelihood
    logDen = log_density(coefs, y, X, halton_sequence)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, y, X, halton_sequence):
    alpha = coefs[0]
    beta1 = coefs[1]
    beta2 = coefs[2]
    gamma0 = coefs[3]
    gamma1 = coefs[4]
    sigma2w = coefs[5]
    sigma2v = coefs[6]

    beta = np.array([beta1, beta2])
    w = stats.norm.ppf(
        halton_sequence, loc=0, scale=sigma2w
    )  # simulated random variables

    panels = np.unique(X[:, 0])
    logDen_is = np.zeros(len(panels))
    for j in range(len(panels)):
        i = panels[j]
        y_i = y[y[:, 0] == i, -1]
        X_i = X[X[:, 0] == i, 1:3]
        z_i = X[X[:, 0] == i, -1]

        sigma2u = np.exp(gamma0 + gamma1 * z_i)
        sigma2 = sigma2u + sigma2v
        lam = np.sqrt(sigma2u) / np.sqrt(sigma2v)

        X_i_beta = X_i @ beta
        y_i_rep = np.repeat(y_i.reshape(-1, 1), 100, axis=1)
        X_i_beta_rep = np.repeat(X_i_beta.reshape(-1, 1), 100, axis=1)
        sigma2_rep = np.repeat(sigma2.reshape(-1, 1), 100, axis=1)
        lam_rep = np.repeat(lam.reshape(-1, 1), 100, axis=1)

        simulated_component_1 = np.mean(
            np.log(
                stats.norm.pdf(
                    (y_i_rep - (alpha + w) - X_i_beta_rep) / np.sqrt(sigma2_rep)
                )
            ),
            axis=1,
        )
        simulated_component_2 = np.mean(
            np.log(
                stats.norm.cdf(
                    (lam_rep * (y_i_rep - (alpha + w) - X_i_beta_rep))
                    / np.sqrt(sigma2_rep)
                )
            ),
            axis=1,
        )

        LL_i = np.log(2 / sigma2) + simulated_component_1 + simulated_component_2
        logDen_is[j] = np.sum(LL_i)

    return logDen_is
production_data = pd.read_csv(r"TaiwaneseManufacturing.csv")
y = production_data[["id", "y"]].values
X = production_data[["id", "x1", "x2", "z1"]].values

# Estimate coefficients via MLE
theta, sterr, logMLE = estimate(y, X)

Zscores = theta / sterr
Pvalues = 2 * (1 - stats.norm.cdf(np.abs(Zscores)))
lower_ConfInt95 = stats.norm.ppf(0.025, loc=theta, scale=sterr)
upper_ConfInt95 = stats.norm.ppf(0.975, loc=theta, scale=sterr)

# Display the results as a table
results = pd.DataFrame(
    columns=["Est", "StEr", "t-stat", "p-val", "[95%Conf.", "Interv]"],
    index=[
        r"$$\beta_{0}$$",
        "x1",
        "x2",
        "$$\sigma^{2}_{u}$$",
        "$$\gamma_{0}$$",
        "z1",
        " ",
        r"$$\sigma^{2}_{v}$$",
        r"$$\sigma^{2}_{w}$$",
    ],
)
theta = np.round(theta.reshape(-1, 1), 3)
sterr = np.round(sterr.reshape(-1, 1), 3)
Zscores = np.round(Zscores.reshape(-1, 1), 3)
Pvalues = np.round(Pvalues.reshape(-1, 1), 3)
lower_ConfInt95 = np.round(lower_ConfInt95.reshape(-1, 1), 3)
upper_ConfInt95 = np.round(upper_ConfInt95.reshape(-1, 1), 3)

frontier = np.concatenate(
    [
        theta[:3],
        sterr[:3],
        Zscores[:3],
        Pvalues[:3],
        lower_ConfInt95[:3],
        upper_ConfInt95[:3],
    ],
    axis=1,
)

Gammas = np.concatenate(
    [
        theta[3:5],
        sterr[3:5],
        Zscores[3:5],
        Pvalues[3:5],
        lower_ConfInt95[3:5],
        upper_ConfInt95[3:5],
    ],
    axis=1,
)
sigmas = np.concatenate(
    [
        theta[-2:],
        sterr[-2:],
        Zscores[-2:],
        Pvalues[-2:],
        lower_ConfInt95[-2:],
        upper_ConfInt95[-2:],
    ],
    axis=1,
)

print("\nLL", round(logMLE, 3))
results.iloc[0:3, :] = frontier
results.iloc[3, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[4:6, :] = Gammas
results.iloc[6, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[7:, :] = sigmas
display(results)
/var/folders/q0/9kb46g7n1_57dfk7vtdsv8t4w169ch/T/ipykernel_41531/139070248.py:103: RuntimeWarning: divide by zero encountered in log
  np.log(
/Users/jleu0010/Documents/GitHub/EPA-Copula-SFA-website/venv/lib/python3.8/site-packages/scipy/optimize/_numdiff.py:576: RuntimeWarning: invalid value encountered in subtract
  df = fun(x) - f0

LL -337.505
Est StEr t-stat p-val [95%Conf. Interv]
$$\beta_{0}$$ 0.068 0.033 2.082 0.037 0.004 0.132
x1 0.502 0.027 18.666 0.0 0.45 0.555
x2 0.681 0.024 27.983 0.0 0.633 0.728
$$\sigma^{2}_{u}$$
$$\gamma_{0}$$ -3.199 0.251 -12.763 0.0 -3.69 -2.708
z1 1.386 0.235 5.908 0.0 0.927 1.846
$$\sigma^{2}_{v}$$ 0.001 0.026 0.039 0.969 -0.05 0.052
$$\sigma^{2}_{w}$$ 0.354 0.06 5.902 0.0 0.236 0.472
# Remove all previous function and variable definitions before the next exercise
%reset

3.11 Exercise 3.11: “True” Fixed Effects model of Wang and Ho (2010)

This exercise estimates the fixed effects stochastic frontier model of Wang and Ho (2010) using MLE for the TaiwaneseManufacturing.csv data.

import numdifftools as nd
import numpy as np
import pandas as pd
from scipy import stats
from scipy.optimize import minimize


def estimate(y, X):
    np.random.seed(10)
    N, p = X.shape

    # Starting values for MLE
    beta1 = 0.53
    beta2 = 0.68
    gamma = 0.53  # Coefficient for z1
    sigma2u = 0.15
    sigma2v = 0.1

    # Initial parameter vector
    theta0 = np.array([beta1, beta2, gamma, sigma2u, sigma2v])
    # Bounds to ensure Sigma2v and Sigma2v are positive
    bounds = [(None, None) for x in range(len(theta0) - 2)] + [
        (1e-3, np.inf),
        (1e-3, np.inf),
    ]

    MLE_results = minimize(
        loglikelihood,
        theta0,
        method="L-BFGS-B",
        options={"maxiter": 1000, "maxfun": 10000, "maxcor": 100},
        args=(y, X),
        bounds=bounds,
    )

    theta = MLE_results.x
    logMLE = MLE_results.fun

    # Estimate the hessian
    hessian = nd.Hessian(f=loglikelihood, method="central", step=1e-5)(theta, y, X)

    # Estimation of standard errors
    ster = np.sqrt(np.diag(np.linalg.inv(hessian)))

    return theta, ster, logMLE


def loglikelihood(coefs, y, X):
    # Obtain the log-likelihood
    logDen = log_density(coefs, y, X)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, y, X):
    beta1 = coefs[0]
    beta2 = coefs[1]
    gamma = coefs[2]
    sigma2_u = coefs[3]
    sigma2_v = coefs[4]

    beta = np.array([beta1, beta2])
    Sigma = np.zeros((5, 5))
    for i in range(5):
        for j in range(5):
            if j == i - 1:
                Sigma[i, j] = -sigma2_v
            elif i == j:
                Sigma[i, j] = 2 * sigma2_v
            elif j == i + 1:
                Sigma[i, j] = -sigma2_v

    panels = np.unique(X[:, 0])
    logDen_is = np.zeros(len(panels))
    for j in range(len(panels)):
        i = panels[j]
        y_i = y[y[:, 0] == i, -1]
        X_i = X[X[:, 0] == i, 1:3]
        z_i = X[X[:, 0] == i, -1]
        T = len(X_i)

        delta_y_i = np.diff(y_i)
        delta_X_i = np.diff(X_i, axis=0)
        delta_eps_i = delta_y_i - delta_X_i @ beta
        h_i = z_i * gamma
        delta_h_i = np.diff(h_i)
        sigma2_stari = sigma2_u / (
            sigma2_u * delta_h_i.T @ np.linalg.inv(Sigma) @ delta_h_i + 1
        )
        mu_i_star = 0 - (sigma2_u * delta_eps_i @ np.linalg.inv(Sigma) @ delta_h_i) / (
            sigma2_u * delta_h_i.T @ np.linalg.inv(Sigma) @ delta_h_i + 1
        )

        LL_i = (
            np.log(
                np.sqrt(sigma2_stari)
                * stats.norm.cdf(mu_i_star / np.sqrt(sigma2_stari))
            )
            - 0.5
            * (
                delta_eps_i @ np.linalg.inv(Sigma) @ delta_eps_i
                + (0 / np.sqrt(sigma2_u)) ** 2
                - (mu_i_star / np.sqrt(sigma2_stari)) ** 2
            )
            - (T - 1) * np.log(np.sqrt(sigma2_v))
            - np.log(np.sqrt(sigma2_u) * stats.norm.cdf(0 / np.sqrt(sigma2_u)))
        )
        logDen_is[j] = LL_i

    return logDen_is
production_data = pd.read_csv(r"TaiwaneseManufacturing.csv")

y = production_data[["id", "y"]].values
X = production_data[["id", "x1", "x2", "z1"]].values

theta, sterr, logMLE = estimate(y, X)

Zscores = theta / sterr
Pvalues = 2 * (1 - stats.norm.cdf(np.abs(Zscores)))
lower_ConfInt95 = stats.norm.ppf(0.025, loc=theta, scale=sterr)
upper_ConfInt95 = stats.norm.ppf(0.975, loc=theta, scale=sterr)

# Display the results as a table
results = pd.DataFrame(
    columns=["Est", "StEr", "z-stat", "p-val", "[95%Conf.", "Interv]"],
    index=[
        "x1",
        "x2",
        " ",
        "z1",
        r"$$\sigma^{2}_{v}$$",
        r"$$\sigma^{2}_{w}$$",
    ],
)

theta = np.round(theta.reshape(-1, 1), 3)
sterr = np.round(sterr.reshape(-1, 1), 3)
Zscores = np.round(Zscores.reshape(-1, 1), 3)
Pvalues = np.round(Pvalues.reshape(-1, 1), 3)
lower_ConfInt95 = np.round(lower_ConfInt95.reshape(-1, 1), 3)
upper_ConfInt95 = np.round(upper_ConfInt95.reshape(-1, 1), 3)

frontier = np.concatenate(
    [
        theta[:2],
        sterr[:2],
        Zscores[:2],
        Pvalues[:2],
        lower_ConfInt95[:2],
        upper_ConfInt95[:2],
    ],
    axis=1,
)

deltas = np.concatenate(
    [
        theta[2],
        sterr[2],
        Zscores[2],
        Pvalues[-2],
        lower_ConfInt95[2],
        upper_ConfInt95[2],
    ]
)
sigmas = np.array(
    [
        theta[-2:],
        sterr[-2:],
        Zscores[-2:],
        Pvalues[-2:],
        lower_ConfInt95[-2:],
        upper_ConfInt95[-2:],
    ]
).T

print("\nLL", round(logMLE, 3))
results.iloc[0:2, :] = frontier
results.iloc[2, :] = np.full(shape=(1, 6), fill_value=" ")
results.iloc[3:4, :] = deltas
results.iloc[4:, :] = sigmas
display(results)

LL -265.28
Est StEr z-stat p-val [95%Conf. Interv]
x1 0.521 0.015 34.112 0.0 0.491 0.551
x2 0.691 0.015 46.183 0.0 0.662 0.72
z1 0.56 27.901 0.02 0.992 -54.125 55.246
$$\sigma^{2}_{v}$$ 0.2 19.894 0.01 0.992 -38.792 39.191
$$\sigma^{2}_{w}$$ 0.111 0.008 14.727 0.0 0.096 0.126

3.12 Exercise 3.12: KLH model of Kumbhakar et al. (2014)

This exercise estimates the model of Kumbhakar et al. (2014) that allows for both persistent and time varying inefficiency for the TaiwaneseManufacturing.csv data.

import numdifftools as nd
import numpy as np
import pandas as pd
from linearmodels import (
    RandomEffects,  # To install the linear models package run: pip install linearmodels at the Anaconda prompt
)
from scipy import stats
from scipy.optimize import minimize


def loglikelihood(coefs, y, x1):
    # Obtain the log likelihood
    logDen = log_density(coefs, y, x1)
    log_likelihood = -np.sum(logDen)

    return log_likelihood


def log_density(coefs, y, x1):
    # Get parameters
    beta1 = coefs[0]
    sigma2u = coefs[1]
    sigma2v = coefs[2]
    Lambda = np.sqrt(sigma2u / sigma2v)
    sigma2 = sigma2u + sigma2v
    sigma = np.sqrt(sigma2)

    # Composed errors from the production function equation
    eps = y - x1 * beta1

    # Compute the log density
    Den = (
        (2 / sigma)
        * stats.norm.pdf(eps / sigma)
        * stats.norm.cdf(-Lambda * eps / sigma)
    )
    logDen = np.log(Den)

    return logDen


def BC98_TE(theta, y, x1):
    beta1 = theta[0]
    sigma2u = theta[1]
    sigma2v = theta[2]

    sigma2 = sigma2u + sigma2v
    epsilon = y - x1 * beta1
    u_star = (-sigma2u * epsilon) / (sigma2)
    sigma2_star = (sigma2v * sigma2u) / (sigma2v + sigma2u)

    TE = np.exp(-u_star + 0.5 * sigma2_star) * (
        stats.norm.cdf((u_star / np.sqrt(sigma2_star)) - np.sqrt(sigma2_star))
        / stats.norm.cdf(u_star / np.sqrt(sigma2_star))
    )

    return TE
production_data = pd.read_csv(r"TaiwaneseManufacturing.csv")

# Estimate the fixed effects rgeression model
production_data = production_data.set_index(keys=["id", "time"], drop=False)
production_data["intercept"] = 1
onee = np.ones(len(production_data))

RE_model = RandomEffects(
    production_data["y"], production_data[["intercept", "x1", "x2"]]
)
RE_result = RE_model.fit()

alphai = production_data["y"].values - (
    production_data[["intercept", "x1", "x2"]].values @ RE_result.params.values
    + RE_result.resids.values.flatten()
)
error = RE_result.resids.values.flatten()
combined_error = error + alphai

# First step MLE
beta1 = 0.36
sigma2u = 0.21
sigma2v = 0.073

# Initial parameter vector
theta0 = np.array([beta1, sigma2u, sigma2v])

# Bounds to ensure Sigma2v and Sigma2v are positive
bounds = [(None, None) for x in range(len(theta0) - 2)] + [
    (1e-3, np.inf),
    (1e-3, np.inf),
]

x1 = onee
MLE_results1 = minimize(
    loglikelihood,
    theta0,
    method="L-BFGS-B",
    options={"maxiter": 1000, "maxfun": 10000, "maxcor": 100},
    args=(error, x1),
    bounds=bounds,
)
theta1 = MLE_results1.x
logMLE1 = MLE_results1.fun

tv_TE = BC98_TE(theta=theta1, y=error, x1=x1)

# Second step MLE
beta1 = 0.36
sigma2u = 0.2
sigma2v = 0.06
# Initial parameter vector
theta0 = np.array([beta1, sigma2u, sigma2v])

# Bounds to ensure Sigma2v and Sigma2v are positive
bounds = [(None, None) for x in range(len(theta0) - 2)] + [
    (1e-3, np.inf),
    (1e-3, np.inf),
]

MLE_results2 = minimize(
    loglikelihood,
    theta0,
    method="L-BFGS-B",
    options={"maxiter": 1000, "maxfun": 10000, "maxcor": 100},
    args=(alphai, x1),
    bounds=bounds,
)
theta2 = MLE_results2.x
logMLE2 = MLE_results2.fun

pers_TE = BC98_TE(theta=theta2, y=alphai, x1=x1)

o_TE = pers_TE * tv_TE

TE_matrix = pd.DataFrame(data={"o_TE": o_TE, "pers_TE": pers_TE, "tv_TE": tv_TE})

display(TE_matrix.describe(percentiles=[]).T)
count mean std min 50% max
o_TE 600.0 0.548567 0.146164 0.032409 0.557264 0.842603
pers_TE 600.0 0.785849 0.102432 0.427305 0.801106 0.927208
tv_TE 600.0 0.690615 0.138213 0.058205 0.712146 0.910178

3.13 Exercise 3.13: Matrix form of FE and RE estimators

This exercise implements the FE and RE regression by hand using the matrix form outlined in Appendix 3.B. It uses the World Bank growth data for 82 countries between 1960 and 1987, contained in the file WBpanel.csv.

import pandas as pd
import numpy as np 
from scipy import stats

# Fixed Effects
WBpanel_data = pd.read_csv('WBpanel.csv')
C = np.unique(WBpanel_data['country'])

N = len(C) # Number of cross sectional units
n, p = WBpanel_data.shape
T = 28 #Observations per identity group. Balanced panel

X = WBpanel_data[['lnk', 'lnl', 'yr']].values
y = WBpanel_data[['lny']].values

e_T = np.ones((T,1))
P_star = 1/T*(e_T@e_T.T)
Q_star = np.eye(T) - P_star
P = np.kron(np.eye(N), P_star)
Q = np.kron(np.eye(N), Q_star)

beta_fe = np.linalg.inv(X.T@Q@X)@X.T@Q@y
k = len(beta_fe)
y_bar = np.mean(y).reshape(-1,1)
X_bar = np.mean(X, axis = 0).reshape(-1,1)
alpha_hat = (y_bar - X_bar.T@beta_fe)
fe_coef = np.concatenate([alpha_hat, beta_fe])

# We follow the definiton of r-squared used in STATA using squared correlations
y_hat_overall = alpha_hat + X@beta_fe;
R_squared_overall_corr = np.corrcoef(y.flatten(), y_hat_overall.flatten())[0,1]**2;
#R_squared_overall_var = 1 - np.sum((y - y_hat_overall.flatten())**2)/(np.sum((y - np.mean(y)).^2));

y_hat_between = alpha_hat  + (P@X)@beta_fe;
R_squared_between_corr = np.corrcoef((P@y).flatten(), y_hat_between.flatten())[0,1]**2;
#R_squared_between_var = 1 - np.sumsum(((P@y).flatten() - y_hat_between.flatten())**2)/(np.sum((P*y - np.mean(y))**2));

y_hat_within = ((Q@X)@beta_fe);
R_squared_within_corr = np.corrcoef((Q@y).flatten(), y_hat_within.flatten())[0,1]**2;
#R_squared_within_var = 1 - np.sum(((Q@y).flatten() - y_hat_within.flatten())**2)/(np.sum((Q*y - np.mean(Q*y))**2));

#standard error estimation
y_dot = Q@y
X_dot = Q@X
epsilon = y_dot - X_dot@beta_fe
Sigma = epsilon@epsilon.T
sigma2_epsilon = (1/(n - N - k))*epsilon.T@epsilon

bread = np.linalg.inv(X_dot.T@X_dot)
filling = sum([X_dot[(i*T):((i+1)*T),:].T@epsilon[(i*T):((i+1)*T)]@epsilon[(i*T):((i+1)*T)].T@X_dot[(i*T):((i+1)*T),:] for i in range(N)])
cluster_robust_Sigma = (N/(N-1))*(bread)@(filling)@(bread)
robust_std_err = np.sqrt(np.diag(cluster_robust_Sigma)).reshape(-1,1)

#Standard error for intercept(mean value of the fixed effect)
std_err_alpha_hat = np.sqrt((sigma2_epsilon/T)+(X_bar.T@cluster_robust_Sigma@X_bar))
std_errs = np.concatenate([std_err_alpha_hat, robust_std_err])

#T-statistics and P-values
Tstat = fe_coef/std_errs
Pvalues = 2*(1 - stats.t.cdf(np.abs(Tstat), df = n - N - k))
lower_ConfInt95 = stats.t.ppf(0.025, df = n - N - k, 
                              loc = fe_coef, 
                              scale = std_errs)
upper_ConfInt95 = stats.t.ppf(0.975, df = n - N - k, 
                              loc = fe_coef, 
                              scale = std_errs)
display(pd.DataFrame(data = {'Coef.':np.concatenate([beta_fe, alpha_hat], axis = 0).ravel(), 
                           'Robust Std. Err.':np.concatenate([robust_std_err, std_err_alpha_hat], axis = 0).ravel(), 
                           'T-Stat': Tstat.ravel(), 
                           'P-value': Pvalues.ravel(),
                            'Lower CI':lower_ConfInt95.ravel(), 
                            'Upper CI':upper_ConfInt95.ravel()}, 
                     index = ['lnk', 'lnl', 'yr', '_cons']))
print('R-squared within ', R_squared_within_corr)
print('R-squared between ', R_squared_between_corr)
print('R-squared overall ', R_squared_overall_corr)
Coef. Robust Std. Err. T-Stat P-value Lower CI Upper CI
lnk 0.531544 0.051674 2.806108 0.005058 0.153553 0.866212
lnl 0.133646 0.109927 10.286444 0.000000 0.430209 0.632879
yr 0.007467 0.002957 1.215769 0.224203 -0.081925 0.349217
_cons 0.509883 0.181705 2.525304 0.011629 0.001668 0.013266
R-squared within  0.8933515713213597
R-squared between  0.9674700339386045
R-squared overall  0.9545972995058635
# Random Effects
beta_hat_within = np.linalg.pinv(X_dot.T@X_dot)@X_dot.T@y_dot #OLS applied to the de-meaned data
Y_bar = np.mean(y).reshape(-1,1)
X_bar = np.mean(X, axis = 0).reshape(-1,1)
alpha_hat_within = (Y_bar - X_bar.T@beta_hat_within)
params = np.concatenate([beta_hat_within, alpha_hat_within])
k = len(beta_hat_within)

epsilon = y_dot - X_dot@beta_hat_within
Sigma = epsilon@epsilon.T
sigma2_epsilon = (1/(n - N - k))*epsilon.T@epsilon

cross_sectional_means_regression_data =  WBpanel_data.groupby(['country'])[['lny', 'lnk', 'lnl', 'yr']].mean()

Y_bar = cross_sectional_means_regression_data[['lny']].values
all_X_bar = cross_sectional_means_regression_data[['lnk', 'lnl', 'yr']].values
X_bar = all_X_bar[:,:-1] #drop the yr variable
X_bar = np.column_stack([np.ones((N,1)), X_bar]) #add intercept

beta_between = np.linalg.pinv(X_bar.T@X_bar)@X_bar.T@Y_bar #OLS applied to the cross sectional mean data
epsilon_between = Y_bar - X_bar@beta_between
sigma2_b = 1/(N - k)*epsilon_between.T@epsilon_between

sigma2_u = max(0, sigma2_b - (sigma2_epsilon/T))
rho_hat = np.sqrt(sigma2_epsilon)/(np.sqrt(sigma2_epsilon + T*sigma2_u))

#Compute feasible transformations
y_all_cross_sectional_means = P@y
X_all_cross_sectional_means = P@X
X_all_cross_sectional_means = np.column_stack([np.ones((n,1)),X_all_cross_sectional_means]) #add intercept
Y_tilde = y - (1 - rho_hat)*y_all_cross_sectional_means
X_tilde = np.column_stack([np.ones((n,1)),X]) - (1 - rho_hat)*X_all_cross_sectional_means

beta_random_effects = np.linalg.pinv(X_tilde.T@X_tilde)@X_tilde.T@Y_tilde #OLS applied to the de-meaned data

# We follow the definiton of r-squared used in STATA using squared correlations
y_hat_overall = beta_random_effects[0] + X@beta_random_effects[1:]
R_squared_overall_corr = np.corrcoef(y.flatten(), y_hat_overall.flatten())[0,1]**2
#R_squared_overall_var = 1 - np.sum((y - y_hat_overall)**2)/(sum((y - np.mean(y))**2))

y_hat_between = beta_random_effects[0]  + (P@X)@beta_random_effects[1:]
R_squared_between_corr = np.corrcoef((P@y).flatten(), y_hat_between.flatten())[0,1]**2
#R_squared_between_var = 1 - np.sum((P@y - y_hat_between)**2)/(np.sum((P@y - np.mean(P@y))**2))

y_hat_within = (Q@X)@beta_random_effects[1:]
R_squared_within_corr = np.corrcoef((Q@y).flatten(), y_hat_within.flatten())[0,1]**2
#R_squared_within_var = 1 - np.sum(((Q@y) - y_hat_within)**2)/(sum(((Q@y) - np.mean(Q@y))**2))

epsilon_random_effects = Y_tilde - X_tilde@beta_random_effects
Omega_inv = (1/sigma2_epsilon)*(Q + (rho_hat**2)*P)
Omega_star_inv = np.linalg.inv(sigma2_epsilon*Q_star + (sigma2_epsilon + T*sigma2_b)*P_star)

X = np.column_stack([np.ones((n,1)), X])
bread = np.linalg.inv(X.T@Omega_inv@X)
filling = sum([X[(i*T):((i+1)*T),:].T@Omega_star_inv@epsilon_random_effects[(i*T):((i+1)*T)]@epsilon_random_effects[(i*T):((i+1)*T)].T@Omega_star_inv@X[(i*T):((i+1)*T),:] for i in range(N)])
cluster_robust_Sigma = bread@filling@bread
robust_std_err = np.sqrt(np.diag(cluster_robust_Sigma))

#Z-statistics and P-values
Zstat = beta_random_effects.flatten()/robust_std_err
Pvalues = 2*(1 - stats.norm.cdf(np.abs(Zstat)))
lower_ConfInt95 = stats.norm.ppf(0.025, 
                              loc = beta_random_effects.flatten(), 
                              scale = robust_std_err)
upper_ConfInt95 = stats.norm.ppf(0.975, 
                              loc = beta_random_effects.flatten(), 
                              scale = robust_std_err)
display(pd.DataFrame(data = {'Coef.':beta_random_effects.ravel(), 
                           'Robust Std. Err.':robust_std_err.ravel(), 
                           'Z-Stat': Zstat.ravel(), 
                           'P-value': Pvalues.ravel(), 
                            'Lower CI':lower_ConfInt95.ravel(), 
                            'Upper CI':upper_ConfInt95.ravel()}, 
                     index = ['_cons', 'lnk', 'lnl', 'yr']))

print('R-squared within ', R_squared_within_corr)
print('R-squared between ', R_squared_between_corr)
print('R-squared overall ', R_squared_overall_corr)
Coef. Robust Std. Err. Z-Stat P-value Lower CI Upper CI
_cons 0.114532 0.101448 1.128974 0.258909 -0.084302 0.313366
lnk 0.607688 0.043080 14.106006 0.000000 0.523253 0.692124
lnl 0.251772 0.053337 4.720446 0.000002 0.147235 0.356310
yr 0.000518 0.002191 0.236569 0.812992 -0.003777 0.004814
R-squared within  0.890544528855278
R-squared between  0.9565239715120661
R-squared overall  0.9512782824088507