-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathscpi_illustration.py
177 lines (144 loc) · 6.5 KB
/
scpi_illustration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
###############################################################################
# SCPI Python Package
# Script for Empirical Illustration - Single Treated Unit
# Authors: Matias D. Cattaneo, Yingjie Feng, Filippo Palomba and Rocio Titiunik
###############################################################################
########################################
# Load SCPI_PKG package
import pandas
import numpy
import os
from copy import deepcopy
from warnings import filterwarnings
from scpi_pkg.scdata import scdata
from scpi_pkg.scdataMulti import scdataMulti
from scpi_pkg.scest import scest
from scpi_pkg.scpi import scpi
from scpi_pkg.scplot import scplot
from scpi_pkg.scplotMulti import scplotMulti
########################################
# Load database
os.chdir("YOUR_PATH_HERE")
data = pandas.read_csv("scpi_germany.csv")
filterwarnings("ignore")
##############################################################################
# SINGLE TREATED UNIT
##############################################################################
########################################
# Set options for data preparation
id_var = 'country'
outcome_var = 'gdp'
time_var = 'year'
features = None
cov_adj = None
period_pre = numpy.arange(1960, 1991)
period_post = numpy.arange(1991, 2004)
unit_tr = 'West Germany'
unit_co = list(set(data[id_var].to_list()))
unit_co = [cou for cou in unit_co if cou != 'West Germany']
constant = False
cointegrated_data = True
data_prep = scdata(df=data, id_var=id_var, time_var=time_var,
outcome_var=outcome_var, period_pre=period_pre,
period_post=period_post, unit_tr=unit_tr,
unit_co=unit_co, features=features, cov_adj=cov_adj,
cointegrated_data=cointegrated_data, constant=constant)
####################################
# SC - point estimation with simplex
est_si = scest(data_prep, w_constr={'name': "simplex"})
print(est_si)
est_si2 = scest(data_prep, w_constr={'p': 'L1', 'dir': '==', 'Q': 1, 'lb': 0})
print(est_si2)
####################################
# SC - plot results
plot = scplot(est_si)
####################################
# SC - point estimation with lasso
est_lasso = scest(data_prep, w_constr={'name': "lasso"})
print(est_lasso)
est_lasso2 = scest(data_prep, w_constr={'p': 'L1', 'dir': '<=', 'Q': 1, 'lb': -numpy.inf})
print(est_lasso2)
####################################
# SC - point estimation with ridge
est_ridge = scest(data_prep, w_constr={'name': "ridge"})
print(est_ridge)
Q_est = est_ridge.w_constr['West Germany']['Q']
est_ridge2 = scest(data_prep, w_constr={'p': 'L2', 'dir': '<=', 'Q': Q_est, 'lb': -numpy.inf})
print(est_ridge2)
####################################
# SC - point estimation with ols
est_ls = scest(data_prep, w_constr={'name': "ols"})
print(est_ls)
est_ls2 = scest(data_prep, w_constr={'p': 'no norm', 'dir': None, 'Q': None, 'lb': -numpy.inf})
print(est_ls2)
####################################
# Set options for inference
w_constr = {'name': 'simplex', 'Q': 1}
u_missp = True
u_sigma = "HC1"
u_order = 1
u_lags = 0
e_method = "gaussian"
e_order = 1
e_lags = 0
e_alpha = 0.05
u_alpha = 0.05
sims = 200
cores = 1
numpy.random.seed(8894)
pi_si = scpi(data_prep, sims=sims, w_constr=w_constr, u_order=u_order, u_lags=u_lags,
e_order=e_order, e_lags=e_lags, e_method=e_method, u_missp=u_missp,
u_sigma=u_sigma, cores=cores, e_alpha=e_alpha, u_alpha=u_alpha)
print(pi_si)
####################################
# SC - plot results
plot = scplot(pi_si)
################################################################################
# Other examples of data preparation
# multiple features
data_prep = scdata(df=data, id_var=id_var, time_var=time_var,
outcome_var=outcome_var, period_pre=period_pre,
period_post=period_post, unit_tr=unit_tr,
unit_co=unit_co, features=['gdp', 'trade'], cov_adj=cov_adj,
cointegrated_data=cointegrated_data, constant=constant)
# multiple features and feature-specific covariate adjustment
data_prep = scdata(df=data, id_var=id_var, time_var=time_var,
outcome_var=outcome_var, period_pre=period_pre,
period_post=period_post, unit_tr=unit_tr,
unit_co=unit_co, features=['gdp', 'trade'], cov_adj=[['constant', 'trend'], []],
cointegrated_data=cointegrated_data, constant=constant)
################################################################################
# Specifying features for different pre-treatment periods or just use pre-treatment averages
# I) we want to include "trade" just for some selected periods, i.e., 1960, 1970, 1980, 1990
tradeUnselPeriods = [t for t in range(1960, 1991) if t not in [1960, 1970, 1980, 1990]]
data['tradeAux'] = data['trade']
data.loc[data['year'].isin(tradeUnselPeriods), 'tradeAux'] = numpy.nan
data_prep = scdata(df=data, id_var=id_var, time_var=time_var,
outcome_var=outcome_var, period_pre=period_pre,
period_post=period_post, unit_tr=unit_tr,
unit_co=unit_co, features=['gdp', 'tradeAux'],
cointegrated_data=cointegrated_data, constant=constant)
data_prep.B
# II) we want to include just the pre-treatment average of "infrate"
dataAux = data[data["year"] <= 1990].groupby(['country'])[['infrate']].mean()
dataAux.rename(columns={"infrate": "infrateAvg"}, inplace=True)
data = data.merge(dataAux, on="country")
data.loc[data['year'] != 1990, "infrateAvg"] = numpy.nan
data_prep = scdata(df=data, id_var=id_var, time_var=time_var,
outcome_var=outcome_var, period_pre=period_pre,
period_post=period_post, unit_tr=unit_tr,
unit_co=unit_co, features=['gdp', 'infrateAvg'],
cointegrated_data=cointegrated_data, constant=constant)
data_prep.B
################################################################################
# Unbalanced panels
# suppose we had an unbalanced panel structure
data.drop(25, inplace=True)
# If we run scdata() now we will get back an error. This is because scdata() requires
# a balanced panel structure with missing values. To get it we can run
data.set_index(['country', 'year'], append=False, drop=False, inplace=True)
df_balanced = (data.reindex(pandas.MultiIndex.from_product([data.index.unique('country'),
data.index.unique('year')],
names=['country', 'year']))
.sort_index())
# and then run scdata()