- 4th Nov 2022
- 06:03 am
* initialize
clear
set more off
set mem 2g
* set working directory
cd /* enter the path here */
*--------------------------
* Q1. Patent application
*--------------------------
* import data
use "chem_patents_maindataset.dta"
* 1(a)
summarize grntyr
* Comment: The data covers the sample period from 1939 to 1975
* 1(b)
tabulate grntyr, subpop(treat)
* Comment: The firms started to receive licensing in year 1919
* 2
summarize count_usa
* Comment: maximum = 68, minimum = 0, mean = 0.349
* 3(a)
drop if grntyr != 1919
. histogram count_cl
(bin=38, start=0, width=.39473684)
* Shape of distribution is nearly normal
* 3(b)
drop if grntyr != 1919
. histogram count_cl,freq
(bin=38, start=0, width=.39473684)
* there are 38 bins of non zero classes
* 4 create dummy variables from grntyr
tab grntyr, gen(td)
* 5 finish the command below
reg count_usa treat, robust /* equation 1 */
reg count_usa count_cl, robust /* equation 2 */
* 6 finish the command below.
reg count_usa treat td1-td64, robust /* equation 3 */
reg count_usa count_cl td1-td64, robust /* equation 4 */
* Reason why we don't include all the dummies (from td1 to td65) is that it results in strong correlation and for linear regression there
*should not be any correlation between variables. So in case of linear regression we alway consider (n-1) dummy variables for n
* 7(a) We assume that there exists no correlation between count_citt (instrument variable) and error, so we use IVregress to get unbiased beta parameter
* Also, we assume that errors are heteroscadastic and independant among observations
* 7(b) finish the command below. It should look like "ivregress 2sls ... (... = ...), robust"
ivregress 2sls count_usa treat (count_cl = count_cl_itt),robust
* IV estimate will be helpful to get un biased estimation of beta when there exists correlation of variable with error term
* 7(c) Run two-stage least squares manually.
reg count_cl count_cl_itt,robust /* equation 5 */
predict double count_cl_hat /* get the fitted value and store it in count_cl_hat */
reg count_usa count_cl,robust /* equation 6 */
* We have now got beta of 0.16 where as earlier we have 0.94 that is higher effect is shown before than now.
*---------------------------------
* Q2. Monte Carlo simulations
* N = 1000
* beta0 = 1, beta1 = 3, gamma = 2
*---------------------------------
* initialize
clear
set more off
* set seed (this ensures that the experiment can be replicated)
set seed 1234
* create temporary file to store experiment results
capture postclose tempid
postfile tempid beta_hat using ols_estimate, replace
/* declares the variable names and the filename of a (new) Stata dataset where results are to be stored. */
* run the Monte Carlo simulation
forvalues i = 1(1)10000 { /* perform the experiement 10000 times (number of simulations) */
drop _all /* drop all the variables in memory */
qui set obs 1000 /* set the number of observations (N) */
gen e = rnormal() /* generate e from a standard normal distribution */
gen u = rnormal() /* generate u from a standard normal distribution */
gen x=e+(2*u) /* fill the gap here (generate x according to x = e + 2u) */
gen y=1+(3*x)+u /* fill the gap here (generate y according to y = 1 + 3x + u) */
quietly reg y x /* quietly regress y on x, suppressing all output */
post tempid (_b[x]) /* post beta_hat to mydata */
}
postclose tempid
* clear data and import simulation data
clear
use ols_estimate /* ols_estimate contains the result from the monte carlo experiment*/
* summarize beta_hat so its mean is stored in r(mean)
quietly sum beta_hat
* check the mean of beta_hat across replications
display r(mean)
* beta_hat follows normal distribution
* plot
twoway hist beta_hat, title("sampling distribution of beta_hat")
*---------------------------------
* Q2. Monte Carlo simulations
* N = 1000
* beta0 = 1, beta1 = 3, gamma = 0
*---------------------------------
* initialize
clear
set more off
* set seed (this ensures that the experiment can be replicated)
set seed 1234
* create temporary file to store experiment results
capture postclose tempid
postfile tempid beta_hat using ols_estimate, replace
/* declares the variable names and the filename of a (new) Stata dataset where results are to be stored. */
* run the Monte Carlo simulation
forvalues i = 1(1)10000 { /* perform the experiement 10000 times (number of simulations) */
drop _all /* drop all the variables in memory */
qui set obs 1000 /* set the number of observations (N) */
gen e = rnormal() /* generate e from a standard normal distribution */
gen u = rnormal() /* generate u from a standard normal distribution */
gen x=e /* fill the gap here (generate x according to x = e) */
gen y= 1+(3*x)+u /* fill the gap here (generate y according to y = 1 + 3x + u) */
quietly reg y x /* quietly regress y on x, suppressing all output */
post tempid (_b[x]) /* post beta_hat to mydata */
}
postclose tempid
* clear data and import simulation data
clear
use ols_estimate /* ols_estimate contains the result from the monte carlo experiment*/
* summarize beta_hat so its mean is stored in r(mean)
quietly sum beta_hat
* check the mean of beta_hat across replications
display r(mean)
* beta hat has normal distribution this shows that its not biased
* plot
twoway hist beta_hat, title("sampling distribution of beta_hat")
*---------------------------------
* Q2. Monte Carlo simulations
* N = 10
* beta0 = 1, beta1 = 3, gamma = 0
*---------------------------------
* initialize
clear
set more off
* set seed (this ensures that the experiment can be replicated)
set seed 1234
* create temporary file to store experiment results
capture postclose tempid
postfile tempid beta_hat using ols_estimate, replace
/* declares the variable names and the filename of a (new) Stata dataset where results are to be stored. */
* run the Monte Carlo simulation
forvalues i = 1(1)10000 { /* perform the experiement 10000 times (number of simulations) */
drop _all /* drop all the variables in memory */
qui set obs 10 /* set the number of observations (N) */
gen e = rnormal() /* generate e from a standard normal distribution */
gen u = rnormal() /* generate u from a standard normal distribution */
gen x = e /* fill the gap here (generate x according to x = e) */
gen y = 1 +(3*x) + u /* fill the gap here (generate y according to y = 1 + 3x + u) */
quietly reg y x /* quietly regress y on x, suppressing all output */
post tempid (_b[x]) /* post beta_hat to mydata */
}
postclose tempid
* clear data and import simulation data
clear
use ols_estimate /* ols_estimate contains the result from the monte carlo experiment*/
* summarize beta_hat so its mean is stored in r(mean)
quietly sum beta_hat
* check the mean of beta_hat across replications
display r(mean) /* 2.99 is mean value from stata output*/
* plot
twoway hist beta_hat, title("sampling distribution of beta_hat")
* sampling distribution follows normal distribution.