6 Elementary Data Analysis
6.1 Computing correlations
6.1.1 Computing correlations between two variables in a dataset
# Prints correlation coefficient between "trade" and "cgexp" variables
cor.test(pt_copy$trade, pt_copy$cgexp, use="complete.obs")
##
## Pearson's product-moment correlation
##
## data: pt_copy$trade and pt_copy$cgexp
## t = 1.8131, df = 80, p-value = 0.07356
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01915884 0.39850057
## sample estimates:
## cor
## 0.1986743
6.1.2 Generating and exporting a correlation matrix
# Extracts variables for which we want a correlation matrix
<-pt_copy %>% select(trade, cgexp, cgrev, catho80) desired_variables
# Creates correlation matrix from "desired_variables" object and assigns to object named "cor_matrix"
<-cor(desired_variables, use="complete.obs") cor_matrix
# prints contents of "cor_matrix"
cor_matrix
## trade cgexp cgrev catho80
## trade 1.00000000 0.1792884 0.3458730 -0.08442666
## cgexp 0.17928838 1.0000000 0.9094998 -0.07010910
## cgrev 0.34587298 0.9094998 1.0000000 -0.05923500
## catho80 -0.08442666 -0.0701091 -0.0592350 1.00000000
# Exports correlation matrix assigned to "cor_matrix" object using stargazer
stargazer(cor_matrix, type="text", title="Correlation Matrix", digits=3, out="corr_table.html")
##
## Correlation Matrix
## ====================================
## trade cgexp cgrev catho80
## ------------------------------------
## trade 1 0.179 0.346 -0.084
## cgexp 0.179 1 0.909 -0.070
## cgrev 0.346 0.909 1 -0.059
## catho80 -0.084 -0.070 -0.059 1
## ------------------------------------
6.2 Basic regression analysis
6.2.1 Running a regression model
# Implements regression with "gexp" as DV, and assigns to object named "regression1"
<-lm(cgexp~gastil+lyp+trade+prop1564+prop65+federal+oecd, data=pt_copy) regression1
# Prints regression table
summary(regression1)
##
## Call:
## lm(formula = cgexp ~ gastil + lyp + trade + prop1564 + prop65 +
## federal + oecd, data = pt_copy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9861 -4.6981 -0.5521 4.4482 16.1124
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.17290 16.08572 3.243 0.00179 **
## gastil -2.15202 1.10609 -1.946 0.05561 .
## lyp -2.04441 2.00721 -1.019 0.31184
## trade 0.04978 0.01924 2.587 0.01170 *
## prop1564 -0.28482 0.26686 -1.067 0.28939
## prop65 1.58627 0.33548 4.728 1.09e-05 ***
## federal -4.58101 2.38015 -1.925 0.05822 .
## oecd 0.96969 2.97171 0.326 0.74514
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.064 on 72 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.5865, Adjusted R-squared: 0.5463
## F-statistic: 14.59 on 7 and 72 DF, p-value: 1.137e-11
6.2.2 Working with categorical variables in a regression model
Working with categorical variables by using factors
# Set Continent variable as factor
$continent<-as.factor(pt_copy$continent)
pt_copy
# Examines levels of factor variable
levels(pt_copy$continent)
## [1] "africa" "asiae" "laam" "other"
# Relevels factor variable to set "other" as reference category
$continent<-relevel(pt_copy$continent, ref="other") pt_copy
# Run regression with the continent variable and assign result to object named "regression2"
<-lm(cgexp~gastil+lyp+trade+prop1564+prop65+federal+continent+col_espa+col_uka+col_otha+oecd, data=pt_copy)
regression2
# Prints regression table for "regression2"
summary(regression2)
##
## Call:
## lm(formula = cgexp ~ gastil + lyp + trade + prop1564 + prop65 +
## federal + continent + col_espa + col_uka + col_otha + oecd,
## data = pt_copy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.3617 -3.9886 -0.3921 4.6050 17.3752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.88833 17.56879 2.555 0.0129 *
## gastil -2.06438 1.13670 -1.816 0.0739 .
## lyp -0.12414 2.08305 -0.060 0.9527
## trade 0.03018 0.02069 1.459 0.1494
## prop1564 -0.25399 0.27421 -0.926 0.3577
## prop65 0.98675 0.45822 2.153 0.0349 *
## federal -4.73466 2.34235 -2.021 0.0473 *
## continentafrica -3.42365 4.58573 -0.747 0.4580
## continentasiae -7.72223 4.17322 -1.850 0.0687 .
## continentlaam -9.03522 4.25535 -2.123 0.0375 *
## col_espa 0.58034 8.05720 0.072 0.9428
## col_uka 2.68929 3.22769 0.833 0.4077
## col_otha -0.80223 3.02997 -0.265 0.7920
## oecd -2.37769 3.33814 -0.712 0.4788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.789 on 66 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.6499, Adjusted R-squared: 0.5809
## F-statistic: 9.424 on 13 and 66 DF, p-value: 1.21e-10
Working with categorical variables by creating dummy variables
# Use "continent" field to make continent dummy variables
<-pt_copy %>% dummy_cols("continent") pt_copy
# run regression with continent dummies, with "other" category excluded
<-lm(cgexp~gastil+lyp+trade+prop1564+prop65+federal+continent_africa+
regression2_alt+continent_laam+col_espa+col_uka+col_otha+oecd,
continent_asiaedata=pt_copy)
# Prints "regression2_alt" regression table
summary(regression2_alt)
##
## Call:
## lm(formula = cgexp ~ gastil + lyp + trade + prop1564 + prop65 +
## federal + continent_africa + continent_asiae + continent_laam +
## col_espa + col_uka + col_otha + oecd, data = pt_copy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.3617 -3.9886 -0.3921 4.6050 17.3752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.88833 17.56879 2.555 0.0129 *
## gastil -2.06438 1.13670 -1.816 0.0739 .
## lyp -0.12414 2.08305 -0.060 0.9527
## trade 0.03018 0.02069 1.459 0.1494
## prop1564 -0.25399 0.27421 -0.926 0.3577
## prop65 0.98675 0.45822 2.153 0.0349 *
## federal -4.73466 2.34235 -2.021 0.0473 *
## continent_africa -3.42365 4.58573 -0.747 0.4580
## continent_asiae -7.72223 4.17322 -1.850 0.0687 .
## continent_laam -9.03522 4.25535 -2.123 0.0375 *
## col_espa 0.58034 8.05720 0.072 0.9428
## col_uka 2.68929 3.22769 0.833 0.4077
## col_otha -0.80223 3.02997 -0.265 0.7920
## oecd -2.37769 3.33814 -0.712 0.4788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.789 on 66 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.6499, Adjusted R-squared: 0.5809
## F-statistic: 9.424 on 13 and 66 DF, p-value: 1.21e-10
6.2.3 Working with interaction terms in regression models
# run regression with interaction term between "trade" and "federal"
<-lm(cgexp~trade*federal, data=pt_copy) trade_federal_interaction
# prints "trade_federal_interaction" regression table
summary(trade_federal_interaction)
##
## Call:
## lm(formula = cgexp ~ trade * federal, data = pt_copy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.0774 -8.1325 0.5782 7.7004 21.0072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.708234 2.517787 10.608 <2e-16 ***
## trade 0.034512 0.026410 1.307 0.195
## federal -4.695595 5.512752 -0.852 0.397
## trade:federal 0.009965 0.076991 0.129 0.897
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.42 on 77 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.05761, Adjusted R-squared: 0.02089
## F-statistic: 1.569 on 3 and 77 DF, p-value: 0.2037
Plotting marginal effects
# Finds mean value of trade variable
mean(pt_copy$trade)
## [1] 78.7659
# Calculates marginal effects of federalism, with "trade" held at mean
<-ggpredict(trade_federal_interaction, terms="federal", condition=c(trade=78.7659)) marginal_effect_federalism
# Prints marginal effects table
marginal_effect_federalism
## # Predicted values of cgexp
##
## federal | Predicted | 95% CI
## ------------------------------------
## 0 | 29.43 | [26.94, 31.91]
## 1 | 25.52 | [18.91, 32.12]
# Plot marginal effects of federalism
ggpredict(trade_federal_interaction, terms="federal") %>%
ggplot(aes(x, predicted))+
geom_point()+
geom_errorbar(aes(ymin=conf.low, ymax=conf.high),width=0.05)+
scale_x_continuous(breaks=(seq(0,1, by=1)))+
labs(title="Predicted Effects of Federalism on Gov't Expenditure\n(with trade set to mean)", y="Predicted Expenditure", x="Federalism")
6.2.4 Creating and exporting regression tables
# Put the regression models you want in your regression table in a list
<-list(regression1,regression2) model_list
# Exporting table as text file
stargazer(model_list, type="text", out="cgexp_regressions.txt")
# Exporting regression table as html file
stargazer(model_list, type="html", out="cgexp_regressions.html")