Largest Active and Reporting Education Public Charities by Expenses
9.13.2018
More from this project:
Largest Active and Reporting Education Public Charities by Expenses
library(tidyverse)
library(knitr)
library(stringr)
library(scales)
library(httr)
source('https://raw.githubusercontent.com/UrbanInstitute/urban_R_theme/master/urban_theme_windows.R')
#Create NTEE grouping categories
arts <- c("A")
highered <- c("B4", "B5")
othered <- c("B")
envanimals <- c("C", "D")
hospitals <- c('E20','E21','E22','E23','E24','F31','E30','E31','E32')
otherhlth <- c("E", "F", "G", "H")
humanserv <- c("I", "J", "K", "L", "M", "N", "O", "P")
intl <- c("Q")
pubben <- c("R", "S", "T", "U", "V", "W", "Y", "Z")
relig <- c("X")
#Import the Reduced NCCS Data Archive
nteedocalleins <- read.csv("Data/nteedocalleins.csv")
#convert variable names to upper case
names(nteedocalleins) <- toupper(names(nteedocalleins))
#This function will apply the most common NTEE Grouping categories to your data.
NTEEclassify <- function(dataset) {
#merge in Master NTEE look up file
dataset <- dataset %>%
left_join(nteedocalleins, by = "EIN")
#create NTEEGRP classifications
dataset$NTEEGRP <- " "
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% arts ] <- "Arts"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% othered ] <- "Education: Other"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,2) %in% highered ] <- "Education: Higher"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% envanimals] <- "Environment and Animals"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% otherhlth] <- "Health Care: Other"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,3) %in% hospitals] <- "Health Care: Hospitals and primary care facilities"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% humanserv] <- "Human Services"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% intl] <- "International"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% pubben] <- "Other Public and social benefit"
dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% relig] <- "Religion related"
dataset$NTEEGRP[is.na(dataset$NTEEFINAL)] <- "Other Public and social benefit"
return(dataset)
}
#Import reduced NCCS Core File Function
prepcorepcfile <- function(corefilepath) {
output <- read_csv(corefilepath,
col_types = cols_only(EIN = col_character(),
FISYR = col_integer(),
NAME = col_character(),
STATE = col_character(),
ADDRESS = col_character(),
CITY = col_character(),
ZIP = col_character(),
MSA_NECH = col_character(),
FIPS = col_character(),
PMSA = col_character(),
STYEAR = col_double(),
TAXPER = col_integer(),
OUTNCCS = col_character(),
OutNCCS = col_character(),
SUBSECCD = col_character(),
RULEDATE = col_character(),
FNDNCD = col_character(),
FRCD = col_character(),
TOTREV = col_double(),
EXPS = col_double(),
ASS_EOY = col_double(),
GRREC = col_double()
))
names(output) <- toupper(names(output))
return(output)
}
#Import NCCS Core File for given year
corefile <- prepcorepcfile(as.character(paste("Data/core", "2015", "pc.csv", sep="")))
#Add NTEE Classifications to the Core File
corefile <- NTEEclassify(corefile)
#Filter out of scope organizations
corefile <- corefile %>%
filter((OUTNCCS != "OUT")) %>%
filter((FNDNCD != "02" & FNDNCD!= "03" & FNDNCD != "04")) %>%
filter((NTEEGRP == "Education: Other" | NTEEGRP == "Education: Higher"))
#Sort the corefile in descending order by expenses
LargestExpenses <- corefile[with(corefile,order(-EXPS)),]
#Limit the list to 10
LargestExpenses <- LargestExpenses[1:10,]
#Select the appropriate columns, drop the rest
LargestExpenses <- LargestExpenses %>%
select(EIN, NTEEFINAL, NTEEGRP, NAME, EXPS)
#Rename columns appropriately
colnames(LargestExpenses) <- c("EIN", "NTEE Code", "NTEE Group", "Name", "Expenses")
#display table
kable(LargestExpenses, format.args = list(decimal.mark = '.', big.mark = ","))
EIN | NTEE Code | NTEE Group | Name | Expenses |
---|---|---|---|---|
135562308 | B43 | Education: Higher | NEW YORK UNIVERSITY | 5,420,536,806 |
520595110 | B43 | Education: Higher | JOHNS HOPKINS UNIVERSITY | 5,413,130,000 |
231352685 | B43 | Education: Higher | TRUSTEES OF THE UNIVERSITY OF PENNSYLVANIA | 5,365,056,000 |
941156365 | B43 | Education: Higher | THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY | 5,050,927,315 |
042103580 | B43 | Education: Higher | PRESIDENT AND FELLOWS OF HARVARD COLLEGE | 4,775,458,754 |
951642394 | B43 | Education: Higher | UNIVERSITY OF SOUTHERN CALIFORNIA | 4,257,472,819 |
620476822 | B43 | Education: Higher | VANDERBILT UNIVERSITY | 4,189,634,110 |
135598093 | B43 | Education: Higher | TRUSTEES OF COLUMBIA UNIVERSITY IN THE CITY OF NEW YORK | 4,139,274,346 |
150532082 | B43 | Education: Higher | CORNELL UNIVERSITY TAX DEPT CORNELL UNIVERSITY | 3,951,934,873 |
060646973 | B43 | Education: Higher | YALE UNIVERSITY | 3,513,798,862 |
Source: NCCS 501(c)(3) Public Charities Core File 2015