Registered Public Charities by Type

8.27.2018
Brice McKeever

More from this project:

Number of Registered 501(c)(3) Public Charities by NTEE Group

  1. library(tidyverse)
  2. library(httr)
  3. library(stringr)
  4. library(knitr)
  5. library(reshape2)
  6. library(extrafont)
  7. source('https://raw.githubusercontent.com/UrbanInstitute/urban_R_theme/master/urban_theme_windows.R')
  8.  
  9. #Create NTEE grouping categories
  10. arts <- c("A")
  11. highered <- c("B4", "B5")
  12. othered <- c("B")
  13. envanimals <- c("C", "D")
  14. hospitals <- c('E20','E21','E22','E23','E24','F31','E30','E31','E32')
  15. otherhlth <- c("E", "F", "G", "H")
  16. humanserv <- c("I", "J", "K", "L", "M", "N", "O", "P")
  17. intl <- c("Q")
  18. pubben <- c("R", "S", "T", "U", "V", "W", "Y", "Z")
  19. relig <- c("X")
  20.  
  21. #link to NCCS Data Archive
  22. nteedoc<- GET("http://nccs-data.urban.org/data/misc/nccs.nteedocAllEins.csv")
  23.  
  24. #pull only the most important columns (EIN, , NTEECC, Nteefinal)
  25. nteedocalleins <-content(nteedoc, type = "text/csv",
  26.                          col_types=cols_only(EIN = col_character(),
  27.                                              NteeCC = col_character(),
  28.                                              NteeFinal = col_character()))
  29.  
  30. #convert variable names to upper case
  31. names(nteedocalleins) <- toupper(names(nteedocalleins))
  32.  
  33.  
  34. #Create a function to filter out unneccesary columns from the Business Master File 
  35. prepbmffile <- function(bmffilepath) {
  36.   output <- read_csv(bmffilepath,
  37.                      col_types = cols_only(EIN = col_character(),
  38.                                            NTEECC = col_character(),
  39.                                            STATE = col_character(),                                         
  40.                                            OUTNCCS = col_character(),
  41.                                            SUBSECCD = col_character(),
  42.                                            FNDNCD = col_character(),
  43.                                            CFILER = col_character(),
  44.                                            CZFILER = col_character(),
  45.                                            CTAXPER = col_character(),
  46.                                            CTOTREV = col_double(),
  47.                                            LEVEL4 = col_character(),
  48.                                            CASSETS = col_double()
  49.                      ))
  50.   names(output) <- toupper(names(output))
  51.   return(output)
  52. }
  53.  
  54. #This function will apply the most common NTEE Grouping categories to your data.
  55. NTEEclassify <- function(dataset) {
  56.   #merge in Master NTEE look up file
  57.   dataset <- dataset %>%
  58.     left_join(nteedocalleins, by = "EIN")
  59.   #create NTEEGRP classifications
  60.   dataset$NTEEGRP <- "  "
  61.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% arts ] <- "Arts"
  62.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% othered ] <- "Education: Other"
  63.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,2) %in% highered ] <- "Education: Higher"
  64.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% envanimals] <- "Environment and Animals"
  65.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% otherhlth] <- "Health Care: Other"
  66.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,3) %in% hospitals] <- "Health Care: Hospitals and primary care facilities"
  67.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% humanserv] <- "Human Services"
  68.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% intl] <- "International"
  69.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% pubben] <- "Other Public and social benefit"
  70.   dataset$NTEEGRP[str_sub(dataset$NTEEFINAL,1,1) %in% relig] <- "Religion related"
  71.   dataset$NTEEGRP[is.na(dataset$NTEEFINAL)] <- "Other Public and social benefit"
  72.   return(dataset)
  73. }
  74.  
  75. #Run the functions on the BMF
  76. bmf2016 <- prepbmffile("Data/bm1608.csv")
  77.  
  78. bmf2016 <- NTEEclassify(bmf2016)
  79.  
  80. #Filter the bmf to isolate nonprofits that are actively filing
  81. output2<- bmf2016 %>%
  82.   #Filter out out of scope orgs
  83.   filter((OUTNCCS != "OUT")) %>%
  84.   filter((FNDNCD != "02" & FNDNCD!= "03" & FNDNCD != "04")) %>%
  85.   #Filter out non 501(c)(3)s
  86.   filter(SUBSECCD =="03") %>%
  87.   #Filter out orgs that haven't filed taxes in the last 2 years
  88.   filter(CFILER == "Y") %>%
  89.   #filter(CZFILER == "N") %>%
  90.   #Total the filtered number of orgs and display by NTEE Group
  91.   group_by(NTEEGRP)%>%
  92.   summarise(TotalActive = n())
  93.  
  94. #Filter the bmf to isolate number of nonprofits by NTEE Group
  95. output <- bmf2016 %>%
  96.   #filter out of scope orgs
  97.   filter((OUTNCCS != "OUT")) %>% 
  98.   filter((FNDNCD != "02" & FNDNCD!= "03" & FNDNCD != "04")) %>%
  99.   #Filter out non 501(c)(3)s
  100.   filter(SUBSECCD =="03") %>%
  101.   #Total the filtered number of orgs and display by NTEE Group
  102.   group_by(NTEEGRP) %>%
  103.   summarize(TotalOrgs = n())
  104.  
  105. #join the two output files to a single table
  106. final <- left_join(output, output2, by = "NTEEGRP")
  107.  
  108. #rename columns appropriately 
  109. colnames(final)<- c("Organization Type", "Number of Organizations", "Number Filing Annually")
  1. #display table
  2. kable(final, format.args = list(decimal.mark = '.', big.mark = ","))
Organization Type Number of Organizations Number Filing Annually
Arts 103,926 91,939
Education: Higher 5,212 3,962
Education: Other 157,775 132,287
Environment and Animals 55,170 41,185
Health Care: Hospitals and primary care facilities 10,615 9,297
Health Care: Other 72,137 62,323
Human Services 284,329 241,584
International 18,802 16,216
Other Public and social benefit 125,366 101,825
Religion related 275,320 56,648

Source: Internal Revenue Service Business Master Files, Exempt Organizations August, 2016