#Code to download and prepare the IRS Business Master Files, Exempt Organizations #As accessible on the NCCS Data Archive (http://nccs-data.urban.org) #Brice McKeever #Center on Nonprofits and Philanthropy #The Urban Institute #3/9/18 #For information on the National Center for Charitable Statistics, see: http://nccs.urban.org/ #Load relevant libraries library(tidyverse) library(httr) library(stringr) #The following function will download and prepare NCCS Core Files for analysis. #The function requires the following fields, enclosed by quotation marks: # year: year of the data in 2 digit year format(e.g, "2016") # month: two digit month, enclosed by quotes (e.g., "08" for August) # # for a full list of available yaer/month combinations, see: http://nccs-data.urban.org/data.php?ds=bmf #Create function to download core files getbmffile <- function(bmfyear, bmfmonth) { #create URL for download based on date of file URL <- paste("http://nccs-data.urban.org/data/bmf/", as.character(bmfyear), "/bmf.bm", str_sub(as.character(bmfyear),3,4),as.character(bmfmonth), ".csv", sep ="") #get raw data rawbmf <- GET(as.character(URL)) if (rawbmf$status_code != 200){ stop("Please check database: it is possible that Core year/type combination you entered does not exist. See http://nccs-data.urban.org/data.php?ds=bmf for list of all available datasets, and enter a valid year/month combination") } stop_for_status(rawbmf) #read data as CSV file format, with proper formatting #Note 1: the code below reads in most fields from the relevant BMF file. However, users should consider commenting out lines #that they do not require. By default, fields used in NCCS validation processes are not included in output file. #Note 2: The code below assumes column names for most recent years of the BMF files. Older editions of the BMF Files #may have other column names. Please consult the relevant data dictionaries (at http://nccs-data.urban.org/data-dictionaries.php) #for more information, and change as neecssary. #parse BMF bmffile <- content(rawbmf, type = "text/csv", col_types = cols_only( EIN = col_character(), FIPS = col_character(), NTEECC = col_character(), FILER = col_character(), ZFILER = col_character(), NAME = col_character(), ADDRESS = col_character(), CITY = col_character(), STATE = col_character(), ZIP5 = col_character(), GEN = col_character(), SUBSECCD = col_character(), RULEDATE = col_character(), FNDNCD = col_character(), TAXPER = col_character(), FRCD = col_character(), PFFRCD = col_character(), ACCPER = col_character(), ASSETS = col_double(), INCOME = col_double(), SEC_NAME = col_character(), NTEE1 = col_character(), LEVEL1 = col_character(), LEVEL2 = col_character(), LEVEL3 = col_character(), LEVEL4 = col_character(), MAJGRPB = col_character(), OUTNCCS = col_character(), OUTREAS = col_character(), NTEESRC = col_character(), ntmaj10 = col_character(), ntmaj12 = col_character(), ntmaj5 = col_character(), nteeFinal = col_character(), nteeFinal1 = col_character(), RandNum = col_double(), nteeConf = col_character(), MSA_NECH = col_character(), PMSA = col_character(), cFiler = col_character(), czFiler = col_character(), cTaxPer = col_character(), cAssets = col_double(), cTotRev = col_double(), cFinSrc = col_character(), EPOST = col_character(), FISYR_IMAGE = col_character(), IRS990n = col_character(), NAICS = col_character())) #convert all variable names to uppercase names(bmffile) <- toupper(names(bmffile)) #write output to local drive #Note: Users should STRONGLY consider saving the data locally to avoid repeated downloads of the data write.csv(bmffile, file = as.character(paste("bm", str_sub(as.character(bmfyear),3,4),as.character(bmfmonth), ".csv", sep=""))) #return output to R for immediate exploration return(bmffile) } #Examples using function defined above: #Download August 2016 IRS Business Master File: #bm1608 <- getbmffile("2016", "08") #Examples of flawed code: #Will not run, as December data is not available for 2016 #bmftest <- getbmffile("2015", "12") #Will not run, as 2045 is not a valid year for BMF data #bmftest2 <- getbmffile("2045", "08")