Skip to contents

Setup

Let’s start by loading the package.

Get brand names and EPCs from generic BNF terms

First, we’ll use openFDA() to retrieve brand names for the drugs we are interested in from the BNF. We first initialise a vector of drug names from the BNF.

bnf <- c("captopril", "enalapril", "fosinopril", "imidapril", "lisinopril",
         "perindopril", "quinapril", "ramipril", "trandolapril",

         "amlodipine", "diltiazem", "felodipine", "lacidipine", "lercanidipine",
         "nicardipine", "nifedipine", "nimodipine", "verapamil",

         "azilsartan", "candesartan", "eprosartan", "irbesartan",
         "losartan", "olmesartan", "telmisartan", "valsartan",

         "bumetanide", "bendroflumethiazide", "chlortalidone",

         "hydrochlorothiazide", "indapamide",

         "atorvastatin", "fluvastatin",
         "pravastatin", "rosuvastatin", "simvastatin",

         "ezetimibe",

         "alirocumab", "evolocumab",

         "metformin",

         "gliclazide", "glimepiride", "tolbutamide", "glipizide",

         "alogliptin", "linagliptin", "saxagliptin", "sitagliptin",
         "vildagliptin",

         "canagliflozin", "dapagliflozin", "empagliflozin", "ertugliflozin",

         "dulaglutide", "exenatide", "liraglutide", "lixisenatide",
         "semaglutide",

         "pioglitazone")

We can then paste these into a single search string, and use them to search against the "openfda.generic_name" field in the Drugs@FDA endpoint.

bnf_search_term <- format_search_term(
  c("openfda.generic_name" = paste0(bnf, collapse = "+")),
  exact = FALSE
)
bnf_search <- openFDA(search = bnf_search_term, endpoint = "drug-drugsfda")

bnf_search_json <- httr2::resp_body_json(bnf_search)

# The JSON object contains information including the generic and brand names 
bnf_search_json$results[[1]][4][[1]][1:3]
#> [[1]]
#> [[1]]$product_number
#> [1] "003"
#> 
#> [[1]]$reference_drug
#> [1] "No"
#> 
#> [[1]]$brand_name
#> [1] "FOSINOPRIL SODIUM"
#> 
#> [[1]]$active_ingredients
#> [[1]]$active_ingredients[[1]]
#> [[1]]$active_ingredients[[1]]$name
#> [1] "FOSINOPRIL SODIUM"
#> 
#> [[1]]$active_ingredients[[1]]$strength
#> [1] "40MG"
#> 
#> 
#> 
#> [[1]]$reference_standard
#> [1] "No"
#> 
#> [[1]]$dosage_form
#> [1] "TABLET"
#> 
#> [[1]]$route
#> [1] "ORAL"
#> 
#> [[1]]$marketing_status
#> [1] "Discontinued"
#> 
#> 
#> [[2]]
#> [[2]]$product_number
#> [1] "001"
#> 
#> [[2]]$reference_drug
#> [1] "No"
#> 
#> [[2]]$brand_name
#> [1] "FOSINOPRIL SODIUM"
#> 
#> [[2]]$active_ingredients
#> [[2]]$active_ingredients[[1]]
#> [[2]]$active_ingredients[[1]]$name
#> [1] "FOSINOPRIL SODIUM"
#> 
#> [[2]]$active_ingredients[[1]]$strength
#> [1] "10MG"
#> 
#> 
#> 
#> [[2]]$reference_standard
#> [1] "No"
#> 
#> [[2]]$dosage_form
#> [1] "TABLET"
#> 
#> [[2]]$route
#> [1] "ORAL"
#> 
#> [[2]]$marketing_status
#> [1] "Discontinued"
#> 
#> 
#> [[3]]
#> [[3]]$product_number
#> [1] "002"
#> 
#> [[3]]$reference_drug
#> [1] "No"
#> 
#> [[3]]$brand_name
#> [1] "FOSINOPRIL SODIUM"
#> 
#> [[3]]$active_ingredients
#> [[3]]$active_ingredients[[1]]
#> [[3]]$active_ingredients[[1]]$name
#> [1] "FOSINOPRIL SODIUM"
#> 
#> [[3]]$active_ingredients[[1]]$strength
#> [1] "20MG"
#> 
#> 
#> 
#> [[3]]$reference_standard
#> [1] "No"
#> 
#> [[3]]$dosage_form
#> [1] "TABLET"
#> 
#> [[3]]$route
#> [1] "ORAL"
#> 
#> [[3]]$marketing_status
#> [1] "Discontinued"

Extracting openFDA fields programmatically

The JSON data from openFDA() is useful, and can be easily parsed with tools from purrr and vctrs. The following function dips into the JSON data from openFDA() and pulls out data from specific fields. It extracts all occurrences of data within the requested field, then condenses these down into a single character vector.

#' Extract openFDA field data from openFDA JSON objects
#' @param json JSON data generated by running [httr2::resp_body_json] on the
#'   output from `openFDA::openFDA()`.
#' @param openFDA_field A single string denoting an openFDA field to extract.
#'   Should be a valid field name, e.g. `"openfda.generic_name"` or 
#'   `"openfda.pharm_class_moa"`.
#' @returns A character vector with all unique values of `openfda_field` for 
#'   your given search.
extract_openFDA_field <- function(json, openfda_field) {
  json |>
    purrr::pluck("results") |>
    purrr::map(.f = \(x) purrr::pluck(x, "openfda", openfda_field)) |>
    vctrs::list_drop_empty() |>
    unlist() |>
    unique()
}

Brand names

Now that we have our function for data extraction, we can pull out the brand names associated with the generic drugs we queried against the API above.

openFDA_brand_names <- extract_openFDA_field(bnf_search_json, "brand_name")

print(openFDA_brand_names[25:30])
#> [1] "ENALAPRIL MALEATE"          "VASOTEC"                   
#> [3] "TELMISARTAN AND AMLODIPINE" "NORVASC"                   
#> [5] "AMLODIPINE BESYLATE"        "FELODIPINE"

Established pharmacological classes

We’ll also extract the established pharmacological classes (EPCs) of our drugs. We will use these EPCs as their own query term below, to find generic drugs which were not within our BNF drug list.

openfda_EPCs <- extract_openFDA_field(bnf_search_json, "pharm_class_epc")

lengths(list(brand_names = openFDA_brand_names, 
             EPCs = openfda_EPCs))
#> brand_names        EPCs 
#>         172          20

Query openFDA with EPCs

We can use EPCs to run queries against the openFDA API. We do this by removing the "[EPC]" tags from our retrieved data, then supplying them in the search strategy.

openfda_EPCs <- stringr::str_remove(openfda_EPCs, pattern = " \\[.*\\]")
openfda_EPCs
#>  [1] "Thiazide Diuretic"                             
#>  [2] "HMG-CoA Reductase Inhibitor"                   
#>  [3] "Angiotensin 2 Receptor Blocker"                
#>  [4] "Dietary Cholesterol Absorption Inhibitor"      
#>  [5] "Loop Diuretic"                                 
#>  [6] "Dihydropyridine Calcium Channel Blocker"       
#>  [7] "Dipeptidyl Peptidase 4 Inhibitor"              
#>  [8] "Thiazide-like Diuretic"                        
#>  [9] "Angiotensin Converting Enzyme Inhibitor"       
#> [10] "GLP-1 Receptor Agonist"                        
#> [11] "Sodium-Glucose Cotransporter 2 Inhibitor"      
#> [12] "Sulfonylurea"                                  
#> [13] "Potassium-sparing Diuretic"                    
#> [14] "Calcium Channel Blocker"                       
#> [15] "Aldosterone Antagonist"                        
#> [16] "Peroxisome Proliferator Receptor alpha Agonist"
#> [17] "Peroxisome Proliferator Receptor gamma Agonist"
#> [18] "Thiazolidinedione"                             
#> [19] "Insulin Analog"                                
#> [20] "Adenosine Triphosphate-Citrate Lyase Inhibitor"

Query openFDA with our EPCs

Before supplying these strings as search terms to openFDA, we surround them with double-quotation marks ("). This ensures openFDA considers each term as a single string, instead of a set of strings. Without the quotes, a term such as "Thiazide Diuretic" will be treated as a search for any drugs with either “Thiazide” or “Diuretic” in the EPC, instead of a search for specifically thiazide diuretics.

epc_search <- purrr::map(
  .x = openfda_EPCs,
  .f = \(epc) {
    openFDA(search = c("openfda.pharm_class_epc" = epc),
            endpoint = "drug-drugsfda") |>
      httr2::resp_body_json()
})

epc_generics <- purrr::map(
  .x = epc_search, 
  .f = \(json) extract_openFDA_field(json, openfda_field = "generic_name")) |>
  purrr::flatten_chr() |>
  unique()

Compare new generic names to BNF terms

Using some pattern matching, we can see which new generic drug terms we have retrieved by searching with EPCs.


bnf_regex <- paste0("(", paste0(bnf, collapse = "|"), ")")
epc_generics |>
  tolower() |>
  grep(pattern = bnf_regex, value = TRUE, invert = TRUE)
#>  [1] "chlorothiazide"                     "pitavastatin magnesium"            
#>  [3] "lovastatin"                         "pitavastatin"                      
#>  [5] "sparsentan"                         "torsemide"                         
#>  [7] "furosemide"                         "ethacrynic acid"                   
#>  [9] "furosemide injection 80 mg/ 10 ml"  "isradipine"                        
#> [11] "nisoldipine"                        "clevidipine"                       
#> [13] "clevipidine"                        "chlorthalidone"                    
#> [15] "metolazone"                         "atenolol and chlorthalidone"       
#> [17] "atenolol and chlorthalidone tablet" "tirzepatide"                       
#> [19] "bexagliflozin"                      "sotagliflozin"                     
#> [21] "glyburide"                          "triamterene capsules"              
#> [23] "triamterene"                        "spironolactone"                    
#> [25] "carospir"                           "eplerenone"                        
#> [27] "fenofibrate"                        "fenofibric acid"                   
#> [29] "gemfibrozil"                        "insulin aspart injection"          
#> [31] "insulin aspart"                     "insulin glargine"                  
#> [33] "insulin lispro"                     "insulin detemir"                   
#> [35] "insulin lispro-aabc"                "insulin glulisine"                 
#> [37] "water injection"                    "insulin glargine-yfgn"             
#> [39] "insulin glargine-aglr"              "diluent"                           
#> [41] "insulin aspart-szjj"                "insulin degludec"                  
#> [43] "bempedoic acid"

Of these drugs, the following are of interest:

fda_generics <- c("pitavastatin", "lovastatin",
                  "chlorothiazide", 
                  "sparsentan",
                  "nisoldipine", "isradipine", "clevidipine", 
                  "glyburide", "glibenclamide",
                  "tirzepatide", 
                  "bexagliflozin", "sotagliflozin",
                  "chlorthalidone", "metolazone", 
                  "spironolactone", "eplerenone", 
                  "bempedoic acid", "fenofibrate", "fenofibric acid",
                  "gemfibrozil")

We can use these generic names of interest to get even more brand names:

more_brand_names <- purrr::map(
  .x = fda_generics,
  .f = \(generic) {
    openFDA(c("openfda.generic_name" = paste0("\"", generic, "\""))) |>
      httr2::resp_body_json() |>
      extract_openFDA_field(openfda_field = "brand_name") |>
      tolower()
  }
) |>
  setNames(fda_generics)
#> Warning in openFDA(c(openfda.generic_name = paste0("\"", generic, "\""))): The openFDA API returned a 404 error.
#> ! This indicates that openFDA had no results to return.
#> This can be due to:
#>  A restrictive search term. Yours was
#>   "openfda.generic_name:\"\"glibenclamide\"\"".

more_brand_names[1:3]
#> $pitavastatin
#> [1] "zypitamag"            "pitavastatin calcium" "nikita"              
#> [4] "livalo"               "pitavastatin"        
#> 
#> $lovastatin
#> [1] "lovastatin" "altoprev"  
#> 
#> $chlorothiazide
#> [1] "chlorothiazide"        "chlorothiazide sodium" "diuril"

Queries about ANGPLT3 inhibitors

Finally, let’s collect some data on angiopoietin-like 3 (ANGPLT3) inhibitors, which can be used in familial homozygous hypercholesteremia in the UK and US.

evinacumab_search <- openFDA(search = c("openfda.generic_name" = "evinacumab"),
                             endpoint = "drug-drugsfda")
evinacumab_epc <- evinacumab_search |>
  httr2::resp_body_json() |>
  extract_openFDA_field("pharm_class_epc") |>
  stringr::str_remove(pattern = " \\[.*\\]")

agplt3_inhibitors_search <- openFDA(
  search = c("openfda.pharm_class_epc" = evinacumab_epc),
  endpoint = "drug-drugsfda"
)

agplt3_inhibitors_brands <- agplt3_inhibitors_search |>
  httr2::resp_body_json() |> 
  extract_openFDA_field("brand_name")

agplt3_inhibitors_generics <- agplt3_inhibitors_search |>
  httr2::resp_body_json() |> 
  extract_openFDA_field("generic_name")

c("AGPLT3-I brands" = agplt3_inhibitors_brands, 
  "AGPLT3-I generics" = agplt3_inhibitors_generics)
#>   AGPLT3-I brands AGPLT3-I generics 
#>         "EVKEEZA"      "EVINACUMAB"

Session info

sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] openFDA_0.1.0.9000
#> 
#> loaded via a namespace (and not attached):
#>  [1] vctrs_0.6.5       cli_3.6.5         knitr_1.50        rlang_1.1.6      
#>  [5] xfun_0.52         stringi_1.8.7     purrr_1.0.4       textshaping_1.0.1
#>  [9] jsonlite_2.0.0    glue_1.8.0        backports_1.5.0   htmltools_0.5.8.1
#> [13] ragg_1.4.0        sass_0.4.10       rappdirs_0.3.3    rmarkdown_2.29   
#> [17] evaluate_1.0.4    jquerylib_0.1.4   fastmap_1.2.0     yaml_2.3.10      
#> [21] lifecycle_1.0.4   httr2_1.1.2       stringr_1.5.1     compiler_4.5.1   
#> [25] fs_1.6.6          systemfonts_1.2.3 digest_0.6.37     R6_2.6.1         
#> [29] curl_6.4.0        magrittr_2.0.3    bslib_0.9.0       checkmate_2.3.2  
#> [33] tools_4.5.1       pkgdown_2.1.3     cachem_1.1.0      desc_1.4.3