Get brand names and EPCs from generic BNF terms
First, we’ll use openFDA()
to retrieve brand names for
the drugs we are interested in from the British National Formulary
(BNF), a reference book for prescribed used in the United Kingdom. We
first initialise a vector of drug names from the BNF:
bnf <- c("captopril", "enalapril", "fosinopril", "imidapril", "lisinopril",
"perindopril", "quinapril", "ramipril", "trandolapril",
"amlodipine", "diltiazem", "felodipine", "lacidipine", "lercanidipine",
"nicardipine", "nifedipine", "nimodipine", "verapamil",
"azilsartan", "candesartan", "eprosartan", "irbesartan",
"losartan", "olmesartan", "telmisartan", "valsartan",
"bumetanide", "bendroflumethiazide", "chlortalidone",
"hydrochlorothiazide", "indapamide",
"atorvastatin", "fluvastatin",
"pravastatin", "rosuvastatin", "simvastatin",
"ezetimibe",
"alirocumab", "evolocumab",
"metformin",
"gliclazide", "glimepiride", "tolbutamide", "glipizide",
"alogliptin", "linagliptin", "saxagliptin", "sitagliptin",
"vildagliptin",
"canagliflozin", "dapagliflozin", "empagliflozin", "ertugliflozin",
"dulaglutide", "exenatide", "liraglutide", "lixisenatide",
"semaglutide",
"pioglitazone")
We can then paste these into a single search string, and use them to
search against the "openfda.generic_name"
field in the Drugs@FDA
endpoint.
bnf_search_term <- format_search_term(
c("openfda.generic_name" = paste0(bnf, collapse = "+")),
exact = FALSE
)
bnf_search <- openFDA(search = bnf_search_term, endpoint = "drug-drugsfda")
bnf_search_json <- httr2::resp_body_json(bnf_search)
# The JSON object contains information including the generic and brand names
bnf_search_json$results[[1]][4][[1]][1:3]
#> [[1]]
#> [[1]]$product_number
#> [1] "003"
#>
#> [[1]]$reference_drug
#> [1] "No"
#>
#> [[1]]$brand_name
#> [1] "TELMISARTAN AND HYDROCHLOROTHIAZIDE"
#>
#> [[1]]$active_ingredients
#> [[1]]$active_ingredients[[1]]
#> [[1]]$active_ingredients[[1]]$name
#> [1] "HYDROCHLOROTHIAZIDE"
#>
#> [[1]]$active_ingredients[[1]]$strength
#> [1] "25MG"
#>
#>
#> [[1]]$active_ingredients[[2]]
#> [[1]]$active_ingredients[[2]]$name
#> [1] "TELMISARTAN"
#>
#> [[1]]$active_ingredients[[2]]$strength
#> [1] "80MG"
#>
#>
#>
#> [[1]]$reference_standard
#> [1] "No"
#>
#> [[1]]$dosage_form
#> [1] "TABLET"
#>
#> [[1]]$route
#> [1] "ORAL"
#>
#> [[1]]$marketing_status
#> [1] "Prescription"
#>
#> [[1]]$te_code
#> [1] "AB"
#>
#>
#> [[2]]
#> [[2]]$product_number
#> [1] "001"
#>
#> [[2]]$reference_drug
#> [1] "No"
#>
#> [[2]]$brand_name
#> [1] "TELMISARTAN AND HYDROCHLOROTHIAZIDE"
#>
#> [[2]]$active_ingredients
#> [[2]]$active_ingredients[[1]]
#> [[2]]$active_ingredients[[1]]$name
#> [1] "HYDROCHLOROTHIAZIDE"
#>
#> [[2]]$active_ingredients[[1]]$strength
#> [1] "12.5MG"
#>
#>
#> [[2]]$active_ingredients[[2]]
#> [[2]]$active_ingredients[[2]]$name
#> [1] "TELMISARTAN"
#>
#> [[2]]$active_ingredients[[2]]$strength
#> [1] "40MG"
#>
#>
#>
#> [[2]]$reference_standard
#> [1] "No"
#>
#> [[2]]$dosage_form
#> [1] "TABLET"
#>
#> [[2]]$route
#> [1] "ORAL"
#>
#> [[2]]$marketing_status
#> [1] "Prescription"
#>
#> [[2]]$te_code
#> [1] "AB"
#>
#>
#> [[3]]
#> [[3]]$product_number
#> [1] "002"
#>
#> [[3]]$reference_drug
#> [1] "No"
#>
#> [[3]]$brand_name
#> [1] "TELMISARTAN AND HYDROCHLOROTHIAZIDE"
#>
#> [[3]]$active_ingredients
#> [[3]]$active_ingredients[[1]]
#> [[3]]$active_ingredients[[1]]$name
#> [1] "HYDROCHLOROTHIAZIDE"
#>
#> [[3]]$active_ingredients[[1]]$strength
#> [1] "12.5MG"
#>
#>
#> [[3]]$active_ingredients[[2]]
#> [[3]]$active_ingredients[[2]]$name
#> [1] "TELMISARTAN"
#>
#> [[3]]$active_ingredients[[2]]$strength
#> [1] "80MG"
#>
#>
#>
#> [[3]]$reference_standard
#> [1] "No"
#>
#> [[3]]$dosage_form
#> [1] "TABLET"
#>
#> [[3]]$route
#> [1] "ORAL"
#>
#> [[3]]$marketing_status
#> [1] "Prescription"
#>
#> [[3]]$te_code
#> [1] "AB"
Extracting openFDA fields programmatically
The JSON data from openFDA()
is useful, and can be
easily parsed with tools from purrr and
vctrs. The following function dips into the JSON data
from openFDA()
and pulls out data from specific fields. It
extracts all occurrences of data within the requested field, then
condenses these down into a single character vector.
#' Extract openFDA field data from openFDA JSON objects
#' @param json JSON data generated by running [httr2::resp_body_json] on the
#' output from `openFDA::openFDA()`.
#' @param openFDA_field A single string denoting an openFDA field to extract.
#' Should be a valid field name, e.g. `"openfda.generic_name"` or
#' `"openfda.pharm_class_moa"`.
#' @returns A character vector with all unique values of `openfda_field` for
#' your given search.
extract_openFDA_field <- function(json, openfda_field) {
json |>
purrr::pluck("results") |>
purrr::map(.f = \(x) purrr::pluck(x, "openfda", openfda_field)) |>
vctrs::list_drop_empty() |>
unlist() |>
unique()
}
Brand names
Now that we have our function for data extraction, we can pull out the brand names associated with the generic drugs we queried against the API above.
openFDA_brand_names <- extract_openFDA_field(bnf_search_json, "brand_name")
print(openFDA_brand_names[25:30])
#> [1] "IRBESARTAN"
#> [2] "NIMODIPINE"
#> [3] "TELMISARTAN AND HYDROCHLOROTHIAZIDE"
#> [4] "DILTIAZEM HYDROCHLORIDE IN SODIUM CHLORIDE"
#> [5] "AMLODIPINE BESYLATE"
#> [6] "PIOGLITAZONE HYDROCHLORIDE"
Query openFDA with EPCs
We can use EPCs to run queries against the openFDA API. We do this by
removing the "[EPC]"
tags from our retrieved data, then
supplying them in the search
strategy.
openfda_EPCs <- stringr::str_remove(openfda_EPCs, pattern = " \\[.*\\]")
openfda_EPCs
#> [1] "Loop Diuretic"
#> [2] "HMG-CoA Reductase Inhibitor"
#> [3] "Dietary Cholesterol Absorption Inhibitor"
#> [4] "Angiotensin Converting Enzyme Inhibitor"
#> [5] "Thiazide Diuretic"
#> [6] "Angiotensin 2 Receptor Blocker"
#> [7] "GLP-1 Receptor Agonist"
#> [8] "Sulfonylurea"
#> [9] "Thiazide-like Diuretic"
#> [10] "Adenosine Triphosphate-Citrate Lyase Inhibitor"
#> [11] "Dihydropyridine Calcium Channel Blocker"
#> [12] "Sodium-Glucose Cotransporter 2 Inhibitor"
#> [13] "Dipeptidyl Peptidase 4 Inhibitor"
#> [14] "Potassium-sparing Diuretic"
#> [15] "Calcium Channel Blocker"
#> [16] "Aldosterone Antagonist"
#> [17] "Peroxisome Proliferator Receptor alpha Agonist"
#> [18] "Peroxisome Proliferator Receptor gamma Agonist"
#> [19] "Thiazolidinedione"
#> [20] "Insulin Analog"
Query openFDA with our EPCs
Before supplying these strings as search terms to openFDA, we
surround them with double-quotation marks ("
). This ensures
openFDA considers each term as a single string, instead of a set of
strings. Without the quotes, a term such as
"Thiazide Diuretic"
will be treated as a search for any
drugs with either “Thiazide” or “Diuretic” in the EPC, instead of a
search for specifically thiazide diuretics.
epc_search <- purrr::map(
.x = openfda_EPCs,
.f = \(epc) {
openFDA(search = c("openfda.pharm_class_epc" = epc),
endpoint = "drug-drugsfda") |>
httr2::resp_body_json()
})
epc_generics <- purrr::map(
.x = epc_search,
.f = \(json) extract_openFDA_field(json, openfda_field = "generic_name")) |>
purrr::flatten_chr() |>
unique()
Compare new generic names to BNF terms
Using some pattern matching, we can see which new generic drug terms we have retrieved by searching with EPCs.
bnf_regex <- paste0("(", paste0(bnf, collapse = "|"), ")")
epc_generics |>
tolower() |>
grep(pattern = bnf_regex, value = TRUE, invert = TRUE)
#> [1] "torsemide" "furosemide"
#> [3] "ethacrynic acid" "furosemide injection 80 mg/ 10 ml"
#> [5] "lovastatin" "pitavastatin"
#> [7] "pitavastatin magnesium" "chlorothiazide"
#> [9] "sparsentan" "tirzepatide"
#> [11] "glyburide" "chlorthalidone"
#> [13] "metolazone" "atenolol and chlorthalidone"
#> [15] "atenolol and chlorthalidone tablet" "bempedoic acid"
#> [17] "nisoldipine" "isradipine"
#> [19] "clevidipine" "clevipidine"
#> [21] "sotagliflozin" "bexagliflozin"
#> [23] "triamterene" "triamterene capsules"
#> [25] "eplerenone" "spironolactone"
#> [27] "carospir" "fenofibric acid"
#> [29] "fenofibrate" "gemfibrozil"
#> [31] "insulin lispro" "insulin glargine"
#> [33] "insulin aspart" "insulin detemir"
#> [35] "insulin lispro-aabc" "insulin glulisine"
#> [37] "water injection" "diluent"
#> [39] "insulin glargine-yfgn" "insulin glargine-aglr"
#> [41] "insulin degludec" "insulin aspart-szjj"
#> [43] "insulin aspart injection"
Of these drugs, the following are of interest:
fda_generics <- c("pitavastatin", "lovastatin",
"chlorothiazide",
"sparsentan",
"nisoldipine", "isradipine", "clevidipine",
"glyburide", "glibenclamide",
"tirzepatide",
"bexagliflozin", "sotagliflozin",
"chlorthalidone", "metolazone",
"spironolactone", "eplerenone",
"bempedoic acid", "fenofibrate", "fenofibric acid",
"gemfibrozil")
We can use these generic names of interest to get even more brand names:
more_brand_names <- purrr::map(
.x = fda_generics,
.f = \(generic) {
openFDA(c("openfda.generic_name" = paste0("\"", generic, "\""))) |>
httr2::resp_body_json() |>
extract_openFDA_field(openfda_field = "brand_name") |>
tolower()
}) |>
setNames(fda_generics)
#> Warning in openFDA(c(openfda.generic_name = paste0("\"", generic, "\""))): The openFDA API returned a 404 error.
#> ! This indicates that openFDA had no results to return.
#> This can be due to:
#> • A restrictive search term. Yours was
#> "openfda.generic_name:\"\"glibenclamide\"\"".
more_brand_names[1:3]
#> $pitavastatin
#> [1] "nikita" "livalo" "pitavastatin"
#> [4] "pitavastatin calcium" "zypitamag"
#>
#> $lovastatin
#> [1] "lovastatin" "altoprev"
#>
#> $chlorothiazide
#> [1] "chlorothiazide sodium" "chlorothiazide" "diuril"
Queries about ANGPLT3 inhibitors
Finally, let’s collect some data on angiopoietin-like 3 (ANGPLT3) inhibitors, which can be used in familial homozygous hypercholesteremia in the UK and US.
evinacumab_search <- openFDA(search = c("openfda.generic_name" = "evinacumab"),
endpoint = "drug-drugsfda")
evinacumab_epc <- evinacumab_search |>
httr2::resp_body_json() |>
extract_openFDA_field("pharm_class_epc") |>
stringr::str_remove(pattern = " \\[.*\\]")
agplt3_inhibitors_search <- openFDA(
search = c("openfda.pharm_class_epc" = evinacumab_epc),
endpoint = "drug-drugsfda"
)
agplt3_inhibitors_brands <- agplt3_inhibitors_search |>
httr2::resp_body_json() |>
extract_openFDA_field("brand_name")
agplt3_inhibitors_generics <- agplt3_inhibitors_search |>
httr2::resp_body_json() |>
extract_openFDA_field("generic_name")
c("AGPLT3-I brands" = agplt3_inhibitors_brands,
"AGPLT3-I generics" = agplt3_inhibitors_generics)
#> AGPLT3-I brands AGPLT3-I generics
#> "EVKEEZA" "EVINACUMAB"
Session info
sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] openFDA_0.1.0.9001
#>
#> loaded via a namespace (and not attached):
#> [1] vctrs_0.6.5 cli_3.6.5 knitr_1.50 rlang_1.1.6
#> [5] xfun_0.52 stringi_1.8.7 purrr_1.0.4 textshaping_1.0.1
#> [9] jsonlite_2.0.0 glue_1.8.0 openssl_2.3.3 backports_1.5.0
#> [13] askpass_1.2.1 htmltools_0.5.8.1 ragg_1.4.0 sass_0.4.10
#> [17] rappdirs_0.3.3 rmarkdown_2.29 evaluate_1.0.4 jquerylib_0.1.4
#> [21] fastmap_1.2.0 yaml_2.3.10 lifecycle_1.0.4 httr2_1.1.2
#> [25] stringr_1.5.1 compiler_4.5.1 fs_1.6.6 systemfonts_1.2.3
#> [29] digest_0.6.37 R6_2.6.1 curl_6.4.0 magrittr_2.0.3
#> [33] bslib_0.9.0 checkmate_2.3.2 tools_4.5.1 pkgdown_2.1.3
#> [37] cachem_1.1.0 desc_1.4.3