Get brand names and EPCs from generic BNF terms
First, we’ll use openFDA()
to retrieve brand names for
the drugs we are interested in from the BNF. We first initialise a
vector of drug names from the BNF.
bnf <- c("captopril", "enalapril", "fosinopril", "imidapril", "lisinopril",
"perindopril", "quinapril", "ramipril", "trandolapril",
"amlodipine", "diltiazem", "felodipine", "lacidipine", "lercanidipine",
"nicardipine", "nifedipine", "nimodipine", "verapamil",
"azilsartan", "candesartan", "eprosartan", "irbesartan",
"losartan", "olmesartan", "telmisartan", "valsartan",
"bumetanide", "bendroflumethiazide", "chlortalidone",
"hydrochlorothiazide", "indapamide",
"atorvastatin", "fluvastatin",
"pravastatin", "rosuvastatin", "simvastatin",
"ezetimibe",
"alirocumab", "evolocumab",
"metformin",
"gliclazide", "glimepiride", "tolbutamide", "glipizide",
"alogliptin", "linagliptin", "saxagliptin", "sitagliptin",
"vildagliptin",
"canagliflozin", "dapagliflozin", "empagliflozin", "ertugliflozin",
"dulaglutide", "exenatide", "liraglutide", "lixisenatide",
"semaglutide",
"pioglitazone")
We can then paste these into a single search string, and use them to
search against the "openfda.generic_name"
field in the Drugs@FDA
endpoint.
bnf_search_term <- format_search_term(
c("openfda.generic_name" = paste0(bnf, collapse = "+")),
exact = FALSE
)
bnf_search <- openFDA(search = bnf_search_term, endpoint = "drug-drugsfda")
bnf_search_json <- httr2::resp_body_json(bnf_search)
# The JSON object contains information including the generic and brand names
bnf_search_json$results[[1]][4][[1]][1:3]
#> [[1]]
#> [[1]]$product_number
#> [1] "001"
#>
#> [[1]]$reference_drug
#> [1] "No"
#>
#> [[1]]$brand_name
#> [1] "HYDROCHLOROTHIAZIDE"
#>
#> [[1]]$active_ingredients
#> [[1]]$active_ingredients[[1]]
#> [[1]]$active_ingredients[[1]]$name
#> [1] "HYDROCHLOROTHIAZIDE"
#>
#> [[1]]$active_ingredients[[1]]$strength
#> [1] "50MG"
#>
#>
#>
#> [[1]]$reference_standard
#> [1] "No"
#>
#> [[1]]$dosage_form
#> [1] "TABLET"
#>
#> [[1]]$route
#> [1] "ORAL"
#>
#> [[1]]$marketing_status
#> [1] "Discontinued"
#>
#>
#> [[2]]
#> NULL
#>
#> [[3]]
#> NULL
Extracting openFDA fields programmatically
The JSON data from openFDA()
is useful, and can be
easily parsed with tools from purrr and
vctrs. The following function dips into the JSON data
from openFDA()
and pulls out data from specific fields. It
extracts all occurrences of data within the requested field, then
condenses these down into a single character vector.
#' Extract openFDA field data from openFDA JSON objects
#' @param json
#' @param openFDA_field A single string denoting an openFDA field to extract.
#' Should be a valid field name, e.g. `"openfda.generic_name"` or
#' `"openfda.pharm_class_moa"`.
#' @returns A character vector with all unique values of `openfda_field` for
#' your given search.
extract_openFDA_field <- function(json, openfda_field) {
json |>
purrr::pluck("results") |>
purrr::map(.f = \(x) purrr::pluck(x, "openfda", openfda_field)) |>
vctrs::list_drop_empty() |>
unlist() |>
unique()
}
Brand names
Now that we have our function for data extraction, we can pull out the brand names associated with the generic drugs we queried against the API above.
openFDA_brand_names <- extract_openFDA_field(bnf_search_json, "brand_name")
print(openFDA_brand_names[25:30])
#> [1] "FELOPDIPINE" "ROSUVASTATIN CALCIUM"
#> [3] "DILTIAZEM HYDROCHLORIDE" "CARDIZEM"
#> [5] "AMLODIPINE AND VALSARTAN" "QUINAPRIL"
Query openFDA with EPCs
We can use EPCs to run queries against the openFDA API. We do this by
removing the "[EPC]"
tags from our retrieved data, then
supplying them in the search
strategy.
openfda_EPCs <- stringr::str_remove(openfda_EPCs, pattern = " \\[.*\\]")
openfda_EPCs
#> [1] "HMG-CoA Reductase Inhibitor"
#> [2] "PCSK9 Inhibitor"
#> [3] "Thiazide Diuretic"
#> [4] "Angiotensin 2 Receptor Blocker"
#> [5] "Dipeptidyl Peptidase 4 Inhibitor"
#> [6] "Sulfonylurea"
#> [7] "Dihydropyridine Calcium Channel Blocker"
#> [8] "Dietary Cholesterol Absorption Inhibitor"
#> [9] "Thiazide-like Diuretic"
#> [10] "Aldosterone Antagonist"
#> [11] "GLP-1 Receptor Agonist"
#> [12] "Loop Diuretic"
#> [13] "Angiotensin Converting Enzyme Inhibitor"
#> [14] "Sodium-Glucose Cotransporter 2 Inhibitor"
#> [15] "Potassium-sparing Diuretic"
#> [16] "Calcium Channel Blocker"
#> [17] "Adenosine Triphosphate-Citrate Lyase Inhibitor"
Query openFDA with our EPCs
Before supplying these strings as search terms to openFDA, we
surround them with double-quotation marks ("
). This ensures
openFDA considers each term as a single string, instead of a set of
strings. Without the quotes, a term such as
"Thiazide Diuretic"
will be treated as a search for any
drugs with either “Thiazide” or “Diuretic” in the EPC, instead of a
search for specifically thiazide diuretics.
epc_search <- purrr::map(
.x = openfda_EPCs,
.f = \(epc) {
openFDA(search = c("openfda.pharm_class_epc" = epc),
endpoint = "drug-drugsfda") |>
httr2::resp_body_json()
})
epc_generics <- purrr::map(
.x = epc_search,
.f = \(json) extract_openFDA_field(json, openfda_field = "generic_name")) |>
purrr::flatten_chr() |>
unique()
Compare new generic names to BNF terms
Using some pattern matching, we can see which new generic drug terms we have retrieved by searching with EPCs.
bnf_regex <- paste0("(", paste0(bnf, collapse = "|"), ")")
epc_generics |>
tolower() |>
grep(pattern = bnf_regex, value = TRUE, invert = TRUE)
#> [1] "pitavastatin magnesium" "lovastatin"
#> [3] "pitavastatin" "chlorothiazide"
#> [5] "sparsentan" "glyburide"
#> [7] "nisoldipine" "isradipine"
#> [9] "clevidipine" "clevipidine"
#> [11] "chlorthalidone" "atenolol and chlorthalidone"
#> [13] "metolazone" "atenolol and chlorthalidone tablet"
#> [15] "spironolactone" "carospir"
#> [17] "eplerenone" "tirzepatide"
#> [19] "furosemide" "ethacrynic acid"
#> [21] "torsemide" "furosemide injection 80 mg/ 10 ml"
#> [23] "sotagliflozin" "bexagliflozin"
#> [25] "triamterene capsules" "triamterene"
#> [27] "bempedoic acid"
Of these drugs, the following are of interest:
fda_generics <- c("pitavastatin", "lovastatin",
"chlorothiazide",
"sparsentan",
"nisoldipine", "isradipine", "clevidipine",
"glyburide", "glibenclamide",
"tirzepatide",
"bexagliflozin", "sotagliflozin",
"chlorthalidone", "metolazone",
"spironolactone", "eplerenone",
"bempedoic acid", "fenofibrate", "fenofibric acid",
"gemfibrozil")
We can use these generic names of interest to get even more brand names:
more_brand_names <- purrr::map(
.x = fda_generics,
.f = \(generic) {
openFDA(c("openfda.generic_name" = paste0("\"", generic, "\""))) |>
httr2::resp_body_json() |>
extract_openFDA_field(openfda_field = "brand_name") |>
tolower()
}
) |>
setNames(fda_generics)
#> Warning in openFDA(c(openfda.generic_name = paste0("\"", generic, "\""))): The openFDA API returned a 404 error.
#> ! This indicates that openFDA had no results to return.
#> This can be due to:
#> • A restrictive search term. Yours was
#> "openfda.generic_name:\"\"glibenclamide\"\"".
more_brand_names[1:3]
#> $pitavastatin
#> [1] "zypitamag" "pitavastatin" "pitavastatin calcium"
#> [4] "livalo" "nikita"
#>
#> $lovastatin
#> [1] "lovastatin" "altoprev"
#>
#> $chlorothiazide
#> [1] "chlorothiazide sodium" "chlorothiazide" "diuril"
Queries about ANGPLT3 inhibitors
Finally, let’s collect some data on angiopoietin-like 3 (ANGPLT3) inhibitors, which can be used in familial homozygous hypercholesteremia in the UK and US.
evinacumab_search <- openFDA(search = c("openfda.generic_name" = "evinacumab"),
endpoint = "drug-drugsfda")
evinacumab_epc <- evinacumab_search |>
httr2::resp_body_json() |>
extract_openFDA_field("pharm_class_epc") |>
stringr::str_remove(pattern = " \\[.*\\]")
agplt3_inhibitors_search <- openFDA(
search = c("openfda.pharm_class_epc" = evinacumab_epc),
endpoint = "drug-drugsfda"
)
agplt3_inhibitors_brands <- agplt3_inhibitors_search |>
httr2::resp_body_json() |>
extract_openFDA_field("brand_name")
agplt3_inhibitors_generics <- agplt3_inhibitors_search |>
httr2::resp_body_json() |>
extract_openFDA_field("generic_name")
c("AGPLT3-I brands" = agplt3_inhibitors_brands,
"AGPLT3-I generics" = agplt3_inhibitors_generics)
#> AGPLT3-I brands AGPLT3-I generics
#> "EVKEEZA" "EVINACUMAB"
Session info
sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.5 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] openFDA_0.1.0.9000
#>
#> loaded via a namespace (and not attached):
#> [1] vctrs_0.6.5 cli_3.6.3 knitr_1.48 rlang_1.1.4
#> [5] xfun_0.48 stringi_1.8.4 purrr_1.0.2 textshaping_0.4.0
#> [9] jsonlite_1.8.9 glue_1.8.0 backports_1.5.0 htmltools_0.5.8.1
#> [13] ragg_1.3.3 sass_0.4.9 rappdirs_0.3.3 rmarkdown_2.28
#> [17] evaluate_1.0.1 jquerylib_0.1.4 fastmap_1.2.0 yaml_2.3.10
#> [21] lifecycle_1.0.4 httr2_1.0.5 stringr_1.5.1 compiler_4.4.1
#> [25] fs_1.6.4 systemfonts_1.1.0 digest_0.6.37 R6_2.5.1
#> [29] curl_5.2.3 magrittr_2.0.3 bslib_0.8.0 checkmate_2.3.2
#> [33] tools_4.4.1 pkgdown_2.1.1 cachem_1.1.0 desc_1.4.3