Skip to contents

Setup

Let’s start by loading the package.

Get brand names and EPCs from generic BNF terms

First, we’ll use openFDA() to retrieve brand names for the drugs we are interested in from the BNF. We first initialise a vector of drug names from the BNF.

bnf <- c("captopril", "enalapril", "fosinopril", "imidapril", "lisinopril",
         "perindopril", "quinapril", "ramipril", "trandolapril",

         "amlodipine", "diltiazem", "felodipine", "lacidipine", "lercanidipine",
         "nicardipine", "nifedipine", "nimodipine", "verapamil",

         "azilsartan", "candesartan", "eprosartan", "irbesartan",
         "losartan", "olmesartan", "telmisartan", "valsartan",

         "bumetanide", "bendroflumethiazide", "chlortalidone",

         "hydrochlorothiazide", "indapamide",

         "atorvastatin", "fluvastatin",
         "pravastatin", "rosuvastatin", "simvastatin",

         "ezetimibe",

         "alirocumab", "evolocumab",

         "metformin",

         "gliclazide", "glimepiride", "tolbutamide", "glipizide",

         "alogliptin", "linagliptin", "saxagliptin", "sitagliptin",
         "vildagliptin",

         "canagliflozin", "dapagliflozin", "empagliflozin", "ertugliflozin",

         "dulaglutide", "exenatide", "liraglutide", "lixisenatide",
         "semaglutide",

         "pioglitazone")

We can then paste these into a single search string, and use them to search against the "openfda.generic_name" field in the Drugs@FDA endpoint.

bnf_search_term <- format_search_term(
  c("openfda.generic_name" = paste0(bnf, collapse = "+")),
  exact = FALSE
)
bnf_search <- openFDA(search = bnf_search_term, endpoint = "drug-drugsfda")

bnf_search_json <- httr2::resp_body_json(bnf_search)

# The JSON object contains information including the generic and brand names 
bnf_search_json$results[[1]][4][[1]][1:3]
#> [[1]]
#> [[1]]$product_number
#> [1] "001"
#> 
#> [[1]]$reference_drug
#> [1] "No"
#> 
#> [[1]]$brand_name
#> [1] "HYDROCHLOROTHIAZIDE"
#> 
#> [[1]]$active_ingredients
#> [[1]]$active_ingredients[[1]]
#> [[1]]$active_ingredients[[1]]$name
#> [1] "HYDROCHLOROTHIAZIDE"
#> 
#> [[1]]$active_ingredients[[1]]$strength
#> [1] "50MG"
#> 
#> 
#> 
#> [[1]]$reference_standard
#> [1] "No"
#> 
#> [[1]]$dosage_form
#> [1] "TABLET"
#> 
#> [[1]]$route
#> [1] "ORAL"
#> 
#> [[1]]$marketing_status
#> [1] "Discontinued"
#> 
#> 
#> [[2]]
#> NULL
#> 
#> [[3]]
#> NULL

Extracting openFDA fields programmatically

The JSON data from openFDA() is useful, and can be easily parsed with tools from purrr and vctrs. The following function dips into the JSON data from openFDA() and pulls out data from specific fields. It extracts all occurrences of data within the requested field, then condenses these down into a single character vector.

#' Extract openFDA field data from openFDA JSON objects
#' @param json
#' @param openFDA_field A single string denoting an openFDA field to extract.
#'   Should be a valid field name, e.g. `"openfda.generic_name"` or 
#'   `"openfda.pharm_class_moa"`.
#' @returns A character vector with all unique values of `openfda_field` for 
#'   your given search.
extract_openFDA_field <- function(json, openfda_field) {
  json |>
    purrr::pluck("results") |>
    purrr::map(.f = \(x) purrr::pluck(x, "openfda", openfda_field)) |>
    vctrs::list_drop_empty() |>
    unlist() |>
    unique()
}

Brand names

Now that we have our function for data extraction, we can pull out the brand names associated with the generic drugs we queried against the API above.

openFDA_brand_names <- extract_openFDA_field(bnf_search_json, "brand_name")

print(openFDA_brand_names[25:30])
#> [1] "FELOPDIPINE"              "ROSUVASTATIN CALCIUM"    
#> [3] "DILTIAZEM HYDROCHLORIDE"  "CARDIZEM"                
#> [5] "AMLODIPINE AND VALSARTAN" "QUINAPRIL"

Established pharmacological classes

We’ll also extract the established pharmacological classes (EPCs) of our drugs. We will use these EPCs as their own query term below, to find generic drugs which were not within our BNF drug list.

openfda_EPCs <- extract_openFDA_field(bnf_search_json, "pharm_class_epc")

lengths(list(brand_names = openFDA_brand_names, 
             EPCs = openfda_EPCs))
#> brand_names        EPCs 
#>         169          17

Query openFDA with EPCs

We can use EPCs to run queries against the openFDA API. We do this by removing the "[EPC]" tags from our retrieved data, then supplying them in the search strategy.

openfda_EPCs <- stringr::str_remove(openfda_EPCs, pattern = " \\[.*\\]")
openfda_EPCs
#>  [1] "HMG-CoA Reductase Inhibitor"                   
#>  [2] "PCSK9 Inhibitor"                               
#>  [3] "Thiazide Diuretic"                             
#>  [4] "Angiotensin 2 Receptor Blocker"                
#>  [5] "Dipeptidyl Peptidase 4 Inhibitor"              
#>  [6] "Sulfonylurea"                                  
#>  [7] "Dihydropyridine Calcium Channel Blocker"       
#>  [8] "Dietary Cholesterol Absorption Inhibitor"      
#>  [9] "Thiazide-like Diuretic"                        
#> [10] "Aldosterone Antagonist"                        
#> [11] "GLP-1 Receptor Agonist"                        
#> [12] "Loop Diuretic"                                 
#> [13] "Angiotensin Converting Enzyme Inhibitor"       
#> [14] "Sodium-Glucose Cotransporter 2 Inhibitor"      
#> [15] "Potassium-sparing Diuretic"                    
#> [16] "Calcium Channel Blocker"                       
#> [17] "Adenosine Triphosphate-Citrate Lyase Inhibitor"

Query openFDA with our EPCs

Before supplying these strings as search terms to openFDA, we surround them with double-quotation marks ("). This ensures openFDA considers each term as a single string, instead of a set of strings. Without the quotes, a term such as "Thiazide Diuretic" will be treated as a search for any drugs with either “Thiazide” or “Diuretic” in the EPC, instead of a search for specifically thiazide diuretics.

epc_search <- purrr::map(
  .x = openfda_EPCs,
  .f = \(epc) {
    openFDA(search = c("openfda.pharm_class_epc" = epc),
            endpoint = "drug-drugsfda") |>
      httr2::resp_body_json()
})

epc_generics <- purrr::map(
  .x = epc_search, 
  .f = \(json) extract_openFDA_field(json, openfda_field = "generic_name")) |>
  purrr::flatten_chr() |>
  unique()

Compare new generic names to BNF terms

Using some pattern matching, we can see which new generic drug terms we have retrieved by searching with EPCs.


bnf_regex <- paste0("(", paste0(bnf, collapse = "|"), ")")
epc_generics |>
  tolower() |>
  grep(pattern = bnf_regex, value = TRUE, invert = TRUE)
#>  [1] "pitavastatin magnesium"             "lovastatin"                        
#>  [3] "pitavastatin"                       "chlorothiazide"                    
#>  [5] "sparsentan"                         "glyburide"                         
#>  [7] "nisoldipine"                        "isradipine"                        
#>  [9] "clevidipine"                        "clevipidine"                       
#> [11] "chlorthalidone"                     "atenolol and chlorthalidone"       
#> [13] "metolazone"                         "atenolol and chlorthalidone tablet"
#> [15] "spironolactone"                     "carospir"                          
#> [17] "eplerenone"                         "tirzepatide"                       
#> [19] "furosemide"                         "ethacrynic acid"                   
#> [21] "torsemide"                          "furosemide injection 80 mg/ 10 ml" 
#> [23] "sotagliflozin"                      "bexagliflozin"                     
#> [25] "triamterene capsules"               "triamterene"                       
#> [27] "bempedoic acid"

Of these drugs, the following are of interest:

fda_generics <- c("pitavastatin", "lovastatin",
                  "chlorothiazide", 
                  "sparsentan",
                  "nisoldipine", "isradipine", "clevidipine", 
                  "glyburide", "glibenclamide",
                  "tirzepatide", 
                  "bexagliflozin", "sotagliflozin",
                  "chlorthalidone", "metolazone", 
                  "spironolactone", "eplerenone", 
                  "bempedoic acid", "fenofibrate", "fenofibric acid",
                  "gemfibrozil")

We can use these generic names of interest to get even more brand names:

more_brand_names <- purrr::map(
  .x = fda_generics,
  .f = \(generic) {
    openFDA(c("openfda.generic_name" = paste0("\"", generic, "\""))) |>
      httr2::resp_body_json() |>
      extract_openFDA_field(openfda_field = "brand_name") |>
      tolower()
  }
) |>
  setNames(fda_generics)
#> Warning in openFDA(c(openfda.generic_name = paste0("\"", generic, "\""))): The openFDA API returned a 404 error.
#> ! This indicates that openFDA had no results to return.
#> This can be due to:
#>  A restrictive search term. Yours was
#>   "openfda.generic_name:\"\"glibenclamide\"\"".

more_brand_names[1:3]
#> $pitavastatin
#> [1] "zypitamag"            "pitavastatin"         "pitavastatin calcium"
#> [4] "livalo"               "nikita"              
#> 
#> $lovastatin
#> [1] "lovastatin" "altoprev"  
#> 
#> $chlorothiazide
#> [1] "chlorothiazide sodium" "chlorothiazide"        "diuril"

Queries about ANGPLT3 inhibitors

Finally, let’s collect some data on angiopoietin-like 3 (ANGPLT3) inhibitors, which can be used in familial homozygous hypercholesteremia in the UK and US.

evinacumab_search <- openFDA(search = c("openfda.generic_name" = "evinacumab"),
                             endpoint = "drug-drugsfda")
evinacumab_epc <- evinacumab_search |>
  httr2::resp_body_json() |>
  extract_openFDA_field("pharm_class_epc") |>
  stringr::str_remove(pattern = " \\[.*\\]")

agplt3_inhibitors_search <- openFDA(
  search = c("openfda.pharm_class_epc" = evinacumab_epc),
  endpoint = "drug-drugsfda"
)

agplt3_inhibitors_brands <- agplt3_inhibitors_search |>
  httr2::resp_body_json() |> 
  extract_openFDA_field("brand_name")

agplt3_inhibitors_generics <- agplt3_inhibitors_search |>
  httr2::resp_body_json() |> 
  extract_openFDA_field("generic_name")

c("AGPLT3-I brands" = agplt3_inhibitors_brands, 
  "AGPLT3-I generics" = agplt3_inhibitors_generics)
#>   AGPLT3-I brands AGPLT3-I generics 
#>         "EVKEEZA"      "EVINACUMAB"

Session info

sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.5 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] openFDA_0.1.0.9000
#> 
#> loaded via a namespace (and not attached):
#>  [1] vctrs_0.6.5       cli_3.6.3         knitr_1.48        rlang_1.1.4      
#>  [5] xfun_0.48         stringi_1.8.4     purrr_1.0.2       textshaping_0.4.0
#>  [9] jsonlite_1.8.9    glue_1.8.0        backports_1.5.0   htmltools_0.5.8.1
#> [13] ragg_1.3.3        sass_0.4.9        rappdirs_0.3.3    rmarkdown_2.28   
#> [17] evaluate_1.0.1    jquerylib_0.1.4   fastmap_1.2.0     yaml_2.3.10      
#> [21] lifecycle_1.0.4   httr2_1.0.5       stringr_1.5.1     compiler_4.4.1   
#> [25] fs_1.6.4          systemfonts_1.1.0 digest_0.6.37     R6_2.5.1         
#> [29] curl_5.2.3        magrittr_2.0.3    bslib_0.8.0       checkmate_2.3.2  
#> [33] tools_4.4.1       pkgdown_2.1.1     cachem_1.1.0      desc_1.4.3