The tabela package provides a streamlined interface for scraping and parsing HTML tables and downloadable files directly from web pages. Educators and students can leverage tabela to:
Data-driven Teaching Materials: Rapidly prototype datasets for classroom exercises by extracting tables from web sources.
Real-time Reporting: Integrate up-to-date data into interactive reports and dashboards for demonstrations and assignments.
Automated Data Ingestion: Streamline workflows by programmatically collecting tabular data from most educational websites.
Setup
# install.packages(c("tabela","dplyr","ggplot2","ggpubr","ggthemes","gt"))library(tabela)library(dplyr)library(ggplot2)library(ggpubr)library(ggthemes)library(readr)library(stringr)library(gt)library(ggmap)library(ggplot2)library(maps)library(plotly)library(viridis) # for a nice color scalelibrary(xml2)library(purrr) library(rvest)
1. Scrape the Data
# target URLurl <-"https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/"# Start a polite sessionsession <-init_session(url)print(session$url)
covid_clean |>arrange(desc(`7-day avg. cases`)) |>slice_head(n =10) |>gt() |>tab_header(title =md("**Top 10 States by 7-Day Average COVID-19 Cases**") ) |>fmt_number(columns =c(`7-day avg. cases`,`7-day avg. deaths`,Cases,Deaths),decimals =0 ) |>fmt_number(columns =c("7-day avg. hospitalizations per 100k"),decimals =1 ) |>cols_label(`7-day avg. cases`="Avg Cases",`7-day avg. deaths`="Avg Deaths",`7-day avg. hospitalizations per 100k`="Hosp/100k" ) |>tab_options(table.font.size =px(14) )
Top 10 States by 7-Day Average COVID-19 Cases
State
Avg Cases
Avg Deaths
Cases
Deaths
7-day avg. hospitalizations
Hosp/100k
NY
429
−37
6,706,390
77,423
177
0.9
MA
268
0
2,048,722
21,035
52
0.8
VA
210
0
2,323,255
23,769
204
2.0
WA
184
2
1,969,833
15,972
34
0.4
MI
157
3
3,119,532
43,191
67
0.7
CA
128
0
11,300,486
102,356
377
1.0
HI
96
0
393,757
1,955
27
2.0
NV
74
0
892,252
12,084
26
0.8
WI
50
0
2,036,872
16,723
79
1.0
ME
18
1
324,378
3,085
25
1.0
5. Publication-Ready Plots
5.1 Top 10 States by Avg Cases
p1 <- covid_clean |>top_n(10, `7-day avg. cases`) |>ggplot(aes(reorder(State, `7-day avg. cases`), `7-day avg. cases`)) +geom_col(fill ="steelblue") +coord_flip() +labs(title ="Top 10 States by 7-Day Average COVID-19 Cases",x ="State",y ="7-Day Average Cases" ) +theme_pubr() +theme(plot.title =element_text(face="bold", size=16),axis.title =element_text(size=12) )print(p1)
5.2 Cases vs Hospitalizations per 100 k
p2 <- covid_clean |>filter(!is.na(`7-day avg. hospitalizations per 100k`)) |>ggplot(aes(`7-day avg. cases`, `7-day avg. hospitalizations per 100k`)) +geom_point(size=3, alpha=0.7) +geom_smooth(method="lm", se=FALSE, linetype="dashed") +labs(title ="7-Day Avg Cases vs Hospitalizations per 100 k",x ="7-Day Average Cases",y ="Hospitalizations per 100 k" ) +theme_few() +theme(plot.title =element_text(face="bold", size=16) )print(p2)
Another Rea–world Example
Below is an example relevant to students: scraping the QS World University Rankings 2025 from Wikipedia using tabela.
# Another influential URL: QS World University Rankings 2025url2 <-"https://en.wikipedia.org/wiki/QS_World_University_Rankings"# Start a polite sessionsession2 <-init_session(url2)print(session2$url)
get_coords <-function(name, url) { page <-read_html(url) lat_dms <- page %>%html_element(".latitude") %>%html_text2() lon_dms <- page %>%html_element(".longitude") %>%html_text2() dms2dec <-function(dms) { parts <-as.numeric(str_extract_all(dms, "[0-9]+\\.?[0-9]*")[[1]]) dir <-str_extract(dms, "[NSEW]") dec <- parts[1] + parts[2]/60+ parts[3]/3600if (dir %in%c("S","W")) dec <--dec dec }tibble(name = name,lat =dms2dec(lat_dms),lon =dms2dec(lon_dms) )}coords_df <-pmap_dfr(list(links_df$name, links_df$url), get_coords)final_df <- links_df %>%left_join(coords_df, by ="name")final_df <- final_df %>%mutate(lat =case_when( name =="Imperial College London"~51.498356, name =="Nanyang Technological University"~1.3483099, name =="University of New South Wales"~-33.917300,TRUE~ lat ),lon =case_when( name =="Imperial College London"~-0.176894, name =="Nanyang Technological University"~103.6831347, name =="University of New South Wales"~151.225300,TRUE~ lon ) )final_df %>% knitr::kable()