Want to see the code? Click on the black boxes on the right to show/hide the code.
Le Tour de France - the most famous cycling race in the world. A 3 week-long endurance event across France and sometimes beyond.
Although the route is ever-changing there are some locations such as L’Alpe d’Huez, Mont Ventoux, and the Champs-Élysées that are inscribed in the history of Le Tour.
Similarly there are some stages that are used more often than others. Today’s final map is a visualisation of the most used stages in the 121-year history of the Tour de France.
library(rayshader)
library(sp)
library(sf)
library(ggplot2)
library(stringr)
library(tidygeocoder)
library(dplyr)
library(showtext)
# Function to create a quadratic Bezier curve between two points
# This makes the line betwee the two towns more attractive
create_curve <- function(p1, p2, control_factor = 0.5) {
# Control point (to create a curve)
control_x <- (p1[1] + p2[1]) / 2
control_y <- max(p1[2], p2[2]) + control_factor * abs(p1[2] - p2[2])
# Interpolation: Quadratic Bézier formula
t <- seq(0, 1, length.out = 50) # 50 points along the curve
curve_x <- (1 - t)^2 * p1[1] + 2 * (1 - t) * t * control_x + t^2 * p2[1]
curve_y <- (1 - t)^2 * p1[2] + 2 * (1 - t) * t * control_y + t^2 * p2[2]
# Create a matrix of curve points
matrix(c(curve_x, curve_y), ncol = 2)
}
# Load the arrival and departure points
stages <- read.csv("https://raw.githubusercontent.com/thomascamminady/LeTourDataSet/refs/heads/master/data/TDF_Stages_History.csv")
# Extract the depart and arrive towns
stages$depart <- sub(".*: (.*?) >.*", "\\1", stages$Stage)
stages$arrive <- sub(".*> (.*)", "\\1", stages$Stage)
#if geocoded places cache is missing geocode the places
if (!file.exists("./data/30/geocoded-places.csv")) {
places <- data.frame(place = unique(c(stages$depart, stages$arrive)), a=0) %>%
geocode(city=place, lat="lat", long="lng", limit=1 )
write.csv(places, "./data/30/geocoded-places.csv")
}
# Load the geocoded places
places <- read.csv("./data/30/geocoded-places.csv")
# Group the combinations of arrivals and departures
combination_counts <- stages %>%
group_by(depart, arrive) %>%
summarise(count = n())
# Add a label for the combination for the map
combination_counts$label = paste0(combination_counts$depart, " - ", combination_counts$arrive)
# Add depart coordinates
combination_counts <- combination_counts %>%
left_join(places, by = c("depart" = "place")) %>%
rename(depart_lat = lat, depart_lon = lng)
# Add arrive coordinates
combination_counts <- combination_counts %>%
left_join(places, by = c("arrive" = "place")) %>%
rename(arrive_lat = lat, arrive_lon = lng)
# Remove rows with missing coordinates
combination_counts <- combination_counts %>%
mutate(
arrive_lat = if_else(is.na(arrive_lat), 0, arrive_lat), # Default to 0 if missing
arrive_lon = if_else(is.na(arrive_lon), 0, arrive_lon), # Default to 0 if missing
depart_lat = if_else(is.na(depart_lat), 0, depart_lat), # Default to 0 if missing
depart_lon = if_else(is.na(depart_lon), 0, depart_lon), # Default to 0 if missing
)
# Create an sf object with the top 10 routes with a curved line geometry
top_routes_sf <- combination_counts[order(-combination_counts$count), ] %>%
rowwise() %>%
mutate(
geometry = st_sfc(
st_linestring(create_curve(
c(depart_lon, depart_lat),
c(arrive_lon, arrive_lat)
)),
crs = 4326
)
) %>%
ungroup() %>%
slice_head(n=10) %>%
st_as_sf(crs = 4326) %>%
st_transform(crs = 3857)
# Get all the departure points and arrival points and smoosh them together
places1 <- top_routes_sf[,c("depart","depart_lat","depart_lon","geometry")]
names(places1) <- (c("name","lat", "lng", "geometry"))
places2 <- top_routes_sf[,c("arrive","arrive_lat","arrive_lon","geometry")]
names(places2) <- (c("name","lat", "lng", "geometry"))
places <- rbind(places1,places2)
# Transform the sf object to the Mercator projection and set the geometry column to a point
places <- places %>%
rowwise() %>%
mutate(geometry = st_sfc(st_point(c(lng, lat)), crs = 4326)) %>%
st_transform(crs=3857)
# Load in France
france <- st_read('./data/11/TM_WORLD_BORDERS-0.2.shp', quiet = TRUE) %>% filter(NAME == "France") %>% st_transform(crs=3857)
# Get the bounding box of the polygon
bbox <- st_bbox(france)
# Define vertical strips for red, white, and blue
strip_width <- (bbox["xmax"] - bbox["xmin"]) / 3
red_strip <- st_polygon(list(matrix(c(
bbox["xmin"], bbox["ymin"],
bbox["xmin"] + strip_width, bbox["ymin"],
bbox["xmin"] + strip_width, bbox["ymax"],
bbox["xmin"], bbox["ymax"],
bbox["xmin"], bbox["ymin"]
), ncol = 2, byrow = TRUE)))
white_strip <- st_polygon(list(matrix(c(
bbox["xmin"] + strip_width, bbox["ymin"],
bbox["xmin"] + 2 * strip_width, bbox["ymin"],
bbox["xmin"] + 2 * strip_width, bbox["ymax"],
bbox["xmin"] + strip_width, bbox["ymax"],
bbox["xmin"] + strip_width, bbox["ymin"]
), ncol = 2, byrow = TRUE)))
blue_strip <- st_polygon(list(matrix(c(
bbox["xmin"] + 2 * strip_width, bbox["ymin"],
bbox["xmax"], bbox["ymin"],
bbox["xmax"], bbox["ymax"],
bbox["xmin"] + 2 * strip_width, bbox["ymax"],
bbox["xmin"] + 2 * strip_width, bbox["ymin"]
), ncol = 2, byrow = TRUE)))
# Combine strips into an sf object
strips_sf <- st_sf(
color = c("red", "white", "blue"),
geometry = st_sfc(red_strip, white_strip, blue_strip),
crs = 3857
)
# Intersect strips with the France polygon
flag_parts <- st_intersection(france, strips_sf)
# Add a custom font
font_add_google("Jim Nightshade", "fancy_font")
showtext_auto()
# Plot the outline of France, coloured fill, routes and places
ggplot() +
geom_sf(data = flag_parts, color = "black", lwd = 2) +
geom_sf(data = flag_parts, aes(fill = color, color=color), lwd = 0.2) +
scale_fill_manual(values = c("red" = "red", "white" = "white", "blue" = "blue")) +
geom_sf(data=top_routes_sf, color = "yellow", line="yellow", lwd=8) +
geom_sf(data=places, color = "black", lwd=1, size=8) +
geom_sf_text(data=places, aes(label = name), nudge_y = 52000, size=20, family="fancy_font") + # Add labels with slight offset
theme_void() +
ggtitle("Most used stages in Le Tour de France") +
theme(plot.title = element_text(hjust = 0.5, size = 80, family="fancy_font"), legend.position = "none")
# Finally, render a table showing the top 10 routes
r <- top_routes_sf[,c("label", "count")]
r <- st_drop_geometry(r)
names(r) <- c("Stage", "Occurrences")
knitr::kable(r, format="markdown")
Stage | Occurrences |
---|---|
Pau - Bordeaux | 18 |
Luchon - Perpignan | 17 |
Caen - Paris | 14 |
Pau - Luchon | 14 |
Strasbourg - Metz | 13 |
Bayonne - Luchon | 12 |
Bordeaux - Bayonne | 12 |
Bayonne - Pau | 10 |
Cherbourg-en-Cotentin - Brest | 10 |
Dunkerque - Paris | 10 |
Tour de France data: https://github.com/thomascamminady/LeTourDataSet MIT Licensed (https://camminady.dev/LeTourDataSet/)
Map data from the World Wind Java project: https://github.com/nasa/World-Wind-Java/blob/master/WorldWind/testData/shapefiles/TM_WORLD_BORDERS-0.2Readme.txt