Solving Real World Issues With RCzechia

Jindra Lacko

2019-08-20

Visualizing Czech Population

Population of the Czech Republic as per the latest census in 2011, per district (okres).

library(RCzechia)
library(dplyr)
library(readxl)
library(httr)
library(tmap)
library(sf)

GET("https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/zvcr034.xls", 
    write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/zvcr034.xls]
##   Date: 2019-08-20 16:57
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 44.5 kB
## <ON DISK>  /tmp/RtmpO66kNq/file4ad54169fcdf.xls

src <- read_excel(tf, range = "Data!B5:C97") # read in with original column names

colnames(src) <- c("NAZ_LAU1", "obyvatel") # meaningful names instead of the original ones

src <- src %>%
  mutate(obyvatel = as.double(obyvatel)) %>% 
    # convert from text to number
  mutate(NAZ_LAU1 = ifelse(NAZ_LAU1 == "Hlavní město Praha", "Praha", NAZ_LAU1)) 
    # rename Prague (from The Capital to a regular city)
  
okresni_data <- RCzechia::okresy("low") %>% # data shapefile
  inner_join(src, by = "NAZ_LAU1") 
    # key for data connection - note the use of inner (i.e. filtering) join

vystup <- tm_shape(okresni_data) + tm_fill(col = "obyvatel", title = "Population", 
                                           palette = "Blues", style = "quantile", n = 5) +
  tm_shape(okresni_data) + tm_borders("grey40", lwd = 0.5) + # thin edges of districts
  tm_shape(republika("low")) + tm_borders("grey30", lwd = 1.5) + # thick national borders
  tm_layout(frame = F) # clean does it

print(vystup)

Geocoding Locations & Drawing them on a Map

Drawing a map: three semi-random landmarks on map, with rivers shown for better orientation.

To get the geocoded data frame function RCzechia::geocode() is used.

library(RCzechia)
library(tmap)
library(sf)

borders <- RCzechia::republika("low")

rivers <- subset(RCzechia::reky(), Major == T)

mista <- data.frame(misto =  c("Kramářova vila", 
                               "Arcibiskupské zahrady v Kromeříži", 
                               "Hrad Bečov nad Teplou"),
                    adresa = c("Gogolova 1, Praha 1",
                               "Sněmovní náměstí 1, Kroměříž",
                               "nám. 5. května 1, Bečov nad Teplou"))

# from a string vector to sf spatial points object
POI <- RCzechia::geocode(mista$adresa) 


tm_plot <- tm_shape(borders) + tm_borders("grey30", lwd = 1) +
  tm_shape(POI) + tm_symbols(col = "firebrick3", shape = 20, size = 0.5) +
  tm_shape(rivers) + tm_lines(col = "steelblue", lwd = 1.5, alpha = 0.5) +
  tm_legend(title = "Very Special Places") + # ... or whatever :)
  tm_layout(frame = F)
  

print(tm_plot)

Unemployment in the Czech Republic - a Chloropleth

A visualization problem: unemployment in the Czech Republic is in low in general, but not uniformly so.
What are the hotspots?

library(dplyr)
library(RCzechia)
library(tmap)
library(sf)

src <- read.csv(url("https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/unempl.csv"), stringsAsFactors = F) 
# open data on unemployment from Czech Statistical Office - https://www.czso.cz/csu/czso/otevrena_data
# lightly edited for size (rows filtered)

src <- src %>%
  mutate(KOD_OBEC = as.character(uzemi_kod))  # keys in RCzechia are of type character

podklad <- RCzechia::obce_polygony() %>% # obce_polygony = municipalities in RCzechia package
  inner_join(src, by = "KOD_OBEC") # linking by key


vystup <- tm_shape(republika()) + tm_borders(col = "grey40") +
  tm_shape(podklad) + tm_fill(col = "hodnota", title = "Unemployment", palette = "YlOrRd") +
  tm_legend(legend.format = list(fun = function(x) paste0(formatC(x, digits = 0, format = "f"), " %"))) +
  tm_layout(frame = F)

print(vystup)

Distance Between Prague and Brno

Calculate distance between two spatial objects; the sf package supports (via gdal) point to point, point to polygon and polygon to polygon distances.

Calculating distance from Prague (#1 Czech city) to Brno (#2 Czech city).

library(dplyr)
library(RCzechia)
library(sf)
library(units)

obce <- RCzechia::obce_polygony()

praha <- subset(obce, NAZ_OBEC == "Praha")

brno <- subset(obce, NAZ_OBEC == "Brno")

vzdalenost <- sf::st_distance(praha, brno) %>%
  units::set_units("kilometers") # easier to interpret than meters, miles or decimal degrees..

print(vzdalenost)
## Units: [kilometers]
##          [,1]
## [1,] 152.8073

Geographical Center of the City of Brno

The metaphysical center of the Brno City is well known. But where is the geographical center?

The center is calculated using sf::st_centroid() and reversely geocoded via RCzechia::revgeo().

library(dplyr)
library(RCzechia)
library(tmap)
library(sf)

brno <- subset(RCzechia::obce_polygony(), NAZ_OBEC == "Brno")

pupek_brna <- brno %>%
  st_transform(5514) %>% # planar CRS (eastings & northings)
  sf::st_centroid(brno) # calculate central point of a polygon
## Warning in st_centroid.sf(., brno): st_centroid assumes attributes are
## constant over geometries of x

# the revgeo() function takes a sf points data frame and returns it back
# with address data in "revgeocoded"" column
adresa_pupku <- RCzechia::revgeo(pupek_brna)$revgeocoded

tm_plot <- tm_shape(brno) + tm_borders(col = "grey40") +
  tm_shape(pupek_brna) + tm_dots(size = 1/3, col = "red", shape = 4) +
  tm_legend(title = "Center of Brno") + 
  tm_layout(frame = F)
  
print(adresa_pupku)
## [1] "Žižkova 513/22, Veveří, 61600 Brno"

print(tm_plot)

Interactive Map

Interactive maps are powerful tools for data visualization. They are easy to produce with the tmap package.

I found the stamen toner basemap a good company for interactive chloropleths - it gives enough context without distracting from the story of your data.

A map of the whole Czech Republic in original resolution (the accuracy is about 1 meter) would be rather sizable, and I found it better policy to either:

Note: it is technically impossible to make html in vignette interactive. As a consequence the result of code shown has been replaced by a static screenshot; the code itself is legit.

library(dplyr)
library(RCzechia)
library(tmap)
library(sf)

src <- read.csv(url("https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/unempl.csv"), stringsAsFactors = F) 
# open data on unemployment from Czech Statistical Office - https://www.czso.cz/csu/czso/otevrena_data
# lightly edited for size (rows filtered)


src <- src %>%
  mutate(KOD_OBEC = as.character(uzemi_kod))  # keys in RCzechia are of type character

podklad <- RCzechia::obce_polygony() %>% # obce_polygony = municipalities in RCzechia package
  inner_join(src, by = "KOD_OBEC") %>% # linking by key
  filter(KOD_CZNUTS3 == "CZ071") # Olomoucký kraj

tmap_mode("view")

vystup <- tm_shape(podklad) + tm_fill(col = "hodnota", title = "Unemployment", palette = "YlOrRd", id = "NAZ_OBEC") +
  tm_legend(legend.format = list(fun = function(x) paste0(formatC(x, digits = 0, format = "f"), " %"))) +
  tm_view(basemaps = "Stamen.Toner")

print(vystup)

This is just a screenshot of the visualization, so it's not interactive. You can play with the interactive version by running the code shown.

Dissolving sf Polygons

Creating custom polygons by aggregating administrative units is a common use case in sales reporting and analysis. Function RCzechia::union_sf() makes this task easier by dissolving polygons according to a value of a data column.

In this demonstration the Czech LAU1 units are grouped into two categories: those with odd lettered names, and those with even letters. They are then dissolved into two multipolygons.

library(RCzechia)
library(dplyr)
library(sf)


poly <- RCzechia::okresy("low") %>% # Czech LAU1 regions as sf data frame
  mutate(oddeven = ifelse(nchar(NAZ_LAU1) %% 2 == 1, "odd", "even" )) %>% # odd or even?
  RCzechia::union_sf("oddeven") # ... et facta est lux

plot(poly, key.pos = 1)

KFME Grid Cells

The Kartierung der Flora Mitteleuropas (KFME) grid is a commonly used technique in biogeography of the Central Europe. It uses a grid of 10×6 arc-minutes (in Central European lattitudes this translates to near squares), with cells numbered from north to south and west to east.

A selection of the grid cells relevant for faunistical mapping of the Czech Republic is available in the RCzechia package.

This example covers a frequent use case:

library(RCzechia)
library(ggplot2)
library(dplyr)
library(sf)

obec <- "Humpolec" # a Czech location

# geolocate centroid of a place
place <- RCzechia::geocode(obec) %>% 
  filter(typ == "Obec") 

# ID of the KFME square containg place geocoded
ctverec_id <- sf::st_intersection(RCzechia::KFME_grid(), place)$ctverec

print(paste0("Location found in grid cell number ", ctverec_id, "."))
## [1] "Location found in grid cell number 6458."

# a single KFME square to be highlighted
highlighted_cell <- KFME_grid() %>% 
  filter(ctverec == ctverec_id) 

# a summary plot
ggplot() +
  geom_sf(data = RCzechia::republika(), size = .85) + # Czech borders
  geom_sf(data = highlighted_cell, # a specific KFME cell ...
          fill = "limegreen", alpha = .5) +  # ... highlighted in lime green
  geom_sf(data = KFME_grid(), size = .33, # all KFME grid cells, thin
          color = "gray80", fill = NA) + # in gray and without fill
  geom_sf(data = place,  color = "red", pch = 4) +  # X marks the spot!
  ggtitle(paste("Location", obec, "in grid cell number", ctverec_id))