1 Preparation

dataFolder <- paste0(ws, "data/")

Load data from geopackage

charging_stations <- st_read(dsn= paste0(dataFolder, "HH.gpkg"), layer="chargingStations")

## Reading layer `chargingStations' from data source `/home/slautenb/Documents/lehre/HD/ws_2020_21/heikaLab/R4UrbanDataLab_2020/data/HH.gpkg' using driver `GPKG'
## Simple feature collection with 1061 features and 12 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 552777.7 ymin: 5921782 xmax: 581876.5 ymax: 5954451
## projected CRS:  ETRS89 / UTM zone 32N

2 Explorative analysis

2.1 Kernel density map

We need to creat/define an empty raster that will store the results of the kernel density estimation

Get bounding box of the data

(bboxHH <- st_bbox(charging_stations))

##      xmin      ymin      xmax      ymax 
##  552777.7 5921782.1  581876.5 5954451.3

gridsize=c(500,500)
range_x=list(c(bboxHH[1], bboxHH[3]),c(bboxHH[2], bboxHH[4]))
#552378.61, 5921782.142, 582209.533, 5956674.616
kdeMat <-  bkde2D(st_coordinates(charging_stations), bandwidth=c(500,500), gridsize=gridsize, range.x=range_x)

Since bkde2D returns a matrix we need to convert it to a raster:

kdeRas <- raster(list(x=kdeMat$x1,y=kdeMat$x2,z=kdeMat$fhat), crs=CRS('+init=EPSG:25832') )

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum European_Terrestrial_Reference_System_1989 in CRS
## definition

Scale the raster so that units are more meaningful and create countour lines that we will use in plotting

kdeRas_scaled <- kdeRas*xres(kdeRas)*yres(kdeRas)* nrow(charging_stations)
cellStats(kdeRas_scaled$layer, stat=sum )

## [1] 1051.721

kdeRas_scaledContour <- rasterToContour(kdeRas_scaled)  %>%   st_as_sf()

tmap_mode("view")

## tmap mode set to interactive viewing

tm_shape(kdeRas_scaled) +
  tm_raster(col="layer", alpha = .5, palette ="-plasma", title = "Kernel density estimate", style = "kmeans" ) + 
  tm_shape(charging_stations) + 
  tm_dots(alpha=.5) + 
  tm_shape(kdeRas_scaledContour) + 
  tm_lines() + 
  tm_text(text="level", col="white", shadow=TRUE, along.lines=TRUE)

2.2 How far are the stations away from each other?

We use a for loop to calculate the distance to the clostet charging station in the data set. Therefore, we loop over all points and calculate the distance between the point i and all points including itself. This rather brute force approache returns the whole distance matrix which when can be used to calculate differences.

n <- nrow(charging_stations)
distMat <- matrix(data=0, nrow=n, ncol=n)
for(i in seq_len(n))
{
  allDists <- st_distance(charging_stations, charging_stations[i,])
  distMat[i,] <- allDists
  
}

In many cases several charging stations are at the same point, rendering a distance of zero.

getSecondSmallest <- function(x, pos=2)
{
   sort(x)[pos]
}
dist2Shortest <- apply(distMat, MARGI=1, FUN= getSecondSmallest, pos=2)
summary(dist2Shortest)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.0000   0.0000   0.0000   0.5075   0.0000 328.8055

We can force the analysis to deliver us the shortest distance to a neighbor not at the same location. Therefore, we: * sort values * get the indices of all values larger than zero * get the first element of the indices that is larger then zero

getSecondSmallestNotAtSame <- function(x, pos=1)
{
   sortedValues <- sort(x)
   idx <- which(sortedValues > 0)
   return(sortedValues[idx][pos])
}
dist2ShortestNotAtSame <- apply(distMat, MARGI=1, FUN= getSecondSmallestNotAtSame)
summary(dist2ShortestNotAtSame)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.93  201.27  287.89  407.80  456.17 5425.73

Or we could get the average distance to the closest ten charging stations

getMeanDistToClostest <- function(x, n=10)
{
   sortedValues <- sort(x)
   return(mean(sortedValues[1:n+1]))
}
distMean10 <- apply(distMat, MARGI=1, FUN= getMeanDistToClostest)
summary(distMean10)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   180.7   300.6   386.0   602.1   764.3  5300.0

If we want to show distances in the map we can simply add the vectors to the simple features object and use tmap afterwards.

charging_stations['meanDist10'] <- distMean10

osm_hh <- read_osm(charging_stations, ext=1.1)

tmap_mode("plot")

## tmap mode set to plotting

tm_shape(osm_hh) + tm_rgb() + 
  tm_shape(charging_stations) + 
  tm_dots(size="meanDist10", legend.size.show = TRUE, title.size = "Mean distance to closest 10 stations" ) + 
  tm_basemap(server= "OpenStreetMap.DE") + 
  tm_layout(legend.outside=TRUE) + 
  tm_scale_bar()

We see that distances are (not unexpectedly increaing from the city center).

A convenient and fast imlementations is kNNdist from the dbscan package:

nn4Mat <- kNNdist(st_coordinates(charging_stations), k=4, all=TRUE)
head(nn4Mat)

##   1        2        3         4
## 1 0 425.1543 425.1543  425.1543
## 2 0 571.8326 571.8326 1319.5587
## 3 0 293.6338 293.6338  312.8868
## 4 0 139.8384 139.8384  196.5883
## 5 0   0.0000 382.7148  382.7148
## 6 0   0.0000 393.3024  393.3024

It returns the distance matrix for the k nearest neighbors.

2.3 Identify spatial clusters using dbscan

DBSCAN estimates the density around each data point by counting the number of points in a user-specified eps-neighborhood and applies a used-specified minPts thresholds to identify core, border and noise points. In a second step, core points are joined into a cluster if they are density-reachable (i.e., there is a chain of core points where one falls inside the eps-neighborhood of the next). Finally, border points are assigned to clusters. The algorithm only needs parameters eps and minPts.

db_hh <- dbscan(st_coordinates(charging_stations), eps = 650, minPts = 7)
print(db_hh)

## DBSCAN clustering for 1061 objects.
## Parameters: eps = 650, minPts = 7
## The clustering contains 8 cluster(s) and 308 noise points.
## 
##   0   1   2   3   4   5   6   7   8 
## 308 639  42  27  10   9   9  10   7 
## 
## Available fields: cluster, eps, minPts

charging_stations['dbCluster'] <- db_hh$cluster
nClust <- length(unique(db_hh$cluster))

tmap_mode("view")

## tmap mode set to interactive viewing

tm_shape(charging_stations) + 
  tm_dots(col = "dbCluster", size=0.25, alpha=.5, palette="Accent", n=nClust) +
  tm_basemap(server= "OpenStreetMap.DE") + 
  tm_scale_bar()

3 Exercise

Perform the same anlysis with the dataset chargingStations_berlin.shp. You can load the shapefile by simply passing the folder name and the name of the shapefile (including the .shp extension) to st_read. The data are in ETRS89 / UTM zone 33N (use a web search to get the EPSG number)

Urban data lab - day 2 - explorative analysis - charging stations HH

Sven Lautenbach

10/18/2020