The task in this notebook is: Intersect 50 Million LBSM Coordinates with 5 Million ATKIS-Shapes (Extent: Germany)
This sort of intersection is simple, but difficult to execute in common GIS Software due to the size of inputs. We can do this in Python, but simple raycasting will require estimated 3 weeks on a portable computer. Therefore, this is mainly a task for code improvements and better data handling. The final intersection demonstrated in this notebook, optimized using spatial RTrees and Hashsets, will finish in under 1 hour. Inbetween, we'll use some interactive visualisations with Geoviews to stream data, as a demonstration for fast preview of really large files.
First, lets have a look at the data. It is pretty big so we only stream the first couple of features:
# read the shapefiles
import fiona
import geoviews as gv
import geoviews.feature as gf
from pathlib import Path
from shapely.geometry import shape
import geopandas as gp
from shapely.geometry import Point
import holoviews as hv
from geopandas.tools import sjoin
# enable shapely.speedups which makes some of the spatial queries running faster.
import shapely.speedups
from cartopy import crs as ccrs
hv.notebook_extension('bokeh')
shapely.speedups.enable()
# filepath
atkis_data = Path.cwd() / "01_ATKIS_Separate" / "gruenland.shp"
atkis_shapes = fiona.open(atkis_data)
x = 0
poly_shapes = []
for feature in atkis_shapes:
x += 1
#print(feature)
poly_shapes.append(shape(feature['geometry']))
first_feature = feature
if x > 50:
break
# prepare geoviews layer
gv_shapes = gv.Polygons(
poly_shapes, label=f'ATKIS-Gruen')
hv.Overlay(
gv.tile_sources.EsriImagery * \
gv_shapes.opts(line_color='white', line_width=0.5, fill_alpha=0.5, tools=['hover'])
).opts(
width=800,
height=480
)
With Geopandas and loading the full shapefile for Catgeory Gruenflaeche:
%%time
atkis_gruen = gp.read_file(atkis_data)
atkis_gruen.head()
atkis_gruen.shape
Get preview map
gv_shapes = gv.Polygons(
atkis_gruen.head(50), vdims=['INDIKATOR', 'CellCode50', 'INDIKATOR_', 'Shape_Area']).opts(tools=['hover'])
# add testpoint
testpoint = Point(10.095,47.4)
gv_testpoint = gv.Points([testpoint]).opts(tools=['hover'], size=5, color='red')
hv.Overlay(
gv.tile_sources.EsriImagery * \
gv_shapes.opts(line_color='white', line_width=0.5, fill_alpha=0.5, tools=['hover']) * \
gv_testpoint).opts(
width=800,
height=480
)
atkis_gruen.head(50).total_bounds
First, lets have a look at the data. It is pretty big so we only stream the first couple of features:
def get_point(line_string):
"""Gets shapely Point from CSV linestring"""
line_arr = line_string.split(',', 4)
if len(line_arr) == 0:
raise ValueError(f"Problem with line: {line_string}")
lng = float(line_arr[3])
lat = float(line_arr[2])
s_point = Point(lng, lat)
return s_point
lbsm_data = Path.cwd() / "02_LBSM_DE" / "2019-02-13_Germany.csv"
lbsn_points = []
x = 0
with open(lbsm_data, "r", encoding="utf-8") as f_handle:
headerline = next(f_handle)
for line in f_handle:
x += 1
s_point = get_point(line)
lbsn_points.append(s_point)
if x > 10000:
break
# make a geoseries form shapely points
g_points = gp.GeoSeries(lbsn_points)
Preview:
gv_points = gv.Points(
lbsn_points, label=f'LBSM loc')
import holoviews as hv
hv.notebook_extension('bokeh')
hv.Overlay(
gv.tile_sources.EsriImagery * \
gv_points.opts(tools=['hover'], size=16)
).opts(
width=800,
height=480
)
Now we have our two datasets that we want to intersect:
print(atkis_gruen.shape)
print(g_points.shape)
print(len(lbsn_points))
Construct RTree for the geometry from atkis shapes for faster intersection
%%time
from rtree import index
# Populate R-tree index with bounds of polygons
idx = index.Index()
for pos, poly in enumerate(atkis_gruen.geometry):
idx.insert(pos, poly.bounds)
We can also do this in shapely
# Query testpoint to see which polygon it is in
# using first Rtree index, then Shapely geometry's within
poly_idx = [i for i in idx.intersection((testpoint.coords[0]))
if testpoint.within(atkis_gruen.geometry[i])]
for num, idx_num in enumerate(poly_idx, 1):
testpoly = atkis_gruen.iloc[[idx_num]]
testpoly_l = gv.Polygons(testpoly, vdims=['INDIKATOR', 'CellCode50', 'INDIKATOR_', 'Shape_Area']).opts(tools=['hover'])
hv.Overlay(
gv.tile_sources.EsriImagery * \
gv_shapes.opts(line_color='white', line_width=0.5, fill_alpha=0.5, tools=['hover']) * \
testpoly_l * \
gv_testpoint
).opts(
width=800,
height=480
)