***Travel time from police post to homes by car and walking in minutes: examlple of Thindigua Neighborhood in Kiambu County_Kenya ***
# Display travel time from police to home by car in minutes: NOTE: map moved to the top for quick reference of the final output
display(m)
!pip install folium
!pip install branca
!pip install r5py
!pip install mapclassify
!pip install networkx
!pip install osmnx
#### Import the necessary libraries ####
# R5Py for travel time matrix computation
from r5py import TransportNetwork, TravelTimeMatrixComputer
# GeoPandas for spatial data manipulation
import geopandas as gpd
import pandas as pd
# General library
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
import networkx as nx
import osmnx as ox
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon
# from osgeo import gdal
2. BING buildings
##Install package
!pip install mercantile
Collecting mercantile Downloading mercantile-1.2.1-py3-none-any.whl.metadata (4.8 kB) Requirement already satisfied: click>=3.0 in /usr/local/lib/python3.10/dist-packages (from mercantile) (8.1.7) Downloading mercantile-1.2.1-py3-none-any.whl (14 kB) Installing collected packages: mercantile Successfully installed mercantile-1.2.1
import pandas as pd
import geopandas as gpd
from shapely import geometry
import mercantile
from tqdm import tqdm
import os
import tempfile
# Geometry copied from https://geojson.io
aoi_geom = {
"coordinates": [
[
[36.8143382024096,-1.1795869911608037],
[36.813743367079894,-1.2265576699855245],
[36.86895837390145,-1.2254156122160964],
[36.86896184174461,-1.1794381833280454],
[36.8143382024096,-1.1795869911608037 ],
]
],
"type": "Polygon",
}
aoi_shape = geometry.shape(aoi_geom)
minx, miny, maxx, maxy = aoi_shape.bounds
output_fn = "Buildings.geojson"
#Determine which tiles to intersect
quad_keys = set()
for tile in list(mercantile.tiles(minx, miny, maxx, maxy, zooms=9)):
quad_keys.add(mercantile.quadkey(tile))
quad_keys = list(quad_keys)
print(f"The input area spans {len(quad_keys)} tiles: {quad_keys}")
The input area spans 1 tiles: ['300110102']
Step 3 - Download the building footprints for each tile that intersects our AOI and crop the results This is where most of the magic happens. We download all the building footprints for each tile that intersects our AOI, then only keep the footprints that are contained by our AOI.
Note: this step might take awhile depending on how many tiles your AOI covers and how many buildings footprints are in those tiles.
df = pd.read_csv(
"https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv", dtype=str
)
df.head()
Location | QuadKey | Url | Size | |
---|---|---|---|---|
0 | Abyei | 122321003 | https://minedbuildings.blob.core.windows.net/g... | 4.8KB |
1 | Abyei | 122321021 | https://minedbuildings.blob.core.windows.net/g... | 7.9KB |
2 | Afghanistan | 123011320 | https://minedbuildings.blob.core.windows.net/g... | 70.2KB |
3 | Afghanistan | 123011321 | https://minedbuildings.blob.core.windows.net/g... | 1.3MB |
4 | Afghanistan | 123011322 | https://minedbuildings.blob.core.windows.net/g... | 4.1MB |
##it took about 40 minutes for this step
idx = 0
combined_gdf = gpd.GeoDataFrame()
with tempfile.TemporaryDirectory() as tmpdir:
# Download the GeoJSON files for each tile that intersects the input geometry
tmp_fns = []
for quad_key in tqdm(quad_keys):
rows = df[df["QuadKey"] == quad_key]
if rows.shape[0] == 1:
url = rows.iloc[0]["Url"]
df2 = pd.read_json(url, lines=True)
df2["geometry"] = df2["geometry"].apply(geometry.shape)
gdf = gpd.GeoDataFrame(df2, crs=4326)
fn = os.path.join(tmpdir, f"{quad_key}.geojson")
tmp_fns.append(fn)
if not os.path.exists(fn):
gdf.to_file(fn, driver="GeoJSON")
elif rows.shape[0] > 1:
raise ValueError(f"Multiple rows found for QuadKey: {quad_key}")
else:
raise ValueError(f"QuadKey not found in dataset: {quad_key}")
# Merge the GeoJSON files into a single file
for fn in tmp_fns:
gdf = gpd.read_file(fn) # Read each file into a GeoDataFrame
gdf = gdf[gdf.geometry.within(aoi_shape)] # Filter geometries within the AOI
gdf['id'] = range(idx, idx + len(gdf)) # Update 'id' based on idx
idx += len(gdf)
combined_gdf = pd.concat([combined_gdf,gdf],ignore_index=True)
100%|██████████| 1/1 [11:20<00:00, 680.34s/it] /usr/local/lib/python3.10/dist-packages/geopandas/geodataframe.py:1528: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
# Make sure the CRS is set to EPSG:4326 (WGS84)
combined_gdf = combined_gdf.to_crs('EPSG:4326')
# Save the GeoDataFrame to a GeoJSON file
output_fn = 'output.geojson'
combined_gdf.to_file(output_fn, driver='GeoJSON')
# Convert polygons to points (centroids) while retaining attributes
combined_gdf['geometry'] = combined_gdf['geometry'].centroid
# Make sure the CRS is set to EPSG:4326 (WGS84)
combined_gdf = combined_gdf.to_crs('EPSG:4326')
<ipython-input-9-803f254d4d15>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. combined_gdf['geometry'] = combined_gdf['geometry'].centroid
combined_gdf.head()
type | properties | geometry | id | |
---|---|---|---|---|
0 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.81653 -1.21011) | 0 |
1 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.81799 -1.21471) | 1 |
2 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82149 -1.22048) | 2 |
3 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82247 -1.21952) | 3 |
4 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82310 -1.20023) | 4 |
combined_gdf.explore()
PolicePost = gpd.read_file('PolicePost.geojson')
PolicePost.head()
id | Place | geometry | |
---|---|---|---|
0 | 1 | PolicePost | POINT (258888.306 9866870.420) |
Travel time compputer
###### Create a TransportNetwork object in R5Pyusing the OSM data ######
# This provides a basis for analysis in the entire third section
#Get osm_pbf from 'extract.bbbike.org'
#GTFS can be obtained from Digital Matatus
transport_network = TransportNetwork(
osm_pbf='Thindigua.osm.pbf', # Path to the OSM PBF file
gtfs=['GTFS_FEED_2019.zip'] # Path to the GTFS file
)
# Create a TravelTimeMatrixComputer object using R5Py to calculate travel times
# This is a key step as it will determine the travel times between all pairs of origins and destinations in the network
travel_time_computer_point = TravelTimeMatrixComputer(
transport_network=transport_network,
origins=PolicePost,
destinations=combined_gdf,
snap_to_network=True,
# Below are some of the travel options that can be tweaked as needed
transport_modes=['CAR'], # Modes of transport to consider, e.g., 'WALK', 'BICYCLE', 'CAR', 'TRANSIT'
# speed_walking=3.6,
)
# For more information on the available options, please refer to the R5Py documentation: https://r5py.readthedocs.io/en/latest/
# Compute travel times between all origins and destinations;
od_matrix_point = travel_time_computer_point.compute_travel_times()
od_matrix_point.head()
# The calculated time_matrix is vey simple: you have the starting id, and its time of travel to any other location (id).
from_id | to_id | travel_time | |
---|---|---|---|
0 | 1 | 0 | 5 |
1 | 1 | 1 | 5 |
2 | 1 | 2 | 7 |
3 | 1 | 3 | 7 |
4 | 1 | 4 | 4 |
join_point = combined_gdf.merge(od_matrix_point, left_on='id', right_on='to_id')
join_point.head()
# You can see how the aggregated data look like. A geometry is introduced to spatially plot the accessibility.
type | properties | geometry | id | from_id | to_id | travel_time | |
---|---|---|---|---|---|---|---|
0 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.81653 -1.21011) | 0 | 1 | 0 | 5 |
1 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.81799 -1.21471) | 1 | 1 | 1 | 5 |
2 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82149 -1.22048) | 2 | 1 | 2 | 7 |
3 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82247 -1.21952) | 3 | 1 | 3 | 7 |
4 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82310 -1.20023) | 4 | 1 | 4 | 4 |
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
# Create the base map with join_point data
m = join_point.explore(
column="travel_time",
scheme='natural_breaks',
cmap="cividis_r",
name="Travel Time",
legend=True,
legend_kwds={"label": "Travel Time"}
)
# Add the police post to the map
PolicePost.explore(
m=m,
color='red',
marker_kwds={'radius': 10, 'fill': True},
tooltip='Place', # Changed from 'name' to 'Place'
name="Police Post"
)
# Add layer control
folium.LayerControl().add_to(m)
# Save the map
output_html = 'travel_time_map_with_police_post_Drive1.html'
m.save(output_html)
WALK
# Create a TravelTimeMatrixComputer object using R5Py to calculate travel times
# This is a key step as it will determine the travel times between all pairs of origins and destinations in the network
travel_time_computer_point = TravelTimeMatrixComputer(
transport_network=transport_network,
origins=PolicePost,
destinations=combined_gdf,
snap_to_network=True,
# Below are some of the travel options that can be tweaked as needed
transport_modes=['WALK'], # Modes of transport to consider, e.g., 'WALK', 'BICYCLE', 'CAR', 'TRANSIT'
# speed_walking=3.6,
)
# For more information on the available options, please refer to the R5Py documentation: https://r5py.readthedocs.io/en/latest/
# Compute travel times between all origins and destinations;
od_matrix_point = travel_time_computer_point.compute_travel_times()
od_matrix_point.head()
# The calculated time_matrix is vey simple: you have the starting id, and its time of travel to any other location (id).
from_id | to_id | travel_time | |
---|---|---|---|
0 | 1 | 0 | 48.0 |
1 | 1 | 1 | 46.0 |
2 | 1 | 2 | 62.0 |
3 | 1 | 3 | 61.0 |
4 | 1 | 4 | 33.0 |
join_point = combined_gdf.merge(od_matrix_point, left_on='id', right_on='to_id')
join_point.head()
# You can see how the aggregated data look like. A geometry is introduced to spatially plot the accessibility.
type | properties | geometry | id | from_id | to_id | travel_time | |
---|---|---|---|---|---|---|---|
0 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.81653 -1.21011) | 0 | 1 | 0 | 48.0 |
1 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.81799 -1.21471) | 1 | 1 | 1 | 46.0 |
2 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82149 -1.22048) | 2 | 1 | 2 | 62.0 |
3 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82247 -1.21952) | 3 | 1 | 3 | 61.0 |
4 | Feature | {'height': -1.0, 'confidence': -1.0} | POINT (36.82310 -1.20023) | 4 | 1 | 4 | 33.0 |
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
# Create the base map with join_point data
m = join_point.explore(
column="travel_time",
scheme='natural_breaks',
cmap="cividis_r",
name="Travel Time",
legend=True,
legend_kwds={"label": "Travel Time"}
)
# Add the police post to the map
PolicePost.explore(
m=m,
color='red',
marker_kwds={'radius': 10, 'fill': True},
tooltip='Place', # Changed from 'name' to 'Place'
name="Police Post"
)
# Add layer control
folium.LayerControl().add_to(m)
# Save the map
output_html = 'travel_time_map_with_police_post_Walk1.html'
m.save(output_html)
# Display travel time from police to home by Walking in minutes
display(m)
print(join_point.columns)
Index(['type', 'properties', 'geometry', 'id', 'from_id', 'to_id', 'travel_time'], dtype='object')
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
def create_popup_content(row):
html = f"""
<form id="form_{row.name}" onsubmit="submitForm(event, {row.name})">
<h3>Point Information</h3>
"""
for col in row.index:
if col != 'geometry':
html += f"""
<label for="{col}_{row.name}">{col}:</label>
<input type="text" id="{col}_{row.name}" name="{col}" value="{row[col]}" required><br><br>
"""
html += """
<label for="comments_{row.name}">Comments:</label>
<textarea id="comments_{row.name}" name="comments"></textarea><br><br>
<input type="submit" value="Submit">
</form>
"""
return html
# Create the base map
m = folium.Map(location=[join_point.geometry.y.mean(), join_point.geometry.x.mean()], zoom_start=10)
# Add points to the map with popups
for idx, row in join_point.iterrows():
popup_content = create_popup_content(row)
folium.Marker(
location=[row.geometry.y, row.geometry.x],
popup=folium.Popup(popup_content, max_width=300),
tooltip=f"Point {idx}"
).add_to(m)
# Add the police post to the map
PolicePost.explore(
m=m,
color='red',
marker_kwds={'radius': 10, 'fill': True},
tooltip=PolicePost.columns[0], # Use the first column as tooltip
name="Police Post"
)
# Add layer control
folium.LayerControl().add_to(m)
# Get the HTML representation of the map
map_html = m._repr_html_()
# Create the full HTML document
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>Interactive Map with Forms</title>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script>
function submitForm(event, id) {{
event.preventDefault();
var form = document.getElementById('form_' + id);
var formData = new FormData(form);
var object = {{}};
formData.forEach((value, key) => {{object[key] = value}});
var json = JSON.stringify(object);
console.log(json);
alert('Form submitted for point ' + id + '\\n' + json);
// Here you would typically send this data to a server
}}
</script>
</head>
<body>
<h1>Interactive Map with Forms</h1>
{map_html}
</body>
</html>
"""
# Save the HTML content to a file
output_html = 'interactive_map_with_forms.html'
with open(output_html, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"Map saved to {output_html}")
Map saved to interactive_map_with_forms.html
Copyright 2021 Google LLC. Licensed under the Apache License, Version 2.0 (the "License");¶
***1. Google Open Buildings example ***
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
Step 1. Prepare a compressed CSV file using Open Buildings data [takes 1-15 minutes depending on the region]¶
#@markdown First, select a region from either the [Natural Earth low res](https://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-0-countries/) (fastest), [Natural Earth high res](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/) or [World Bank high res](https://datacatalog.worldbank.org/dataset/world-bank-official-boundaries) shapefiles:
region_border_source = 'Natural Earth (High Res 10m)' #@param ["Natural Earth (Low Res 110m)", "Natural Earth (High Res 10m)", "World Bank (High Res 10m)"]
region = '' #@param ["", "AGO (Angola)", "BDI (Burundi)", "BEN (Benin)", "BFA (Burkina Faso)", "BGD (Bangladesh)", "BRN (Brunei)", "BTN (Bhutan)", "BWA (Botswana)", "CAF (Central African Republic)", "CIV (C\u00f4te d'Ivoire)", "CMR (Cameroon)", "COD (Democratic Republic of the Congo)", "COG (Republic of the Congo)", "COM (Comoros)", "CPV (Cape Verde)", "DJI (Djibouti)", "DZA (Algeria)", "EGY (Egypt)", "ERI (Eritrea)", "ETH (Ethiopia)", "GAB (Gabon)", "GHA (Ghana)", "GIN (Guinea)", "GMB (The Gambia)", "GNB (Guinea-Bissau)", "GNQ (Equatorial Guinea)", "IDN (Indonesia)", "IOT (British Indian Ocean Territory)", "KEN (Kenya)", "KHM (Cambodia)", "LAO (Laos)", "LBR (Liberia)", "LKA (Sri Lanka)", "LSO (Lesotho)", "MDG (Madagascar)", "MDV (Maldives)", "MOZ (Mozambique)", "MRT (Mauritania)", "MUS (Mauritius)", "MWI (Malawi)", "MYS (Malaysia)", "MYT (Mayotte)", "NAM (Namibia)", "NER (Niger)", "NGA (Nigeria)", "NPL (Nepal)", "PHL (Philippines)", "REU (R\u00e9union)", "RWA (Rwanda)", "SDN (Sudan)", "SEN (Senegal)", "SGP (Singapore)", "SHN (Saint Helena, Ascension and Tristan da Cunha)", "SLE (Sierra Leone)", "SOM (Somalia)", "STP (S\u00e3o Tom\u00e9 and Pr\u00edncipe)", "SWZ (Eswatini)", "SYC (Seychelles)", "TGO (Togo)", "THA (Thailand)", "TLS (Timor-Leste)", "TUN (Tunisia)", "TZA (Tanzania)", "UGA (Uganda)", "VNM (Vietnam)", "ZAF (South Africa)", "ZMB (Zambia)", "ZWE (Zimbabwe)"]
#@markdown **or** specify an area of interest in [WKT format](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) (assumes crs='EPSG:4326'); this [tool](https://arthur-e.github.io/Wicket/sandbox-gmaps3.html) might be useful.
your_own_wkt_polygon = 'POLYGON((36.81133624078676 -1.181334543884988,36.86489459039613 -1.181334543884988,36.86489459039613 -1.233508013634914,36.81133624078676 -1.233508013634914,36.81133624078676 -1.181334543884988))' #@param {type:"string"}
#@markdown Select type of data to download here:
data_type = 'polygons' #@param ["polygons", "points"]
!sudo apt-get install swig
!pip install s2geometry pygeos geopandas
import functools
import glob
import gzip
import multiprocessing
import os
import shutil
import tempfile
from typing import List, Optional, Tuple
import geopandas as gpd
from IPython import display
import pandas as pd
import s2geometry as s2
import shapely
import tensorflow as tf
import tqdm.notebook
BUILDING_DOWNLOAD_PATH = ('gs://open-buildings-data/v3/'
f'{data_type}_s2_level_6_gzip_no_header')
def get_filename_and_region_dataframe(
region_border_source: str, region: str,
your_own_wkt_polygon: str) -> Tuple[str, gpd.geodataframe.GeoDataFrame]:
"""Returns output filename and a geopandas dataframe with one region row."""
if your_own_wkt_polygon:
filename = f'open_buildings_v3_{data_type}_your_own_wkt_polygon.csv.gz'
region_df = gpd.GeoDataFrame(
geometry=gpd.GeoSeries.from_wkt([your_own_wkt_polygon]),
crs='EPSG:4326')
if not isinstance(region_df.iloc[0].geometry,
shapely.geometry.polygon.Polygon) and not isinstance(
region_df.iloc[0].geometry,
shapely.geometry.multipolygon.MultiPolygon):
raise ValueError("`your_own_wkt_polygon` must be a POLYGON or "
"MULTIPOLYGON.")
print(f'Preparing your_own_wkt_polygon.')
return filename, region_df
if not region:
raise ValueError('Please select a region or set your_own_wkt_polygon.')
if region_border_source == 'Natural Earth (Low Res 110m)':
url = ('https://naciscdn.org/naturalearth/'
'110m/cultural/ne_110m_admin_0_countries.zip')
!wget -N {url}
display.clear_output()
region_shapefile_path = os.path.basename(url)
source_name = 'ne_110m'
elif region_border_source == 'Natural Earth (High Res 10m)':
url = ('https://naciscdn.org/naturalearth/'
'10m/cultural/ne_10m_admin_0_countries.zip')
!wget -N {url}
display.clear_output()
region_shapefile_path = os.path.basename(url)
source_name = 'ne_10m'
elif region_border_source == 'World Bank (High Res 10m)':
url = ('https://datacatalogfiles.worldbank.org/ddh-published/'
'0038272/DR0046659/wb_countries_admin0_10m.zip')
!wget -N {url}
!unzip -o {os.path.basename(url)}
display.clear_output()
region_shapefile_path = 'WB_countries_Admin0_10m'
source_name = 'wb_10m'
region_iso_a3 = region.split(' ')[0]
filename = (f'open_buildings_v3_{data_type}_'
f'{source_name}_{region_iso_a3}.csv.gz')
region_df = gpd.read_file(region_shapefile_path).query(
f'ISO_A3 == "{region_iso_a3}"').dissolve(by='ISO_A3')[['geometry']]
print(f'Preparing {region} from {region_border_source}.')
return filename, region_df
def get_bounding_box_s2_covering_tokens(
region_geometry: shapely.geometry.base.BaseGeometry) -> List[str]:
region_bounds = region_geometry.bounds
s2_lat_lng_rect = s2.S2LatLngRect_FromPointPair(
s2.S2LatLng_FromDegrees(region_bounds[1], region_bounds[0]),
s2.S2LatLng_FromDegrees(region_bounds[3], region_bounds[2]))
coverer = s2.S2RegionCoverer()
# NOTE: Should be kept in-sync with s2 level in BUILDING_DOWNLOAD_PATH.
coverer.set_fixed_level(6)
coverer.set_max_cells(1000000)
return [cell.ToToken() for cell in coverer.GetCovering(s2_lat_lng_rect)]
def s2_token_to_shapely_polygon(
s2_token: str) -> shapely.geometry.polygon.Polygon:
s2_cell = s2.S2Cell(s2.S2CellId_FromToken(s2_token, len(s2_token)))
coords = []
for i in range(4):
s2_lat_lng = s2.S2LatLng(s2_cell.GetVertex(i))
coords.append((s2_lat_lng.lng().degrees(), s2_lat_lng.lat().degrees()))
return shapely.geometry.Polygon(coords)
def download_s2_token(
s2_token: str, region_df: gpd.geodataframe.GeoDataFrame) -> Optional[str]:
"""Downloads the matching CSV file with polygons for the `s2_token`.
NOTE: Only polygons inside the region are kept.
NOTE: Passing output via a temporary file to reduce memory usage.
Args:
s2_token: S2 token for which to download the CSV file with building
polygons. The S2 token should be at the same level as the files in
BUILDING_DOWNLOAD_PATH.
region_df: A geopandas dataframe with only one row that contains the region
for which to keep polygons.
Returns:
Either filepath which contains a gzipped CSV without header for the
`s2_token` subfiltered to only contain building polygons inside the region
or None which means that there were no polygons inside the region for this
`s2_token`.
"""
s2_cell_geometry = s2_token_to_shapely_polygon(s2_token)
region_geometry = region_df.iloc[0].geometry
prepared_region_geometry = shapely.prepared.prep(region_geometry)
# If the s2 cell doesn't intersect the country geometry at all then we can
# know that all rows would be dropped so instead we can just return early.
if not prepared_region_geometry.intersects(s2_cell_geometry):
return None
try:
# Using tf.io.gfile.GFile gives better performance than passing the GCS path
# directly to pd.read_csv.
with tf.io.gfile.GFile(
os.path.join(BUILDING_DOWNLOAD_PATH, f'{s2_token}_buildings.csv.gz'),
'rb') as gf:
# If the s2 cell is fully covered by country geometry then can skip
# filtering as we need all rows.
if prepared_region_geometry.covers(s2_cell_geometry):
with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as tmp_f:
shutil.copyfileobj(gf, tmp_f)
return tmp_f.name
# Else take the slow path.
# NOTE: We read in chunks to save memory.
csv_chunks = pd.read_csv(
gf, chunksize=2000000, dtype=object, compression='gzip', header=None)
tmp_f = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
tmp_f.close()
for csv_chunk in csv_chunks:
points = gpd.GeoDataFrame(
geometry=gpd.points_from_xy(csv_chunk[1], csv_chunk[0]),
crs='EPSG:4326')
# sjoin 'within' was faster than using shapely's 'within' directly.
points = gpd.sjoin(points, region_df, predicate='within')
csv_chunk = csv_chunk.iloc[points.index]
csv_chunk.to_csv(
tmp_f.name,
mode='ab',
index=False,
header=False,
compression={
'method': 'gzip',
'compresslevel': 1
})
return tmp_f.name
except tf.errors.NotFoundError:
return None
# Clear output after pip install.
display.clear_output()
filename, region_df = get_filename_and_region_dataframe(region_border_source,
region,
your_own_wkt_polygon)
# Remove any old outputs to not run out of disk.
for f in glob.glob('/tmp/open_buildings_*'):
os.remove(f)
# Write header to the compressed CSV file.
with gzip.open(f'/tmp/{filename}', 'wt') as merged:
if data_type == "polygons":
merged.write(','.join([
'latitude', 'longitude', 'area_in_meters', 'confidence', 'geometry',
'full_plus_code'
]) + '\n')
else:
merged.write(','.join([
'latitude', 'longitude', 'area_in_meters', 'confidence',
'full_plus_code'
]) + '\n')
download_s2_token_fn = functools.partial(download_s2_token, region_df=region_df)
s2_tokens = get_bounding_box_s2_covering_tokens(region_df.iloc[0].geometry)
# Downloads CSV files for relevant S2 tokens and after filtering appends them
# to the compressed output CSV file. Relies on the fact that concatenating
# gzipped files produces a valid gzip file.
# NOTE: Uses a pool to speed up output preparation.
with open(f'/tmp/{filename}', 'ab') as merged:
with multiprocessing.Pool(4) as e:
for fname in tqdm.notebook.tqdm(
e.imap_unordered(download_s2_token_fn, s2_tokens),
total=len(s2_tokens)):
if fname:
with open(fname, 'rb') as tmp_f:
shutil.copyfileobj(tmp_f, merged)
os.unlink(fname)
Step 2. Access the generated CSV¶
We currently support two options:
- Upload to your Google Drive (requires authentication) and then download manually from Google Drive [recommended, very fast]
- Download directly from the Colab [not recommended, very slow]
When asked to do the authentication please follow the url, paste the generated token into the text field and press enter.
#@title Upload result to Google Drive [fast, requires authentication]
from google.colab import auth
from googleapiclient.http import MediaFileUpload
from googleapiclient.discovery import build
auth.authenticate_user()
drive_service = build('drive', 'v3')
file_metadata = {
'name': filename,
'mimeType': 'application/gzip'
}
media = MediaFileUpload(f'/tmp/{filename}',
mimetype='application/gzip',
resumable=True)
created = drive_service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
print('Upload to Google Drive done.')
print(f'Please download {file_metadata["name"]} manually from Google Drive.')
print(f'Search link: https://drive.google.com/drive/search?q={filename}')
#@title Download result directly [slow]
from google.colab import files
files.download(f'/tmp/{filename}')
# Read the CSV file
df = pd.read_csv('ThindiguaBuildings.csv')
print(df.columns)
# Create a list of polygons
polygons = []
for i, row in df.iterrows():
coords = [(row['longitude'], row['latitude']), (row['longitude'], row['latitude']), (row['longitude'], row['latitude']), (row['longitude'], row['latitude'])]
polygon = Polygon(coords)
polygons.append(polygon)
# Create a GeoDataFrame from the list of polygons
gdf = gpd.GeoDataFrame(geometry=polygons, crs='EPSG:4326')
# Print the GeoDataFrame
print(gdf)
import geopandas as gpd
from IPython.display import display
# Read the CSV file and create the GeoDataFrame
df = pd.read_csv('ThindiguaBuildings.csv')
polygons = []
for i, row in df.iterrows():
coords = [(row['longitude'], row['latitude']), (row['longitude'], row['latitude']), (row['longitude'], row['latitude']), (row['longitude'], row['latitude'])]
polygon = Polygon(coords)
polygons.append(polygon)
gdf = gpd.GeoDataFrame(geometry=polygons, crs='EPSG:4326')
# Plot the GeoDataFrame on a Google Maps-based interactive map
gdf.explore(cmap='viridis')
gdf.to_file("ThindiguaBuildings.geojson", driver="GeoJSON")