-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
122 lines (98 loc) · 4.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# !/usr/bin/env python
"""
SEED Platform (TM), Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors.
See also https://github.com/SEED-platform/seed/blob/main/LICENSE.md
"""
import gzip
import json
import os
import sys
import warnings
from pathlib import Path
from typing import Any
import geopandas as gpd
import mercantile
import pandas as pd
from dotenv import load_dotenv
from shapely.geometry import Point
from utils.common import Location
from utils.geocode_addresses import geocode_addresses
from utils.normalize_address import normalize_address
from utils.ubid import bounding_box, centroid, encode_ubid
from utils.update_dataset_links import update_dataset_links
from utils.update_quadkeys import update_quadkeys
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)
load_dotenv()
def main():
MAPQUEST_API_KEY = os.getenv("MAPQUEST_API_KEY")
if not MAPQUEST_API_KEY:
sys.exit("Missing MapQuest API key")
if not os.path.exists("locations.json"):
sys.exit("Missing locations.json file")
quadkey_path = Path("data/quadkeys")
if not quadkey_path.exists():
quadkey_path.mkdir(parents=True, exist_ok=True)
with open("locations.json") as f:
locations: list[Location] = json.load(f)
for loc in locations:
loc["street"] = normalize_address(loc["street"])
data = geocode_addresses(locations, MAPQUEST_API_KEY)
# TODO confirm high quality geocoding results, and that all results have latitude/longitude properties
# Find all quadkeys that the coordinates fall within
quadkeys = set()
for datum in data:
tile = mercantile.tile(datum["longitude"], datum["latitude"], 9)
quadkey = int(mercantile.quadkey(tile))
quadkeys.add(quadkey)
datum["quadkey"] = quadkey
# Download quadkey dataset links
update_dataset_links()
# Download quadkeys
update_quadkeys(list(quadkeys))
# Loop properties and load quadkeys as necessary
loaded_quadkeys: dict[int, Any] = {}
for datum in data:
quadkey = datum["quadkey"]
if quadkey not in loaded_quadkeys:
print(f"Loading {quadkey}")
with gzip.open(f"data/quadkeys/{quadkey}.geojsonl.gz", "rb") as f:
loaded_quadkeys[quadkey] = gpd.read_file(f)
print(f" {len(loaded_quadkeys[quadkey])} footprints in quadkey")
geojson = loaded_quadkeys[quadkey]
point = Point(datum["longitude"], datum["latitude"])
point_gdf = gpd.GeoDataFrame(crs="epsg:4326", geometry=[point])
# intersections have `geometry`, `index_right`, and `height`
intersections = gpd.sjoin(point_gdf, geojson)
if len(intersections) >= 1:
footprint = geojson.iloc[intersections.iloc[0].index_right]
datum["footprint_match"] = "intersection"
else:
footprint = geojson.iloc[geojson.distance(point).sort_values().index[0]]
datum["footprint_match"] = "closest"
datum["geometry"] = footprint.geometry
datum["height"] = footprint.height if footprint.height != -1 else None
# Determine UBIDs from footprints
datum["ubid"] = encode_ubid(datum["geometry"])
# Save covered building list as csv and GeoJSON
columns = [
"address", "city", "state", "postal_code", "side_of_street", "neighborhood", "county",
"country", "latitude", "longitude", "quality", "footprint_match", "height", "ubid",
"geometry"] # fmt: off
gdf = gpd.GeoDataFrame(data=data, columns=columns)
gdf.to_csv("data/covered-buildings.csv", index=False)
gdf.to_file("data/covered-buildings.geojson", driver="GeoJSON")
# Save a custom GeoJSON with 3 layers: UBID bounding boxes, footprints, then UBID centroids
bounding_boxes = gpd.GeoDataFrame(
data=[{"UBID Bounding Box": datum["address"], "geometry": bounding_box(datum["ubid"])} for datum in data],
columns=["UBID Bounding Box", "geometry"],
)
centroids = gpd.GeoDataFrame(
data=[{"UBID Centroid": datum["address"], "geometry": centroid(datum["ubid"])} for datum in data],
columns=["UBID Centroid", "geometry"],
)
gdf_ubid = pd.concat([bounding_boxes, gdf, centroids])
with open("data/covered-buildings-ubid.geojson", "w") as f:
f.write(gdf_ubid.to_json(drop_id=True, na="drop"))
if __name__ == "__main__":
main()