A basic introduction to raster processing
Returns a subset of a rasterio dataset. This function assumes that the bounding coordinates for the desired cropped rasters are present in the input geodataframe's geometry column.
query_window_by_polygon(raster, output_folder, gdf, mask)
For cropping a raster based on boundaries of a single polygon. A technical step-by-step explanation of how query_window_by_polygon
works is detailed in the cell blocks below.
type | default | optional/ required | details | |
---|---|---|---|---|
raster | String | none | required | local filename of raster or open raster dataset |
output_folder | String | none | required | file path where cropped raster outputs will be saved |
gdf | GeoDataFrame | none | required | polygon that will become basis of boundaries for cropping |
mask | Boolean | none | required | True- Assign NULL to areas outside borders of a non-rectangular polygon \ False- Retain values outside borders of a non-rectangular polygon |
- Define the function and its arguments.
def query_window_by_polygon(
input_raster: Union[str, DatasetReader, PosixPath],
output_path: str,
geometry: Polygon,
mask=False,
) -> None:
- Check if raster is specified as PosixPath and convert to String.
if isinstance(input_raster, PosixPath):
input_raster = str(input_raster)
- Check if raster is path to a file or an open raster dataset.
if isinstance(input_raster,str):
input_dset = rio.open(input_raster)
else:
input_dset = input_raster
- Get the window bounds (left, right, top, bottom coordinates) from polygon geometry and check if it has the correct number of elements.
window_bounds = geometry.bounds
assert (
len(window_bounds) == 4
),
- Unroll window bounds.
left, bottom, right, top = window_bounds
- Open raster as input.
with rio.open(input_raster) as input_dst:
- Get profile of input_dst:
input_profile = input_dst.profile
- Specify window and query subset.
window = rio.windows.from_bounds(left, bottom, right, top, input_dst.transform)
subset = input_dst.read(window=window)
- Get the shape of the output subset.
number_of_bands, height, width = subset.shape
- Get the transformation of the subset based on the window.
win_transform = input_dst.window_transform(window)
- Update metadata for the output.
output_profile = input_profile.copy()
update_params = {
"count": number_of_bands,
"height": height,
"width": width,
"transform": win_transform,
}
output_profile.update(update_params)
- Write image to output file.
with rio.open(output_path, "w", **output_profile) as output_dst:
output_dst.write(subset)
output_dst.colorinterp = input_dst.colorinterp
- Apply mask according to shape of the polygon.
if mask:
with rio.open(output_path) as dst:
masked_image, masked_transform = rio.mask.mask(dst, [geometry], crop=True)
with rio.open(output_path, "w", **output_profile) as output_dst:
update_params = {
"height": masked_image.shape[1],
"width": masked_image.shape[2],
"transform": masked_transform,
}
output_profile.update(update_params)
output_dst.write(masked_image)
- Return output.
return
query_window_by_gdf(raster, output_folder, gdf, name_col, mask)
For cropping a raster based on boundaries of multiple polygons. A step-by-step explanation of how query_window_by_gdf
works is detailed in the cell blocks below.
type | default | optional/ required | details | |
---|---|---|---|---|
raster | String | none | required | local filename of raster or open raster dataset |
output_folder | String | none | required | file path where cropped raster outputs will be saved |
gdf | GeoDataFrame | none | required | polygon that will become basis of boundaries for cropping |
name_col | String | none | optional | column name to base output filepath on. if left blank, outputs will be name sequentially as 'output_0.tif' |
mask | Boolean | none | required | True- Assign NULL to areas outside borders of a non-rectangular polygon \ False- Retain values outside borders of a non-rectangular polygon |
- Define the function and create a copy of the geodataframe.
def query_window_by_gdf(
input_raster: Union[str, DatasetReader, PosixPath],
output_folder: str,
gdf,
name_col=None,
mask=False,
) -> None:
gdf = gdf.copy()
- Check if the coordinate reference systems of the raster and geodataframe match.
with rio.open(input_raster) as dst:
assert dst.meta["crs"] == gdf.crs, "input_raster and gdf CRS must match!"
- Name outputs based on values from name_col.
if name_col is None:
name_col = "name"
gdf[name_col] = "output_" + gdf.reset_index().index.astype(str) + ".tif"
else:
gdf[name_col] = gdf[name_col] + ".tif"
for i, row in gdf.iterrows():
polygon = row.geometry
output_name = row[name_col]
output_path = output_folder / output_name
print(output_path)
query_window_by_polygon(input_raster, output_path, polygon, mask)
Input:
input_image
- raster of 2020 Philippine populationoutput_folder
- filepath to where output raster will be savedcircle_gdf
- GeoDataFrame containining single circular polygonmask
- True- to assign NULL values to cells outside the circular polygon
Output:
- cropped raster (GeoTiff) containing only values within circular polygon
import rasterio as rio
import geowrangler.raster_process as raster_process
input_image
raster = rio.open(input_image)
show(raster.read(1), cmap="plasma", transform=raster.transform);
circle_gdf
fig, ax = plt.subplots(1, 1, figsize=(4, 8))
show(raster.read(1), cmap="viridis", ax=ax, transform=raster.transform)
circle_gdf.plot(ax=ax, facecolor="none", edgecolor="yellow")
ax;
output_folder = Path("../data")
raster_process.query_window_by_gdf(input_image, output_folder, circle_gdf, mask=True)
with rio.open(output_folder / "output_0.tif") as dst:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
show(dst.read(1), cmap="viridis", ax=ax, transform=dst.transform)
circle_gdf.plot(facecolor="none", edgecolor="yellow", ax=ax)
print(dst.read(1))
ax;
Input:
input_image
- raster of 2020 Philippine populationoutput_folder
- filepath to where output rasters will be savedgrid_gdf
- GeoDataFrame containining multiple polygons (grid cells)mask
- False- no need to assign NULL values because grid cells occupy full space from image center to boundaries
Output:
- multiple cropped rasters (GeoTiff) split according to grid cell polygons
import rasterio as rio
import geowrangler.raster_process
input_image
raster = rio.open(input_image)
show(raster.read(1), cmap="plasma", transform=raster.transform)
grid_gdf
fig, ax = plt.subplots(1, 1, figsize=(4, 8))
show(raster.read(1), cmap="viridis", ax=ax, transform=raster.transform)
grid_gdf.plot(ax=ax, facecolor="none", edgecolor="yellow")
ax
output_folder = Path("../data")
raster_process.query_window_by_gdf(
input_image, output_folder, grid_gdf, name_col="name", mask=False
)
for name in grid_gdf["name"]:
image_path = output_folder / (name + ".tif")
with rio.open(image_path) as dst:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax.set_title(image_path)
show(dst.read(1), cmap="viridis", ax=ax, transform=dst.transform)