Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions python/sedona/spark/geopandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2244,6 +2244,105 @@ def symmetric_difference(self, other, align=None):
"""
return _delegate_to_geometry_column("symmetric_difference", self, other, align)

def union(self, other, align=None):
"""Return a ``GeoSeries`` of the union of points in each aligned geometry
with `other`.

The operation works on a 1-to-1 row-wise manner.

Parameters
----------
other : Geoseries or geometric object
The Geoseries (elementwise) or geometric object to find the
union with.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices.
If False, the order of elements is preserved. None defaults to True.

Returns
-------
GeoSeries

Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 6),
... )

>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry

>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry

We can do union of each geometry and a single shapely geometry:

>>> s.union(Polygon([(0, 0), (1, 1), (0, 1)]))
0 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
1 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
2 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
3 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
4 POLYGON ((0 1, 1 1, 0 0, 0 1))
dtype: geometry

We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:

>>> s.union(s2, align=True)
0 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
1 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
2 MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (1 0,...
3 LINESTRING (2 0, 0 2)
4 MULTIPOINT ((0 1), (1 1))
dtype: geometry

>>> s.union(s2, align=False)
0 POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
1 GEOMETRYCOLLECTION (POLYGON ((0 0, 0 2, 1 2, 2...
2 MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (2 0,...
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry

See Also
--------
GeoSeries.symmetric_difference
GeoSeries.difference
GeoSeries.intersection
"""
return _delegate_to_geometry_column("union", self, other, align)

def intersection_all(self):
raise NotImplementedError("This method is not implemented yet.")

Expand Down
12 changes: 12 additions & 0 deletions python/sedona/spark/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,18 @@ def symmetric_difference(self, other, align=None) -> "GeoSeries":
returns_geom=True,
)

def union(self, other, align=None) -> "GeoSeries":
other_series, extended = self._make_series_of_val(other)
align = False if extended else align

spark_expr = stf.ST_Union(F.col("L"), F.col("R"))
return self._row_wise_operation(
spark_expr,
other_series,
align=align,
returns_geom=True,
)

@property
def is_simple(self) -> pspd.Series:
spark_expr = stf.ST_IsSimple(self.spark.column)
Expand Down
95 changes: 95 additions & 0 deletions python/tests/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,101 @@ def test_symmetric_difference(self):
df_result = s.to_geoframe().symmetric_difference(s2, align=False)
self.check_sgpd_equals_gpd(df_result, expected)

def test_union(self):
s = GeoSeries(
[
Polygon([(0, 0), (2, 2), (0, 2)]),
Polygon([(0, 0), (2, 2), (0, 2)]),
LineString([(0, 0), (2, 2)]),
LineString([(2, 0), (0, 2)]),
Point(0, 1),
],
)
s2 = GeoSeries(
[
Polygon([(0, 0), (1, 1), (0, 1)]),
LineString([(1, 0), (1, 3)]),
LineString([(2, 0), (0, 2)]),
Point(1, 1),
Point(0, 1),
],
index=range(1, 6),
)

# Test with single geometry
result = s.union(Polygon([(0, 0), (1, 1), (0, 1)]))
expected = gpd.GeoSeries(
[
Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
GeometryCollection(
[
Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
LineString([(0, 0), (2, 2)]),
]
),
GeometryCollection(
[
Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
LineString([(2, 0), (0, 2)]),
]
),
Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]),
]
)
self.check_sgpd_equals_gpd(result, expected)

# Test with align=True
result = s.union(s2, align=True)
expected = gpd.GeoSeries(
[
None,
Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
MultiLineString(
[
LineString([(0, 0), (1, 1)]),
LineString([(1, 1), (2, 2)]),
LineString([(1, 0), (1, 1)]),
LineString([(1, 1), (1, 3)]),
]
),
LineString([(2, 0), (0, 2)]),
MultiPoint([Point(0, 1), Point(1, 1)]),
None,
]
)
self.check_sgpd_equals_gpd(result, expected)

# Test with align=False
result = s.union(s2, align=False)
expected = gpd.GeoSeries(
[
Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
GeometryCollection(
[
Polygon([(0, 0), (0, 2), (1, 2), (2, 2), (1, 1), (0, 0)]),
LineString([(1, 0), (1, 1)]),
LineString([(1, 1), (1, 3)]),
]
),
MultiLineString(
[
LineString([(0, 0), (1, 1)]),
LineString([(1, 1), (2, 2)]),
LineString([(2, 0), (1, 1)]),
LineString([(1, 1), (0, 2)]),
]
),
LineString([(2, 0), (0, 2)]),
Point(0, 1),
]
)
self.check_sgpd_equals_gpd(result, expected)

# Check that GeoDataFrame works too
df_result = s.to_geoframe().union(s2, align=False)
self.check_sgpd_equals_gpd(df_result, expected)

def test_is_simple(self):
s = sgpd.GeoSeries(
[
Expand Down
20 changes: 20 additions & 0 deletions python/tests/geopandas/test_match_geopandas_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,26 @@ def test_symmetric_difference(self):
)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

def test_union(self):
for geom, geom2 in self.pairs:
# Operation doesn't work on invalid geometries
if (
not gpd.GeoSeries(geom).is_valid.all()
or not gpd.GeoSeries(geom2).is_valid.all()
):
continue

sgpd_result = GeoSeries(geom).union(GeoSeries(geom2))
gpd_result = gpd.GeoSeries(geom).union(gpd.GeoSeries(geom2))
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

if len(geom) == len(geom2):
sgpd_result = GeoSeries(geom).union(GeoSeries(geom2), align=False)
gpd_result = gpd.GeoSeries(geom).union(
gpd.GeoSeries(geom2), align=False
)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

def test_is_simple(self):
# 'is_simple' is meaningful only for `LineStrings` and `LinearRings`
data = [
Expand Down
Loading