Skip to content

Commit 34b9d39

Browse files
committedJun 28, 2018
[FEATURE][processing] K Means clustering algorithm
Adds a native k-means clustering algorithm. Based on a port of PostGIS' ST_ClusterKMeans function, this new algorithm adds a new cluster ID field to a set of input features identify the feature's cluster based on the k-means clustering approach. If non-point geometries are used as input, the clustering is based off the centroid of the input geometries.
1 parent 16f3781 commit 34b9d39

16 files changed

+1032
-1
lines changed
 
Binary file not shown.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_lines.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
10+
<gml:coord><gml:X>11</gml:X><gml:Y>5</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_lines fid="lines.0">
16+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,2 9,2 9,3 11,5</gml:coordinates></gml:LineString></ogr:geometryProperty>
17+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
18+
</ogr:kmeans_lines>
19+
</gml:featureMember>
20+
<gml:featureMember>
21+
<ogr:kmeans_lines fid="lines.1">
22+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>-1,-1 1,-1</gml:coordinates></gml:LineString></ogr:geometryProperty>
23+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
24+
</ogr:kmeans_lines>
25+
</gml:featureMember>
26+
<gml:featureMember>
27+
<ogr:kmeans_lines fid="lines.2">
28+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>2,0 2,2 3,2 3,3</gml:coordinates></gml:LineString></ogr:geometryProperty>
29+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
30+
</ogr:kmeans_lines>
31+
</gml:featureMember>
32+
<gml:featureMember>
33+
<ogr:kmeans_lines fid="lines.3">
34+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>3,1 5,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
35+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
36+
</ogr:kmeans_lines>
37+
</gml:featureMember>
38+
<gml:featureMember>
39+
<ogr:kmeans_lines fid="lines.4">
40+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>7,-3 10,-3</gml:coordinates></gml:LineString></ogr:geometryProperty>
41+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
42+
</ogr:kmeans_lines>
43+
</gml:featureMember>
44+
<gml:featureMember>
45+
<ogr:kmeans_lines fid="lines.5">
46+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,-3 10,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
47+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
48+
</ogr:kmeans_lines>
49+
</gml:featureMember>
50+
<gml:featureMember>
51+
<ogr:kmeans_lines fid="lines.6">
52+
<ogr:CLUSTER_ID xsi:nil="true"/>
53+
</ogr:kmeans_lines>
54+
</gml:featureMember>
55+
</ogr:FeatureCollection>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_lines" type="ogr:kmeans_lines_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_lines_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:LineStringPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:integer">
22+
<xs:totalDigits value="10"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
</xs:sequence>
27+
</xs:extension>
28+
</xs:complexContent>
29+
</xs:complexType>
30+
</xs:schema>
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_3.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
10+
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_points_3 fid="points.0">
16+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
17+
<ogr:id>1</ogr:id>
18+
<ogr:id2>2</ogr:id2>
19+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
20+
</ogr:kmeans_points_3>
21+
</gml:featureMember>
22+
<gml:featureMember>
23+
<ogr:kmeans_points_3 fid="points.1">
24+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
25+
<ogr:id>2</ogr:id>
26+
<ogr:id2>1</ogr:id2>
27+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
28+
</ogr:kmeans_points_3>
29+
</gml:featureMember>
30+
<gml:featureMember>
31+
<ogr:kmeans_points_3 fid="points.2">
32+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
33+
<ogr:id>3</ogr:id>
34+
<ogr:id2>0</ogr:id2>
35+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
36+
</ogr:kmeans_points_3>
37+
</gml:featureMember>
38+
<gml:featureMember>
39+
<ogr:kmeans_points_3 fid="points.3">
40+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
41+
<ogr:id>4</ogr:id>
42+
<ogr:id2>2</ogr:id2>
43+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
44+
</ogr:kmeans_points_3>
45+
</gml:featureMember>
46+
<gml:featureMember>
47+
<ogr:kmeans_points_3 fid="points.4">
48+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
49+
<ogr:id>5</ogr:id>
50+
<ogr:id2>1</ogr:id2>
51+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
52+
</ogr:kmeans_points_3>
53+
</gml:featureMember>
54+
<gml:featureMember>
55+
<ogr:kmeans_points_3 fid="points.5">
56+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
57+
<ogr:id>6</ogr:id>
58+
<ogr:id2>0</ogr:id2>
59+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
60+
</ogr:kmeans_points_3>
61+
</gml:featureMember>
62+
<gml:featureMember>
63+
<ogr:kmeans_points_3 fid="points.6">
64+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
65+
<ogr:id>7</ogr:id>
66+
<ogr:id2>0</ogr:id2>
67+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
68+
</ogr:kmeans_points_3>
69+
</gml:featureMember>
70+
<gml:featureMember>
71+
<ogr:kmeans_points_3 fid="points.7">
72+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
73+
<ogr:id>8</ogr:id>
74+
<ogr:id2>0</ogr:id2>
75+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
76+
</ogr:kmeans_points_3>
77+
</gml:featureMember>
78+
<gml:featureMember>
79+
<ogr:kmeans_points_3 fid="points.8">
80+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
81+
<ogr:id>9</ogr:id>
82+
<ogr:id2>0</ogr:id2>
83+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
84+
</ogr:kmeans_points_3>
85+
</gml:featureMember>
86+
</ogr:FeatureCollection>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_points_3" type="ogr:kmeans_points_3_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_points_3_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:integer">
22+
<xs:totalDigits value="10"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
27+
<xs:simpleType>
28+
<xs:restriction base="xs:integer">
29+
<xs:totalDigits value="10"/>
30+
</xs:restriction>
31+
</xs:simpleType>
32+
</xs:element>
33+
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
34+
<xs:simpleType>
35+
<xs:restriction base="xs:integer">
36+
<xs:totalDigits value="10"/>
37+
</xs:restriction>
38+
</xs:simpleType>
39+
</xs:element>
40+
</xs:sequence>
41+
</xs:extension>
42+
</xs:complexContent>
43+
</xs:complexType>
44+
</xs:schema>
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_5.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
10+
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_points_5 fid="points.0">
16+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
17+
<ogr:id>1</ogr:id>
18+
<ogr:id2>2</ogr:id2>
19+
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
20+
</ogr:kmeans_points_5>
21+
</gml:featureMember>
22+
<gml:featureMember>
23+
<ogr:kmeans_points_5 fid="points.1">
24+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
25+
<ogr:id>2</ogr:id>
26+
<ogr:id2>1</ogr:id2>
27+
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
28+
</ogr:kmeans_points_5>
29+
</gml:featureMember>
30+
<gml:featureMember>
31+
<ogr:kmeans_points_5 fid="points.2">
32+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
33+
<ogr:id>3</ogr:id>
34+
<ogr:id2>0</ogr:id2>
35+
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
36+
</ogr:kmeans_points_5>
37+
</gml:featureMember>
38+
<gml:featureMember>
39+
<ogr:kmeans_points_5 fid="points.3">
40+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
41+
<ogr:id>4</ogr:id>
42+
<ogr:id2>2</ogr:id2>
43+
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
44+
</ogr:kmeans_points_5>
45+
</gml:featureMember>
46+
<gml:featureMember>
47+
<ogr:kmeans_points_5 fid="points.4">
48+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
49+
<ogr:id>5</ogr:id>
50+
<ogr:id2>1</ogr:id2>
51+
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
52+
</ogr:kmeans_points_5>
53+
</gml:featureMember>
54+
<gml:featureMember>
55+
<ogr:kmeans_points_5 fid="points.5">
56+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
57+
<ogr:id>6</ogr:id>
58+
<ogr:id2>0</ogr:id2>
59+
<ogr:CLUSTER_ID5>1</ogr:CLUSTER_ID5>
60+
</ogr:kmeans_points_5>
61+
</gml:featureMember>
62+
<gml:featureMember>
63+
<ogr:kmeans_points_5 fid="points.6">
64+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
65+
<ogr:id>7</ogr:id>
66+
<ogr:id2>0</ogr:id2>
67+
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
68+
</ogr:kmeans_points_5>
69+
</gml:featureMember>
70+
<gml:featureMember>
71+
<ogr:kmeans_points_5 fid="points.7">
72+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
73+
<ogr:id>8</ogr:id>
74+
<ogr:id2>0</ogr:id2>
75+
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
76+
</ogr:kmeans_points_5>
77+
</gml:featureMember>
78+
<gml:featureMember>
79+
<ogr:kmeans_points_5 fid="points.8">
80+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
81+
<ogr:id>9</ogr:id>
82+
<ogr:id2>0</ogr:id2>
83+
<ogr:CLUSTER_ID5>3</ogr:CLUSTER_ID5>
84+
</ogr:kmeans_points_5>
85+
</gml:featureMember>
86+
</ogr:FeatureCollection>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_points_5" type="ogr:kmeans_points_5_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_points_5_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:integer">
22+
<xs:totalDigits value="10"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
27+
<xs:simpleType>
28+
<xs:restriction base="xs:integer">
29+
<xs:totalDigits value="10"/>
30+
</xs:restriction>
31+
</xs:simpleType>
32+
</xs:element>
33+
<xs:element name="CLUSTER_ID5" nillable="true" minOccurs="0" maxOccurs="1">
34+
<xs:simpleType>
35+
<xs:restriction base="xs:integer">
36+
<xs:totalDigits value="10"/>
37+
</xs:restriction>
38+
</xs:simpleType>
39+
</xs:element>
40+
</xs:sequence>
41+
</xs:extension>
42+
</xs:complexContent>
43+
</xs:complexType>
44+
</xs:schema>
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_polys.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
10+
<gml:coord><gml:X>10</gml:X><gml:Y>6</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_polys fid="polys.0">
16+
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>-1,-1 -1,3 3,3 3,2 2,2 2,-1 -1,-1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
17+
<ogr:name>aaaaa</ogr:name>
18+
<ogr:intval>33</ogr:intval>
19+
<ogr:floatval>44.123456</ogr:floatval>
20+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
21+
</ogr:kmeans_polys>
22+
</gml:featureMember>
23+
<gml:featureMember>
24+
<ogr:kmeans_polys fid="polys.1">
25+
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>5,5 6,4 4,4 5,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
26+
<ogr:name>Aaaaa</ogr:name>
27+
<ogr:intval>-33</ogr:intval>
28+
<ogr:floatval>0</ogr:floatval>
29+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
30+
</ogr:kmeans_polys>
31+
</gml:featureMember>
32+
<gml:featureMember>
33+
<ogr:kmeans_polys fid="polys.2">
34+
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>2,5 2,6 3,6 3,5 2,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
35+
<ogr:name>bbaaa</ogr:name>
36+
<ogr:intval xsi:nil="true"/>
37+
<ogr:floatval>0.123</ogr:floatval>
38+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
39+
</ogr:kmeans_polys>
40+
</gml:featureMember>
41+
<gml:featureMember>
42+
<ogr:kmeans_polys fid="polys.3">
43+
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>6,1 10,1 10,-3 6,-3 6,1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs><gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>7,0 7,-2 9,-2 9,0 7,0</gml:coordinates></gml:LinearRing></gml:innerBoundaryIs></gml:Polygon></ogr:geometryProperty>
44+
<ogr:name>ASDF</ogr:name>
45+
<ogr:intval>0</ogr:intval>
46+
<ogr:floatval xsi:nil="true"/>
47+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
48+
</ogr:kmeans_polys>
49+
</gml:featureMember>
50+
<gml:featureMember>
51+
<ogr:kmeans_polys fid="polys.4">
52+
<ogr:name xsi:nil="true"/>
53+
<ogr:intval>120</ogr:intval>
54+
<ogr:floatval>-100291.43213</ogr:floatval>
55+
<ogr:CLUSTER_ID xsi:nil="true"/>
56+
</ogr:kmeans_polys>
57+
</gml:featureMember>
58+
<gml:featureMember>
59+
<ogr:kmeans_polys fid="polys.5">
60+
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>3,2 6,1 6,-3 2,-1 2,2 3,2</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
61+
<ogr:name>elim</ogr:name>
62+
<ogr:intval>2</ogr:intval>
63+
<ogr:floatval>3.33</ogr:floatval>
64+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
65+
</ogr:kmeans_polys>
66+
</gml:featureMember>
67+
</ogr:FeatureCollection>
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_polys" type="ogr:kmeans_polys_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_polys_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:PolygonPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="name" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:string">
22+
<xs:maxLength value="5"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
<xs:element name="intval" nillable="true" minOccurs="0" maxOccurs="1">
27+
<xs:simpleType>
28+
<xs:restriction base="xs:integer">
29+
<xs:totalDigits value="10"/>
30+
</xs:restriction>
31+
</xs:simpleType>
32+
</xs:element>
33+
<xs:element name="floatval" nillable="true" minOccurs="0" maxOccurs="1">
34+
<xs:simpleType>
35+
<xs:restriction base="xs:decimal">
36+
</xs:restriction>
37+
</xs:simpleType>
38+
</xs:element>
39+
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
40+
<xs:simpleType>
41+
<xs:restriction base="xs:integer">
42+
<xs:totalDigits value="10"/>
43+
</xs:restriction>
44+
</xs:simpleType>
45+
</xs:element>
46+
</xs:sequence>
47+
</xs:extension>
48+
</xs:complexContent>
49+
</xs:complexType>
50+
</xs:schema>

‎python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5660,4 +5660,56 @@ tests:
56605660
name: expected/vectorize.gml
56615661
type: vector
56625662

5663+
- algorithm: native:kmeansclustering
5664+
name: K means, points, 3 clusters
5665+
params:
5666+
CLUSTERS: 3
5667+
FIELD_NAME: CLUSTER_ID
5668+
INPUT:
5669+
name: points.gml
5670+
type: vector
5671+
results:
5672+
OUTPUT:
5673+
name: expected/kmeans_points_3.gml
5674+
type: vector
5675+
5676+
- algorithm: native:kmeansclustering
5677+
name: K means, points, 5 clusters
5678+
params:
5679+
CLUSTERS: 5
5680+
FIELD_NAME: CLUSTER_ID5
5681+
INPUT:
5682+
name: points.gml
5683+
type: vector
5684+
results:
5685+
OUTPUT:
5686+
name: expected/kmeans_points_5.gml
5687+
type: vector
5688+
5689+
- algorithm: native:kmeansclustering
5690+
name: K means, lines
5691+
params:
5692+
CLUSTERS: 2
5693+
FIELD_NAME: CLUSTER_ID
5694+
INPUT:
5695+
name: lines.gml
5696+
type: vector
5697+
results:
5698+
OUTPUT:
5699+
name: expected/kmeans_lines.gml
5700+
type: vector
5701+
5702+
- algorithm: native:kmeansclustering
5703+
name: K means, polys
5704+
params:
5705+
CLUSTERS: 2
5706+
FIELD_NAME: CLUSTER_ID
5707+
INPUT:
5708+
name: polys.gml
5709+
type: vector
5710+
results:
5711+
OUTPUT:
5712+
name: expected/kmeans_polys.gml
5713+
type: vector
5714+
56635715
# See ../README.md for a description of the file format

‎src/analysis/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ SET(QGIS_ANALYSIS_SRCS
4141
processing/qgsalgorithmfiledownloader.cpp
4242
processing/qgsalgorithmfilter.cpp
4343
processing/qgsalgorithmfixgeometries.cpp
44+
processing/qgsalgorithmimportphotos.cpp
4445
processing/qgsalgorithmintersection.cpp
4546
processing/qgsalgorithmjoinbyattribute.cpp
4647
processing/qgsalgorithmjoinwithlines.cpp
47-
processing/qgsalgorithmimportphotos.cpp
48+
processing/qgsalgorithmkmeansclustering.cpp
4849
processing/qgsalgorithmlineintersection.cpp
4950
processing/qgsalgorithmloadlayer.cpp
5051
processing/qgsalgorithmmeancoordinates.cpp
Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
/***************************************************************************
2+
qgsalgorithmkmeansclustering.cpp
3+
---------------------
4+
begin : June 2018
5+
copyright : (C) 2018 by Nyall Dawson
6+
email : nyall dot dawson at gmail dot com
7+
***************************************************************************/
8+
9+
/***************************************************************************
10+
* *
11+
* This program is free software; you can redistribute it and/or modify *
12+
* it under the terms of the GNU General Public License as published by *
13+
* the Free Software Foundation; either version 2 of the License, or *
14+
* (at your option) any later version. *
15+
* *
16+
***************************************************************************/
17+
18+
#include "qgsalgorithmkmeansclustering.h"
19+
20+
///@cond PRIVATE
21+
22+
const int KMEANS_MAX_ITERATIONS = 1000;
23+
24+
QString QgsKMeansClusteringAlgorithm::name() const
25+
{
26+
return QStringLiteral( "kmeansclustering" );
27+
}
28+
29+
QString QgsKMeansClusteringAlgorithm::displayName() const
30+
{
31+
return QObject::tr( "K-means clustering" );
32+
}
33+
34+
QStringList QgsKMeansClusteringAlgorithm::tags() const
35+
{
36+
return QObject::tr( "clustering,clusters,kmeans,points" ).split( ',' );
37+
}
38+
39+
QString QgsKMeansClusteringAlgorithm::group() const
40+
{
41+
return QObject::tr( "Vector analysis" );
42+
}
43+
44+
QString QgsKMeansClusteringAlgorithm::groupId() const
45+
{
46+
return QStringLiteral( "vectoranalysis" );
47+
}
48+
49+
void QgsKMeansClusteringAlgorithm::initAlgorithm( const QVariantMap & )
50+
{
51+
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
52+
QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorAnyGeometry ) );
53+
addParameter( new QgsProcessingParameterNumber( QStringLiteral( "CLUSTERS" ), QObject::tr( "Number of clusters" ),
54+
QgsProcessingParameterNumber::Integer, 5, false, 1 ) );
55+
addParameter( new QgsProcessingParameterString( QStringLiteral( "FIELD_NAME" ),
56+
QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ) );
57+
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorAnyGeometry ) );
58+
}
59+
60+
QString QgsKMeansClusteringAlgorithm::shortHelpString() const
61+
{
62+
return QObject::tr( "Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
63+
"If input geometries are line or polygons, the clustering is based on the centroid of the feature." );
64+
}
65+
66+
QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance() const
67+
{
68+
return new QgsKMeansClusteringAlgorithm();
69+
}
70+
71+
QVariantMap QgsKMeansClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
72+
{
73+
std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
74+
if ( !source )
75+
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
76+
77+
int k = parameterAsInt( parameters, QStringLiteral( "CLUSTERS" ), context );
78+
79+
QgsFields outputFields = source->fields();
80+
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
81+
QgsFields newFields;
82+
newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
83+
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
84+
85+
QString dest;
86+
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
87+
if ( !sink )
88+
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
89+
90+
// build list of point inputs - if it's already a point, use that. If not, take the centroid.
91+
feedback->pushInfo( QObject::tr( "Collecting input points" ) );
92+
double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
93+
int i = 0;
94+
int n = 0;
95+
QgsFeature feat;
96+
97+
std::vector< Feature > clusterFeatures;
98+
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
99+
QHash< QgsFeatureId, int > idToObj;
100+
while ( features.nextFeature( feat ) )
101+
{
102+
i++;
103+
if ( feedback->isCanceled() )
104+
{
105+
break;
106+
}
107+
108+
feedback->setProgress( i * step );
109+
if ( !feat.hasGeometry() )
110+
continue;
111+
112+
n++;
113+
114+
QgsPointXY point;
115+
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point )
116+
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
117+
else
118+
{
119+
QgsGeometry centroid = feat.geometry().centroid();
120+
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( centroid.constGet() ) );
121+
}
122+
123+
idToObj.insert( feat.id(), clusterFeatures.size() );
124+
clusterFeatures.emplace_back( Feature( point ) );
125+
}
126+
127+
if ( n < k )
128+
{
129+
feedback->reportError( QObject::tr( "Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
130+
k = n;
131+
}
132+
133+
if ( k > 1 )
134+
{
135+
feedback->pushInfo( QObject::tr( "Calculating clusters" ) );
136+
137+
// cluster centers
138+
std::vector< QgsPointXY > centers( k );
139+
140+
initClusters( clusterFeatures, centers, k, feedback );
141+
calculateKMeans( clusterFeatures, centers, k, feedback );
142+
}
143+
144+
features = source->getFeatures();
145+
i = 0;
146+
while ( features.nextFeature( feat ) )
147+
{
148+
i++;
149+
if ( feedback->isCanceled() )
150+
{
151+
break;
152+
}
153+
154+
feedback->setProgress( 50 + i * step );
155+
QgsAttributes attr = feat.attributes();
156+
if ( !feat.hasGeometry() )
157+
{
158+
attr << QVariant();
159+
}
160+
else if ( k <= 1 )
161+
{
162+
attr << 0;
163+
}
164+
else if ( !idToObj.contains( feat.id() ) )
165+
{
166+
attr << QVariant();
167+
}
168+
else
169+
{
170+
attr << clusterFeatures[ idToObj.value( feat.id() ) ].cluster;
171+
}
172+
feat.setAttributes( attr );
173+
sink->addFeature( feat, QgsFeatureSink::FastInsert );
174+
}
175+
176+
QVariantMap outputs;
177+
outputs.insert( QStringLiteral( "OUTPUT" ), dest );
178+
return outputs;
179+
}
180+
181+
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
182+
183+
void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, const int k, QgsProcessingFeedback *feedback )
184+
{
185+
ulong n = points.size();
186+
if ( n == 0 )
187+
return;
188+
189+
if ( n == 1 )
190+
{
191+
for ( int i = 0; i < k; i++ )
192+
centers[ i ] = points[ 0 ].point;
193+
return;
194+
}
195+
196+
long duplicateCount = 1;
197+
// initially scan for two most distance points from each other, p1 and p2
198+
ulong p1 = 0;
199+
ulong p2 = 0;
200+
double distanceP1 = 0;
201+
double distanceP2 = 0;
202+
double maxDistance = -1;
203+
for ( ulong i = 1; i < n; i++ )
204+
{
205+
distanceP1 = points[i].point.sqrDist( points[p1].point );
206+
distanceP2 = points[i].point.sqrDist( points[p2].point );
207+
208+
// if this point is further then existing candidates, replace our choice
209+
if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
210+
{
211+
maxDistance = std::max( distanceP1, distanceP2 );
212+
if ( distanceP1 > distanceP2 )
213+
p2 = i;
214+
else
215+
p1 = i;
216+
}
217+
218+
// also record count of duplicate points
219+
if ( qgsDoubleNear( distanceP1, 0 ) || qgsDoubleNear( distanceP2, 0 ) )
220+
duplicateCount++;
221+
}
222+
223+
if ( feedback && duplicateCount > 1 )
224+
{
225+
feedback->pushInfo( QObject::tr( "There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
226+
}
227+
228+
// By now two points should be found and be not the same
229+
Q_ASSERT( p1 != p2 && maxDistance >= 0 );
230+
231+
// Accept these two points as our initial centers
232+
centers[0] = points[p1].point;
233+
centers[1] = points[p2].point;
234+
235+
if ( k > 2 )
236+
{
237+
// array of minimum distance to a point from accepted cluster centers
238+
std::vector< double > distances( n );
239+
240+
// initialize array with distance to first object
241+
for ( ulong j = 0; j < n; j++ )
242+
{
243+
distances[j] = points[j].point.sqrDist( centers[0] );
244+
}
245+
distances[p1] = -1;
246+
distances[p2] = -1;
247+
248+
// loop i on clusters, skip 0 and 1 as found already
249+
for ( int i = 2; i < k; i++ )
250+
{
251+
ulong candidateCenter = 0;
252+
double maxDistance = std::numeric_limits<double>::lowest();
253+
254+
// loop j on points
255+
for ( ulong j = 0; j < n; j++ )
256+
{
257+
// accepted clusters are already marked with distance = -1
258+
if ( distances[j] < 0 )
259+
continue;
260+
261+
// update minimal distance with previously accepted cluster
262+
distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
263+
264+
// greedily take a point that's farthest from any of accepted clusters
265+
if ( distances[j] > maxDistance )
266+
{
267+
candidateCenter = j;
268+
maxDistance = distances[j];
269+
}
270+
}
271+
272+
// checked earlier by counting entries on input, just in case
273+
Q_ASSERT( maxDistance >= 0 );
274+
275+
// accept candidate to centers
276+
distances[candidateCenter] = -1;
277+
// copy the point coordinates into the initial centers array
278+
centers[i] = points[candidateCenter].point;
279+
}
280+
}
281+
}
282+
283+
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
284+
285+
void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback )
286+
{
287+
int converged = false;
288+
bool changed = false;
289+
290+
// avoid reallocating weights array for every iteration
291+
std::vector< uint > weights( k );
292+
293+
uint i = 0;
294+
for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
295+
{
296+
if ( feedback && feedback->isCanceled() )
297+
break;
298+
299+
findNearest( objs, centers, k, changed );
300+
updateMeans( objs, centers, weights, k );
301+
converged = !changed;
302+
}
303+
304+
if ( !converged && feedback )
305+
feedback->reportError( QObject::tr( "Clustering did not converge after %1 iterations" ).arg( i ) );
306+
else if ( feedback )
307+
feedback->pushInfo( QObject::tr( "Clustering converged after %1 iterations" ).arg( i ) );
308+
}
309+
310+
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
311+
312+
void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, const std::vector<QgsPointXY> &centers, const int k, bool &changed )
313+
{
314+
changed = false;
315+
ulong n = points.size();
316+
for ( ulong i = 0; i < n; i++ )
317+
{
318+
Feature &point = points[i];
319+
320+
// Initialize with distance to first cluster
321+
double currentDistance = point.point.sqrDist( centers[0] );
322+
int currentCluster = 0;
323+
324+
// Check all other cluster centers and find the nearest
325+
for ( int cluster = 1; cluster < k; cluster++ )
326+
{
327+
const double distance = point.point.sqrDist( centers[cluster] );
328+
if ( distance < currentDistance )
329+
{
330+
currentDistance = distance;
331+
currentCluster = cluster;
332+
}
333+
}
334+
335+
// Store the nearest cluster this object is in
336+
if ( point.cluster != currentCluster )
337+
{
338+
changed = true;
339+
point.cluster = currentCluster;
340+
}
341+
}
342+
}
343+
344+
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
345+
346+
void QgsKMeansClusteringAlgorithm::updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> &centers, std::vector<uint> &weights, const int k )
347+
{
348+
uint n = points.size();
349+
std::fill( weights.begin(), weights.end(), 0 );
350+
for ( int i = 0; i < k; i++ )
351+
{
352+
centers[i].setX( 0.0 );
353+
centers[i].setY( 0.0 );
354+
}
355+
for ( uint i = 0; i < n; i++ )
356+
{
357+
int cluster = points[i].cluster;
358+
centers[cluster] += QgsVector( points[i].point.x(),
359+
points[i].point.y() );
360+
weights[cluster] += 1;
361+
}
362+
for ( int i = 0; i < k; i++ )
363+
{
364+
centers[i] /= weights[i];
365+
}
366+
}
367+
368+
369+
///@endcond
370+
371+
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/***************************************************************************
2+
qgsalgorithmkmeansclustering.h
3+
---------------------
4+
begin : June 2018
5+
copyright : (C) 2018 by Nyall Dawson
6+
email : nyall dot dawson at gmail dot com
7+
***************************************************************************/
8+
9+
/***************************************************************************
10+
* *
11+
* This program is free software; you can redistribute it and/or modify *
12+
* it under the terms of the GNU General Public License as published by *
13+
* the Free Software Foundation; either version 2 of the License, or *
14+
* (at your option) any later version. *
15+
* *
16+
***************************************************************************/
17+
18+
#ifndef QGSALGORITHMKMEANSCLUSTERING_H
19+
#define QGSALGORITHMKMEANSCLUSTERING_H
20+
21+
#define SIP_NO_FILE
22+
23+
#include "qgis.h"
24+
#include "qgis_analysis.h"
25+
#include "qgsprocessingalgorithm.h"
26+
27+
///@cond PRIVATE
28+
29+
30+
/**
31+
* Native k-means clustering algorithm.
32+
*/
33+
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsProcessingAlgorithm
34+
{
35+
36+
public:
37+
38+
QgsKMeansClusteringAlgorithm() = default;
39+
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
40+
QString name() const override;
41+
QString displayName() const override;
42+
QStringList tags() const override;
43+
QString group() const override;
44+
QString groupId() const override;
45+
QString shortHelpString() const override;
46+
QgsKMeansClusteringAlgorithm *createInstance() const override SIP_FACTORY;
47+
48+
protected:
49+
50+
QVariantMap processAlgorithm( const QVariantMap &parameters,
51+
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
52+
53+
private:
54+
55+
struct Feature
56+
{
57+
Feature( QgsPointXY point )
58+
: point( point )
59+
{}
60+
61+
QgsPointXY point;
62+
int cluster = -1;
63+
};
64+
65+
static void initClusters( std::vector< Feature > &points, std::vector< QgsPointXY > &centers, int k, QgsProcessingFeedback *feedback );
66+
static void calculateKMeans( std::vector< Feature > &points, std::vector< QgsPointXY > &centers, int k, QgsProcessingFeedback *feedback );
67+
static void findNearest( std::vector< Feature > &points, const std::vector< QgsPointXY > &centers, int k, bool &changed );
68+
static void updateMeans( const std::vector< Feature > &points, std::vector< QgsPointXY > &centers, std::vector< uint > &weights, int k );
69+
70+
friend class TestQgsProcessingAlgs;
71+
};
72+
73+
///@endcond PRIVATE
74+
75+
#endif // QGSALGORITHMKMEANSCLUSTERING_H
76+
77+

‎src/analysis/processing/qgsnativealgorithms.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "qgsalgorithmjoinwithlines.h"
4343
#include "qgsalgorithmimportphotos.h"
4444
#include "qgsalgorithmintersection.h"
45+
#include "qgsalgorithmkmeansclustering.h"
4546
#include "qgsalgorithmlineintersection.h"
4647
#include "qgsalgorithmloadlayer.h"
4748
#include "qgsalgorithmmeancoordinates.h"
@@ -150,6 +151,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
150151
addAlgorithm( new QgsIntersectionAlgorithm() );
151152
addAlgorithm( new QgsJoinByAttributeAlgorithm() );
152153
addAlgorithm( new QgsJoinWithLinesAlgorithm() );
154+
addAlgorithm( new QgsKMeansClusteringAlgorithm() );
153155
addAlgorithm( new QgsLineIntersectionAlgorithm() );
154156
addAlgorithm( new QgsLoadLayerAlgorithm() );
155157
addAlgorithm( new QgsMeanCoordinatesAlgorithm() );

‎src/core/geometry/qgsgeometry.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,6 +1870,12 @@ QgsGeometry QgsGeometry::centroid() const
18701870
return QgsGeometry();
18711871
}
18721872

1873+
// avoid calling geos for trivial point centroids
1874+
if ( QgsWkbTypes::flatType( d->geometry->wkbType() ) == QgsWkbTypes::Point )
1875+
{
1876+
return *this;
1877+
}
1878+
18731879
QgsGeos geos( d->geometry.get() );
18741880

18751881
mLastError.clear();

‎tests/src/analysis/testqgsprocessingalgs.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "qgsnativealgorithms.h"
2626
#include "qgsalgorithmimportphotos.h"
2727
#include "qgsalgorithmtransform.h"
28+
#include "qgsalgorithmkmeansclustering.h"
2829

2930
class TestQgsProcessingAlgs: public QObject
3031
{
@@ -41,6 +42,7 @@ class TestQgsProcessingAlgs: public QObject
4142
void parseGeoTags();
4243
void featureFilterAlg();
4344
void transformAlg();
45+
void kmeansCluster();
4446

4547
private:
4648

@@ -435,6 +437,64 @@ void TestQgsProcessingAlgs::transformAlg()
435437
QVERIFY( ok );
436438
}
437439

440+
void TestQgsProcessingAlgs::kmeansCluster()
441+
{
442+
// make some features
443+
std::vector< QgsKMeansClusteringAlgorithm::Feature > features;
444+
std::vector< QgsPointXY > centers( 2 );
445+
446+
// no features, no crash
447+
int k = 2;
448+
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
449+
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
450+
451+
// features < clusters
452+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
453+
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
454+
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
455+
QCOMPARE( features[ 0 ].cluster, 0 );
456+
457+
// features == clusters
458+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 11, 5 ) ) );
459+
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
460+
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
461+
QCOMPARE( features[ 0 ].cluster, 1 );
462+
QCOMPARE( features[ 1 ].cluster, 0 );
463+
464+
// features > clusters
465+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 3 ) ) );
466+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 13 ) ) );
467+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 23, 6 ) ) );
468+
k = 2;
469+
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
470+
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
471+
QCOMPARE( features[ 0 ].cluster, 1 );
472+
QCOMPARE( features[ 1 ].cluster, 1 );
473+
QCOMPARE( features[ 2 ].cluster, 0 );
474+
QCOMPARE( features[ 3 ].cluster, 0 );
475+
QCOMPARE( features[ 4 ].cluster, 0 );
476+
477+
// repeat above, with 3 clusters
478+
k = 3;
479+
centers.resize( 3 );
480+
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
481+
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
482+
QCOMPARE( features[ 0 ].cluster, 1 );
483+
QCOMPARE( features[ 1 ].cluster, 2 );
484+
QCOMPARE( features[ 2 ].cluster, 2 );
485+
QCOMPARE( features[ 3 ].cluster, 2 );
486+
QCOMPARE( features[ 4 ].cluster, 0 );
487+
488+
// with identical points
489+
features.clear();
490+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
491+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
492+
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
493+
QCOMPARE( features[ 0 ].cluster, -1 );
494+
QCOMPARE( features[ 1 ].cluster, -1 );
495+
QCOMPARE( features[ 2 ].cluster, -1 );
496+
}
497+
438498

439499
QGSTEST_MAIN( TestQgsProcessingAlgs )
440500
#include "testqgsprocessingalgs.moc"

0 commit comments

Comments
 (0)
Please sign in to comment.