Skip to content

Commit

Permalink
[FEATURE][processing] K Means clustering algorithm
Browse files Browse the repository at this point in the history
Adds a native k-means clustering algorithm.

Based on a port of PostGIS' ST_ClusterKMeans function, this
new algorithm adds a new cluster ID field to a set of input
features identify the feature's cluster based on the k-means
clustering approach. If non-point geometries are used as input,
the clustering is based off the centroid of the input geometries.
  • Loading branch information
nyalldawson committed Jun 28, 2018
1 parent 16f3781 commit 34b9d39
Show file tree
Hide file tree
Showing 16 changed files with 1,032 additions and 1 deletion.
Binary file not shown.
55 changes: 55 additions & 0 deletions python/plugins/processing/tests/testdata/expected/kmeans_lines.gml
@@ -0,0 +1,55 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_lines.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
<gml:coord><gml:X>11</gml:X><gml:Y>5</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>

<gml:featureMember>
<ogr:kmeans_lines fid="lines.0">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,2 9,2 9,3 11,5</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.1">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>-1,-1 1,-1</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.2">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>2,0 2,2 3,2 3,3</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.3">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>3,1 5,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.4">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>7,-3 10,-3</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.5">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,-3 10,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.6">
<ogr:CLUSTER_ID xsi:nil="true"/>
</ogr:kmeans_lines>
</gml:featureMember>
</ogr:FeatureCollection>
30 changes: 30 additions & 0 deletions python/plugins/processing/tests/testdata/expected/kmeans_lines.xsd
@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_lines" type="ogr:kmeans_lines_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_lines_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:LineStringPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>
@@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_3.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>

<gml:featureMember>
<ogr:kmeans_points_3 fid="points.0">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>1</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.1">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>2</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.2">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>3</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.3">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>4</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.4">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>5</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.5">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>6</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.6">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>7</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.7">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>8</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.8">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>9</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
</ogr:FeatureCollection>
@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_points_3" type="ogr:kmeans_points_3_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_points_3_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>
@@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_5.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>

<gml:featureMember>
<ogr:kmeans_points_5 fid="points.0">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>1</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.1">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>2</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.2">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>3</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.3">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>4</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.4">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>5</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.5">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>6</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>1</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.6">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>7</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.7">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>8</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.8">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>9</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>3</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
</ogr:FeatureCollection>
@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_points_5" type="ogr:kmeans_points_5_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_points_5_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="CLUSTER_ID5" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>

0 comments on commit 34b9d39

Please sign in to comment.