Skip to content

Commit c21fe59

Browse files
committedJul 16, 2018
[FEATURE][processing] New dbscan spatial clustering algorithm
Implements an optimised DBSCAN density based scanning cluster approach for clustering 2d point features.
1 parent e9f85f5 commit c21fe59

File tree

4 files changed

+375
-0
lines changed

4 files changed

+375
-0
lines changed
 

‎src/analysis/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ SET(QGIS_ANALYSIS_SRCS
2727
processing/qgsalgorithmcentroid.cpp
2828
processing/qgsalgorithmclip.cpp
2929
processing/qgsalgorithmconvexhull.cpp
30+
processing/qgsalgorithmdbscanclustering.cpp
3031
processing/qgsalgorithmdifference.cpp
3132
processing/qgsalgorithmdissolve.cpp
3233
processing/qgsalgorithmdropgeometry.cpp
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
/***************************************************************************
2+
qgsalgorithmdbscanclustering.cpp
3+
---------------------
4+
begin : July 2018
5+
copyright : (C) 2018 by Nyall Dawson
6+
email : nyall dot dawson at gmail dot com
7+
***************************************************************************/
8+
9+
/***************************************************************************
10+
* *
11+
* This program is free software; you can redistribute it and/or modify *
12+
* it under the terms of the GNU General Public License as published by *
13+
* the Free Software Foundation; either version 2 of the License, or *
14+
* (at your option) any later version. *
15+
* *
16+
***************************************************************************/
17+
18+
#include "qgsalgorithmdbscanclustering.h"
19+
#include "qgsspatialindexkdbush.h"
20+
#include <unordered_set>
21+
22+
///@cond PRIVATE
23+
24+
QString QgsDbscanClusteringAlgorithm::name() const
25+
{
26+
return QStringLiteral( "dbscanclustering" );
27+
}
28+
29+
QString QgsDbscanClusteringAlgorithm::displayName() const
30+
{
31+
return QObject::tr( "DBSCAN clustering" );
32+
}
33+
34+
QString QgsDbscanClusteringAlgorithm::shortDescription() const
35+
{
36+
return QObject::tr( "Clusters point features using a density based scan algorithm." );
37+
}
38+
39+
QStringList QgsDbscanClusteringAlgorithm::tags() const
40+
{
41+
return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
42+
}
43+
44+
QString QgsDbscanClusteringAlgorithm::group() const
45+
{
46+
return QObject::tr( "Vector analysis" );
47+
}
48+
49+
QString QgsDbscanClusteringAlgorithm::groupId() const
50+
{
51+
return QStringLiteral( "vectoranalysis" );
52+
}
53+
54+
void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55+
{
56+
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57+
QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
58+
addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
59+
QgsProcessingParameterNumber::Integer, 5, false, 1 ) );
60+
addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61+
QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62+
63+
auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64+
QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65+
dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66+
addParameter( dbscanStarParam.release() );
67+
68+
auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69+
QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70+
fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
71+
addParameter( fieldNameParam.release() );
72+
73+
74+
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
75+
}
76+
77+
QString QgsDbscanClusteringAlgorithm::shortHelpString() const
78+
{
79+
return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
80+
"The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
81+
}
82+
83+
QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
84+
{
85+
return new QgsDbscanClusteringAlgorithm();
86+
}
87+
88+
struct KDBushDataEqualById
89+
{
90+
bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
91+
{
92+
return a.id == b.id;
93+
}
94+
};
95+
96+
struct KDBushDataHashById
97+
{
98+
std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
99+
{
100+
return std::hash< QgsFeatureId > {}( a.id );
101+
}
102+
};
103+
104+
bool operator <( const QgsSpatialIndexKDBushData &data, const QgsFeatureId id )
105+
{
106+
return data.id < id;
107+
};
108+
109+
bool operator <( const QgsFeatureId id, const QgsSpatialIndexKDBushData &data )
110+
{
111+
return id < data.id;
112+
};
113+
114+
QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
115+
{
116+
std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
117+
if ( !source )
118+
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
119+
120+
const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
121+
const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
122+
const bool borderPointsAreNoise = parameterAsBool( parameters, QStringLiteral( "DBSCAN*" ), context );
123+
124+
QgsFields outputFields = source->fields();
125+
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
126+
QgsFields newFields;
127+
newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
128+
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
129+
130+
QString dest;
131+
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
132+
if ( !sink )
133+
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
134+
135+
// build spatial index
136+
feedback->pushInfo( QObject::tr( "Building spatial index" ) );
137+
QgsSpatialIndexKDBush index( *source, feedback );
138+
if ( feedback->isCanceled() )
139+
return QVariantMap();
140+
141+
// dbscan!
142+
feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
143+
std::unordered_map< QgsFeatureId, int> idToCluster;
144+
idToCluster.reserve( index.size() );
145+
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
146+
const long featureCount = source->featureCount();
147+
148+
dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback );
149+
150+
// write clusters
151+
const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
152+
features = source->getFeatures();
153+
int i = 0;
154+
QgsFeature feat;
155+
while ( features.nextFeature( feat ) )
156+
{
157+
i++;
158+
if ( feedback->isCanceled() )
159+
{
160+
break;
161+
}
162+
163+
feedback->setProgress( 90 + i * writeStep );
164+
QgsAttributes attr = feat.attributes();
165+
auto cluster = idToCluster.find( feat.id() );
166+
if ( cluster != idToCluster.end() )
167+
{
168+
attr << cluster->second;
169+
}
170+
else
171+
{
172+
attr << QVariant();
173+
}
174+
feat.setAttributes( attr );
175+
sink->addFeature( feat, QgsFeatureSink::FastInsert );
176+
}
177+
178+
QVariantMap outputs;
179+
outputs.insert( QStringLiteral( "OUTPUT" ), dest );
180+
return outputs;
181+
}
182+
183+
184+
void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
185+
const double eps,
186+
const bool borderPointsAreNoise,
187+
const long featureCount,
188+
QgsFeatureIterator features,
189+
QgsSpatialIndexKDBush &index,
190+
std::unordered_map< QgsFeatureId, int> &idToCluster,
191+
QgsProcessingFeedback *feedback )
192+
{
193+
const double step = featureCount > 0 ? 90.0 / featureCount : 1;
194+
195+
std::unordered_set< QgsFeatureId > visited;
196+
visited.reserve( index.size() );
197+
198+
QgsFeature feat;
199+
int clusterIdx = 0;
200+
int i = 0;
201+
202+
while ( features.nextFeature( feat ) )
203+
{
204+
if ( feedback->isCanceled() )
205+
{
206+
break;
207+
}
208+
209+
if ( !feat.hasGeometry() )
210+
{
211+
feedback->setProgress( ++i * step );
212+
continue;
213+
}
214+
215+
if ( visited.find( feat.id() ) != visited.end() )
216+
{
217+
// already visited!
218+
continue;
219+
}
220+
221+
QgsPointXY point;
222+
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point )
223+
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
224+
else
225+
{
226+
// not a point geometry
227+
feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
228+
feedback->setProgress( ++i * step );
229+
continue;
230+
}
231+
232+
std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
233+
234+
if ( minSize > 1 )
235+
{
236+
index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
237+
{
238+
within.insert( data );
239+
} );
240+
if ( within.size() < minSize )
241+
continue;
242+
243+
visited.insert( feat.id() );
244+
}
245+
else
246+
{
247+
// optimised case for minSize == 1, we can skip the initial check
248+
within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
249+
}
250+
251+
// start new cluster
252+
clusterIdx++;
253+
idToCluster[ feat.id() ] = clusterIdx;
254+
feedback->setProgress( ++i * step );
255+
256+
while ( !within.empty() )
257+
{
258+
if ( feedback->isCanceled() )
259+
{
260+
break;
261+
}
262+
263+
QgsSpatialIndexKDBushData j = *within.begin();
264+
within.erase( within.begin() );
265+
266+
if ( visited.find( j.id ) != visited.end() )
267+
{
268+
// already visited!
269+
continue;
270+
}
271+
272+
visited.insert( j.id );
273+
feedback->setProgress( ++i * step );
274+
275+
// check from this point
276+
QgsPointXY point2 = j.point();
277+
278+
std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
279+
index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
280+
{
281+
within2.insert( data );
282+
} );
283+
if ( within2.size() >= minSize )
284+
{
285+
// expand neighbourhood
286+
std::set_difference( within2.begin(),
287+
within2.end(),
288+
visited.begin(), visited.end(),
289+
std::inserter( within, within.end() ) );
290+
}
291+
if ( !borderPointsAreNoise || within2.size() >= minSize )
292+
{
293+
idToCluster[ j.id ] = clusterIdx;
294+
}
295+
}
296+
}
297+
}
298+
299+
///@endcond
300+
301+
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/***************************************************************************
2+
qgsalgorithmdbscanclustering.h
3+
---------------------
4+
begin : July 2018
5+
copyright : (C) 2018 by Nyall Dawson
6+
email : nyall dot dawson at gmail dot com
7+
***************************************************************************/
8+
9+
/***************************************************************************
10+
* *
11+
* This program is free software; you can redistribute it and/or modify *
12+
* it under the terms of the GNU General Public License as published by *
13+
* the Free Software Foundation; either version 2 of the License, or *
14+
* (at your option) any later version. *
15+
* *
16+
***************************************************************************/
17+
18+
#ifndef QGSALGORITHMDBSCANCLUSTERING_H
19+
#define QGSALGORITHMDBSCANCLUSTERING_H
20+
21+
#define SIP_NO_FILE
22+
23+
#include "qgis.h"
24+
#include "qgis_analysis.h"
25+
#include "qgsprocessingalgorithm.h"
26+
#include <unordered_map>
27+
28+
class QgsSpatialIndexKDBush;
29+
30+
///@cond PRIVATE
31+
32+
33+
/**
34+
* Native k-means clustering algorithm.
35+
*/
36+
class ANALYSIS_EXPORT QgsDbscanClusteringAlgorithm : public QgsProcessingAlgorithm
37+
{
38+
39+
public:
40+
41+
QgsDbscanClusteringAlgorithm() = default;
42+
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
43+
QString name() const override;
44+
QString displayName() const override;
45+
QString shortDescription() const override;
46+
QStringList tags() const override;
47+
QString group() const override;
48+
QString groupId() const override;
49+
QString shortHelpString() const override;
50+
QgsDbscanClusteringAlgorithm *createInstance() const override SIP_FACTORY;
51+
52+
protected:
53+
54+
QVariantMap processAlgorithm( const QVariantMap &parameters,
55+
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
56+
private:
57+
static void dbscan( std::size_t minSize,
58+
double eps,
59+
bool borderPointsAreNoise,
60+
long featureCount,
61+
QgsFeatureIterator features,
62+
QgsSpatialIndexKDBush &index,
63+
std::unordered_map< QgsFeatureId, int> &idToCluster,
64+
QgsProcessingFeedback *feedback );
65+
};
66+
67+
///@endcond PRIVATE
68+
69+
#endif // QGSALGORITHMDBSCANCLUSTERING_H
70+
71+

‎src/analysis/processing/qgsnativealgorithms.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "qgsalgorithmcentroid.h"
2525
#include "qgsalgorithmclip.h"
2626
#include "qgsalgorithmconvexhull.h"
27+
#include "qgsalgorithmdbscanclustering.h"
2728
#include "qgsalgorithmdifference.h"
2829
#include "qgsalgorithmdissolve.h"
2930
#include "qgsalgorithmdropgeometry.h"
@@ -133,6 +134,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
133134
addAlgorithm( new QgsClipAlgorithm() );
134135
addAlgorithm( new QgsCollectAlgorithm() );
135136
addAlgorithm( new QgsConvexHullAlgorithm() );
137+
addAlgorithm( new QgsDbscanClusteringAlgorithm() );
136138
addAlgorithm( new QgsDifferenceAlgorithm() );
137139
addAlgorithm( new QgsDissolveAlgorithm() );
138140
addAlgorithm( new QgsDropGeometryAlgorithm() );

0 commit comments

Comments
 (0)
Please sign in to comment.