|
| 1 | +/*************************************************************************** |
| 2 | + qgsalgorithmdbscanclustering.cpp |
| 3 | + --------------------- |
| 4 | + begin : July 2018 |
| 5 | + copyright : (C) 2018 by Nyall Dawson |
| 6 | + email : nyall dot dawson at gmail dot com |
| 7 | + ***************************************************************************/ |
| 8 | + |
| 9 | +/*************************************************************************** |
| 10 | + * * |
| 11 | + * This program is free software; you can redistribute it and/or modify * |
| 12 | + * it under the terms of the GNU General Public License as published by * |
| 13 | + * the Free Software Foundation; either version 2 of the License, or * |
| 14 | + * (at your option) any later version. * |
| 15 | + * * |
| 16 | + ***************************************************************************/ |
| 17 | + |
| 18 | +#include "qgsalgorithmdbscanclustering.h" |
| 19 | +#include "qgsspatialindexkdbush.h" |
| 20 | +#include <unordered_set> |
| 21 | + |
| 22 | +///@cond PRIVATE |
| 23 | + |
| 24 | +QString QgsDbscanClusteringAlgorithm::name() const |
| 25 | +{ |
| 26 | + return QStringLiteral( "dbscanclustering" ); |
| 27 | +} |
| 28 | + |
| 29 | +QString QgsDbscanClusteringAlgorithm::displayName() const |
| 30 | +{ |
| 31 | + return QObject::tr( "DBSCAN clustering" ); |
| 32 | +} |
| 33 | + |
| 34 | +QString QgsDbscanClusteringAlgorithm::shortDescription() const |
| 35 | +{ |
| 36 | + return QObject::tr( "Clusters point features using a density based scan algorithm." ); |
| 37 | +} |
| 38 | + |
| 39 | +QStringList QgsDbscanClusteringAlgorithm::tags() const |
| 40 | +{ |
| 41 | + return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' ); |
| 42 | +} |
| 43 | + |
| 44 | +QString QgsDbscanClusteringAlgorithm::group() const |
| 45 | +{ |
| 46 | + return QObject::tr( "Vector analysis" ); |
| 47 | +} |
| 48 | + |
| 49 | +QString QgsDbscanClusteringAlgorithm::groupId() const |
| 50 | +{ |
| 51 | + return QStringLiteral( "vectoranalysis" ); |
| 52 | +} |
| 53 | + |
| 54 | +void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & ) |
| 55 | +{ |
| 56 | + addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), |
| 57 | + QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) ); |
| 58 | + addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ), |
| 59 | + QgsProcessingParameterNumber::Integer, 5, false, 1 ) ); |
| 60 | + addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ), |
| 61 | + QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) ); |
| 62 | + |
| 63 | + auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ), |
| 64 | + QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true ); |
| 65 | + dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); |
| 66 | + addParameter( dbscanStarParam.release() ); |
| 67 | + |
| 68 | + auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ), |
| 69 | + QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ); |
| 70 | + fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); |
| 71 | + addParameter( fieldNameParam.release() ); |
| 72 | + |
| 73 | + |
| 74 | + addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) ); |
| 75 | +} |
| 76 | + |
| 77 | +QString QgsDbscanClusteringAlgorithm::shortHelpString() const |
| 78 | +{ |
| 79 | + return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n" |
| 80 | + "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." ); |
| 81 | +} |
| 82 | + |
| 83 | +QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const |
| 84 | +{ |
| 85 | + return new QgsDbscanClusteringAlgorithm(); |
| 86 | +} |
| 87 | + |
| 88 | +struct KDBushDataEqualById |
| 89 | +{ |
| 90 | + bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const |
| 91 | + { |
| 92 | + return a.id == b.id; |
| 93 | + } |
| 94 | +}; |
| 95 | + |
| 96 | +struct KDBushDataHashById |
| 97 | +{ |
| 98 | + std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const |
| 99 | + { |
| 100 | + return std::hash< QgsFeatureId > {}( a.id ); |
| 101 | + } |
| 102 | +}; |
| 103 | + |
| 104 | +bool operator <( const QgsSpatialIndexKDBushData &data, const QgsFeatureId id ) |
| 105 | +{ |
| 106 | + return data.id < id; |
| 107 | +}; |
| 108 | + |
| 109 | +bool operator <( const QgsFeatureId id, const QgsSpatialIndexKDBushData &data ) |
| 110 | +{ |
| 111 | + return id < data.id; |
| 112 | +}; |
| 113 | + |
| 114 | +QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap ¶meters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) |
| 115 | +{ |
| 116 | + std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) ); |
| 117 | + if ( !source ) |
| 118 | + throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) ); |
| 119 | + |
| 120 | + const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) ); |
| 121 | + const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context ); |
| 122 | + const bool borderPointsAreNoise = parameterAsBool( parameters, QStringLiteral( "DBSCAN*" ), context ); |
| 123 | + |
| 124 | + QgsFields outputFields = source->fields(); |
| 125 | + const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context ); |
| 126 | + QgsFields newFields; |
| 127 | + newFields.append( QgsField( clusterFieldName, QVariant::Int ) ); |
| 128 | + outputFields = QgsProcessingUtils::combineFields( outputFields, newFields ); |
| 129 | + |
| 130 | + QString dest; |
| 131 | + std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) ); |
| 132 | + if ( !sink ) |
| 133 | + throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) ); |
| 134 | + |
| 135 | + // build spatial index |
| 136 | + feedback->pushInfo( QObject::tr( "Building spatial index" ) ); |
| 137 | + QgsSpatialIndexKDBush index( *source, feedback ); |
| 138 | + if ( feedback->isCanceled() ) |
| 139 | + return QVariantMap(); |
| 140 | + |
| 141 | + // dbscan! |
| 142 | + feedback->pushInfo( QObject::tr( "Analysing clusters" ) ); |
| 143 | + std::unordered_map< QgsFeatureId, int> idToCluster; |
| 144 | + idToCluster.reserve( index.size() ); |
| 145 | + QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) ); |
| 146 | + const long featureCount = source->featureCount(); |
| 147 | + |
| 148 | + dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback ); |
| 149 | + |
| 150 | + // write clusters |
| 151 | + const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1; |
| 152 | + features = source->getFeatures(); |
| 153 | + int i = 0; |
| 154 | + QgsFeature feat; |
| 155 | + while ( features.nextFeature( feat ) ) |
| 156 | + { |
| 157 | + i++; |
| 158 | + if ( feedback->isCanceled() ) |
| 159 | + { |
| 160 | + break; |
| 161 | + } |
| 162 | + |
| 163 | + feedback->setProgress( 90 + i * writeStep ); |
| 164 | + QgsAttributes attr = feat.attributes(); |
| 165 | + auto cluster = idToCluster.find( feat.id() ); |
| 166 | + if ( cluster != idToCluster.end() ) |
| 167 | + { |
| 168 | + attr << cluster->second; |
| 169 | + } |
| 170 | + else |
| 171 | + { |
| 172 | + attr << QVariant(); |
| 173 | + } |
| 174 | + feat.setAttributes( attr ); |
| 175 | + sink->addFeature( feat, QgsFeatureSink::FastInsert ); |
| 176 | + } |
| 177 | + |
| 178 | + QVariantMap outputs; |
| 179 | + outputs.insert( QStringLiteral( "OUTPUT" ), dest ); |
| 180 | + return outputs; |
| 181 | +} |
| 182 | + |
| 183 | + |
| 184 | +void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize, |
| 185 | + const double eps, |
| 186 | + const bool borderPointsAreNoise, |
| 187 | + const long featureCount, |
| 188 | + QgsFeatureIterator features, |
| 189 | + QgsSpatialIndexKDBush &index, |
| 190 | + std::unordered_map< QgsFeatureId, int> &idToCluster, |
| 191 | + QgsProcessingFeedback *feedback ) |
| 192 | +{ |
| 193 | + const double step = featureCount > 0 ? 90.0 / featureCount : 1; |
| 194 | + |
| 195 | + std::unordered_set< QgsFeatureId > visited; |
| 196 | + visited.reserve( index.size() ); |
| 197 | + |
| 198 | + QgsFeature feat; |
| 199 | + int clusterIdx = 0; |
| 200 | + int i = 0; |
| 201 | + |
| 202 | + while ( features.nextFeature( feat ) ) |
| 203 | + { |
| 204 | + if ( feedback->isCanceled() ) |
| 205 | + { |
| 206 | + break; |
| 207 | + } |
| 208 | + |
| 209 | + if ( !feat.hasGeometry() ) |
| 210 | + { |
| 211 | + feedback->setProgress( ++i * step ); |
| 212 | + continue; |
| 213 | + } |
| 214 | + |
| 215 | + if ( visited.find( feat.id() ) != visited.end() ) |
| 216 | + { |
| 217 | + // already visited! |
| 218 | + continue; |
| 219 | + } |
| 220 | + |
| 221 | + QgsPointXY point; |
| 222 | + if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point ) |
| 223 | + point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) ); |
| 224 | + else |
| 225 | + { |
| 226 | + // not a point geometry |
| 227 | + feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) ); |
| 228 | + feedback->setProgress( ++i * step ); |
| 229 | + continue; |
| 230 | + } |
| 231 | + |
| 232 | + std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within; |
| 233 | + |
| 234 | + if ( minSize > 1 ) |
| 235 | + { |
| 236 | + index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data ) |
| 237 | + { |
| 238 | + within.insert( data ); |
| 239 | + } ); |
| 240 | + if ( within.size() < minSize ) |
| 241 | + continue; |
| 242 | + |
| 243 | + visited.insert( feat.id() ); |
| 244 | + } |
| 245 | + else |
| 246 | + { |
| 247 | + // optimised case for minSize == 1, we can skip the initial check |
| 248 | + within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) ); |
| 249 | + } |
| 250 | + |
| 251 | + // start new cluster |
| 252 | + clusterIdx++; |
| 253 | + idToCluster[ feat.id() ] = clusterIdx; |
| 254 | + feedback->setProgress( ++i * step ); |
| 255 | + |
| 256 | + while ( !within.empty() ) |
| 257 | + { |
| 258 | + if ( feedback->isCanceled() ) |
| 259 | + { |
| 260 | + break; |
| 261 | + } |
| 262 | + |
| 263 | + QgsSpatialIndexKDBushData j = *within.begin(); |
| 264 | + within.erase( within.begin() ); |
| 265 | + |
| 266 | + if ( visited.find( j.id ) != visited.end() ) |
| 267 | + { |
| 268 | + // already visited! |
| 269 | + continue; |
| 270 | + } |
| 271 | + |
| 272 | + visited.insert( j.id ); |
| 273 | + feedback->setProgress( ++i * step ); |
| 274 | + |
| 275 | + // check from this point |
| 276 | + QgsPointXY point2 = j.point(); |
| 277 | + |
| 278 | + std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2; |
| 279 | + index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data ) |
| 280 | + { |
| 281 | + within2.insert( data ); |
| 282 | + } ); |
| 283 | + if ( within2.size() >= minSize ) |
| 284 | + { |
| 285 | + // expand neighbourhood |
| 286 | + std::set_difference( within2.begin(), |
| 287 | + within2.end(), |
| 288 | + visited.begin(), visited.end(), |
| 289 | + std::inserter( within, within.end() ) ); |
| 290 | + } |
| 291 | + if ( !borderPointsAreNoise || within2.size() >= minSize ) |
| 292 | + { |
| 293 | + idToCluster[ j.id ] = clusterIdx; |
| 294 | + } |
| 295 | + } |
| 296 | + } |
| 297 | +} |
| 298 | + |
| 299 | +///@endcond |
| 300 | + |
| 301 | + |
0 commit comments