Skip to content

Commit

Permalink
[processing] Port Delete Duplicate Geometries to c++
Browse files Browse the repository at this point in the history
  • Loading branch information
nyalldawson committed Dec 31, 2019
1 parent 2433ee5 commit 12006de
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 154 deletions.
3 changes: 0 additions & 3 deletions python/plugins/processing/algs/help/qgis.yaml
Expand Up @@ -84,9 +84,6 @@ qgis:delaunaytriangulation: >
qgis:deletecolumn: >
This algorithm takes a vector layer and generates a new one that has the exact same content but without the selected columns.

qgis:deleteduplicategeometries: >
This algorithm finds duplicated geometries and removes them. Attributes are not checked, so in case two features have identical geometries but different attributes, only one of them will be added to the result layer.

qgis:deleteholes: >
This algorithm takes a polygon layer and removes holes in polygons. It creates a new vector layer in which polygons with holes have been replaced by polygons with only their external ring. Attributes are not modified.

Expand Down
148 changes: 0 additions & 148 deletions python/plugins/processing/algs/qgis/DeleteDuplicateGeometries.py

This file was deleted.

2 changes: 0 additions & 2 deletions python/plugins/processing/algs/qgis/QgisAlgorithmProvider.py
Expand Up @@ -39,7 +39,6 @@
from .DefineProjection import DefineProjection
from .Delaunay import Delaunay
from .DeleteColumn import DeleteColumn
from .DeleteDuplicateGeometries import DeleteDuplicateGeometries
from .EliminateSelection import EliminateSelection
from .ExecuteSQL import ExecuteSQL
from .ExportGeometryInfo import ExportGeometryInfo
Expand Down Expand Up @@ -123,7 +122,6 @@ def getAlgs(self):
DefineProjection(),
Delaunay(),
DeleteColumn(),
DeleteDuplicateGeometries(),
EliminateSelection(),
ExecuteSQL(),
ExportGeometryInfo(),
Expand Down
Expand Up @@ -1363,7 +1363,7 @@ tests:
name: expected/split_linez_by_length.shp
type: vector

- algorithm: qgis:deleteduplicategeometries
- algorithm: native:deleteduplicategeometries
name: Delete Duplicates with null geometries
params:
INPUT:
Expand Down
1 change: 1 addition & 0 deletions src/analysis/CMakeLists.txt
Expand Up @@ -40,6 +40,7 @@ SET(QGIS_ANALYSIS_SRCS
processing/qgsalgorithmconstantraster.cpp
processing/qgsalgorithmconvexhull.cpp
processing/qgsalgorithmdbscanclustering.cpp
processing/qgsalgorithmdeleteduplicategeometries.cpp
processing/qgsalgorithmdensifygeometriesbycount.cpp
processing/qgsalgorithmdensifygeometriesbyinterval.cpp
processing/qgsalgorithmdifference.cpp
Expand Down
199 changes: 199 additions & 0 deletions src/analysis/processing/qgsalgorithmdeleteduplicategeometries.cpp
@@ -0,0 +1,199 @@
/***************************************************************************
qgsalgorithmdeleteduplicategeometries.cpp
-----------------------------------------
begin : December 2019
copyright : (C) 2019 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/

/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/

#include "qgsalgorithmdeleteduplicategeometries.h"
#include "qgsvectorlayer.h"
#include "qgsgeometryengine.h"

///@cond PRIVATE

QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
{
return QStringLiteral( "deleteduplicategeometries" );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
{
return QObject::tr( "Delete duplicate geometries" );
}

QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
{
return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
{
return QObject::tr( "Vector general" );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
{
return QStringLiteral( "vectorgeneral" );
}

void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
{
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
{
return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
"so in case two features have identical geometries but different attributes, only one of "
"them will be added to the result layer." );
}

QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
{
return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
}

QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
{
return new QgsDeleteDuplicateGeometriesAlgorithm();
}

bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
{
mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
if ( !mSource )
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );

return true;
}

QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
{
QString destId;
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
mSource->wkbType(), mSource->sourceCrs() ) );
if ( !sink )
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );

QgsFeatureIterator it = mSource->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );

double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
QHash< QgsFeatureId, QgsGeometry > geometries;
QSet< QgsFeatureId > nullGeometryFeatures;
QgsSpatialIndex index;
QgsFeature f;
long current = 0;
while ( it.nextFeature( f ) )
{
if ( feedback->isCanceled() )
break;

if ( !f.hasGeometry() )
{
nullGeometryFeatures.insert( f.id() );
}
else
{
geometries.insert( f.id(), f.geometry() );
index.addFeature( f );
}

// overall this loop takes about 10% of time
current++;
feedback->setProgress( 0.10 * current * step );
}

// start by assuming everything is unique, and chop away at this list
QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
current = 0;
long removed = 0;

for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
{
const QgsFeatureId featureId = it.key();
const QgsGeometry geometry = it.value();

if ( feedback->isCanceled() )
break;

if ( !uniqueFeatures.contains( featureId ) )
{
// feature was already marked as a duplicate
}
else
{
const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );

for ( const QgsFeatureId candidateId : candidates )
{
if ( candidateId == featureId )
continue;

if ( !uniqueFeatures.contains( candidateId ) )
{
// candidate already marked as a duplicate (not sure if this is possible,
// since it would mean the current feature would also have to be a duplicate!
// but let's be safe!)
continue;
}
else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
{
// candidate is a duplicate of feature
uniqueFeatures.remove( candidateId );
removed++;
}
}
}

current++;
feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
}

// now, fetch all the feature attributes for the unique features only
// be super-smart and don't re-fetch geometries
QSet< QgsFeatureId > outputFeatureIds = uniqueFeatures.keys().toSet();
outputFeatureIds.unite( nullGeometryFeatures );
step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();

QgsFeatureRequest request = QgsFeatureRequest().setFilterFids( outputFeatureIds ).setFlags( QgsFeatureRequest::NoGeometry );
it = mSource->getFeatures( request );
current = 0;
while ( it.nextFeature( f ) )
{
if ( feedback->isCanceled() )
break;

// use already fetched geometry
if ( !nullGeometryFeatures.contains( f.id() ) )
{
f.setGeometry( uniqueFeatures.value( f.id() ) );
}
sink->addFeature( f, QgsFeatureSink::FastInsert );

current++;
feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
}

feedback->pushInfo( QObject::tr( "%1 duplicate features removed" ).arg( removed ) );

QVariantMap outputs;
outputs.insert( QStringLiteral( "OUTPUT" ), destId );
outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
return outputs;
}

///@endcond

0 comments on commit 12006de

Please sign in to comment.