Skip to content

Commit

Permalink
[feature][processing] A brand new spatiotemporal ST-DBSCAN clustering…
Browse files Browse the repository at this point in the history
… algorithm
  • Loading branch information
nirvn authored and nyalldawson committed Jul 26, 2021
1 parent 402cf11 commit f68ef8f
Show file tree
Hide file tree
Showing 9 changed files with 390 additions and 20 deletions.
@@ -0,0 +1,104 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ stdbscan_multiple_clusters.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>

<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.8">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>9</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:date>2017/08/13</ogr:date>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>2</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.1">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>2</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:date>2017/09/13</ogr:date>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>2</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.0">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>1</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:date xsi:nil="true"/>
<ogr:CLUSTER_ID xsi:nil="true"/>
<ogr:CLUSTER_SIZE xsi:nil="true"/>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.3">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>4</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:date>2001/01/01</ogr:date>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>1</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.2">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>3</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:date>2005/09/13</ogr:date>
<ogr:CLUSTER_ID>3</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>1</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.5">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>6</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:date xsi:nil="true"/>
<ogr:CLUSTER_ID xsi:nil="true"/>
<ogr:CLUSTER_SIZE xsi:nil="true"/>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.4">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>5</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:date>2014/03/13</ogr:date>
<ogr:CLUSTER_ID>4</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>3</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.7">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>8</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:date>2011/09/13</ogr:date>
<ogr:CLUSTER_ID>4</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>3</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
<gml:featureMember>
<ogr:stdbscan_multiple_clusters fid="points.6">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>7</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:date>2010/10/10</ogr:date>
<ogr:CLUSTER_ID>4</ogr:CLUSTER_ID>
<ogr:CLUSTER_SIZE>3</ogr:CLUSTER_SIZE>
</ogr:stdbscan_multiple_clusters>
</gml:featureMember>
</ogr:FeatureCollection>
@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="stdbscan_multiple_clusters" type="ogr:stdbscan_multiple_clusters_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="stdbscan_multiple_clusters_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:long">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:long">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="date" nillable="true" minOccurs="0" maxOccurs="1" type="xs:date">
</xs:element>
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="CLUSTER_SIZE" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>
Expand Up @@ -878,6 +878,23 @@ tests:
name: expected/dbscan_multiple_clusters.gml
type: vector

- algorithm: native:stdbscanclustering
name: ST-DBScan multiple clusters
params:
DATETIME_FIELD: date
DBSCAN*: false
EPS: 5.0
EPS2: 1000.0
FIELD_NAME: CLUSTER_ID
INPUT:
name: custom/points_with_date.shp
type: vector
MIN_SIZE: 1
results:
OUTPUT:
name: expected/stdbscan_multiple_clusters.gml
type: vector

- algorithm: qgis:rastersampling
name: Single band raster
params:
Expand Down
1 change: 1 addition & 0 deletions src/analysis/CMakeLists.txt
Expand Up @@ -198,6 +198,7 @@ set(QGIS_ANALYSIS_SRCS
processing/qgsalgorithmsplitlinesbylength.cpp
processing/qgsalgorithmsplitvectorlayer.cpp
processing/qgsalgorithmsplitwithlines.cpp
processing/qgsalgorithmstdbscanclustering.cpp
processing/qgsalgorithmstringconcatenation.cpp
processing/qgsalgorithmswapxy.cpp
processing/qgsalgorithmsubdivide.cpp
Expand Down
55 changes: 43 additions & 12 deletions src/analysis/processing/qgsalgorithmdbscanclustering.cpp
Expand Up @@ -38,7 +38,7 @@ QString QgsDbscanClusteringAlgorithm::shortDescription() const

QStringList QgsDbscanClusteringAlgorithm::tags() const
{
return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
return QObject::tr( "clustering,clusters,density,based,points,distance" ).split( ',' );
}

QString QgsDbscanClusteringAlgorithm::group() const
Expand Down Expand Up @@ -113,7 +113,8 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );

const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
const double eps1 = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
const double eps2 = parameterAsDouble( parameters, QStringLiteral( "EPS2" ), context ) * 24 * 60 * 60;
const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );

QgsFields outputFields = source->fields();
Expand All @@ -135,13 +136,31 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p
if ( feedback->isCanceled() )
return QVariantMap();

// dbscan!
std::unordered_map< QgsFeatureId, QDateTime> idToDateTime;
const QString dateTimeFieldName = parameterAsString( parameters, QStringLiteral( "DATETIME_FIELD" ), context );
if ( !dateTimeFieldName.isEmpty() )
{
const int dateTimefieldIndex = source->fields().lookupField( dateTimeFieldName );
if ( dateTimefieldIndex == -1 )
throw QgsProcessingException( QObject::tr( "Datetime field missing" ) );

// fetch temporal values
feedback->pushInfo( QObject::tr( "Fetching temporal values" ) );
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() << dateTimefieldIndex ), QgsProcessingFeatureSource::FlagSkipGeometryValidityChecks );
QgsFeature feature;
while ( features.nextFeature( feature ) )
{
idToDateTime[ feature.id() ] = feature.attributes().at( dateTimefieldIndex ).toDateTime();
}
}

// stdbscan!
feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
std::unordered_map< QgsFeatureId, int> idToCluster;
idToCluster.reserve( index.size() );
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
const long featureCount = source->featureCount();
dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback );
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
stdbscan( minSize, eps1, eps2, borderPointsAreNoise, featureCount, features, index, idToCluster, idToDateTime, feedback );

// cluster size
std::unordered_map< int, int> clusterSize;
Expand Down Expand Up @@ -182,14 +201,15 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p
return outputs;
}


void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
const double eps,
void QgsDbscanClusteringAlgorithm::stdbscan( const std::size_t minSize,
const double eps1,
const double eps2,
const bool borderPointsAreNoise,
const long featureCount,
QgsFeatureIterator features,
QgsSpatialIndexKDBush &index,
std::unordered_map< QgsFeatureId, int> &idToCluster,
std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime,
QgsProcessingFeedback *feedback )
{
const double step = featureCount > 0 ? 90.0 / featureCount : 1;
Expand Down Expand Up @@ -231,13 +251,22 @@ void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
continue;
}

if ( !idToDateTime.empty() && !idToDateTime[ feat.id() ].isValid() )
{
// missing datetime value
feedback->reportError( QObject::tr( "Feature %1 is missing a valid datetime value." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
feedback->setProgress( ++i * step );
continue;
}

std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;

if ( minSize > 1 )
{
index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
index.within( point, eps1, [&within, pointId = feat.id(), &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
{
within.insert( data );
if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ pointId ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
within.insert( data );
} );
if ( within.size() < minSize )
continue;
Expand Down Expand Up @@ -278,10 +307,12 @@ void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
QgsPointXY point2 = j.point();

std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
index.within( point2, eps1, [&within2, point2Id = j.id, &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
{
within2.insert( data );
if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ point2Id ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
within2.insert( data );
} );

if ( within2.size() >= minSize )
{
// expand neighbourhood
Expand Down
20 changes: 12 additions & 8 deletions src/analysis/processing/qgsalgorithmdbscanclustering.h
Expand Up @@ -54,14 +54,18 @@ class ANALYSIS_EXPORT QgsDbscanClusteringAlgorithm : public QgsProcessingAlgorit
QVariantMap processAlgorithm( const QVariantMap &parameters,
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
private:
static void dbscan( std::size_t minSize,
double eps,
bool borderPointsAreNoise,
long featureCount,
QgsFeatureIterator features,
QgsSpatialIndexKDBush &index,
std::unordered_map< QgsFeatureId, int> &idToCluster,
QgsProcessingFeedback *feedback );

static void stdbscan( std::size_t minSize,
const double eps1,
const double eps2,
bool borderPointsAreNoise,
long featureCount,
QgsFeatureIterator features,
QgsSpatialIndexKDBush &index,
std::unordered_map< QgsFeatureId, int> &idToCluster,
std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime,
QgsProcessingFeedback *feedback );

};

///@endcond PRIVATE
Expand Down

0 comments on commit f68ef8f

Please sign in to comment.