Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
API: Added support for bulk loading of spatial index
This is much faster way of initializing a spatial index. From python it is as simple as
>>> index = QgsSpatialIndex( layer.getFeatures() )

From a simple test with 50K points in a memory layer:
- bulk loading ~ 100 ms
- inserting features ~ 600 ms

The index tree should be in theory also better constructed and may result in faster lookups.
  • Loading branch information
wonder-sk committed Dec 3, 2014
1 parent 94a276e commit b278ea1
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 4 deletions.
7 changes: 7 additions & 0 deletions python/core/qgsspatialindex.sip
Expand Up @@ -12,6 +12,13 @@ class QgsSpatialIndex
/** constructor - creates R-tree */
QgsSpatialIndex();

/** constructor - creates R-tree and bulk loads it with features from the iterator.
* This is much faster approach than creating an empty index and then inserting features one by one.
*
* @note added in 2.8
*/
explicit QgsSpatialIndex( const QgsFeatureIterator& fi );

/** copy constructor */
QgsSpatialIndex( const QgsSpatialIndex& other );

Expand Down
78 changes: 75 additions & 3 deletions src/core/qgsspatialindex.cpp
Expand Up @@ -17,6 +17,7 @@

#include "qgsgeometry.h"
#include "qgsfeature.h"
#include "qgsfeatureiterator.h"
#include "qgsrectangle.h"
#include "qgslogger.h"

Expand Down Expand Up @@ -73,6 +74,61 @@ class QgsSpatialIndexCopyVisitor : public SpatialIndex::IVisitor
};


/** Utility class for bulk loading of R-trees. Not a part of public API. */
class QgsFeatureIteratorDataStream : public IDataStream
{
public:
//! constructor - needs to load all data to a vector for later access when bulk loading
QgsFeatureIteratorDataStream( const QgsFeatureIterator& fi ) : mFi( fi ), mNextData( 0 )
{
readNextEntry();
}

~QgsFeatureIteratorDataStream()
{
delete mNextData;
}

//! returns a pointer to the next entry in the stream or 0 at the end of the stream.
virtual IData* getNext()
{
RTree::Data* ret = mNextData;
mNextData = 0;
readNextEntry();
return ret;
}

//! returns true if there are more items in the stream.
virtual bool hasNext() { return mNextData != 0; }

//! returns the total number of entries available in the stream.
virtual uint32_t size() { Q_ASSERT( 0 && "not available" ); return 0; }

//! sets the stream pointer to the first entry, if possible.
virtual void rewind() { Q_ASSERT( 0 && "not available" ); }

protected:
void readNextEntry()
{
QgsFeature f;
SpatialIndex::Region r;
QgsFeatureId id;
while ( mFi.nextFeature( f ) )
{
if ( QgsSpatialIndex::featureInfo( f, r, id ) )
{
mNextData = new RTree::Data( 0, 0, r, id );
return;
}
}
}

private:
QgsFeatureIterator mFi;
RTree::Data* mNextData;
};


/** Data of spatial index that may be implicitly shared */
class QgsSpatialIndexData : public QSharedData
{
Expand All @@ -82,6 +138,12 @@ class QgsSpatialIndexData : public QSharedData
initTree();
}

explicit QgsSpatialIndexData( const QgsFeatureIterator& fi )
{
QgsFeatureIteratorDataStream fids( fi );
initTree( &fids );
}

QgsSpatialIndexData( const QgsSpatialIndexData& other )
: QSharedData( other )
{
Expand All @@ -101,7 +163,7 @@ class QgsSpatialIndexData : public QSharedData
delete mStorage;
}

void initTree()
void initTree( IDataStream* inputStream = 0 )
{
// for now only memory manager
mStorage = StorageManager::createNewMemoryStorageManager();
Expand All @@ -115,8 +177,13 @@ class QgsSpatialIndexData : public QSharedData

// create R-tree
SpatialIndex::id_type indexId;
mRTree = RTree::createNewRTree( *mStorage, fillFactor, indexCapacity,
leafCapacity, dimension, variant, indexId );

if ( inputStream )
mRTree = RTree::createAndBulkLoadNewRTree( RTree::BLM_STR, *inputStream, *mStorage, fillFactor, indexCapacity,
leafCapacity, dimension, variant, indexId );
else
mRTree = RTree::createNewRTree( *mStorage, fillFactor, indexCapacity,
leafCapacity, dimension, variant, indexId );
}

/** storage manager */
Expand All @@ -134,6 +201,11 @@ QgsSpatialIndex::QgsSpatialIndex()
d = new QgsSpatialIndexData;
}

QgsSpatialIndex::QgsSpatialIndex( const QgsFeatureIterator& fi )
{
d = new QgsSpatialIndexData( fi );
}

QgsSpatialIndex::QgsSpatialIndex( const QgsSpatialIndex& other )
: d( other.d )
{
Expand Down
12 changes: 11 additions & 1 deletion src/core/qgsspatialindex.h
Expand Up @@ -40,6 +40,7 @@ class QgsPoint;
#include "qgsfeature.h"

class QgsSpatialIndexData;
class QgsFeatureIterator;

class CORE_EXPORT QgsSpatialIndex
{
Expand All @@ -51,6 +52,13 @@ class CORE_EXPORT QgsSpatialIndex
/** constructor - creates R-tree */
QgsSpatialIndex();

/** constructor - creates R-tree and bulk loads it with features from the iterator.
* This is much faster approach than creating an empty index and then inserting features one by one.
*
* @note added in 2.8
*/
explicit QgsSpatialIndex( const QgsFeatureIterator& fi );

/** copy constructor */
QgsSpatialIndex( const QgsSpatialIndex& other );

Expand Down Expand Up @@ -86,7 +94,9 @@ class CORE_EXPORT QgsSpatialIndex
// @note not available in python bindings
static SpatialIndex::Region rectToRegion( QgsRectangle rect );
// @note not available in python bindings
bool featureInfo( const QgsFeature& f, SpatialIndex::Region& r, QgsFeatureId &id );
static bool featureInfo( const QgsFeature& f, SpatialIndex::Region& r, QgsFeatureId &id );

friend class QgsFeatureIteratorDataStream; // for access to featureInfo()

private:

Expand Down
74 changes: 74 additions & 0 deletions tests/src/core/testqgsspatialindex.cpp
Expand Up @@ -18,8 +18,11 @@
#include <QString>
#include <QObject>

#include <qgsapplication.h>
#include <qgsgeometry.h>
#include <qgsspatialindex.h>
#include <qgsvectordataprovider.h>
#include <qgsvectorlayer.h>


#if QT_VERSION < 0x40701
Expand Down Expand Up @@ -58,6 +61,16 @@ class TestQgsSpatialIndex : public QObject

private slots:

void initTestCase()
{
QgsApplication::init();
QgsApplication::initQgis();
}
void cleanupTestCase()
{
QgsApplication::exitQgis();
}

void testQuery()
{
QgsSpatialIndex index;
Expand Down Expand Up @@ -132,6 +145,67 @@ class TestQgsSpatialIndex : public QObject
}
}

void benchmarkBulkLoad()
{
QgsVectorLayer* vl = new QgsVectorLayer( "Point", "x", "memory" );
for ( int i = 0; i < 100; ++i )
{
QgsFeatureList flist;
for ( int k = 0; k < 500; ++k )
{
QgsFeature f( i*1000 + k );
f.setGeometry( QgsGeometry::fromPoint( QgsPoint( i / 10, i % 10 ) ) );
flist << f;
}
vl->dataProvider()->addFeatures( flist );
}

QTime t;
QgsSpatialIndex* indexBulk;
QgsSpatialIndex* indexInsert;

t.start();
{
QgsFeature f;
QgsFeatureIterator fi = vl->getFeatures();
while ( fi.nextFeature( f ) )
;
}
qDebug( "iter only: %d ms", t.elapsed() );

t.start();
{
QgsFeatureIterator fi = vl->getFeatures();
indexBulk = new QgsSpatialIndex( fi );
}
qDebug( "bulk load: %d ms", t.elapsed() );

t.start();
{
QgsFeatureIterator fi = vl->getFeatures();
QgsFeature f;
indexInsert = new QgsSpatialIndex;
while ( fi.nextFeature( f ) )
indexInsert->insertFeature( f );
}
qDebug( "insert: %d ms", t.elapsed() );

// test whether a query will give us the same results
QgsRectangle rect( 4.9, 4.9, 5.1, 5.1 );
QList<QgsFeatureId> resBulk = indexBulk->intersects( rect );
QList<QgsFeatureId> resInsert = indexInsert->intersects( rect );

QCOMPARE( resBulk.count(), 500 );
QCOMPARE( resInsert.count(), 500 );
// the trees are built differently so they will give also different order of fids
qSort( resBulk );
qSort( resInsert );
QCOMPARE( resBulk, resInsert );

delete indexBulk;
delete indexInsert;
}

};

QTEST_MAIN( TestQgsSpatialIndex )
Expand Down

1 comment on commit b278ea1

@NathanW2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

Please sign in to comment.