Skip to content

Commit 6160e06

Browse files
authoredJun 4, 2018
Merge pull request #7167 from rouault/gpkg_perf_improvements
[OGR provider] Performance improvements on huge GeoPackage database (fixes #18402)
2 parents 8307264 + 2502a33 commit 6160e06

File tree

4 files changed

+242
-7
lines changed

4 files changed

+242
-7
lines changed
 

‎src/providers/ogr/qgsogrdataitems.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ QList<QgsOgrDbLayerInfo *> QgsOgrLayerItem::subLayers( const QString &path, cons
172172
{
173173
// Collect mixed-geom layers
174174
QMultiMap<int, QStringList> subLayersMap;
175-
const QStringList subLayersList( layer.dataProvider()->subLayers( ) );
175+
QgsOgrProvider *ogrProvider = qobject_cast<QgsOgrProvider *>( layer.dataProvider() );
176+
const QStringList subLayersList( ogrProvider->subLayersWithoutFeatureCount( ) );
176177
QMap< QString, int > mapLayerNameToCount;
177178
bool uniqueNames = true;
178179
int prevIdx = -1;

‎src/providers/ogr/qgsogrprovider.cpp

Lines changed: 199 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -705,7 +705,7 @@ static OGRwkbGeometryType ogrWkbGeometryTypeFromName( const QString &typeName )
705705
return wkbUnknown;
706706
}
707707

708-
void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer ) const
708+
void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer, bool withFeatureCount ) const
709709
{
710710
QgsOgrFeatureDefn &fdef = layer->GetLayerDefn();
711711
// Get first column name,
@@ -731,7 +731,7 @@ void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer
731731

732732
if ( wkbFlatten( layerGeomType ) != wkbUnknown )
733733
{
734-
int layerFeatureCount = layer->GetFeatureCount();
734+
int layerFeatureCount = withFeatureCount ? layer->GetApproxFeatureCount() : -1;
735735

736736
QString geom = ogrWkbGeometryTypeName( layerGeomType );
737737

@@ -822,6 +822,16 @@ void QgsOgrProvider::addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer
822822
}
823823

824824
QStringList QgsOgrProvider::subLayers() const
825+
{
826+
return _subLayers( true );
827+
}
828+
829+
QStringList QgsOgrProvider::subLayersWithoutFeatureCount() const
830+
{
831+
return _subLayers( false );
832+
}
833+
834+
QStringList QgsOgrProvider::_subLayers( bool withFeatureCount ) const
825835
{
826836
if ( !mValid )
827837
{
@@ -833,7 +843,7 @@ QStringList QgsOgrProvider::subLayers() const
833843

834844
if ( mOgrLayer && ( mIsSubLayer || layerCount() == 1 ) )
835845
{
836-
addSubLayerDetailsToSubLayerList( mLayerIndex, mOgrLayer );
846+
addSubLayerDetailsToSubLayerList( mLayerIndex, mOgrLayer, withFeatureCount );
837847
}
838848
else
839849
{
@@ -856,7 +866,7 @@ QStringList QgsOgrProvider::subLayers() const
856866
if ( !layer )
857867
continue;
858868

859-
addSubLayerDetailsToSubLayerList( i, layer.get() );
869+
addSubLayerDetailsToSubLayerList( i, layer.get(), withFeatureCount );
860870
if ( firstLayer == nullptr )
861871
{
862872
firstLayer = std::move( layer );
@@ -3838,7 +3848,7 @@ void QgsOgrProvider::recalculateFeatureCount()
38383848
// so we remove it if there's any and then put it back
38393849
if ( mOgrGeometryTypeFilter == wkbUnknown )
38403850
{
3841-
mFeaturesCounted = mOgrLayer->GetFeatureCount( true );
3851+
mFeaturesCounted = mOgrLayer->GetApproxFeatureCount();
38423852
if ( mFeaturesCounted == -1 )
38433853
{
38443854
mFeaturesCounted = QgsVectorDataProvider::UnknownCount;
@@ -5131,9 +5141,193 @@ GIntBig QgsOgrLayer::GetFeatureCount( bool force )
51315141
return OGR_L_GetFeatureCount( hLayer, force );
51325142
}
51335143

5144+
GIntBig QgsOgrLayer::GetApproxFeatureCount()
5145+
{
5146+
QMutexLocker locker( &ds->mutex );
5147+
5148+
// OGR_L_GetFeatureCount() can be super slow on huge geopackage files
5149+
// so implement some approximation strategy that has reasonable runtime.
5150+
QString driverName = GDALGetDriverShortName( GDALGetDatasetDriver( ds->hDS ) );
5151+
if ( driverName == QLatin1String( "GPKG" ) )
5152+
{
5153+
CPLPushErrorHandler( CPLQuietErrorHandler );
5154+
OGRLayerH hSqlLayer = GDALDatasetExecuteSQL(
5155+
ds->hDS, "SELECT 1 FROM gpkg_ogr_contents LIMIT 0", nullptr, nullptr );
5156+
CPLPopErrorHandler();
5157+
if ( hSqlLayer )
5158+
{
5159+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5160+
return OGR_L_GetFeatureCount( hLayer, TRUE );
5161+
}
5162+
5163+
// Enumerate features up to a limit of 100000.
5164+
const GIntBig nLimit = CPLAtoGIntBig(
5165+
CPLGetConfigOption( "QGIS_GPKG_FC_THRESHOLD", "100000" ) );
5166+
QByteArray layerName = OGR_L_GetName( hLayer );
5167+
QByteArray sql( "SELECT COUNT(*) FROM (SELECT 1 FROM " );
5168+
sql += QgsOgrProviderUtils::quotedIdentifier( layerName, driverName );
5169+
sql += " LIMIT ";
5170+
sql += CPLSPrintf( CPL_FRMT_GIB, nLimit );
5171+
sql += ")";
5172+
hSqlLayer = GDALDatasetExecuteSQL( ds->hDS, sql, nullptr, nullptr );
5173+
GIntBig res = -1;
5174+
if ( hSqlLayer )
5175+
{
5176+
gdal::ogr_feature_unique_ptr fet( OGR_L_GetNextFeature( hSqlLayer ) );
5177+
if ( fet )
5178+
{
5179+
res = OGR_F_GetFieldAsInteger64( fet.get(), 0 );
5180+
}
5181+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5182+
}
5183+
if ( res >= 0 && res < nLimit )
5184+
{
5185+
// Less than 100000 features ? This is the final count
5186+
return res;
5187+
}
5188+
if ( res == nLimit )
5189+
{
5190+
// If we reach the threshold, then use the min and max values of the rowid
5191+
// hoping there are not a lot of holes.
5192+
// Do it in 2 separate SQL queries otherwise SQLite apparently does a
5193+
// full table scan...
5194+
sql = "SELECT MAX(ROWID) FROM ";
5195+
sql += QgsOgrProviderUtils::quotedIdentifier( layerName, driverName );
5196+
hSqlLayer = GDALDatasetExecuteSQL( ds->hDS, sql, nullptr, nullptr );
5197+
GIntBig maxrowid = -1;
5198+
if ( hSqlLayer )
5199+
{
5200+
gdal::ogr_feature_unique_ptr fet( OGR_L_GetNextFeature( hSqlLayer ) );
5201+
if ( fet )
5202+
{
5203+
maxrowid = OGR_F_GetFieldAsInteger64( fet.get(), 0 );
5204+
}
5205+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5206+
}
5207+
5208+
sql = "SELECT MIN(ROWID) FROM ";
5209+
sql += QgsOgrProviderUtils::quotedIdentifier( layerName, driverName );
5210+
hSqlLayer = GDALDatasetExecuteSQL( ds->hDS, sql, nullptr, nullptr );
5211+
GIntBig minrowid = 0;
5212+
if ( hSqlLayer )
5213+
{
5214+
gdal::ogr_feature_unique_ptr fet( OGR_L_GetNextFeature( hSqlLayer ) );
5215+
if ( fet )
5216+
{
5217+
minrowid = OGR_F_GetFieldAsInteger64( fet.get(), 0 );
5218+
}
5219+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5220+
}
5221+
5222+
if ( maxrowid >= minrowid )
5223+
{
5224+
return maxrowid - minrowid + 1;
5225+
}
5226+
}
5227+
}
5228+
5229+
return OGR_L_GetFeatureCount( hLayer, TRUE );
5230+
}
5231+
5232+
#if GDAL_VERSION_NUM < GDAL_COMPUTE_VERSION(2,4,0)
5233+
static bool findMinOrMax( GDALDatasetH hDS, const QByteArray &rtreeName,
5234+
const char *varName, bool isMin, double &val )
5235+
{
5236+
// We proceed by dichotomic search since unfortunately SELECT MIN(minx)
5237+
// in a RTree is a slow operation
5238+
double minval = -1e10;
5239+
double maxval = 1e10;
5240+
val = 0.0;
5241+
double oldval = 0.0;
5242+
for ( int i = 0; i < 100 && maxval - minval > 1e-15; i++ )
5243+
{
5244+
val = ( minval + maxval ) / 2;
5245+
if ( i > 0 && val == oldval )
5246+
{
5247+
break;
5248+
}
5249+
oldval = val;
5250+
QByteArray sql = "SELECT 1 FROM ";
5251+
sql += rtreeName;
5252+
sql += " WHERE ";
5253+
sql += varName;
5254+
sql += isMin ? " < " : " > ";
5255+
sql += CPLSPrintf( "%.18g", val );
5256+
sql += " LIMIT 1";
5257+
auto hSqlLayer = GDALDatasetExecuteSQL(
5258+
hDS, sql, nullptr, nullptr );
5259+
GIntBig count = -1;
5260+
if ( hSqlLayer )
5261+
{
5262+
count = OGR_L_GetFeatureCount( hSqlLayer, true );
5263+
GDALDatasetReleaseResultSet( hDS, hSqlLayer );
5264+
}
5265+
if ( count < 0 )
5266+
{
5267+
return false;
5268+
}
5269+
if ( ( isMin && count == 0 ) || ( !isMin && count == 1 ) )
5270+
{
5271+
minval = val;
5272+
}
5273+
else
5274+
{
5275+
maxval = val;
5276+
}
5277+
}
5278+
return true;
5279+
}
5280+
#endif
5281+
51345282
OGRErr QgsOgrLayer::GetExtent( OGREnvelope *psExtent, bool bForce )
51355283
{
51365284
QMutexLocker locker( &ds->mutex );
5285+
5286+
#if GDAL_VERSION_NUM < GDAL_COMPUTE_VERSION(2,4,0)
5287+
// OGR_L_GetExtent() can be super slow on huge geopackage files
5288+
// so implement some approximation strategy that has reasonable runtime.
5289+
// Actually this should return a rather accurante answer.
5290+
QString driverName = GDALGetDriverShortName( GDALGetDatasetDriver( ds->hDS ) );
5291+
if ( driverName == QLatin1String( "GPKG" ) )
5292+
{
5293+
QByteArray layerName = OGR_L_GetName( hLayer );
5294+
QByteArray rtreeName =
5295+
QgsOgrProviderUtils::quotedIdentifier( "rtree_" + layerName + "_" + OGR_L_GetGeometryColumn( hLayer ), driverName );
5296+
5297+
// Check if there is a non-empty RTree
5298+
QByteArray sql( "SELECT 1 FROM " );
5299+
sql += rtreeName;
5300+
sql += " LIMIT 1";
5301+
CPLPushErrorHandler( CPLQuietErrorHandler );
5302+
OGRLayerH hSqlLayer = GDALDatasetExecuteSQL(
5303+
ds->hDS, sql, nullptr, nullptr );
5304+
CPLPopErrorHandler();
5305+
if ( !hSqlLayer )
5306+
{
5307+
return OGR_L_GetExtent( hLayer, psExtent, bForce );
5308+
}
5309+
bool hasFeatures = OGR_L_GetFeatureCount( hSqlLayer, true ) > 0;
5310+
GDALDatasetReleaseResultSet( ds->hDS, hSqlLayer );
5311+
if ( !hasFeatures )
5312+
{
5313+
return OGRERR_FAILURE;
5314+
}
5315+
5316+
double minx, miny, maxx, maxy;
5317+
if ( findMinOrMax( ds->hDS, rtreeName, "MINX", true, minx ) &&
5318+
findMinOrMax( ds->hDS, rtreeName, "MINY", true, miny ) &&
5319+
findMinOrMax( ds->hDS, rtreeName, "MAXX", false, maxx ) &&
5320+
findMinOrMax( ds->hDS, rtreeName, "MAXY", false, maxy ) )
5321+
{
5322+
psExtent->MinX = minx;
5323+
psExtent->MinY = miny;
5324+
psExtent->MaxX = maxx;
5325+
psExtent->MaxY = maxy;
5326+
return OGRERR_NONE;
5327+
}
5328+
}
5329+
#endif
5330+
51375331
return OGR_L_GetExtent( hLayer, psExtent, bForce );
51385332
}
51395333

‎src/providers/ogr/qgsogrprovider.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ class QgsOgrProvider : public QgsVectorDataProvider
100100

101101
QgsCoordinateReferenceSystem crs() const override;
102102
QStringList subLayers() const override;
103+
QStringList subLayersWithoutFeatureCount() const;
103104
QString storageType() const override;
104105
QgsFeatureIterator getFeatures( const QgsFeatureRequest &request ) const override;
105106
QString subsetString() const override;
@@ -215,7 +216,9 @@ class QgsOgrProvider : public QgsVectorDataProvider
215216
//! Does the real job of settings the subset string and adds an argument to disable update capabilities
216217
bool _setSubsetString( const QString &theSQL, bool updateFeatureCount = true, bool updateCapabilities = true, bool hasExistingRef = true );
217218

218-
void addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer ) const;
219+
void addSubLayerDetailsToSubLayerList( int i, QgsOgrLayer *layer, bool withFeatureCount ) const;
220+
221+
QStringList _subLayers( bool withFeatureCount ) const;
219222

220223
QgsFields mAttributeFields;
221224

@@ -565,6 +568,9 @@ class QgsOgrLayer
565568
//! Wrapper of OGR_L_GetLayerCount
566569
GIntBig GetFeatureCount( bool force = false );
567570

571+
//! Return an approximate feature count
572+
GIntBig GetApproxFeatureCount();
573+
568574
//! Wrapper of OGR_L_GetLayerCount
569575
OGRErr GetExtent( OGREnvelope *psExtent, bool bForce );
570576

‎tests/src/python/test_provider_ogr_gpkg.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,40 @@ def testAddingTwoIntFieldsWithWidth(self):
949949
self.assertTrue(vl.addAttribute(QgsField("c", QVariant.Int, "integer", 10)))
950950
self.assertTrue(vl.commitChanges())
951951

952+
def testApproxFeatureCountAndExtent(self):
953+
""" Test perf improvement for for https://issues.qgis.org/issues/18402 """
954+
955+
tmpfile = os.path.join(self.basetestpath, 'testApproxFeatureCountAndExtent.gpkg')
956+
ds = ogr.GetDriverByName('GPKG').CreateDataSource(tmpfile)
957+
lyr = ds.CreateLayer('test', geom_type=ogr.wkbPoint)
958+
f = ogr.Feature(lyr.GetLayerDefn())
959+
f.SetGeometry(ogr.CreateGeometryFromWkt('POINT(0 1)'))
960+
lyr.CreateFeature(f)
961+
f = ogr.Feature(lyr.GetLayerDefn())
962+
f.SetGeometry(ogr.CreateGeometryFromWkt('POINT(2 3)'))
963+
lyr.CreateFeature(f)
964+
fid = f.GetFID()
965+
f = ogr.Feature(lyr.GetLayerDefn())
966+
f.SetGeometry(ogr.CreateGeometryFromWkt('POINT(4 5)'))
967+
lyr.CreateFeature(f)
968+
lyr.DeleteFeature(fid)
969+
ds = None
970+
ds = ogr.Open(tmpfile, update=1)
971+
ds.ExecuteSQL('DROP TABLE gpkg_ogr_contents')
972+
ds = None
973+
974+
os.environ['QGIS_GPKG_FC_THRESHOLD'] = '1'
975+
vl = QgsVectorLayer(u'{}'.format(tmpfile) + "|layername=" + "test", 'test', u'ogr')
976+
self.assertTrue(vl.isValid())
977+
fc = vl.featureCount()
978+
del os.environ['QGIS_GPKG_FC_THRESHOLD']
979+
self.assertEqual(fc, 3) # didn't notice the hole
980+
981+
reference = QgsGeometry.fromRect(QgsRectangle(0, 1, 4, 5))
982+
provider_extent = QgsGeometry.fromRect(vl.extent())
983+
self.assertTrue(QgsGeometry.compare(provider_extent.asPolygon()[0], reference.asPolygon()[0], 0.00001),
984+
provider_extent.asPolygon()[0])
985+
952986

953987
if __name__ == '__main__':
954988
unittest.main()

0 commit comments

Comments
 (0)
Please sign in to comment.