Skip to content

Commit d8dac0c

Browse files
committedMay 12, 2013
Merge pull request #582 from ccrook/delimited_text_indexing_implemented
Delimited text provider indexing efficiency improvements.
2 parents 724ed72 + 2c37896 commit d8dac0c

11 files changed

+3092
-1548
lines changed
 

‎resources/context_help/QgsDelimitedTextSourceSelect-en_US

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ or are duplicated.
182182
</p>
183183
<p>
184184
In addition to the attributes explicitly in the data file QGIS assigns a unique
185-
feature id to each record. This is the line number in the source file on which
185+
feature id to each record which is the line number in the source file on which
186186
the record starts.
187187
</p>
188188
<p>
@@ -275,7 +275,10 @@ The following options can be added
275275
<li><tt>crs=...</tt> specifies the coordinate system to use for the vector layer, in a format accepted by QgsCoordinateReferenceSystem.createFromString (for example &quot;EPSG:4167&quot;). If this is not
276276
specified then a dialog box may request this information from the user
277277
when the layer is loaded (depending on QGIS CRS settings).</li>
278-
<li><tt>quiet=(yes|no)</tt> specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no.</li>
278+
<li><tt>subsetIndex=(yes|no)</tt> specifies whether the provider should build an index to define subset during the initial file scan. The index will apply both for explicitly defined subsets, and for the implicit subset of features for which the geometry definition is valid. By default the subset index is built if it is applicable. This option is not available from the GUI.</li>
279+
<li><tt>spatialIndex=(yes|no)</tt> specifies whether the provider should build a spatial index during the initial file scan. By default the spatial index is not built. </li>
280+
<li><tt>useWatcher=(yes|no)</tt> specifies whether the provider should use a file system watcher to monitor for changes to the file. This option is not available from the GUI</li>
281+
<li><tt>quiet=(yes|no)</tt> specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no. This option is not available from the GUI</li>
279282
</ul>
280283

281284

‎src/core/qgsvectorlayer.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,20 @@ struct CORE_EXPORT QgsVectorJoinInfo
375375
* Defines the coordinate reference system used for the layer. This can be
376376
* any string accepted by QgsCoordinateReferenceSystem::createFromString()
377377
*
378+
* -subsetIndex=(yes|no)
379+
*
380+
* Determines whether the provider generates an index to improve the efficiency
381+
* of subsets. The default is yes
382+
*
383+
* -spatialIndex=(yes|no)
384+
*
385+
* Determines whether the provider generates a spatial index. The default is no.
386+
*
387+
* -useWatcher=(yes|no)
388+
*
389+
* Defines whether the file will be monitored for changes. The default is
390+
* to monitor for changes.
391+
*
378392
* - quiet
379393
*
380394
* Errors encountered loading the file will not be reported in a user dialog if

‎src/providers/delimitedtext/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SET (DTEXT_SRCS
1010
)
1111

1212
SET (DTEXT_MOC_HDRS
13+
qgsdelimitedtextfile.h
1314
qgsdelimitedtextprovider.h
1415
qgsdelimitedtextsourceselect.h
1516
)

‎src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616
#include "qgsdelimitedtextprovider.h"
1717
#include "qgsdelimitedtextfile.h"
1818

19+
#include "qgsexpression.h"
1920
#include "qgsgeometry.h"
21+
#include "qgslogger.h"
2022
#include "qgsmessagelog.h"
23+
#include "qgsspatialindex.h"
2124

25+
#include <QtAlgorithms>
2226
#include <QTextStream>
2327

2428
QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request )
@@ -32,6 +36,106 @@ QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTe
3236
}
3337
P->mActiveIterator = this;
3438

39+
// Determine mode to use based on request...
40+
41+
QgsDebugMsg( "Setting up QgsDelimitedTextIterator" );
42+
43+
// Does the layer have geometry - will revise later to determine if we actually need to
44+
// load it.
45+
mLoadGeometry = P->mGeomRep != QgsDelimitedTextProvider::GeomNone;
46+
47+
// Does the layer have an explicit or implicit subset (implicit subset is if we have geometry which can
48+
// be invalid)
49+
50+
mTestSubset = P->mSubsetExpression;
51+
mTestGeometry = false;
52+
53+
mMode = FileScan;
54+
if ( request.filterType() == QgsFeatureRequest::FilterFid )
55+
{
56+
QgsDebugMsg( "Configuring for returning single id" );
57+
mFeatureIds.append( request.filterFid() );
58+
mMode = FeatureIds;
59+
mTestSubset = false;
60+
}
61+
// If have geometry and testing geometry then evaluate options...
62+
// If we don't have geometry then all records pass geometry filter.
63+
// CC: 2013-05-09
64+
// Not sure about intended relationship between filtering on geometry and
65+
// requesting no geometry? Have preserved current logic of ignoring spatial filter
66+
// if not requesting geometry.
67+
68+
else if ( request.filterType() == QgsFeatureRequest::FilterRect && mLoadGeometry
69+
&& !( mRequest.flags() & QgsFeatureRequest::NoGeometry ) )
70+
{
71+
QgsDebugMsg( "Configuring for rectangle select" );
72+
mTestGeometry = true;
73+
// Exact intersection test only applies for WKT geometries
74+
mTestGeometryExact = mRequest.flags() & QgsFeatureRequest::ExactIntersect
75+
&& P->mGeomRep == QgsDelimitedTextProvider::GeomAsWkt;
76+
77+
QgsRectangle rect = request.filterRect();
78+
79+
// If request doesn't overlap extents, then nothing to return
80+
if ( ! rect.intersects( P->extent() ) )
81+
{
82+
QgsDebugMsg( "Rectangle outside layer extents - no features to return" );
83+
mMode = FeatureIds;
84+
}
85+
// If the request extents include the entire layer, then revert to
86+
// a file scan
87+
88+
else if ( rect.contains( P->extent() ) )
89+
{
90+
QgsDebugMsg( "Rectangle contains layer extents - bypass spatial filter" );
91+
mTestGeometry = false;
92+
}
93+
// If we have a spatial index then use it. The spatial index already accounts
94+
// for the subset. Also means we don't have to test geometries unless doing exact
95+
// intersection
96+
97+
else if ( P->mUseSpatialIndex )
98+
{
99+
mFeatureIds = P->mSpatialIndex->intersects( rect );
100+
// Sort for efficient sequential retrieval
101+
qSort(mFeatureIds.begin(), mFeatureIds.end());
102+
QgsDebugMsg( QString("Layer has spatial index - selected %1 features from index").arg(mFeatureIds.size()) );
103+
mMode = FeatureIds;
104+
mTestSubset = false;
105+
mTestGeometry = mTestGeometryExact;
106+
}
107+
}
108+
109+
// If we have a subset index then use it..
110+
if ( mMode == FileScan && P->mUseSubsetIndex )
111+
{
112+
QgsDebugMsg( QString("Layer has subset index - use %1 items from subset index").arg(P->mSubsetIndex.size()) );
113+
mTestSubset = false;
114+
mMode = SubsetIndex;
115+
}
116+
117+
// Otherwise just have to scan the file
118+
if( mMode == FileScan )
119+
{
120+
QgsDebugMsg( "File will be scanned for desired features" );
121+
}
122+
123+
// If the request does not require geometry, can we avoid loading it?
124+
// We need it if we are testing geometry (ie spatial filter), or
125+
// if testing the subset expression, and it uses geometry.
126+
if ( mRequest.flags() & QgsFeatureRequest::NoGeometry &&
127+
! mTestGeometry &&
128+
! ( mTestSubset && P->mSubsetExpression->needsGeometry() ) )
129+
{
130+
QgsDebugMsg( "Feature geometries not required" );
131+
mLoadGeometry = false;
132+
}
133+
134+
QgsDebugMsg( QString("Iterator is scanning file: ") + (scanningFile() ? "Yes" : "No"));
135+
QgsDebugMsg( QString("Iterator is loading geometries: ") + (loadGeometry() ? "Yes" : "No"));
136+
QgsDebugMsg( QString("Iterator is testing geometries: ") + (testGeometry() ? "Yes" : "No"));
137+
QgsDebugMsg( QString("Iterator is testing subset: ") + (testSubset() ? "Yes" : "No"));
138+
35139
rewind();
36140
}
37141

@@ -49,8 +153,40 @@ bool QgsDelimitedTextFeatureIterator::nextFeature( QgsFeature& feature )
49153
if ( mClosed )
50154
return false;
51155

52-
bool gotFeature = P->nextFeature( feature, P->mFile, mRequest );
156+
bool gotFeature = false;
157+
if ( mMode == FileScan )
158+
{
159+
gotFeature = P->nextFeature( feature, P->mFile, this );
160+
}
161+
else
162+
{
163+
while( ! gotFeature )
164+
{
165+
qint64 fid = -1;
166+
if ( mMode == FeatureIds )
167+
{
168+
if( mNextId < mFeatureIds.size() )
169+
{
170+
fid = mFeatureIds[mNextId];
171+
}
172+
}
173+
else if( mNextId < P->mSubsetIndex.size() )
174+
{
175+
fid = P->mSubsetIndex[mNextId];
176+
}
177+
if( fid < 0 ) break;
178+
mNextId++;
179+
gotFeature = (P->setNextFeatureId( fid ) && P->nextFeature( feature, P->mFile, this ));
180+
}
181+
}
182+
183+
// CC: 2013-05-08: What is the intent of rewind/close. The following
184+
// line from previous implementation means that we cannot rewind the iterator
185+
// after reading last record? Is this correct? This line can be removed if
186+
// not.
187+
53188
if ( ! gotFeature ) close();
189+
54190
return gotFeature;
55191
}
56192

@@ -60,7 +196,14 @@ bool QgsDelimitedTextFeatureIterator::rewind()
60196
return false;
61197

62198
// Skip to first data record
63-
P->resetStream();
199+
if ( mMode == FileScan )
200+
{
201+
P->resetStream();
202+
}
203+
else
204+
{
205+
mNextId = 0;
206+
}
64207
return true;
65208
}
66209

@@ -71,6 +214,29 @@ bool QgsDelimitedTextFeatureIterator::close()
71214

72215
// tell provider that this iterator is not active anymore
73216
P->mActiveIterator = 0;
217+
mFeatureIds = QList<QgsFeatureId>();
74218
mClosed = true;
75219
return true;
76220
}
221+
222+
/**
223+
* Check to see if the point is within the selection rectangle
224+
*/
225+
bool QgsDelimitedTextFeatureIterator::wantGeometry( const QgsPoint &pt ) const
226+
{
227+
if ( ! mTestGeometry ) return true;
228+
return mRequest.filterRect().contains( pt );
229+
}
230+
231+
/**
232+
* Check to see if the geometry is within the selection rectangle
233+
*/
234+
bool QgsDelimitedTextFeatureIterator::wantGeometry( QgsGeometry *geom ) const
235+
{
236+
if ( ! mTestGeometry ) return true;
237+
238+
if ( mTestGeometryExact )
239+
return geom->intersects( mRequest.filterRect() );
240+
else
241+
return geom->boundingBox().intersects( mRequest.filterRect() );
242+
}

‎src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,20 @@
1515
#ifndef QGSDELIMITEDTEXTFEATUREITERATOR_H
1616
#define QGSDELIMITEDTEXTFEATUREITERATOR_H
1717

18+
#include <QList>
1819
#include "qgsfeatureiterator.h"
20+
#include "qgsfeature.h"
1921

2022
class QgsDelimitedTextProvider;
2123

2224
class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator
2325
{
26+
enum IteratorMode
27+
{
28+
FileScan,
29+
SubsetIndex,
30+
FeatureIds
31+
};
2432
public:
2533
QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request );
2634

@@ -35,8 +43,29 @@ class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator
3543
//! end of iterating: free the resources / lock
3644
virtual bool close();
3745

46+
// Flags used by nextFeature function of QgsDelimitedTextProvider
47+
bool testSubset() const { return mTestSubset; }
48+
bool testGeometry() const { return mTestGeometry; }
49+
bool loadGeometry() const { return mLoadGeometry; }
50+
bool loadSubsetOfAttributes() const { return ! mTestSubset && mRequest.flags() & QgsFeatureRequest::SubsetOfAttributes;}
51+
bool scanningFile() const { return mMode == FileScan; }
52+
53+
// Pass through attribute subset
54+
const QgsAttributeList &subsetOfAttributes() const { return mRequest.subsetOfAttributes(); }
55+
56+
// Tests whether the geometry is required, given that testGeometry is true.
57+
bool wantGeometry( const QgsPoint & point ) const;
58+
bool wantGeometry( QgsGeometry *geom ) const;
59+
3860
protected:
3961
QgsDelimitedTextProvider* P;
62+
QList<QgsFeatureId> mFeatureIds;
63+
IteratorMode mMode;
64+
long mNextId;
65+
bool mTestSubset;
66+
bool mTestGeometry;
67+
bool mTestGeometryExact;
68+
bool mLoadGeometry;
4069
};
4170

4271

0 commit comments

Comments
 (0)
Please sign in to comment.