Skip to content

Commit d8dac0c

Browse files
committedMay 12, 2013
Merge pull request #582 from ccrook/delimited_text_indexing_implemented
Delimited text provider indexing efficiency improvements.
2 parents 724ed72 + 2c37896 commit d8dac0c

11 files changed

+3092
-1548
lines changed
 

‎resources/context_help/QgsDelimitedTextSourceSelect-en_US

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ or are duplicated.
182182
</p>
183183
<p>
184184
In addition to the attributes explicitly in the data file QGIS assigns a unique
185-
feature id to each record. This is the line number in the source file on which
185+
feature id to each record which is the line number in the source file on which
186186
the record starts.
187187
</p>
188188
<p>
@@ -275,7 +275,10 @@ The following options can be added
275275
<li><tt>crs=...</tt> specifies the coordinate system to use for the vector layer, in a format accepted by QgsCoordinateReferenceSystem.createFromString (for example &quot;EPSG:4167&quot;). If this is not
276276
specified then a dialog box may request this information from the user
277277
when the layer is loaded (depending on QGIS CRS settings).</li>
278-
<li><tt>quiet=(yes|no)</tt> specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no.</li>
278+
<li><tt>subsetIndex=(yes|no)</tt> specifies whether the provider should build an index to define subset during the initial file scan. The index will apply both for explicitly defined subsets, and for the implicit subset of features for which the geometry definition is valid. By default the subset index is built if it is applicable. This option is not available from the GUI.</li>
279+
<li><tt>spatialIndex=(yes|no)</tt> specifies whether the provider should build a spatial index during the initial file scan. By default the spatial index is not built. </li>
280+
<li><tt>useWatcher=(yes|no)</tt> specifies whether the provider should use a file system watcher to monitor for changes to the file. This option is not available from the GUI</li>
281+
<li><tt>quiet=(yes|no)</tt> specifies whether errors encountered loading the layer are presented in a dialog box (they will be written to the QGIS log in any case). The default is no. This option is not available from the GUI</li>
279282
</ul>
280283

281284

‎src/core/qgsvectorlayer.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,20 @@ struct CORE_EXPORT QgsVectorJoinInfo
375375
* Defines the coordinate reference system used for the layer. This can be
376376
* any string accepted by QgsCoordinateReferenceSystem::createFromString()
377377
*
378+
* -subsetIndex=(yes|no)
379+
*
380+
* Determines whether the provider generates an index to improve the efficiency
381+
* of subsets. The default is yes
382+
*
383+
* -spatialIndex=(yes|no)
384+
*
385+
* Determines whether the provider generates a spatial index. The default is no.
386+
*
387+
* -useWatcher=(yes|no)
388+
*
389+
* Defines whether the file will be monitored for changes. The default is
390+
* to monitor for changes.
391+
*
378392
* - quiet
379393
*
380394
* Errors encountered loading the file will not be reported in a user dialog if

‎src/providers/delimitedtext/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SET (DTEXT_SRCS
1010
)
1111

1212
SET (DTEXT_MOC_HDRS
13+
qgsdelimitedtextfile.h
1314
qgsdelimitedtextprovider.h
1415
qgsdelimitedtextsourceselect.h
1516
)

‎src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616
#include "qgsdelimitedtextprovider.h"
1717
#include "qgsdelimitedtextfile.h"
1818

19+
#include "qgsexpression.h"
1920
#include "qgsgeometry.h"
21+
#include "qgslogger.h"
2022
#include "qgsmessagelog.h"
23+
#include "qgsspatialindex.h"
2124

25+
#include <QtAlgorithms>
2226
#include <QTextStream>
2327

2428
QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request )
@@ -32,6 +36,106 @@ QgsDelimitedTextFeatureIterator::QgsDelimitedTextFeatureIterator( QgsDelimitedTe
3236
}
3337
P->mActiveIterator = this;
3438

39+
// Determine mode to use based on request...
40+
41+
QgsDebugMsg( "Setting up QgsDelimitedTextIterator" );
42+
43+
// Does the layer have geometry - will revise later to determine if we actually need to
44+
// load it.
45+
mLoadGeometry = P->mGeomRep != QgsDelimitedTextProvider::GeomNone;
46+
47+
// Does the layer have an explicit or implicit subset (implicit subset is if we have geometry which can
48+
// be invalid)
49+
50+
mTestSubset = P->mSubsetExpression;
51+
mTestGeometry = false;
52+
53+
mMode = FileScan;
54+
if ( request.filterType() == QgsFeatureRequest::FilterFid )
55+
{
56+
QgsDebugMsg( "Configuring for returning single id" );
57+
mFeatureIds.append( request.filterFid() );
58+
mMode = FeatureIds;
59+
mTestSubset = false;
60+
}
61+
// If have geometry and testing geometry then evaluate options...
62+
// If we don't have geometry then all records pass geometry filter.
63+
// CC: 2013-05-09
64+
// Not sure about intended relationship between filtering on geometry and
65+
// requesting no geometry? Have preserved current logic of ignoring spatial filter
66+
// if not requesting geometry.
67+
68+
else if ( request.filterType() == QgsFeatureRequest::FilterRect && mLoadGeometry
69+
&& !( mRequest.flags() & QgsFeatureRequest::NoGeometry ) )
70+
{
71+
QgsDebugMsg( "Configuring for rectangle select" );
72+
mTestGeometry = true;
73+
// Exact intersection test only applies for WKT geometries
74+
mTestGeometryExact = mRequest.flags() & QgsFeatureRequest::ExactIntersect
75+
&& P->mGeomRep == QgsDelimitedTextProvider::GeomAsWkt;
76+
77+
QgsRectangle rect = request.filterRect();
78+
79+
// If request doesn't overlap extents, then nothing to return
80+
if ( ! rect.intersects( P->extent() ) )
81+
{
82+
QgsDebugMsg( "Rectangle outside layer extents - no features to return" );
83+
mMode = FeatureIds;
84+
}
85+
// If the request extents include the entire layer, then revert to
86+
// a file scan
87+
88+
else if ( rect.contains( P->extent() ) )
89+
{
90+
QgsDebugMsg( "Rectangle contains layer extents - bypass spatial filter" );
91+
mTestGeometry = false;
92+
}
93+
// If we have a spatial index then use it. The spatial index already accounts
94+
// for the subset. Also means we don't have to test geometries unless doing exact
95+
// intersection
96+
97+
else if ( P->mUseSpatialIndex )
98+
{
99+
mFeatureIds = P->mSpatialIndex->intersects( rect );
100+
// Sort for efficient sequential retrieval
101+
qSort(mFeatureIds.begin(), mFeatureIds.end());
102+
QgsDebugMsg( QString("Layer has spatial index - selected %1 features from index").arg(mFeatureIds.size()) );
103+
mMode = FeatureIds;
104+
mTestSubset = false;
105+
mTestGeometry = mTestGeometryExact;
106+
}
107+
}
108+
109+
// If we have a subset index then use it..
110+
if ( mMode == FileScan && P->mUseSubsetIndex )
111+
{
112+
QgsDebugMsg( QString("Layer has subset index - use %1 items from subset index").arg(P->mSubsetIndex.size()) );
113+
mTestSubset = false;
114+
mMode = SubsetIndex;
115+
}
116+
117+
// Otherwise just have to scan the file
118+
if( mMode == FileScan )
119+
{
120+
QgsDebugMsg( "File will be scanned for desired features" );
121+
}
122+
123+
// If the request does not require geometry, can we avoid loading it?
124+
// We need it if we are testing geometry (ie spatial filter), or
125+
// if testing the subset expression, and it uses geometry.
126+
if ( mRequest.flags() & QgsFeatureRequest::NoGeometry &&
127+
! mTestGeometry &&
128+
! ( mTestSubset && P->mSubsetExpression->needsGeometry() ) )
129+
{
130+
QgsDebugMsg( "Feature geometries not required" );
131+
mLoadGeometry = false;
132+
}
133+
134+
QgsDebugMsg( QString("Iterator is scanning file: ") + (scanningFile() ? "Yes" : "No"));
135+
QgsDebugMsg( QString("Iterator is loading geometries: ") + (loadGeometry() ? "Yes" : "No"));
136+
QgsDebugMsg( QString("Iterator is testing geometries: ") + (testGeometry() ? "Yes" : "No"));
137+
QgsDebugMsg( QString("Iterator is testing subset: ") + (testSubset() ? "Yes" : "No"));
138+
35139
rewind();
36140
}
37141

@@ -49,8 +153,40 @@ bool QgsDelimitedTextFeatureIterator::nextFeature( QgsFeature& feature )
49153
if ( mClosed )
50154
return false;
51155

52-
bool gotFeature = P->nextFeature( feature, P->mFile, mRequest );
156+
bool gotFeature = false;
157+
if ( mMode == FileScan )
158+
{
159+
gotFeature = P->nextFeature( feature, P->mFile, this );
160+
}
161+
else
162+
{
163+
while( ! gotFeature )
164+
{
165+
qint64 fid = -1;
166+
if ( mMode == FeatureIds )
167+
{
168+
if( mNextId < mFeatureIds.size() )
169+
{
170+
fid = mFeatureIds[mNextId];
171+
}
172+
}
173+
else if( mNextId < P->mSubsetIndex.size() )
174+
{
175+
fid = P->mSubsetIndex[mNextId];
176+
}
177+
if( fid < 0 ) break;
178+
mNextId++;
179+
gotFeature = (P->setNextFeatureId( fid ) && P->nextFeature( feature, P->mFile, this ));
180+
}
181+
}
182+
183+
// CC: 2013-05-08: What is the intent of rewind/close. The following
184+
// line from previous implementation means that we cannot rewind the iterator
185+
// after reading last record? Is this correct? This line can be removed if
186+
// not.
187+
53188
if ( ! gotFeature ) close();
189+
54190
return gotFeature;
55191
}
56192

@@ -60,7 +196,14 @@ bool QgsDelimitedTextFeatureIterator::rewind()
60196
return false;
61197

62198
// Skip to first data record
63-
P->resetStream();
199+
if ( mMode == FileScan )
200+
{
201+
P->resetStream();
202+
}
203+
else
204+
{
205+
mNextId = 0;
206+
}
64207
return true;
65208
}
66209

@@ -71,6 +214,29 @@ bool QgsDelimitedTextFeatureIterator::close()
71214

72215
// tell provider that this iterator is not active anymore
73216
P->mActiveIterator = 0;
217+
mFeatureIds = QList<QgsFeatureId>();
74218
mClosed = true;
75219
return true;
76220
}
221+
222+
/**
223+
* Check to see if the point is within the selection rectangle
224+
*/
225+
bool QgsDelimitedTextFeatureIterator::wantGeometry( const QgsPoint &pt ) const
226+
{
227+
if ( ! mTestGeometry ) return true;
228+
return mRequest.filterRect().contains( pt );
229+
}
230+
231+
/**
232+
* Check to see if the geometry is within the selection rectangle
233+
*/
234+
bool QgsDelimitedTextFeatureIterator::wantGeometry( QgsGeometry *geom ) const
235+
{
236+
if ( ! mTestGeometry ) return true;
237+
238+
if ( mTestGeometryExact )
239+
return geom->intersects( mRequest.filterRect() );
240+
else
241+
return geom->boundingBox().intersects( mRequest.filterRect() );
242+
}

‎src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,20 @@
1515
#ifndef QGSDELIMITEDTEXTFEATUREITERATOR_H
1616
#define QGSDELIMITEDTEXTFEATUREITERATOR_H
1717

18+
#include <QList>
1819
#include "qgsfeatureiterator.h"
20+
#include "qgsfeature.h"
1921

2022
class QgsDelimitedTextProvider;
2123

2224
class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator
2325
{
26+
enum IteratorMode
27+
{
28+
FileScan,
29+
SubsetIndex,
30+
FeatureIds
31+
};
2432
public:
2533
QgsDelimitedTextFeatureIterator( QgsDelimitedTextProvider* p, const QgsFeatureRequest& request );
2634

@@ -35,8 +43,29 @@ class QgsDelimitedTextFeatureIterator : public QgsAbstractFeatureIterator
3543
//! end of iterating: free the resources / lock
3644
virtual bool close();
3745

46+
// Flags used by nextFeature function of QgsDelimitedTextProvider
47+
bool testSubset() const { return mTestSubset; }
48+
bool testGeometry() const { return mTestGeometry; }
49+
bool loadGeometry() const { return mLoadGeometry; }
50+
bool loadSubsetOfAttributes() const { return ! mTestSubset && mRequest.flags() & QgsFeatureRequest::SubsetOfAttributes;}
51+
bool scanningFile() const { return mMode == FileScan; }
52+
53+
// Pass through attribute subset
54+
const QgsAttributeList &subsetOfAttributes() const { return mRequest.subsetOfAttributes(); }
55+
56+
// Tests whether the geometry is required, given that testGeometry is true.
57+
bool wantGeometry( const QgsPoint & point ) const;
58+
bool wantGeometry( QgsGeometry *geom ) const;
59+
3860
protected:
3961
QgsDelimitedTextProvider* P;
62+
QList<QgsFeatureId> mFeatureIds;
63+
IteratorMode mMode;
64+
long mNextId;
65+
bool mTestSubset;
66+
bool mTestGeometry;
67+
bool mTestGeometryExact;
68+
bool mLoadGeometry;
4069
};
4170

4271

‎src/providers/delimitedtext/qgsdelimitedtextfile.cpp

Lines changed: 115 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <QFile>
2323
#include <QDataStream>
2424
#include <QTextStream>
25+
#include <QFileSystemWatcher>
2526
#include <QTextCodec>
2627
#include <QStringList>
2728
#include <QRegExp>
@@ -37,15 +38,20 @@ QgsDelimitedTextFile::QgsDelimitedTextFile( QString url ) :
3738
mEncoding( "UTF-8" ),
3839
mFile( 0 ),
3940
mStream( 0 ),
41+
mUseWatcher( true ),
42+
mWatcher( 0 ),
4043
mDefinitionValid( false ),
4144
mUseHeader( true ),
4245
mDiscardEmptyFields( false ),
4346
mTrimFields( false ),
4447
mSkipLines( 0 ),
4548
mMaxFields( 0 ),
4649
mMaxNameLength( 200 ), // Don't want field names to be too unweildy!
47-
mLineNumber( 0 ),
48-
mRecordLineNumber( 0 ),
50+
mLineNumber( -1 ),
51+
mRecordLineNumber( -1 ),
52+
mRecordNumber( -1 ),
53+
mHoldCurrentRecord( false ),
54+
mMaxRecordNumber( -1 ),
4955
mMaxFieldCount( 0 )
5056
{
5157
// The default type is CSV
@@ -71,6 +77,11 @@ void QgsDelimitedTextFile::close()
7177
delete mFile;
7278
mFile = 0;
7379
}
80+
if ( mWatcher )
81+
{
82+
delete mWatcher;
83+
mWatcher = 0;
84+
}
7485
}
7586

7687
bool QgsDelimitedTextFile::open()
@@ -92,10 +103,25 @@ bool QgsDelimitedTextFile::open()
92103
QTextCodec *codec = QTextCodec::codecForName( mEncoding.toAscii() );
93104
mStream->setCodec( codec );
94105
}
106+
mMaxRecordNumber = -1;
107+
mHoldCurrentRecord = false;
108+
if ( mWatcher ) delete mWatcher;
109+
if( mUseWatcher )
110+
{
111+
mWatcher = new QFileSystemWatcher( this );
112+
mWatcher->addPath( mFileName );
113+
connect( mWatcher, SIGNAL( fileChanged( QString ) ), this, SLOT( updateFile() ) );
114+
}
95115
}
96116
return true;
97117
}
98118

119+
void QgsDelimitedTextFile::updateFile()
120+
{
121+
close();
122+
emit( fileUpdated() );
123+
}
124+
99125
// Clear information based on current definition of file
100126
void QgsDelimitedTextFile::resetDefinition()
101127
{
@@ -126,6 +152,12 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url )
126152
mEncoding = url.queryItemValue( "encoding" );
127153
}
128154

155+
//
156+
if ( url.hasQueryItem( "useWatcher" ) )
157+
{
158+
mUseWatcher = ! url.queryItemValue( "useWatcher" ).toUpper().startsWith( 'N' );;
159+
}
160+
129161
// The default type is csv, to be consistent with the
130162
// previous implementation (except that quoting should be handled properly)
131163

@@ -181,7 +213,7 @@ bool QgsDelimitedTextFile::setFromUrl( QUrl &url )
181213
}
182214
if ( url.hasQueryItem( "skipEmptyFields" ) )
183215
{
184-
mDiscardEmptyFields = ! url.queryItemValue( "skipEmptyFields" ).toUpper().startsWith( 'N' );;
216+
mDiscardEmptyFields = ! url.queryItemValue( "skipEmptyFields" ).toUpper().startsWith( 'N' );
185217
}
186218
if ( url.hasQueryItem( "trimFields" ) )
187219
{
@@ -231,6 +263,9 @@ QUrl QgsDelimitedTextFile::url()
231263
{
232264
url.addQueryItem( "encoding", mEncoding );
233265
}
266+
267+
if( ! mUseWatcher ) url.addQueryItem( "useWatcher", "no");
268+
234269
url.addQueryItem( "type", type() );
235270
if ( mType == DelimTypeRegexp )
236271
{
@@ -277,6 +312,12 @@ void QgsDelimitedTextFile::setEncoding( QString encoding )
277312
mEncoding = encoding;
278313
}
279314

315+
void QgsDelimitedTextFile::setUseWatcher(bool useWatcher)
316+
{
317+
resetDefinition();
318+
mUseWatcher = useWatcher;
319+
}
320+
280321
QString QgsDelimitedTextFile::type()
281322
{
282323
if ( mType == DelimTypeWhitespace ) return QString( "whitespace" );
@@ -372,7 +413,7 @@ void QgsDelimitedTextFile::setDiscardEmptyFields( bool discardEmptyFields )
372413

373414
void QgsDelimitedTextFile::setFieldNames( const QStringList &names )
374415
{
375-
mFieldNames.empty();
416+
mFieldNames.clear();
376417
foreach ( QString name, names )
377418
{
378419
bool nameOk = true;
@@ -455,9 +496,47 @@ int QgsDelimitedTextFile::fieldIndex( QString name )
455496

456497
}
457498

499+
bool QgsDelimitedTextFile::setNextRecordId(long nextRecordId )
500+
{
501+
mHoldCurrentRecord = nextRecordId == mRecordLineNumber;
502+
if( mHoldCurrentRecord ) return true;
503+
return setNextLineNumber( nextRecordId );
504+
}
505+
458506
QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextRecord( QStringList &record )
459507
{
460-
return ( this->*mParser )( record );
508+
509+
record.clear();
510+
Status status = RecordOk;
511+
512+
if( mHoldCurrentRecord )
513+
{
514+
mHoldCurrentRecord = false;
515+
}
516+
else
517+
{
518+
// Invalidate the record line number, in get EOF
519+
mRecordLineNumber = -1;
520+
521+
// Find the first non-blank line to read
522+
QString buffer;
523+
status = nextLine( buffer, true );
524+
if ( status != RecordOk ) return status;
525+
526+
mCurrentRecord.clear();
527+
mRecordLineNumber = mLineNumber;
528+
if ( mRecordNumber >= 0 )
529+
{
530+
mRecordNumber++;
531+
if ( mRecordNumber > mMaxRecordNumber ) mMaxRecordNumber = mRecordNumber;
532+
}
533+
status = (this->*mParser )( buffer, mCurrentRecord );
534+
}
535+
if( status == RecordOk )
536+
{
537+
record.append(mCurrentRecord);
538+
}
539+
return status;
461540
}
462541

463542

@@ -469,7 +548,8 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::reset()
469548
// Reset the file pointer
470549
mStream->seek( 0 );
471550
mLineNumber = 0;
472-
mRecordLineNumber = 0;
551+
mRecordNumber = -1;
552+
mRecordLineNumber = -1;
473553

474554
// Skip header lines
475555
for ( int i = mSkipLines; i-- > 0; )
@@ -478,14 +558,15 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::reset()
478558
mLineNumber++;
479559
}
480560
// Read the column names
561+
Status result = RecordOk;
481562
if ( mUseHeader )
482563
{
483564
QStringList names;
484-
QgsDelimitedTextFile::Status result = nextRecord( names );
565+
result = nextRecord( names );
485566
setFieldNames( names );
486-
return result;
487567
}
488-
return RecordOk;
568+
if( result == RecordOk ) mRecordNumber = 0;
569+
return result;
489570
}
490571

491572
QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bool skipBlank )
@@ -509,6 +590,24 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::nextLine( QString &buffer, bo
509590
return RecordEOF;
510591
}
511592

593+
bool QgsDelimitedTextFile::setNextLineNumber( long nextLineNumber )
594+
{
595+
if ( ! mStream ) return false;
596+
if ( mLineNumber > nextLineNumber-1 )
597+
{
598+
mRecordNumber = -1;
599+
mStream->seek(0);
600+
mLineNumber = 0;
601+
}
602+
QString buffer;
603+
while( mLineNumber < nextLineNumber-1 )
604+
{
605+
if( nextLine(buffer,false) != RecordOk ) return false;
606+
}
607+
return true;
608+
609+
}
610+
512611
void QgsDelimitedTextFile::appendField( QStringList &record, QString field, bool quoted )
513612
{
514613
if ( mMaxFields > 0 && record.size() >= mMaxFields ) return;
@@ -522,16 +621,14 @@ void QgsDelimitedTextFile::appendField( QStringList &record, QString field, bool
522621
if ( !( mDiscardEmptyFields && field.isEmpty() ) ) record.append( field );
523622
}
524623
// Keep track of maximum number of non-empty fields in a record
525-
if ( record.size() > mMaxFieldCount && ! field.isEmpty() ) mMaxFieldCount = record.size();
624+
if ( record.size() > mMaxFieldCount && ! field.isEmpty() )
625+
{
626+
mMaxFieldCount = record.size();
627+
}
526628
}
527629

528-
QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QStringList &fields )
630+
QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QString &buffer, QStringList &fields )
529631
{
530-
fields.clear();
531-
QString buffer;
532-
Status status = nextLine( buffer, true );
533-
if ( status != RecordOk ) return status;
534-
mRecordLineNumber = mLineNumber;
535632

536633
// If match is anchored, then only interested in records which actually match
537634
// and extract capture groups
@@ -586,16 +683,9 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QStringList &fie
586683
return RecordOk;
587684
}
588685

589-
QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QStringList &fields )
686+
QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseQuoted( QString &buffer, QStringList &fields )
590687
{
591-
fields.clear();
592-
593-
// Find the first non-blank line to read
594-
QString buffer;
595-
Status status = nextLine( buffer, true );
596-
if ( status != RecordOk ) return status;
597-
mRecordLineNumber = mLineNumber;
598-
688+
Status status = RecordOk;
599689
QString field; // String in which to accumulate next field
600690
bool escaped = false; // Next char is escaped
601691
bool quoted = false; // In quotes

‎src/providers/delimitedtext/qgsdelimitedtextfile.h

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/***************************************************************************
2-
qgsdelimitedtextparser.h - File for delimited text file
2+
qgsdelimitedtextfile.h - File for delimited text file
33
-------------------
44
begin : 2004-02-27
55
copyright : (C) 2013 by Chris Crook
@@ -15,13 +15,17 @@
1515
* *
1616
***************************************************************************/
1717

18+
#ifndef QGSDELIMITEDTEXTFILE_H
19+
#define QGSDELIMITEDTEXTFILE_H
20+
1821
#include <QStringList>
1922
#include <QRegExp>
2023
#include <QUrl>
2124

2225
class QgsFeature;
2326
class QgsField;
2427
class QFile;
28+
class QFileSystemWatcher;
2529
class QTextStream;
2630

2731

@@ -66,9 +70,11 @@ class QTextStream;
6670
// on an abstract base class in order to facilitate changing the type of the parser easily
6771
// eg in the provider dialog
6872

69-
class QgsDelimitedTextFile
73+
class QgsDelimitedTextFile : public QObject
7074
{
7175

76+
Q_OBJECT
77+
7278
public:
7379

7480
enum Status
@@ -84,7 +90,7 @@ class QgsDelimitedTextFile
8490
{
8591
DelimTypeWhitespace,
8692
DelimTypeCSV,
87-
DelimTypeRegexp,
93+
DelimTypeRegexp
8894
};
8995

9096
QgsDelimitedTextFile( QString url = QString() );
@@ -238,11 +244,22 @@ class QgsDelimitedTextFile
238244
/** Return the line number of the start of the last record read
239245
* @return linenumber The line number of the start of the record
240246
*/
241-
int recordLineNumber()
247+
int recordId()
242248
{
243249
return mRecordLineNumber;
244250
}
245251

252+
/** Set the index of the next record to return.
253+
* @param nextRecordId The id to set the next record to
254+
* @return valid True if the next record can be located
255+
*/
256+
bool setNextRecordId( long nextRecordId );
257+
258+
/** Number record number of records visited. After scanning the file
259+
* serves as a record count.
260+
* @return maxRecordNumber The maximum record number
261+
*/
262+
long recordCount() { return mMaxRecordNumber; }
246263
/** Reset the file to reread from the beginning
247264
*/
248265
Status reset();
@@ -272,6 +289,22 @@ class QgsDelimitedTextFile
272289
*/
273290
static QString decodeChars( QString string );
274291

292+
/** Set to use or not use a QFileWatcher to notify of changes to the file
293+
* @param useWatcher True to use a watcher, false otherwise
294+
*/
295+
296+
void setUseWatcher( bool useWatcher );
297+
298+
signals:
299+
/** Signal sent when the file is updated by another process
300+
*/
301+
void fileUpdated();
302+
303+
public slots:
304+
/** Slot used by watcher to notify of file updates
305+
*/
306+
void updateFile();
307+
275308
private:
276309

277310
/** Open the file
@@ -290,29 +323,34 @@ class QgsDelimitedTextFile
290323
void resetDefinition();
291324

292325
/** Parse reqular expression delimited fields */
293-
Status parseRegexp( QStringList &fields );
326+
Status parseRegexp( QString &buffer, QStringList &fields );
294327
/** Parse quote delimited fields, where quote and escape are different */
295-
Status parseQuoted( QStringList &fields );
328+
Status parseQuoted( QString &buffer, QStringList &fields );
296329

297330
/** Return the next line from the data file. If skipBlank is true then
298331
* blank lines will be skipped - this is for compatibility with previous
299332
* delimited text parser implementation.
300333
*/
301334
Status nextLine( QString &buffer, bool skipBlank = false );
302335

336+
/** Set the next line to read from the file.
337+
*/
338+
bool setNextLineNumber( long nextLineNumber );
339+
303340
/** Utility routine to add a field to a record, accounting for trimming
304341
* and discarding, and maximum field count
305342
*/
306-
307343
void appendField( QStringList &record, QString field, bool quoted = false );
308344

309345
// Pointer to the currently selected parser
310-
Status( QgsDelimitedTextFile::*mParser )( QStringList &fields );
346+
Status( QgsDelimitedTextFile::*mParser )( QString &buffer, QStringList &fields );
311347

312348
QString mFileName;
313349
QString mEncoding;
314350
QFile *mFile;
315351
QTextStream *mStream;
352+
bool mUseWatcher;
353+
QFileSystemWatcher *mWatcher;
316354

317355
// Parameters common to parsers
318356
bool mDefinitionValid;
@@ -333,7 +371,14 @@ class QgsDelimitedTextFile
333371

334372
// Information extracted from file
335373
QStringList mFieldNames;
336-
int mLineNumber;
337-
int mRecordLineNumber;
374+
long mLineNumber;
375+
long mRecordLineNumber;
376+
long mRecordNumber;
377+
QStringList mCurrentRecord;
378+
bool mHoldCurrentRecord;
379+
// Maximum number of record (ie maximum record number visited)
380+
long mMaxRecordNumber;
338381
int mMaxFieldCount;
339382
};
383+
384+
#endif

‎src/providers/delimitedtext/qgsdelimitedtextprovider.cpp

Lines changed: 373 additions & 125 deletions
Large diffs are not rendered by default.

‎src/providers/delimitedtext/qgsdelimitedtextprovider.h

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
* *
1616
***************************************************************************/
1717

18+
#ifndef QGSDELIMITEDTEXTPROVIDER_H
19+
#define QGSDELIMITEDTEXTPROVIDER_H
1820

1921
#include "qgsvectordataprovider.h"
2022
#include "qgscoordinatereferencesystem.h"
23+
#include "qgsdelimitedtextfile.h"
2124

2225
#include <QStringList>
2326

@@ -29,8 +32,8 @@ class QFile;
2932
class QTextStream;
3033

3134
class QgsDelimitedTextFeatureIterator;
32-
class QgsDelimitedTextFile;
3335
class QgsExpression;
36+
class QgsSpatialIndex;
3437

3538
/**
3639
\class QgsDelimitedTextProvider
@@ -47,7 +50,8 @@ class QgsExpression;
4750
* Example uri = "/home/foo/delim.txt?delimiter=|"*
4851
*
4952
* For detailed information on the uri format see the QGSVectorLayer
50-
* documentation.
53+
* documentation. Note that the interpretation of the URI is split
54+
* between QgsDelimitedTextFile and QgsDelimitedTextProvider.
5155
*
5256
5357
*/
@@ -64,6 +68,13 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider
6468
static QRegExp WktPrefixRegexp;
6569
static QRegExp CrdDmsRegexp;
6670

71+
enum GeomRepresentationType
72+
{
73+
GeomNone,
74+
GeomAsXy,
75+
GeomAsWkt
76+
};
77+
6778
QgsDelimitedTextProvider( QString uri = QString() );
6879

6980
virtual ~QgsDelimitedTextProvider();
@@ -102,6 +113,10 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider
102113
*/
103114
virtual int capabilities() const;
104115

116+
/** Creates a spatial index on the data
117+
* @return indexCreated Returns true if a spatial index is created
118+
*/
119+
virtual bool createSpatialIndex();
105120

106121
/* Implementation of functions from QgsDataProvider */
107122

@@ -186,23 +201,30 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider
186201
*/
187202
bool boundsCheck( QgsGeometry *geom );
188203

204+
private slots:
205+
206+
void onFileUpdated();
207+
189208
private:
190209

191210
static QRegExp WktZMRegexp;
192211
static QRegExp WktCrdRegexp;
193212

213+
void scanFile( bool buildIndexes );
214+
void rescanFile();
215+
void resetCachedSubset();
216+
void resetIndexes();
194217
void clearInvalidLines();
195218
void recordInvalidLine( QString message );
196-
void reportErrors( QStringList messages = QStringList() );
219+
void reportErrors( QStringList messages = QStringList(), bool showDialog = true );
197220
void resetStream();
198221
bool recordIsEmpty( QStringList &record );
199-
bool nextFeature( QgsFeature& feature, QgsDelimitedTextFile *file, const QgsFeatureRequest& request );
200-
QgsGeometry* loadGeometryWkt( const QStringList& tokens, const QgsFeatureRequest& request );
201-
QgsGeometry* loadGeometryXY( const QStringList& tokens, const QgsFeatureRequest& request );
202-
bool boundsCheck( const QgsPoint &pt, const QgsFeatureRequest& request );
203-
bool boundsCheck( QgsGeometry *geom, const QgsFeatureRequest& request );
222+
bool nextFeature( QgsFeature& feature, QgsDelimitedTextFile *file, QgsDelimitedTextFeatureIterator *iterator );
223+
QgsGeometry* loadGeometryWkt( const QStringList& tokens, QgsDelimitedTextFeatureIterator *iterator );
224+
QgsGeometry* loadGeometryXY( const QStringList& tokens, QgsDelimitedTextFeatureIterator *iterator );
204225
void fetchAttribute( QgsFeature& feature, int fieldIdx, const QStringList& tokens );
205-
void resetDataSummary();
226+
void setUriParameter( QString parameter, QString value );
227+
bool setNextFeatureId( qint64 fid ) { return mFile->setNextRecordId( (long) fid ); }
206228

207229

208230
QgsGeometry *geomFromWkt( QString &sWkt );
@@ -216,10 +238,15 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider
216238
QgsDelimitedTextFile *mFile;
217239

218240
// Fields
241+
GeomRepresentationType mGeomRep;
219242
QList<int> attributeColumns;
220243
QgsFields attributeFields;
221244

222245
int mFieldCount; // Note: this includes field count for wkt field
246+
QString mWktFieldName;
247+
QString mXFieldName;
248+
QString mYFieldName;
249+
223250
int mXFieldIndex;
224251
int mYFieldIndex;
225252
int mWktFieldIndex;
@@ -246,7 +273,12 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider
246273
bool mXyDms;
247274

248275
QString mSubsetString;
276+
QString mCachedSubsetString;
249277
QgsExpression *mSubsetExpression;
278+
bool mBuildSubsetIndex;
279+
QList<quintptr> mSubsetIndex;
280+
bool mUseSubsetIndex;
281+
bool mCachedUseSubsetIndex;
250282

251283
//! Storage for any lines in the file that couldn't be loaded
252284
int mMaxInvalidLines;
@@ -270,6 +302,14 @@ class QgsDelimitedTextProvider : public QgsVectorDataProvider
270302
QGis::WkbType mWkbType;
271303
QGis::GeometryType mGeometryType;
272304

305+
// Spatial index
306+
bool mBuildSpatialIndex;
307+
bool mUseSpatialIndex;
308+
bool mCachedUseSpatialIndex;
309+
QgsSpatialIndex *mSpatialIndex;
310+
273311
friend class QgsDelimitedTextFeatureIterator;
274312
QgsDelimitedTextFeatureIterator* mActiveIterator;
275313
};
314+
315+
#endif

‎tests/src/python/test_qgsdelimitedtextprovider.py

Lines changed: 253 additions & 1375 deletions
Large diffs are not rendered by default.

‎tests/src/python/test_qgsdelimitedtextprovider_wanted.py

Lines changed: 2030 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.