Skip to content

Commit

Permalink
Fix tests and type detection
Browse files Browse the repository at this point in the history
  • Loading branch information
elpaso authored and nyalldawson committed Nov 29, 2021
1 parent 062f3c5 commit 8f6ea6e
Show file tree
Hide file tree
Showing 14 changed files with 304 additions and 49 deletions.
18 changes: 18 additions & 0 deletions src/providers/delimitedtext/qgsdelimitedtextfeatureiterator.cpp
Expand Up @@ -516,6 +516,23 @@ void QgsDelimitedTextFeatureIterator::fetchAttribute( QgsFeature &feature, int f
QVariant val;
switch ( mSource->mFields.at( fieldIdx ).type() )
{
case QVariant::Bool:
{
Q_ASSERT( mSource->mFieldBooleanLiterals.contains( fieldIdx ) );
if ( value.compare( mSource->mFieldBooleanLiterals[ fieldIdx ].first, Qt::CaseSensitivity::CaseInsensitive ) == 0 )
{
val = true;
}
else if ( value.compare( mSource->mFieldBooleanLiterals[ fieldIdx ].second, Qt::CaseSensitivity::CaseInsensitive ) == 0 )
{
val = false;
}
else
{
val = QVariant( QVariant::Bool );
}
break;
}
case QVariant::Int:
{
int ivalue = 0;
Expand Down Expand Up @@ -599,6 +616,7 @@ QgsDelimitedTextFeatureSource::QgsDelimitedTextFeatureSource( const QgsDelimited
, mXyDms( p->mXyDms )
, attributeColumns( p->attributeColumns )
, mCrs( p->mCrs )
, mFieldBooleanLiterals( p->mFieldBooleanLiterals )
{
QUrl url = p->mFile->url();

Expand Down
Expand Up @@ -53,6 +53,7 @@ class QgsDelimitedTextFeatureSource final: public QgsAbstractFeatureSource
bool mXyDms;
QList<int> attributeColumns;
QgsCoordinateReferenceSystem mCrs;
QMap<int, QPair<QString, QString>> mFieldBooleanLiterals;

friend class QgsDelimitedTextFeatureIterator;
};
Expand Down
107 changes: 85 additions & 22 deletions src/providers/delimitedtext/qgsdelimitedtextprovider.cpp
Expand Up @@ -69,6 +69,7 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( const QString &uri, const Pr
<< QgsVectorDataProvider::NativeType( tr( "Whole number (integer)" ), QStringLiteral( "integer" ), QVariant::Int, 0, 10 )
<< QgsVectorDataProvider::NativeType( tr( "Whole number (integer - 64 bit)" ), QStringLiteral( "integer64" ), QVariant::LongLong )
<< QgsVectorDataProvider::NativeType( tr( "Decimal number (double)" ), QStringLiteral( "double" ), QVariant::Double, -1, -1, -1, -1 )
<< QgsVectorDataProvider::NativeType( tr( "Boolean" ), QStringLiteral( "bool" ), QVariant::Bool, -1, -1, -1, -1 )
<< QgsVectorDataProvider::NativeType( tr( "Text, unlimited length (text)" ), QStringLiteral( "text" ), QVariant::String, -1, -1, -1, -1 )

// date type
Expand Down Expand Up @@ -158,7 +159,7 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( const QString &uri, const Pr

if ( query.hasQueryItem( QStringLiteral( "quiet" ) ) ) mShowInvalidLines = false;

// Parse and store user-defined field types
// Parse and store user-defined field types and boolean literals
const auto queryItems { query.queryItems( QUrl::ComponentFormattingOption::FullyDecoded ) };
for ( const auto &queryItem : std::as_const( queryItems ) )
{
Expand All @@ -172,6 +173,14 @@ QgsDelimitedTextProvider::QgsDelimitedTextProvider( const QString &uri, const Pr
}
}

// Parse and store custom boolean literals
if ( query.hasQueryItem( QStringLiteral( "booleanTrue" ) ) && query.hasQueryItem( QStringLiteral( "booleanFalse" ) ) )
{
mUserDefinedBooleanLiterals = qMakePair<QString, QString>(
query.queryItemValue( QStringLiteral( "booleanTrue" ), QUrl::ComponentFormattingOption::FullyDecoded ),
query.queryItemValue( QStringLiteral( "booleanFalse" ), QUrl::ComponentFormattingOption::FullyDecoded ) );
}

// Do an initial scan of the file to determine field names, types,
// geometry type (for Wkt), extents, etc. Parameter value subset.isEmpty()
// avoid redundant building indexes if we will be building a subset string,
Expand Down Expand Up @@ -425,14 +434,16 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
QList<bool> couldBeDateTime;
QList<bool> couldBeDate;
QList<bool> couldBeTime;
QList<bool> couldBeBool;

bool foundFirstGeometry = false;
QMap<int, QPair<QString, QString>> boolCandidates;
const QList<QPair<QString, QString>> boolLiterals { booleanLiterals() };

while ( true )
{
if ( feedback && feedback->isCanceled() )
{
qDebug() << "Task was canceled";
break;
}
const QgsDelimitedTextFile::Status status = mFile->nextRecord( parts );
Expand Down Expand Up @@ -621,9 +632,10 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
couldBeDateTime.append( false );
couldBeDate.append( false );
couldBeTime.append( false );
couldBeBool.append( false );
}

// If this column has been empty so far then initiallize it
// If this column has been empty so far then initialize it
// for possible types

if ( isEmpty[i] )
Expand All @@ -635,6 +647,7 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
couldBeDateTime[i] = true;
couldBeDate[i] = true;
couldBeTime[i] = true;
couldBeBool[i] = true;
}

if ( ! mDetectTypes )
Expand All @@ -645,6 +658,31 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
// Now test for still valid possible types for the field
// Types are possible until first record which cannot be parsed

if ( couldBeBool[i] )
{
couldBeBool[i] = false;
if ( ! boolCandidates.contains( i ) )
{
boolCandidates[ i ] = QPair<QString, QString>();
}
if ( ! boolCandidates[i].first.isEmpty() )
{
couldBeBool[i] = value.compare( boolCandidates[i].first, Qt::CaseSensitivity::CaseInsensitive ) == 0 || value.compare( boolCandidates[i].second, Qt::CaseSensitivity::CaseInsensitive ) == 0;
}
else
{
for ( const auto &bc : std::as_const( boolLiterals ) )
{
if ( value.compare( bc.first, Qt::CaseSensitivity::CaseInsensitive ) == 0 || value.compare( bc.second, Qt::CaseSensitivity::CaseInsensitive ) == 0 )
{
boolCandidates[i] = bc;
couldBeBool[i] = true;
break;
}
}
}
}

if ( couldBeInt[i] )
{
( void )value.toInt( &couldBeInt[i] );
Expand Down Expand Up @@ -725,8 +763,8 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
}
}

// In case of fast scan we exit after the first record
if ( ! forceFullScan && mReadFlags.testFlag( ReadFlag::SkipFullScan ) )
// In case of fast scan we exit after the third record (to avoid detecting booleans)
if ( ! forceFullScan && mReadFlags.testFlag( ReadFlag::SkipFullScan ) && mNumberFeatures > 2 )
{
break;
}
Expand All @@ -749,63 +787,72 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
QgsDebugMsgLevel( QStringLiteral( "Reading CSVT: %1" ).arg( mFile->fileName() ), 2 );
QStringList csvtTypes = readCsvtFieldTypes( mFile->fileName(), &csvtMessage );

for ( int i = 0; i < fieldNames.size(); i++ )
for ( int fieldIdx = 0; fieldIdx < fieldNames.size(); fieldIdx++ )
{
// Skip over WKT field ... don't want to display in attribute table
if ( i == mWktFieldIndex )
if ( fieldIdx == mWktFieldIndex )
continue;

// Add the field index lookup for the column
attributeColumns.append( i );
attributeColumns.append( fieldIdx );
QVariant::Type fieldType = QVariant::String;
QString typeName = QStringLiteral( "text" );

// User-defined types take precedence over all
if ( ! mUserDefinedFieldTypes.value( fieldNames[ i ] ).isEmpty() )
if ( ! mUserDefinedFieldTypes.value( fieldNames[ fieldIdx ] ).isEmpty() )
{
typeName = mUserDefinedFieldTypes.value( fieldNames[ i ] );
typeName = mUserDefinedFieldTypes.value( fieldNames[ fieldIdx ] );
}
else
{
if ( i < csvtTypes.size() )
if ( fieldIdx < csvtTypes.size() )
{
typeName = csvtTypes[i];
typeName = csvtTypes[fieldIdx];
}
else if ( mDetectTypes && i < couldBeInt.size() )
else if ( mDetectTypes && fieldIdx < couldBeInt.size() )
{
if ( couldBeInt[i] )
if ( couldBeBool[fieldIdx] )
{
typeName = QStringLiteral( "bool" );
}
else if ( couldBeInt[fieldIdx] )
{
typeName = QStringLiteral( "integer" );
}
else if ( couldBeLongLong[i] )
else if ( couldBeLongLong[fieldIdx] )
{
typeName = QStringLiteral( "integer64" );
}
else if ( couldBeDouble[i] )
else if ( couldBeDouble[fieldIdx] )
{
typeName = QStringLiteral( "double" );
}
else if ( couldBeDateTime[i] )
else if ( couldBeDateTime[fieldIdx] )
{
typeName = QStringLiteral( "datetime" );
}
else if ( couldBeDate[i] )
else if ( couldBeDate[fieldIdx] )
{
typeName = QStringLiteral( "date" );
}
else if ( couldBeTime[i] )
else if ( couldBeTime[fieldIdx] )
{
typeName = QStringLiteral( "time" );
}
}
}

if ( typeName == QLatin1String( "integer" ) || typeName == QLatin1String( "int8" ) )
if ( typeName == QLatin1String( "bool" ) )
{
fieldType = QVariant::Bool;
mFieldBooleanLiterals.insert( fieldIdx, boolCandidates[fieldIdx] );
}
else if ( typeName == QLatin1String( "integer" ) || typeName == QLatin1String( "int8" ) )
{
typeName = QLatin1String( "integer" );
fieldType = QVariant::Int;
}
else if ( typeName == QLatin1String( "longlong" ) || typeName == QLatin1String( "long" ) )
else if ( typeName == QLatin1String( "integer64" ) || typeName == QLatin1String( "longlong" ) || typeName == QLatin1String( "long" ) )
{
typeName = QLatin1String( "longlong" );
fieldType = QVariant::LongLong;
Expand All @@ -832,7 +879,7 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes, bool forceFullScan,
typeName = QStringLiteral( "text" );
}

attributeFields.append( QgsField( fieldNames[i], fieldType, typeName ) );
attributeFields.append( QgsField( fieldNames[fieldIdx], fieldType, typeName ) );
}

QgsDebugMsgLevel( "Field count for the delimited text file is " + QString::number( attributeFields.size() ), 2 );
Expand Down Expand Up @@ -1019,6 +1066,22 @@ void QgsDelimitedTextProvider::appendZM( QString &sZ, QString &sM, QgsPoint &poi
}
}

QList<QPair<QString, QString> > QgsDelimitedTextProvider::booleanLiterals() const
{
QList<QPair<QString, QString> > booleans
{
{ QStringLiteral( "true" ), QStringLiteral( "false" ) },
{ QStringLiteral( "t" ), QStringLiteral( "f" ) },
{ QStringLiteral( "yes" ), QStringLiteral( "no" ) },
{ QStringLiteral( "1" ), QStringLiteral( "0" ) },
};
if ( ! mUserDefinedBooleanLiterals.first.isEmpty() )
{
booleans.append( mUserDefinedBooleanLiterals );
}
return booleans;
}

bool QgsDelimitedTextProvider::pointFromXY( QString &sX, QString &sY, QgsPoint &pt, const QString &decimalPoint, bool xyDms )
{
if ( ! decimalPoint.isEmpty() )
Expand Down
5 changes: 5 additions & 0 deletions src/providers/delimitedtext/qgsdelimitedtextprovider.h
Expand Up @@ -168,6 +168,8 @@ class QgsDelimitedTextProvider final: public QgsVectorDataProvider
static bool pointFromXY( QString &sX, QString &sY, QgsPoint &point, const QString &decimalPoint, bool xyDms );
static void appendZM( QString &sZ, QString &sM, QgsPoint &point, const QString &decimalPoint );

QList<QPair<QString, QString>> booleanLiterals() const;

// mLayerValid defines whether the layer has been loaded as a valid layer
bool mLayerValid = false;
// mValid defines whether the layer is currently valid (may differ from
Expand Down Expand Up @@ -243,6 +245,9 @@ class QgsDelimitedTextProvider final: public QgsVectorDataProvider
// Store user-defined column types (i.e. types that are not automatically determined)
QgsStringMap mUserDefinedFieldTypes;

QPair<QString, QString> mUserDefinedBooleanLiterals;
QMap<int, QPair<QString, QString>> mFieldBooleanLiterals;

friend class QgsDelimitedTextFeatureIterator;
friend class QgsDelimitedTextFeatureSource;
};
Expand Down
27 changes: 17 additions & 10 deletions src/providers/delimitedtext/qgsdelimitedtextsourceselect.cpp
Expand Up @@ -138,6 +138,8 @@ void QgsDelimitedTextSourceSelect::addButtonClicked()
return;
}

cancelScanTask();

//Build the delimited text URI from the user provided information
const QString datasourceUrl { url( )};

Expand Down Expand Up @@ -506,16 +508,7 @@ void QgsDelimitedTextSourceSelect::updateFieldLists()

// Run the scan in a separate thread

// This will cancel the existing task (if any)
if ( mScanTaskId > 0 )
{
QgsDelimitedTextFileScanTask *task { qobject_cast<QgsDelimitedTextFileScanTask *>( QgsApplication::taskManager()->task( mScanTaskId ) ) };
if ( task )
{
task->cancel();
}
mScanTaskId = -1;
}
cancelScanTask();

QgsDelimitedTextFileScanTask *newTask { new QgsDelimitedTextFileScanTask( url( /* skip overriden types */ true ) ) };
mCancelButton->show();
Expand Down Expand Up @@ -932,6 +925,20 @@ QString QgsDelimitedTextSourceSelect::url( bool skipOverriddenTypes )
return QString::fromLatin1( url.toEncoded() );
}

void QgsDelimitedTextSourceSelect::cancelScanTask()
{
// This will cancel the existing task (if any)
if ( mScanTaskId > 0 )
{
QgsDelimitedTextFileScanTask *task { qobject_cast<QgsDelimitedTextFileScanTask *>( QgsApplication::taskManager()->task( mScanTaskId ) ) };
if ( task )
{
task->cancel();
}
mScanTaskId = -1;
}
}

bool QgsDelimitedTextFileScanTask::run()
{
QgsDelimitedTextProvider provider(
Expand Down
1 change: 1 addition & 0 deletions src/providers/delimitedtext/qgsdelimitedtextsourceselect.h
Expand Up @@ -122,6 +122,7 @@ class QgsDelimitedTextSourceSelect : public QgsAbstractDataSourceWidget, private
void showHelp();
void showCrsWidget();
QString url( bool skipOverriddenTypes = false );
void cancelScanTask();

public slots:
void addButtonClicked() override;
Expand Down
1 change: 1 addition & 0 deletions tests/src/python/CMakeLists.txt
Expand Up @@ -71,6 +71,7 @@ ADD_PYTHON_TEST(PyQgsDateTimeEdit test_qgsdatetimeedit.py)
ADD_PYTHON_TEST(PyQgsDateTimeStatisticalSummary test_qgsdatetimestatisticalsummary.py)
ADD_PYTHON_TEST(PyQgsDatumTransform test_qgsdatumtransforms.py)
ADD_PYTHON_TEST(PyQgsDelimitedTextProvider test_qgsdelimitedtextprovider.py)
ADD_PYTHON_TEST(PyQgsDelimitedTextProviderTypesOverride test_qgsdelimitedtextprovider_types_override.py)
ADD_PYTHON_TEST(PyQgsDistanceArea test_qgsdistancearea.py)
ADD_PYTHON_TEST(PyQgsEditFormConfig test_qgseditformconfig.py)
ADD_PYTHON_TEST(PyQgsEditWidgets test_qgseditwidgets.py)
Expand Down

0 comments on commit 8f6ea6e

Please sign in to comment.