Skip to content

Commit

Permalink
Fix type detection and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
elpaso authored and nyalldawson committed Nov 29, 2021
1 parent 8c62308 commit c1932e0
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 7 deletions.
11 changes: 7 additions & 4 deletions src/providers/delimitedtext/qgsdelimitedtextprovider.cpp
Expand Up @@ -223,7 +223,7 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( const QString &filenam
// not allowed in OGR CSVT files. Also doesn't care if int and string fields have

strTypeList = strTypeList.toLower();
const QRegularExpression reTypeList( QRegularExpression::anchoredPattern( QStringLiteral( "^(?:\\s*(\\\"?)(?:integer|real|double|long|longlong|int8|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" ) ) );
const QRegularExpression reTypeList( QRegularExpression::anchoredPattern( QStringLiteral( "^(?:\\s*(\\\"?)(?:integer|real|double|longlong|long|int8|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" ) ) );
const QRegularExpressionMatch match = reTypeList.match( strTypeList );
if ( !match.hasMatch() )
{
Expand All @@ -237,7 +237,7 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( const QString &filenam
QgsDebugMsgLevel( QStringLiteral( "Field type string: %1" ).arg( strTypeList ), 2 );

int pos = 0;
const QRegularExpression reType( QStringLiteral( "(integer|longlong|real|double|string|date|datetime|time)" ) );
const QRegularExpression reType( QStringLiteral( R"re((int8|integer|longlong|\blong\b|real|double|string|\bdate\b|datetime|\btime\b))re" ) );
QRegularExpressionMatch typeMatch = reType.match( strTypeList, pos );
while ( typeMatch.hasMatch() )
{
Expand Down Expand Up @@ -707,6 +707,7 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
attributeFields.clear();

QString csvtMessage;
QgsDebugMsgLevel( QStringLiteral( "Reading CSVT: %1" ).arg( mFile->fileName() ), 2 );
QStringList csvtTypes = readCsvtFieldTypes( mFile->fileName(), &csvtMessage );

for ( int i = 0; i < fieldNames.size(); i++ )
Expand Down Expand Up @@ -751,12 +752,14 @@ void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
}
}

if ( typeName == QLatin1String( "integer" ) )
if ( typeName == QLatin1String( "integer" ) || typeName == QLatin1String( "int8" ) )
{
typeName = QLatin1String( "integer" );
fieldType = QVariant::Int;
}
else if ( typeName == QLatin1String( "longlong" ) )
else if ( typeName == QLatin1String( "longlong" ) || typeName == QLatin1String( "long" ) )
{
typeName = QLatin1String( "longlong" );
fieldType = QVariant::LongLong;
}
else if ( typeName == QLatin1String( "real" ) || typeName == QLatin1String( "double" ) )
Expand Down
64 changes: 63 additions & 1 deletion tests/src/python/test_qgsdelimitedtextprovider.py
Expand Up @@ -34,7 +34,7 @@

rebuildTests = 'REBUILD_DELIMITED_TEXT_TESTS' in os.environ

from qgis.PyQt.QtCore import QCoreApplication, QVariant, QUrl, QObject
from qgis.PyQt.QtCore import QCoreApplication, QVariant, QUrl, QObject, QTemporaryDir

from qgis.core import (
QgsProviderRegistry,
Expand Down Expand Up @@ -222,6 +222,8 @@ def setUpClass(cls):
"""Run before all tests"""
# toggle full ctest output to debug flaky CI test
print('CTEST_FULL_OUTPUT')
cls.tmp_dir = QTemporaryDir()
cls.tmp_path = cls.tmp_dir.path()

def layerData(self, layer, request={}, offset=0):
# Retrieve the data for a layer
Expand Down Expand Up @@ -1031,6 +1033,66 @@ def testSaturationOfWorkingBuffer(self):
finally:
del os.environ['QGIS_DELIMITED_TEXT_FILE_BUFFER_SIZE']

def _run_test(self, csv_content, csvt_content='', uri_options=''):

try:
self.__text_index += 1
except:
self.__text_index = 1

basename = 'test_type_detection_{}'.format(self.__text_index)

csv_file = os.path.join(self.tmp_path, basename + '.csv')
with open(csv_file, 'w+') as f:
f.write(csv_content)

if csvt_content:
csvt_file = os.path.join(self.tmp_path, basename + '.csvt')
with open(csvt_file, 'w+') as f:
f.write(csvt_content)

uri = 'file:///{}'.format(csv_file)
if uri_options:
uri += '?{}'.format(uri_options)

vl = QgsVectorLayer(uri, 'test_{}'.format(basename), 'delimitedtext')
return vl

def test_type_detection_csvt(self):
"""Type detection from CSVT"""

vl = self._run_test("f1,f2,f3,f4,f5\n1,1,1,\"1\",1\n", "Integer,Longlong,Real,String,Real\n")
self.assertTrue(vl.isValid())
fields = {f.name(): (f.type(), f.typeName()) for f in vl.fields()}
self.assertEqual(fields, {
'f1': (QVariant.Int, 'integer'),
'f2': (QVariant.LongLong, 'longlong'),
'f3': (QVariant.Double, 'double'),
'f4': (QVariant.String, 'text'),
'f5': (QVariant.Double, 'double')})

# Missing last field in CSVT
vl = self._run_test("f1,f2,f3,f4,f5\n1,1,1,\"1\",1\n", "Integer,Long,Real,String\n")
self.assertTrue(vl.isValid())
fields = {f.name(): (f.type(), f.typeName()) for f in vl.fields()}
self.assertEqual(fields, {
'f1': (QVariant.Int, 'integer'),
'f2': (QVariant.LongLong, 'longlong'),
'f3': (QVariant.Double, 'double'),
'f4': (QVariant.String, 'text'),
'f5': (QVariant.Int, 'integer')})

# No CSVT and detectTypes=no
vl = self._run_test("f1,f2,f3,f4,f5\n1,1,1,\"1\",1\n", uri_options='detectTypes=no')
self.assertTrue(vl.isValid())
fields = {f.name(): (f.type(), f.typeName()) for f in vl.fields()}
self.assertEqual(fields, {
'f1': (QVariant.String, 'text'),
'f2': (QVariant.String, 'text'),
'f3': (QVariant.String, 'text'),
'f4': (QVariant.String, 'text'),
'f5': (QVariant.String, 'text')})


if __name__ == '__main__':
unittest.main()
12 changes: 10 additions & 2 deletions tests/src/python/test_qgsdelimitedtextprovider_wanted.py
Expand Up @@ -2110,9 +2110,14 @@ def test_033_reset_subset_string():


def test_034_csvt_file():
"""In the test file we have two rows with 11 and 12 fields, the CSV lists only 11 headers:
id,description,fint,freal,fstr,fstr,fdatetime,fdate,ftime,flong,flonglong
The CSVT contains 11 field types (note "long" which is not supported but interpreted as an alias for "longlong"):
integer,string,integer,real,string,string,string,string,string,long,longlong
"""
wanted = {}
wanted['uri'] = 'file://testcsvt.csv?geomType=none&type=csv'
wanted['fieldTypes'] = ['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'text', 'longlong', 'longlong']
wanted['fieldTypes'] = ['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'longlong', 'longlong', 'longlong']
wanted['geometryType'] = 4
wanted['data'] = {
2: {
Expand Down Expand Up @@ -2546,9 +2551,12 @@ def test_041_no_detect_type():


def test_042_no_detect_types_csvt():
"""detectTypes is no, the types are taken from the CSVT except the last one (which is not in the CSVT
and it is not detected)"""

wanted = {}
wanted['uri'] = 'file://testcsvt.csv?geomType=none&type=csv&detectTypes=no'
wanted['fieldTypes'] = ['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text']
wanted['fieldTypes'] = ['integer', 'text', 'integer', 'double', 'text', 'text', 'text', 'text', 'text', 'longlong', 'longlong', 'text']
wanted['geometryType'] = 4
wanted['data'] = {
2: {
Expand Down

0 comments on commit c1932e0

Please sign in to comment.