Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[WFS] fix unknown expat encoding issues
  • Loading branch information
troopa81 committed Aug 19, 2021
1 parent 7955196 commit 2f6cd43
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 15 deletions.
2 changes: 0 additions & 2 deletions python/core/auto_generated/qgsgml.sip.in
Expand Up @@ -39,7 +39,6 @@ request is finished
const QString &authcfg = QString() ) /PyName=getFeaturesUri/;
%Docstring
Does the Http GET request to the wfs server
Supports only UTF-8, UTF-16, ISO-8859-1, ISO-8859-1 XML encodings.

:param uri: GML URL
:param wkbType: wkbType to retrieve
Expand All @@ -58,7 +57,6 @@ Supports only UTF-8, UTF-16, ISO-8859-1, ISO-8859-1 XML encodings.
int getFeatures( const QByteArray &data, QgsWkbTypes::Type *wkbType, QgsRectangle *extent = 0 );
%Docstring
Read from GML data. Constructor uri param is ignored
Supports only UTF-8, UTF-16, ISO-8859-1, ISO-8859-1 XML encodings.
%End

QMap<QgsFeatureId, QgsFeature * > featuresMap() const;
Expand Down
55 changes: 45 additions & 10 deletions src/core/qgsgml.cpp
Expand Up @@ -31,6 +31,8 @@
#include <QSet>
#include <QSettings>
#include <QUrl>
#include <QTextCodec>
#include <QRegularExpression>

#include "ogr_api.h"

Expand Down Expand Up @@ -321,10 +323,7 @@ QgsGmlStreamingParser::QgsGmlStreamingParser( const QString &typeName,
mTypeNameUTF8Len = strlen( mTypeNamePtr );
}

mParser = XML_ParserCreateNS( nullptr, NS_SEPARATOR );
XML_SetUserData( mParser, this );
XML_SetElementHandler( mParser, QgsGmlStreamingParser::start, QgsGmlStreamingParser::end );
XML_SetCharacterDataHandler( mParser, QgsGmlStreamingParser::chars );
createParser();
}

static QString stripNS( const QString &string )
Expand Down Expand Up @@ -412,10 +411,7 @@ QgsGmlStreamingParser::QgsGmlStreamingParser( const QList<LayerProperties> &laye

mEndian = QgsApplication::endian();

mParser = XML_ParserCreateNS( nullptr, NS_SEPARATOR );
XML_SetUserData( mParser, this );
XML_SetElementHandler( mParser, QgsGmlStreamingParser::start, QgsGmlStreamingParser::end );
XML_SetCharacterDataHandler( mParser, QgsGmlStreamingParser::chars );
createParser();
}


Expand Down Expand Up @@ -444,11 +440,40 @@ bool QgsGmlStreamingParser::processData( const QByteArray &data, bool atEnd )
return true;
}

bool QgsGmlStreamingParser::processData( const QByteArray &data, bool atEnd, QString &errorMsg )
bool QgsGmlStreamingParser::processData( const QByteArray &pdata, bool atEnd, QString &errorMsg )
{
if ( XML_Parse( mParser, data.data(), data.size(), atEnd ) == 0 )
QByteArray data = pdata;

if ( mCodec )
{
// convert data to UTF-8
QString strData = mCodec->toUnicode( pdata );
data = strData.toUtf8();
}

if ( XML_Parse( mParser, data, data.size(), atEnd ) == XML_STATUS_ERROR )
{
XML_Error errorCode = XML_GetErrorCode( mParser );
if ( !mCodec && errorCode == XML_ERROR_UNKNOWN_ENCODING )
{
// Specified encoding is unknown, Expat only accepts UTF-8, UTF-16, ISO-8859-1
// Try to get encoding string and convert data to utf-8
QRegularExpression reEncoding( QStringLiteral( "<?xml.*encoding=['\"]([^'\"]*)['\"].*?>" ),
QRegularExpression::CaseInsensitiveOption );
QRegularExpressionMatch match = reEncoding.match( pdata );
const QString encoding = match.hasMatch() ? match.captured( 1 ) : QString();
mCodec = !encoding.isEmpty() ? QTextCodec::codecForName( encoding.toLatin1() ) : nullptr;
if ( mCodec )
{
// recreate parser with UTF-8 encoding
XML_ParserFree( mParser );
mParser = nullptr;
createParser( QByteArrayLiteral( "UTF-8" ) );

return processData( data, atEnd, errorMsg );
}
}

errorMsg = QObject::tr( "Error: %1 on line %2, column %3" )
.arg( XML_ErrorString( errorCode ) )
.arg( XML_GetCurrentLineNumber( mParser ) )
Expand Down Expand Up @@ -1561,3 +1586,13 @@ int QgsGmlStreamingParser::totalWKBFragmentSize() const
}
return result;
}

void QgsGmlStreamingParser::createParser( const QByteArray &encoding )
{
Q_ASSERT( !mParser );

mParser = XML_ParserCreateNS( encoding.isEmpty() ? nullptr : encoding.data(), NS_SEPARATOR );
XML_SetUserData( mParser, this );
XML_SetElementHandler( mParser, QgsGmlStreamingParser::start, QgsGmlStreamingParser::end );
XML_SetCharacterDataHandler( mParser, QgsGmlStreamingParser::chars );
}
10 changes: 7 additions & 3 deletions src/core/qgsgml.h
Expand Up @@ -33,6 +33,7 @@
#include <string>

class QgsCoordinateReferenceSystem;
class QTextCodec;

#ifndef SIP_RUN

Expand Down Expand Up @@ -253,8 +254,11 @@ class CORE_EXPORT QgsGmlStreamingParser
//! Safely (if empty) pop from mode stack
ParseMode modeStackPop() { return mParseModeStack.isEmpty() ? None : mParseModeStack.pop(); }

//! create parser with specified encoding if any
void createParser( const QByteArray &encoding = QByteArray() );

//! Expat parser
XML_Parser mParser;
XML_Parser mParser = nullptr;

//! List of (feature, gml_id) pairs
QVector<QgsGmlFeaturePtrGmlIdPair> mFeatureList;
Expand Down Expand Up @@ -344,6 +348,8 @@ class CORE_EXPORT QgsGmlStreamingParser
std::string mGeometryString;
//! Whether we found a unhandled geometry element
bool mFoundUnhandledGeometryElement;
//! text codec used to read data with an expat unsupported encoding
QTextCodec *mCodec = nullptr;
};

#endif
Expand All @@ -368,7 +374,6 @@ class CORE_EXPORT QgsGml : public QObject

/**
* Does the Http GET request to the wfs server
* Supports only UTF-8, UTF-16, ISO-8859-1, ISO-8859-1 XML encodings.
* \param uri GML URL
* \param wkbType wkbType to retrieve
* \param extent retrieved extents
Expand All @@ -387,7 +392,6 @@ class CORE_EXPORT QgsGml : public QObject

/**
* Read from GML data. Constructor uri param is ignored
* Supports only UTF-8, UTF-16, ISO-8859-1, ISO-8859-1 XML encodings.
*/
int getFeatures( const QByteArray &data, QgsWkbTypes::Type *wkbType, QgsRectangle *extent = nullptr );

Expand Down
70 changes: 70 additions & 0 deletions tests/src/core/testqgsgml.cpp
Expand Up @@ -17,6 +17,7 @@
#include "qgstest.h"
#include <QUrl>
#include <QTemporaryFile>
#include <QTextCodec>

//qgis includes...
#include <qgsgeometry.h>
Expand Down Expand Up @@ -82,6 +83,8 @@ class TestQgsGML : public QObject
void testThroughOGRGeometry_urn_EPSG_4326();
void testAccents();
void testSameTypeameAsGeomName();
void testUnknownEncoding_data();
void testUnknownEncoding();
};

const QString data1( "<myns:FeatureCollection "
Expand Down Expand Up @@ -1273,5 +1276,72 @@ void TestQgsGML::testSameTypeameAsGeomName()
delete features[0].first;
}

void TestQgsGML::testUnknownEncoding_data()
{
QTest::addColumn<QString>( "xmlHeader" );
QTest::addColumn<QByteArray>( "encoding" );

QTest::newRow( "simple quote" ) << QStringLiteral( "<?xml version='1.0' encoding='ISO-8859-15'?>" ) << QByteArrayLiteral( "ISO-8859-15" );
QTest::newRow( "double quote" ) << QStringLiteral( "<?xml version='1.0' encoding=\"ISO-8859-15\"?>" ) << QByteArrayLiteral( "ISO-8859-15" );
QTest::newRow( "UTF-8" ) << QStringLiteral( "<?xml version='1.0' encoding=\"UTF-8\"?>" ) << QByteArrayLiteral( "UTF-8" );
QTest::newRow( "No header" ) << QString() << QByteArrayLiteral( "UTF-8" );
}

void TestQgsGML::testUnknownEncoding()
{
QFETCH( QString, xmlHeader );
QFETCH( QByteArray, encoding );

QgsWkbTypes::Type wkbType;

QTextCodec *codec = QTextCodec::codecForName( encoding );

QByteArray data = codec->fromUnicode(
QStringLiteral(
"%1<myns:FeatureCollection "
"xmlns:myns='http://myns' "
"xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
"xmlns:gml='http://www.opengis.net/gml'>"
"<gml:boundedBy><gml:null>unknown</gml:null></gml:boundedBy>"
"<gml:featureMember>"
"<myns:mytypename fid='mytypename.1'>"
"<myns:strfield>price: 10€</myns:strfield>"
"<myns:mygeom>"
"<gml:Point srsName='http://www.opengis.net/gml/srs/epsg.xml#27700'>"
"<gml:coordinates decimal='.' cs=',' ts=' '>10,20</gml:coordinates>"
"</gml:Point>"
"</myns:mygeom>"
"</myns:mytypename>"
"</gml:featureMember>"
"</myns:FeatureCollection>" ).arg( xmlHeader ) );

QgsFields fields;
fields.append( QgsField( QStringLiteral( "strfield" ), QVariant::String, QStringLiteral( "string" ) ) );

{
QgsGml gmlParser( QStringLiteral( "mytypename" ), QStringLiteral( "mygeom" ), fields );
QCOMPARE( gmlParser.getFeatures( data, &wkbType ), 0 );
QMap<QgsFeatureId, QgsFeature * > featureMaps = gmlParser.featuresMap();
QCOMPARE( featureMaps.size(), 1 );
QVERIFY( featureMaps.constFind( 0 ) != featureMaps.constEnd() );
QCOMPARE( featureMaps[ 0 ]->attributes().size(), 1 );
QCOMPARE( featureMaps[0]->attribute( QStringLiteral( "strfield" ) ).toString(), QString( "price: 10€" ) );
delete featureMaps[ 0 ];
}

{
QgsGmlStreamingParser gmlParser( QStringLiteral( "mytypename" ), QStringLiteral( "mygeom" ), fields );
QCOMPARE( gmlParser.processData( data.mid( 0, data.size() / 2 ), false ), true );
QCOMPARE( gmlParser.getAndStealReadyFeatures().size(), 0 );
QCOMPARE( gmlParser.processData( data.mid( data.size() / 2 ), true ), true );
QCOMPARE( gmlParser.isException(), false );
QVector<QgsGmlStreamingParser::QgsGmlFeaturePtrGmlIdPair> features = gmlParser.getAndStealReadyFeatures();
QCOMPARE( features.size(), 1 );
QCOMPARE( features[ 0 ].first->attributes().size(), 1 );
QCOMPARE( features[ 0 ].first->attribute( QStringLiteral( "strfield" ) ).toString(), QString( "price: 10€" ) );
delete features[0].first;
}
}

QGSTEST_MAIN( TestQgsGML )
#include "testqgsgml.moc"

0 comments on commit 2f6cd43

Please sign in to comment.