Skip to content

Commit

Permalink
New class QgsStringStatisticalSummary, for calculating statistics
Browse files Browse the repository at this point in the history
on lists of strings
  • Loading branch information
nyalldawson committed May 10, 2016
1 parent ceba526 commit 0493cbf
Show file tree
Hide file tree
Showing 7 changed files with 508 additions and 0 deletions.
1 change: 1 addition & 0 deletions python/core/core.sip
Expand Up @@ -127,6 +127,7 @@
%Include qgssnappingutils.sip
%Include qgsspatialindex.sip
%Include qgsstatisticalsummary.sip
%Include qgsstringstatisticalsummary.sip
%Include qgsstringutils.sip
%Include qgstolerance.sip
%Include qgstracer.sip
Expand Down
119 changes: 119 additions & 0 deletions python/core/qgsstringstatisticalsummary.sip
@@ -0,0 +1,119 @@
/** \ingroup core
* \class QgsStringStatisticalSummary
* \brief Calculator for summary statistics and aggregates for a list of strings.
*
* Statistics are calculated by calling @link calculate @endlink and passing a list of strings. The
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
* are calculated by default. Statistics which require slower computations are only calculated by
* specifying the statistic in the constructor or via @link setStatistics @endlink.
*
* \note Added in version 2.16
*/


class QgsStringStatisticalSummary
{
%TypeHeaderCode
#include <qgsstringstatisticalsummary.h>
%End

public:

public:

//! Enumeration of flags that specify statistics to be calculated
enum Statistic
{
Count, //!< Count
CountDistinct, //!< Number of distinct string values
CountMissing, //!< Number of missing (null) values
Min, //!< Minimum string value
Max, //!< Maximum string value
MinimumLength, //!< Minimum length of string
MaximumLength, //!< Maximum length of string
All, //! All statistics
};
typedef QFlags<QgsStringStatisticalSummary::Statistic> Statistics;

/** Constructor for QgsStringStatistics
* @param stats flags for statistics to calculate
*/
QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats = All );

/** Returns flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg count).
* @see setStatistics
*/
Statistics statistics() const;

/** Sets flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg count).
* @param stats flags for statistics to calculate
* @see statistics
*/
void setStatistics( const Statistics& stats );

/** Resets the calculated values
*/
void reset();

/** Calculates summary statistics for a list of strings.
* @param values list of strings
*/
void calculate( const QStringList& values );

/** Calculates summary statistics for a list of variants. Any non-string variants will be
* ignored.
* @param values list of variants
*/
void calculate( const QVariantList& values );

/** Returns the value of a specified statistic
* @param stat statistic to return
* @returns calculated value of statistic
*/
QVariant statistic( Statistic stat ) const;

/** Returns the calculated count of values.
*/
int count() const;

/** Returns the number of distinct string values.
* @see distinctValues()
*/
int countDistinct() const;

/** Returns the set of distinct string values.
* @see countDistinct()
*/
QSet< QString > distinctValues() const;

/** Returns the number of missing (null) string values.
*/
int countMissing() const;

/** Returns the minimum (non-null) string value.
*/
QString min() const;

/** Returns the maximum (non-null) string value.
*/
QString max() const;

/** Returns the minimum length of strings.
*/
int minLength() const;

/** Returns the maximum length of strings.
*/
int maxLength() const;

/** Returns the friendly display name for a statistic
* @param statistic statistic to return name for
*/
static QString displayName( Statistic statistic );

};

QFlags<QgsStringStatisticalSummary::Statistic> operator|(QgsStringStatisticalSummary::Statistic f1, QFlags<QgsStringStatisticalSummary::Statistic> f2);

2 changes: 2 additions & 0 deletions src/core/CMakeLists.txt
Expand Up @@ -194,6 +194,7 @@ SET(QGIS_CORE_SRCS
qgssqlexpressioncompiler.cpp
qgssqliteexpressioncompiler.cpp
qgsstatisticalsummary.cpp
qgsstringstatisticalsummary.cpp
qgsstringutils.cpp
qgstextlabelfeature.cpp
qgstolerance.cpp
Expand Down Expand Up @@ -689,6 +690,7 @@ SET(QGIS_CORE_HDRS
qgsspatialindex.h
qgssqlexpressioncompiler.h
qgsstatisticalsummary.h
qgsstringstatisticalsummary.h
qgsstringutils.h
qgstextlabelfeature.h
qgstolerance.h
Expand Down
154 changes: 154 additions & 0 deletions src/core/qgsstringstatisticalsummary.cpp
@@ -0,0 +1,154 @@
/***************************************************************************
qgsstringstatisticalsummary.cpp
-------------------------------
Date : May 2016
Copyright : (C) 2016 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/

#include "qgsstringstatisticalsummary.h"
#include <QString>
#include <QStringList>
#include <QObject>
#include <QVariant>
#include <QVariantList>
#include "limits.h"

/***************************************************************************
* This class is considered CRITICAL and any change MUST be accompanied with
* full unit tests in test_qgsstringstatisticalsummary.py.
* See details in QEP #17
****************************************************************************/

QgsStringStatisticalSummary::QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats )
: mStatistics( stats )
{
reset();
}

void QgsStringStatisticalSummary::reset()
{
mCount = 0;
mValues.clear();
mCountMissing = 0;
mMin.clear();
mMax.clear();
mMinLength = INT_MAX;
mMaxLength = 0;
}

void QgsStringStatisticalSummary::calculate( const QStringList& values )
{
reset();

Q_FOREACH ( const QString& string, values )
{
testString( string );
}
}

void QgsStringStatisticalSummary::calculate( const QVariantList& values )
{
reset();

Q_FOREACH ( const QVariant& variant, values )
{
if ( variant.type() == QVariant::String )
{
testString( variant.toString() );
}
}
}

void QgsStringStatisticalSummary::testString( const QString& string )
{
mCount++;

if ( string.isEmpty() )
mCountMissing++;

if ( mStatistics & CountDistinct )
{
mValues << string;
}
if ( mStatistics & Min )
{
if ( !mMin.isEmpty() && !string.isEmpty() )
{
mMin = qMin( mMin, string );
}
else if ( mMin.isEmpty() && !string.isEmpty() )
{
mMin = string;
}
}
if ( mStatistics & Max )
{
if ( !mMax.isEmpty() && !string.isEmpty() )
{
mMax = qMax( mMax, string );
}
else if ( mMax.isEmpty() && !string.isEmpty() )
{
mMax = string;
}
}
mMinLength = qMin( mMinLength, string.length() );
mMaxLength = qMax( mMaxLength, string.length() );
}

QVariant QgsStringStatisticalSummary::statistic( QgsStringStatisticalSummary::Statistic stat ) const
{
switch ( stat )
{
case Count:
return mCount;
case CountDistinct:
return mValues.count();
case CountMissing:
return mCountMissing;
case Min:
return mMin;
case Max:
return mMax;
case MinimumLength:
return mMinLength;
case MaximumLength:
return mMaxLength;
case All:
return 0;
}
return 0;
}

QString QgsStringStatisticalSummary::displayName( QgsStringStatisticalSummary::Statistic statistic )
{
switch ( statistic )
{
case Count:
return QObject::tr( "Count" );
case CountDistinct:
return QObject::tr( "Count (distinct)" );
case CountMissing:
return QObject::tr( "Count (missing)" );
case Min:
return QObject::tr( "Minimum" );
case Max:
return QObject::tr( "Maximum" );
case MinimumLength:
return QObject::tr( "Minimum length" );
case MaximumLength:
return QObject::tr( "Maximum length" );
case All:
return QString();
}
return QString();
}

0 comments on commit 0493cbf

Please sign in to comment.