Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
FEAT #27230 #27271 [Aggregates] Allow majority and minority statistic…
…s on non-numeric fields
  • Loading branch information
suricactus authored and nyalldawson committed Mar 30, 2020
1 parent 6e4beab commit 5ec5bc9
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 19 deletions.
16 changes: 16 additions & 0 deletions python/core/auto_generated/qgsstringstatisticalsummary.sip.in
Expand Up @@ -39,6 +39,8 @@ specifying the statistic in the constructor or via setStatistics().
MinimumLength,
MaximumLength,
MeanLength,
Minority,
Majority,
All,
};
typedef QFlags<QgsStringStatisticalSummary::Statistic> Statistics;
Expand Down Expand Up @@ -213,6 +215,20 @@ Returns the maximum length of strings.
Returns the mean length of strings.

.. versionadded:: 3.0
%End

QString minority() const;
%Docstring
Returns the most common string.

.. versionadded:: 3.14
%End

QString majority() const;
%Docstring
Returns the least common string.

.. versionadded:: 3.14
%End

static QString displayName( QgsStringStatisticalSummary::Statistic statistic );
Expand Down
2 changes: 1 addition & 1 deletion src/app/qgsstatisticalsummarydockwidget.cpp
Expand Up @@ -34,7 +34,7 @@ typedef QList< QgsStatisticalSummary::Statistic > StatsList;
typedef QList< QgsStringStatisticalSummary::Statistic > StringStatsList;
typedef QList< QgsDateTimeStatisticalSummary::Statistic > DateTimeStatsList;
Q_GLOBAL_STATIC_WITH_ARGS( StatsList, sDisplayStats, ( {QgsStatisticalSummary::Count, QgsStatisticalSummary::Sum, QgsStatisticalSummary::Mean, QgsStatisticalSummary::Median, QgsStatisticalSummary::StDev, QgsStatisticalSummary::StDevSample, QgsStatisticalSummary::Min, QgsStatisticalSummary::Max, QgsStatisticalSummary::Range, QgsStatisticalSummary::Minority, QgsStatisticalSummary::Majority, QgsStatisticalSummary::Variety, QgsStatisticalSummary::FirstQuartile, QgsStatisticalSummary::ThirdQuartile, QgsStatisticalSummary::InterQuartileRange} ) )
Q_GLOBAL_STATIC_WITH_ARGS( StringStatsList, sDisplayStringStats, ( {QgsStringStatisticalSummary::Count, QgsStringStatisticalSummary::CountDistinct, QgsStringStatisticalSummary::CountMissing, QgsStringStatisticalSummary::Min, QgsStringStatisticalSummary::Max, QgsStringStatisticalSummary::MinimumLength, QgsStringStatisticalSummary::MaximumLength} ) )
Q_GLOBAL_STATIC_WITH_ARGS( StringStatsList, sDisplayStringStats, ( {QgsStringStatisticalSummary::Count, QgsStringStatisticalSummary::CountDistinct, QgsStringStatisticalSummary::CountMissing, QgsStringStatisticalSummary::Min, QgsStringStatisticalSummary::Max, QgsStringStatisticalSummary::Minority, QgsStringStatisticalSummary::Majority, QgsStringStatisticalSummary::MinimumLength, QgsStringStatisticalSummary::MaximumLength} ) )
Q_GLOBAL_STATIC_WITH_ARGS( DateTimeStatsList, sDisplayDateTimeStats, ( {QgsDateTimeStatisticalSummary::Count, QgsDateTimeStatisticalSummary::CountDistinct, QgsDateTimeStatisticalSummary::CountMissing, QgsDateTimeStatisticalSummary::Min, QgsDateTimeStatisticalSummary::Max, QgsDateTimeStatisticalSummary::Range} ) )

#define MISSING_VALUES -1
Expand Down
8 changes: 6 additions & 2 deletions src/core/qgsaggregatecalculator.cpp
Expand Up @@ -331,6 +331,7 @@ QList<QgsAggregateCalculator::AggregateInfo> QgsAggregateCalculator::aggregates(
<< QVariant::LongLong
<< QVariant::ULongLong
<< QVariant::Double
<< QVariant::String
}
<< AggregateInfo
{
Expand All @@ -342,6 +343,7 @@ QList<QgsAggregateCalculator::AggregateInfo> QgsAggregateCalculator::aggregates(
<< QVariant::LongLong
<< QVariant::ULongLong
<< QVariant::Double
<< QVariant::String
}
<< AggregateInfo
{
Expand Down Expand Up @@ -583,15 +585,17 @@ QgsStringStatisticalSummary::Statistic QgsAggregateCalculator::stringStatFromAgg
return QgsStringStatisticalSummary::MinimumLength;
case StringMaximumLength:
return QgsStringStatisticalSummary::MaximumLength;
case Minority:
return QgsStringStatisticalSummary::Minority;
case Majority:
return QgsStringStatisticalSummary::Majority;

case Sum:
case Mean:
case Median:
case StDev:
case StDevSample:
case Range:
case Minority:
case Majority:
case FirstQuartile:
case ThirdQuartile:
case InterQuartileRange:
Expand Down
4 changes: 2 additions & 2 deletions src/core/qgsaggregatecalculator.h
Expand Up @@ -74,8 +74,8 @@ class CORE_EXPORT QgsAggregateCalculator
StDev, //!< Standard deviation of values (numeric fields only)
StDevSample, //!< Sample standard deviation of values (numeric fields only)
Range, //!< Range of values (max - min) (numeric and datetime fields only)
Minority, //!< Minority of values (numeric fields only)
Majority, //!< Majority of values (numeric fields only)
Minority, //!< Minority of values
Majority, //!< Majority of values
FirstQuartile, //!< First quartile (numeric fields only)
ThirdQuartile, //!< Third quartile (numeric fields only)
InterQuartileRange, //!< Inter quartile range (IQR) (numeric fields only)
Expand Down
31 changes: 29 additions & 2 deletions src/core/qgsstringstatisticalsummary.cpp
Expand Up @@ -44,6 +44,8 @@ void QgsStringStatisticalSummary::reset()
mMaxLength = 0;
mSumLengths = 0;
mMeanLength = 0;
mMinority = QString();
mMajority = QString();
}

void QgsStringStatisticalSummary::calculate( const QStringList &values )
Expand Down Expand Up @@ -75,6 +77,21 @@ void QgsStringStatisticalSummary::addValue( const QVariant &value )
void QgsStringStatisticalSummary::finalize()
{
mMeanLength = mSumLengths / static_cast< double >( mCount );

if ( mStatistics & Minority || mStatistics & Majority )
{
QList<int> valueCounts = mValues.values();
std::sort( valueCounts.begin(), valueCounts.end() );

if ( mStatistics & Minority )
{
mMinority = mValues.key( valueCounts.first() );
}
if ( mStatistics & Majority )
{
mMajority = mValues.key( valueCounts.last() );
}
}
}

void QgsStringStatisticalSummary::calculateFromVariants( const QVariantList &values )
Expand All @@ -89,6 +106,8 @@ void QgsStringStatisticalSummary::calculateFromVariants( const QVariantList &val
testString( variant.toString() );
}
}

finalize();
}

void QgsStringStatisticalSummary::testString( const QString &string )
Expand All @@ -98,9 +117,9 @@ void QgsStringStatisticalSummary::testString( const QString &string )
if ( string.isEmpty() )
mCountMissing++;

if ( mStatistics & CountDistinct )
if ( mStatistics & CountDistinct || mStatistics & Majority || mStatistics & Minority )
{
mValues << string;
mValues[string]++;
}
if ( mStatistics & Min )
{
Expand Down Expand Up @@ -150,6 +169,10 @@ QVariant QgsStringStatisticalSummary::statistic( QgsStringStatisticalSummary::St
return mMaxLength;
case MeanLength:
return mMeanLength;
case Minority:
return mMinority;
case Majority:
return mMajority;
case All:
return 0;
}
Expand All @@ -176,6 +199,10 @@ QString QgsStringStatisticalSummary::displayName( QgsStringStatisticalSummary::S
return QObject::tr( "Maximum length" );
case MeanLength:
return QObject::tr( "Mean length" );
case Minority:
return QObject::tr( "Minority" );
case Majority:
return QObject::tr( "Majority" );
case All:
return QString();
}
Expand Down
24 changes: 20 additions & 4 deletions src/core/qgsstringstatisticalsummary.h
Expand Up @@ -55,7 +55,9 @@ class CORE_EXPORT QgsStringStatisticalSummary
MinimumLength = 32, //!< Minimum length of string
MaximumLength = 64, //!< Maximum length of string
MeanLength = 128, //!< Mean length of strings
All = Count | CountDistinct | CountMissing | Min | Max | MinimumLength | MaximumLength | MeanLength, //!< All statistics
Minority = 256, //!< Minority of strings
Majority = 512, //!< Majority of strings
All = Count | CountDistinct | CountMissing | Min | Max | MinimumLength | MaximumLength | MeanLength | Minority | Majority, //!< All statistics
};
Q_DECLARE_FLAGS( Statistics, Statistic )

Expand Down Expand Up @@ -156,13 +158,13 @@ class CORE_EXPORT QgsStringStatisticalSummary
* Returns the number of distinct string values.
* \see distinctValues()
*/
int countDistinct() const { return mValues.count(); }
int countDistinct() const { return mValues.keys().count(); }

/**
* Returns the set of distinct string values.
* \see countDistinct()
*/
QSet< QString > distinctValues() const { return mValues; }
QSet< QString > distinctValues() const { return QSet<QString>::fromList( mValues.keys() ); }

/**
* Returns the number of missing (null) string values.
Expand Down Expand Up @@ -195,6 +197,18 @@ class CORE_EXPORT QgsStringStatisticalSummary
*/
double meanLength() const { return mMeanLength; }

/**
* Returns the most common string.
* \since QGIS 3.14
*/
QString minority() const { return mMinority; }

/**
* Returns the least common string.
* \since QGIS 3.14
*/
QString majority() const { return mMajority; }

/**
* Returns the friendly display name for a statistic
* \param statistic statistic to return name for
Expand All @@ -206,14 +220,16 @@ class CORE_EXPORT QgsStringStatisticalSummary
Statistics mStatistics;

int mCount;
QSet< QString > mValues;
QMap< QString, int > mValues;
int mCountMissing;
QString mMin;
QString mMax;
int mMinLength;
int mMaxLength;
long mSumLengths;
double mMeanLength;
QString mMinority;
QString mMajority;

void testString( const QString &string );
};
Expand Down
2 changes: 2 additions & 0 deletions tests/src/core/testqgsexpression.cpp
Expand Up @@ -1986,6 +1986,8 @@ class TestQgsExpression: public QObject
QTest::newRow( "range" ) << "range(\"col1\")" << false << QVariant( 18.0 );
QTest::newRow( "minority" ) << "minority(\"col3\")" << false << QVariant( 1 );
QTest::newRow( "majority" ) << "majority(\"col3\")" << false << QVariant( 2 );
QTest::newRow( "minority string" ) << "minority(\"col2\")" << false << QVariant( "test" );
QTest::newRow( "majority string" ) << "majority(\"col2\")" << false << QVariant( "" );
QTest::newRow( "q1" ) << "q1(\"col1\")" << false << QVariant( 2.5 );
QTest::newRow( "q3" ) << "q3(\"col1\")" << false << QVariant( 6.5 );
QTest::newRow( "iqr" ) << "iqr(\"col1\")" << false << QVariant( 4 );
Expand Down
22 changes: 14 additions & 8 deletions tests/src/python/test_qgsstringstatisticalsummary.py
Expand Up @@ -24,14 +24,14 @@ def testStats(self):
# added one-at-a-time
s = QgsStringStatisticalSummary()
self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', 'aaaa', '', 'dddd']
s.calculate(strings)
s2 = QgsStringStatisticalSummary()
for string in strings:
s2.addString(string)
s2.finalize()
self.assertEqual(s.count(), 9)
self.assertEqual(s2.count(), 9)
self.assertEqual(s.count(), 10)
self.assertEqual(s2.count(), 10)
self.assertEqual(s.countDistinct(), 6)
self.assertEqual(s2.countDistinct(), 6)
self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
Expand All @@ -46,8 +46,12 @@ def testStats(self):
self.assertEqual(s2.minLength(), 0)
self.assertEqual(s.maxLength(), 8)
self.assertEqual(s2.maxLength(), 8)
self.assertEqual(s.meanLength(), 3.33333333333333333333333)
self.assertEqual(s2.meanLength(), 3.33333333333333333333333)
self.assertEqual(s.meanLength(), 3.4)
self.assertEqual(s2.meanLength(), 3.4)
self.assertEqual(s.minority(), 'bbbbbbbb')
self.assertEqual(s2.minority(), 'bbbbbbbb')
self.assertEqual(s.majority(), 'aaaa')
self.assertEqual(s2.majority(), 'aaaa')

#extra check for minLength without empty strings
s.calculate(['1111111', '111', '11111'])
Expand All @@ -56,14 +60,16 @@ def testStats(self):
def testIndividualStats(self):
# tests calculation of statistics one at a time, to make sure statistic calculations are not
# dependent on each other
tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9},
tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 10},
{'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6},
{'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2},
{'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'},
{'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'},
{'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0},
{'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8},
{'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.3333333333333335},
{'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.4},
{'stat': QgsStringStatisticalSummary.Minority, 'expected': 'bbbbbbbb'},
{'stat': QgsStringStatisticalSummary.Majority, 'expected': 'aaaa'},
]

s = QgsStringStatisticalSummary()
Expand All @@ -77,7 +83,7 @@ def testIndividualStats(self):
s3.setStatistics(t['stat'])
self.assertEqual(s.statistics(), t['stat'])

strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', 'aaaa', '', 'dddd']
s.calculate(strings)
s3.reset()
for string in strings:
Expand Down

0 comments on commit 5ec5bc9

Please sign in to comment.