Skip to content

Commit 5ec5bc9

Browse files
suricactusnyalldawson
authored andcommittedMar 30, 2020
FEAT #27230 #27271 [Aggregates] Allow majority and minority statistics on non-numeric fields
1 parent 6e4beab commit 5ec5bc9

8 files changed

+90
-19
lines changed
 

‎python/core/auto_generated/qgsstringstatisticalsummary.sip.in

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ specifying the statistic in the constructor or via setStatistics().
3939
MinimumLength,
4040
MaximumLength,
4141
MeanLength,
42+
Minority,
43+
Majority,
4244
All,
4345
};
4446
typedef QFlags<QgsStringStatisticalSummary::Statistic> Statistics;
@@ -213,6 +215,20 @@ Returns the maximum length of strings.
213215
Returns the mean length of strings.
214216

215217
.. versionadded:: 3.0
218+
%End
219+
220+
QString minority() const;
221+
%Docstring
222+
Returns the most common string.
223+
224+
.. versionadded:: 3.14
225+
%End
226+
227+
QString majority() const;
228+
%Docstring
229+
Returns the least common string.
230+
231+
.. versionadded:: 3.14
216232
%End
217233

218234
static QString displayName( QgsStringStatisticalSummary::Statistic statistic );

‎src/app/qgsstatisticalsummarydockwidget.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ typedef QList< QgsStatisticalSummary::Statistic > StatsList;
3434
typedef QList< QgsStringStatisticalSummary::Statistic > StringStatsList;
3535
typedef QList< QgsDateTimeStatisticalSummary::Statistic > DateTimeStatsList;
3636
Q_GLOBAL_STATIC_WITH_ARGS( StatsList, sDisplayStats, ( {QgsStatisticalSummary::Count, QgsStatisticalSummary::Sum, QgsStatisticalSummary::Mean, QgsStatisticalSummary::Median, QgsStatisticalSummary::StDev, QgsStatisticalSummary::StDevSample, QgsStatisticalSummary::Min, QgsStatisticalSummary::Max, QgsStatisticalSummary::Range, QgsStatisticalSummary::Minority, QgsStatisticalSummary::Majority, QgsStatisticalSummary::Variety, QgsStatisticalSummary::FirstQuartile, QgsStatisticalSummary::ThirdQuartile, QgsStatisticalSummary::InterQuartileRange} ) )
37-
Q_GLOBAL_STATIC_WITH_ARGS( StringStatsList, sDisplayStringStats, ( {QgsStringStatisticalSummary::Count, QgsStringStatisticalSummary::CountDistinct, QgsStringStatisticalSummary::CountMissing, QgsStringStatisticalSummary::Min, QgsStringStatisticalSummary::Max, QgsStringStatisticalSummary::MinimumLength, QgsStringStatisticalSummary::MaximumLength} ) )
37+
Q_GLOBAL_STATIC_WITH_ARGS( StringStatsList, sDisplayStringStats, ( {QgsStringStatisticalSummary::Count, QgsStringStatisticalSummary::CountDistinct, QgsStringStatisticalSummary::CountMissing, QgsStringStatisticalSummary::Min, QgsStringStatisticalSummary::Max, QgsStringStatisticalSummary::Minority, QgsStringStatisticalSummary::Majority, QgsStringStatisticalSummary::MinimumLength, QgsStringStatisticalSummary::MaximumLength} ) )
3838
Q_GLOBAL_STATIC_WITH_ARGS( DateTimeStatsList, sDisplayDateTimeStats, ( {QgsDateTimeStatisticalSummary::Count, QgsDateTimeStatisticalSummary::CountDistinct, QgsDateTimeStatisticalSummary::CountMissing, QgsDateTimeStatisticalSummary::Min, QgsDateTimeStatisticalSummary::Max, QgsDateTimeStatisticalSummary::Range} ) )
3939

4040
#define MISSING_VALUES -1

‎src/core/qgsaggregatecalculator.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ QList<QgsAggregateCalculator::AggregateInfo> QgsAggregateCalculator::aggregates(
331331
<< QVariant::LongLong
332332
<< QVariant::ULongLong
333333
<< QVariant::Double
334+
<< QVariant::String
334335
}
335336
<< AggregateInfo
336337
{
@@ -342,6 +343,7 @@ QList<QgsAggregateCalculator::AggregateInfo> QgsAggregateCalculator::aggregates(
342343
<< QVariant::LongLong
343344
<< QVariant::ULongLong
344345
<< QVariant::Double
346+
<< QVariant::String
345347
}
346348
<< AggregateInfo
347349
{
@@ -583,15 +585,17 @@ QgsStringStatisticalSummary::Statistic QgsAggregateCalculator::stringStatFromAgg
583585
return QgsStringStatisticalSummary::MinimumLength;
584586
case StringMaximumLength:
585587
return QgsStringStatisticalSummary::MaximumLength;
588+
case Minority:
589+
return QgsStringStatisticalSummary::Minority;
590+
case Majority:
591+
return QgsStringStatisticalSummary::Majority;
586592

587593
case Sum:
588594
case Mean:
589595
case Median:
590596
case StDev:
591597
case StDevSample:
592598
case Range:
593-
case Minority:
594-
case Majority:
595599
case FirstQuartile:
596600
case ThirdQuartile:
597601
case InterQuartileRange:

‎src/core/qgsaggregatecalculator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ class CORE_EXPORT QgsAggregateCalculator
7474
StDev, //!< Standard deviation of values (numeric fields only)
7575
StDevSample, //!< Sample standard deviation of values (numeric fields only)
7676
Range, //!< Range of values (max - min) (numeric and datetime fields only)
77-
Minority, //!< Minority of values (numeric fields only)
78-
Majority, //!< Majority of values (numeric fields only)
77+
Minority, //!< Minority of values
78+
Majority, //!< Majority of values
7979
FirstQuartile, //!< First quartile (numeric fields only)
8080
ThirdQuartile, //!< Third quartile (numeric fields only)
8181
InterQuartileRange, //!< Inter quartile range (IQR) (numeric fields only)

‎src/core/qgsstringstatisticalsummary.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ void QgsStringStatisticalSummary::reset()
4444
mMaxLength = 0;
4545
mSumLengths = 0;
4646
mMeanLength = 0;
47+
mMinority = QString();
48+
mMajority = QString();
4749
}
4850

4951
void QgsStringStatisticalSummary::calculate( const QStringList &values )
@@ -75,6 +77,21 @@ void QgsStringStatisticalSummary::addValue( const QVariant &value )
7577
void QgsStringStatisticalSummary::finalize()
7678
{
7779
mMeanLength = mSumLengths / static_cast< double >( mCount );
80+
81+
if ( mStatistics & Minority || mStatistics & Majority )
82+
{
83+
QList<int> valueCounts = mValues.values();
84+
std::sort( valueCounts.begin(), valueCounts.end() );
85+
86+
if ( mStatistics & Minority )
87+
{
88+
mMinority = mValues.key( valueCounts.first() );
89+
}
90+
if ( mStatistics & Majority )
91+
{
92+
mMajority = mValues.key( valueCounts.last() );
93+
}
94+
}
7895
}
7996

8097
void QgsStringStatisticalSummary::calculateFromVariants( const QVariantList &values )
@@ -89,6 +106,8 @@ void QgsStringStatisticalSummary::calculateFromVariants( const QVariantList &val
89106
testString( variant.toString() );
90107
}
91108
}
109+
110+
finalize();
92111
}
93112

94113
void QgsStringStatisticalSummary::testString( const QString &string )
@@ -98,9 +117,9 @@ void QgsStringStatisticalSummary::testString( const QString &string )
98117
if ( string.isEmpty() )
99118
mCountMissing++;
100119

101-
if ( mStatistics & CountDistinct )
120+
if ( mStatistics & CountDistinct || mStatistics & Majority || mStatistics & Minority )
102121
{
103-
mValues << string;
122+
mValues[string]++;
104123
}
105124
if ( mStatistics & Min )
106125
{
@@ -150,6 +169,10 @@ QVariant QgsStringStatisticalSummary::statistic( QgsStringStatisticalSummary::St
150169
return mMaxLength;
151170
case MeanLength:
152171
return mMeanLength;
172+
case Minority:
173+
return mMinority;
174+
case Majority:
175+
return mMajority;
153176
case All:
154177
return 0;
155178
}
@@ -176,6 +199,10 @@ QString QgsStringStatisticalSummary::displayName( QgsStringStatisticalSummary::S
176199
return QObject::tr( "Maximum length" );
177200
case MeanLength:
178201
return QObject::tr( "Mean length" );
202+
case Minority:
203+
return QObject::tr( "Minority" );
204+
case Majority:
205+
return QObject::tr( "Majority" );
179206
case All:
180207
return QString();
181208
}

‎src/core/qgsstringstatisticalsummary.h

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ class CORE_EXPORT QgsStringStatisticalSummary
5555
MinimumLength = 32, //!< Minimum length of string
5656
MaximumLength = 64, //!< Maximum length of string
5757
MeanLength = 128, //!< Mean length of strings
58-
All = Count | CountDistinct | CountMissing | Min | Max | MinimumLength | MaximumLength | MeanLength, //!< All statistics
58+
Minority = 256, //!< Minority of strings
59+
Majority = 512, //!< Majority of strings
60+
All = Count | CountDistinct | CountMissing | Min | Max | MinimumLength | MaximumLength | MeanLength | Minority | Majority, //!< All statistics
5961
};
6062
Q_DECLARE_FLAGS( Statistics, Statistic )
6163

@@ -156,13 +158,13 @@ class CORE_EXPORT QgsStringStatisticalSummary
156158
* Returns the number of distinct string values.
157159
* \see distinctValues()
158160
*/
159-
int countDistinct() const { return mValues.count(); }
161+
int countDistinct() const { return mValues.keys().count(); }
160162

161163
/**
162164
* Returns the set of distinct string values.
163165
* \see countDistinct()
164166
*/
165-
QSet< QString > distinctValues() const { return mValues; }
167+
QSet< QString > distinctValues() const { return QSet<QString>::fromList( mValues.keys() ); }
166168

167169
/**
168170
* Returns the number of missing (null) string values.
@@ -195,6 +197,18 @@ class CORE_EXPORT QgsStringStatisticalSummary
195197
*/
196198
double meanLength() const { return mMeanLength; }
197199

200+
/**
201+
* Returns the most common string.
202+
* \since QGIS 3.14
203+
*/
204+
QString minority() const { return mMinority; }
205+
206+
/**
207+
* Returns the least common string.
208+
* \since QGIS 3.14
209+
*/
210+
QString majority() const { return mMajority; }
211+
198212
/**
199213
* Returns the friendly display name for a statistic
200214
* \param statistic statistic to return name for
@@ -206,14 +220,16 @@ class CORE_EXPORT QgsStringStatisticalSummary
206220
Statistics mStatistics;
207221

208222
int mCount;
209-
QSet< QString > mValues;
223+
QMap< QString, int > mValues;
210224
int mCountMissing;
211225
QString mMin;
212226
QString mMax;
213227
int mMinLength;
214228
int mMaxLength;
215229
long mSumLengths;
216230
double mMeanLength;
231+
QString mMinority;
232+
QString mMajority;
217233

218234
void testString( const QString &string );
219235
};

‎tests/src/core/testqgsexpression.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,8 @@ class TestQgsExpression: public QObject
19861986
QTest::newRow( "range" ) << "range(\"col1\")" << false << QVariant( 18.0 );
19871987
QTest::newRow( "minority" ) << "minority(\"col3\")" << false << QVariant( 1 );
19881988
QTest::newRow( "majority" ) << "majority(\"col3\")" << false << QVariant( 2 );
1989+
QTest::newRow( "minority string" ) << "minority(\"col2\")" << false << QVariant( "test" );
1990+
QTest::newRow( "majority string" ) << "majority(\"col2\")" << false << QVariant( "" );
19891991
QTest::newRow( "q1" ) << "q1(\"col1\")" << false << QVariant( 2.5 );
19901992
QTest::newRow( "q3" ) << "q3(\"col1\")" << false << QVariant( 6.5 );
19911993
QTest::newRow( "iqr" ) << "iqr(\"col1\")" << false << QVariant( 4 );

‎tests/src/python/test_qgsstringstatisticalsummary.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ def testStats(self):
2424
# added one-at-a-time
2525
s = QgsStringStatisticalSummary()
2626
self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
27-
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
27+
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', 'aaaa', '', 'dddd']
2828
s.calculate(strings)
2929
s2 = QgsStringStatisticalSummary()
3030
for string in strings:
3131
s2.addString(string)
3232
s2.finalize()
33-
self.assertEqual(s.count(), 9)
34-
self.assertEqual(s2.count(), 9)
33+
self.assertEqual(s.count(), 10)
34+
self.assertEqual(s2.count(), 10)
3535
self.assertEqual(s.countDistinct(), 6)
3636
self.assertEqual(s2.countDistinct(), 6)
3737
self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
@@ -46,8 +46,12 @@ def testStats(self):
4646
self.assertEqual(s2.minLength(), 0)
4747
self.assertEqual(s.maxLength(), 8)
4848
self.assertEqual(s2.maxLength(), 8)
49-
self.assertEqual(s.meanLength(), 3.33333333333333333333333)
50-
self.assertEqual(s2.meanLength(), 3.33333333333333333333333)
49+
self.assertEqual(s.meanLength(), 3.4)
50+
self.assertEqual(s2.meanLength(), 3.4)
51+
self.assertEqual(s.minority(), 'bbbbbbbb')
52+
self.assertEqual(s2.minority(), 'bbbbbbbb')
53+
self.assertEqual(s.majority(), 'aaaa')
54+
self.assertEqual(s2.majority(), 'aaaa')
5155

5256
#extra check for minLength without empty strings
5357
s.calculate(['1111111', '111', '11111'])
@@ -56,14 +60,16 @@ def testStats(self):
5660
def testIndividualStats(self):
5761
# tests calculation of statistics one at a time, to make sure statistic calculations are not
5862
# dependent on each other
59-
tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9},
63+
tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 10},
6064
{'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6},
6165
{'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2},
6266
{'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'},
6367
{'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'},
6468
{'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0},
6569
{'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8},
66-
{'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.3333333333333335},
70+
{'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.4},
71+
{'stat': QgsStringStatisticalSummary.Minority, 'expected': 'bbbbbbbb'},
72+
{'stat': QgsStringStatisticalSummary.Majority, 'expected': 'aaaa'},
6773
]
6874

6975
s = QgsStringStatisticalSummary()
@@ -77,7 +83,7 @@ def testIndividualStats(self):
7783
s3.setStatistics(t['stat'])
7884
self.assertEqual(s.statistics(), t['stat'])
7985

80-
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
86+
strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', 'aaaa', '', 'dddd']
8187
s.calculate(strings)
8288
s3.reset()
8389
for string in strings:

0 commit comments

Comments
 (0)
Please sign in to comment.