Skip to content

Commit ab29f2d

Browse files
committedNov 30, 2016
[processing] Use QgsStringStatisticalSummary in basic stats for strings
And also further optimise the algorithm
1 parent e272bb3 commit ab29f2d

File tree

2 files changed

+39
-60
lines changed

2 files changed

+39
-60
lines changed
 

‎python/plugins/processing/algs/qgis/BasicStatisticsStrings.py

Lines changed: 33 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131

3232
from qgis.PyQt.QtGui import QIcon
3333

34+
from qgis.core import (QgsStringStatisticalSummary,
35+
QgsFeatureRequest)
36+
3437
from processing.core.GeoAlgorithm import GeoAlgorithm
3538
from processing.core.parameters import ParameterVector
3639
from processing.core.parameters import ParameterTableField
@@ -54,6 +57,8 @@ class BasicStatisticsStrings(GeoAlgorithm):
5457
EMPTY = 'EMPTY'
5558
FILLED = 'FILLED'
5659
UNIQUE = 'UNIQUE'
60+
MIN_VALUE = 'MIN_VALUE'
61+
MAX_VALUE = 'MAX_VALUE'
5762

5863
def getIcon(self):
5964
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'basic_statistics.png'))
@@ -78,6 +83,8 @@ def defineCharacteristics(self):
7883
self.addOutput(OutputNumber(self.EMPTY, self.tr('Number of empty values')))
7984
self.addOutput(OutputNumber(self.FILLED, self.tr('Number of non-empty values')))
8085
self.addOutput(OutputNumber(self.UNIQUE, self.tr('Number of unique values')))
86+
self.addOutput(OutputNumber(self.MIN_VALUE, self.tr('Minimum string value')))
87+
self.addOutput(OutputNumber(self.MAX_VALUE, self.tr('Maximum string value')))
8188

8289
def processAlgorithm(self, progress):
8390
layer = dataobjects.getObjectFromUri(
@@ -86,77 +93,47 @@ def processAlgorithm(self, progress):
8693

8794
outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)
8895

89-
index = layer.fields().lookupField(fieldName)
90-
91-
sumValue = 0
92-
minValue = 0
93-
maxValue = 0
94-
meanValue = 0
95-
nullValues = 0
96-
filledValues = 0
97-
98-
isFirst = True
99-
values = []
100-
101-
features = vector.features(layer)
96+
request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
97+
layer.fields())
98+
stat = QgsStringStatisticalSummary()
99+
features = vector.features(layer, request)
102100
count = len(features)
103-
total = 100.0 / count
101+
total = 100.0 / float(count)
104102
for current, ft in enumerate(features):
105-
value = ft[fieldName]
106-
if value:
107-
length = float(len(value))
108-
filledValues += 1
109-
else:
110-
nullValues += 1
111-
progress.setPercentage(int(current * total))
112-
continue
113-
114-
if isFirst:
115-
minValue = length
116-
maxValue = length
117-
isFirst = False
118-
else:
119-
if length < minValue:
120-
minValue = length
121-
if length > maxValue:
122-
maxValue = length
123-
124-
values.append(length)
125-
sumValue += length
126-
103+
stat.addValue(ft[fieldName])
127104
progress.setPercentage(int(current * total))
128105

129-
n = float(len(values))
130-
if n > 0:
131-
meanValue = sumValue / n
132-
133-
uniqueValues = vector.getUniqueValuesCount(layer, index)
106+
stat.finalize()
134107

135108
data = []
136109
data.append(self.tr('Analyzed layer: {}').format(layer.name()))
137110
data.append(self.tr('Analyzed field: {}').format(fieldName))
138-
data.append(self.tr('Minimum length: {}').format(minValue))
139-
data.append(self.tr('Maximum length: {}').format(maxValue))
140-
data.append(self.tr('Mean length: {}').format(meanValue))
141-
data.append(self.tr('Filled values: {}').format(filledValues))
142-
data.append(self.tr('NULL (missing) values: {}').format(nullValues))
143-
data.append(self.tr('Count: {}').format(count))
144-
data.append(self.tr('Unique: {}').format(uniqueValues))
111+
data.append(self.tr('Minimum length: {}').format(stat.minLength()))
112+
data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
113+
data.append(self.tr('Mean length: {}').format(stat.meanLength()))
114+
data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing()))
115+
data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
116+
data.append(self.tr('Count: {}').format(stat.count()))
117+
data.append(self.tr('Unique: {}').format(stat.countDistinct()))
118+
data.append(self.tr('Minimum string value: {}').format(stat.min()))
119+
data.append(self.tr('Maximum string value: {}').format(stat.max()))
145120

146121
self.createHTML(outputFile, data)
147122

148-
self.setOutputValue(self.MIN_LEN, minValue)
149-
self.setOutputValue(self.MAX_LEN, maxValue)
150-
self.setOutputValue(self.MEAN_LEN, meanValue)
151-
self.setOutputValue(self.FILLED, filledValues)
152-
self.setOutputValue(self.EMPTY, nullValues)
153-
self.setOutputValue(self.COUNT, count)
154-
self.setOutputValue(self.UNIQUE, uniqueValues)
123+
self.setOutputValue(self.MIN_LEN, stat.minLength())
124+
self.setOutputValue(self.MAX_LEN, stat.maxLength())
125+
self.setOutputValue(self.MEAN_LEN, stat.meanLength())
126+
self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
127+
self.setOutputValue(self.EMPTY, stat.countMissing())
128+
self.setOutputValue(self.COUNT, stat.count())
129+
self.setOutputValue(self.UNIQUE, stat.countDistinct())
130+
self.setOutputValue(self.MIN_VALUE, stat.min())
131+
self.setOutputValue(self.MAX_VALUE, stat.max())
155132

156133
def createHTML(self, outputFile, algData):
157134
with codecs.open(outputFile, 'w', encoding='utf-8') as f:
158135
f.write('<html><head>\n')
159136
f.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>\n')
160137
for s in algData:
161138
f.write('<p>' + str(s) + '</p>\n')
162-
f.write('</body></html>')
139+
f.write('</body></html>\n')

‎python/plugins/processing/tests/testdata/expected/basic_statistics_string.html

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
33
<p>Analyzed layer: multipolys.gml</p>
44
<p>Analyzed field: Bname</p>
5-
<p>Minimum length: 4.0</p>
6-
<p>Maximum length: 4.0</p>
7-
<p>Mean length: 4.0</p>
5+
<p>Minimum length: 0</p>
6+
<p>Maximum length: 4</p>
7+
<p>Mean length: 3.0</p>
88
<p>Filled values: 3</p>
99
<p>NULL (missing) values: 1</p>
1010
<p>Count: 4</p>
1111
<p>Unique: 2</p>
12-
</body></html>
12+
<p>Minimum string value: Test</p>
13+
<p>Maximum string value: Test</p>
14+
</body></html>

0 commit comments

Comments
 (0)
Please sign in to comment.