28
28
29
29
from qgis .core import (QgsProcessingParameterFeatureSource ,
30
30
QgsStatisticalSummary ,
31
+ QgsDateTimeStatisticalSummary ,
32
+ QgsStringStatisticalSummary ,
31
33
QgsFeatureRequest ,
32
34
QgsProcessingParameterField ,
33
35
QgsProcessingParameterFeatureSink ,
36
38
QgsWkbTypes ,
37
39
QgsCoordinateReferenceSystem ,
38
40
QgsFeature ,
39
- QgsFeatureSink )
41
+ QgsFeatureSink ,
42
+ QgsProcessing ,
43
+ NULL )
40
44
from qgis .PyQt .QtCore import QVariant
41
45
from processing .algs .qgis .QgisAlgorithm import QgisAlgorithm
42
46
47
+ from collections import defaultdict
43
48
44
- class StatisticsByCategories (QgisAlgorithm ):
45
49
50
+ class StatisticsByCategories (QgisAlgorithm ):
46
51
INPUT = 'INPUT'
47
52
VALUES_FIELD_NAME = 'VALUES_FIELD_NAME'
48
53
CATEGORIES_FIELD_NAME = 'CATEGORIES_FIELD_NAME'
@@ -56,13 +61,15 @@ def __init__(self):
56
61
57
62
def initAlgorithm (self , config = None ):
58
63
self .addParameter (QgsProcessingParameterFeatureSource (self .INPUT ,
59
- self .tr ('Input vector layer' )))
64
+ self .tr ('Input vector layer' ),
65
+ types = [QgsProcessing .TypeVector ]))
60
66
self .addParameter (QgsProcessingParameterField (self .VALUES_FIELD_NAME ,
61
67
self .tr ('Field to calculate statistics on' ),
62
- parentLayerParameterName = self .INPUT , type = QgsProcessingParameterField . Numeric ))
68
+ parentLayerParameterName = self .INPUT ))
63
69
self .addParameter (QgsProcessingParameterField (self .CATEGORIES_FIELD_NAME ,
64
70
self .tr ('Field with categories' ),
65
- parentLayerParameterName = self .INPUT , type = QgsProcessingParameterField .Any ))
71
+ parentLayerParameterName = self .INPUT ,
72
+ type = QgsProcessingParameterField .Any ))
66
73
67
74
self .addParameter (QgsProcessingParameterFeatureSink (self .OUTPUT , self .tr ('Statistics by category' )))
68
75
@@ -78,46 +85,179 @@ def processAlgorithm(self, parameters, context, feedback):
78
85
category_field_name = self .parameterAsString (parameters , self .CATEGORIES_FIELD_NAME , context )
79
86
80
87
value_field_index = source .fields ().lookupField (value_field_name )
88
+ value_field = source .fields ().at (value_field_index )
81
89
category_field_index = source .fields ().lookupField (category_field_name )
82
90
83
- features = source .getFeatures (QgsFeatureRequest ().setFlags (QgsFeatureRequest .NoGeometry ))
84
- total = 100.0 / source .featureCount () if source .featureCount () else 0
85
- values = {}
91
+ # generate output fields
92
+ fields = QgsFields ()
93
+ fields .append (source .fields ().at (category_field_index ))
94
+
95
+ def addField (name ):
96
+ """
97
+ Adds a field to the output, keeping the same data type as the value_field
98
+ """
99
+ field = value_field
100
+ field .setName (name )
101
+ fields .append (field )
102
+
103
+ if value_field .isNumeric ():
104
+ field_type = 'numeric'
105
+ fields .append (QgsField ('count' , QVariant .Int ))
106
+ fields .append (QgsField ('unique' , QVariant .Int ))
107
+ fields .append (QgsField ('min' , QVariant .Double ))
108
+ fields .append (QgsField ('max' , QVariant .Double ))
109
+ fields .append (QgsField ('range' , QVariant .Double ))
110
+ fields .append (QgsField ('sum' , QVariant .Double ))
111
+ fields .append (QgsField ('mean' , QVariant .Double ))
112
+ fields .append (QgsField ('median' , QVariant .Double ))
113
+ fields .append (QgsField ('stddev' , QVariant .Double ))
114
+ fields .append (QgsField ('minority' , QVariant .Double ))
115
+ fields .append (QgsField ('majority' , QVariant .Double ))
116
+ fields .append (QgsField ('q1' , QVariant .Double ))
117
+ fields .append (QgsField ('q3' , QVariant .Double ))
118
+ fields .append (QgsField ('iqr' , QVariant .Double ))
119
+ elif value_field .type () in (QVariant .Date , QVariant .Time , QVariant .DateTime ):
120
+ field_type = 'datetime'
121
+ fields .append (QgsField ('count' , QVariant .Int ))
122
+ fields .append (QgsField ('unique' , QVariant .Int ))
123
+ fields .append (QgsField ('empty' , QVariant .Int ))
124
+ fields .append (QgsField ('filled' , QVariant .Int ))
125
+ # keep same data type for these fields
126
+ addField ('min' )
127
+ addField ('max' )
128
+ else :
129
+ field_type = 'string'
130
+ fields .append (QgsField ('count' , QVariant .Int ))
131
+ fields .append (QgsField ('unique' , QVariant .Int ))
132
+ fields .append (QgsField ('empty' , QVariant .Int ))
133
+ fields .append (QgsField ('filled' , QVariant .Int ))
134
+ # keep same data type for these fields
135
+ addField ('min' )
136
+ addField ('max' )
137
+ fields .append (QgsField ('min_length' , QVariant .Int ))
138
+ fields .append (QgsField ('max_length' , QVariant .Int ))
139
+ fields .append (QgsField ('mean_length' , QVariant .Double ))
140
+
141
+ features = source .getFeatures (QgsFeatureRequest ().setFlags (QgsFeatureRequest .NoGeometry ).setSubsetOfAttributes (
142
+ [value_field_index , category_field_index ]))
143
+ total = 50.0 / source .featureCount () if source .featureCount () else 0
144
+ values = defaultdict (list )
86
145
for current , feat in enumerate (features ):
87
146
if feedback .isCanceled ():
88
147
break
89
148
90
149
feedback .setProgress (int (current * total ))
91
150
attrs = feat .attributes ()
92
151
try :
93
- value = float (attrs [value_field_index ])
152
+ if field_type == 'numeric' :
153
+ if attrs [value_field_index ] == NULL :
154
+ continue
155
+ else :
156
+ value = float (attrs [value_field_index ])
157
+ elif attrs [value_field_index ] == NULL :
158
+ value = NULL
159
+ elif field_type == 'string' :
160
+ value = str (attrs [value_field_index ])
161
+ else :
162
+ value = attrs [value_field_index ]
94
163
cat = attrs [category_field_index ]
95
- if cat not in values :
96
- values [cat ] = []
97
164
values [cat ].append (value )
98
165
except :
99
166
pass
100
167
101
- fields = QgsFields ()
102
- fields .append (source .fields ().at (category_field_index ))
103
- fields .append (QgsField ('min' , QVariant .Double ))
104
- fields .append (QgsField ('max' , QVariant .Double ))
105
- fields .append (QgsField ('mean' , QVariant .Double ))
106
- fields .append (QgsField ('stddev' , QVariant .Double ))
107
- fields .append (QgsField ('sum' , QVariant .Double ))
108
- fields .append (QgsField ('count' , QVariant .Int ))
109
-
110
168
(sink , dest_id ) = self .parameterAsSink (parameters , self .OUTPUT , context ,
111
169
fields , QgsWkbTypes .NoGeometry , QgsCoordinateReferenceSystem ())
112
170
113
- stat = QgsStatisticalSummary (QgsStatisticalSummary .Min | QgsStatisticalSummary .Max |
114
- QgsStatisticalSummary .Mean | QgsStatisticalSummary .StDevSample |
115
- QgsStatisticalSummary .Sum | QgsStatisticalSummary .Count )
171
+ if field_type == 'numeric' :
172
+ self .calcNumericStats (values , sink , feedback )
173
+ elif field_type == 'datetime' :
174
+ self .calcDateTimeStats (values , sink , feedback )
175
+ else :
176
+ self .calcStringStats (values , sink , feedback )
177
+
178
+ return {self .OUTPUT : dest_id }
179
+
180
+ def calcNumericStats (self , values , sink , feedback ):
181
+ stat = QgsStatisticalSummary ()
182
+
183
+ total = 50.0 / len (values ) if values else 0
184
+ current = 0
185
+ for cat , v in values .items ():
186
+ if feedback .isCanceled ():
187
+ break
188
+
189
+ feedback .setProgress (int (current * total ) + 50 )
116
190
117
- for (cat , v ) in list (values .items ()):
118
191
stat .calculate (v )
119
192
f = QgsFeature ()
120
- f .setAttributes ([cat , stat .min (), stat .max (), stat .mean (), stat .sampleStDev (), stat .sum (), stat .count ()])
193
+ f .setAttributes ([cat ,
194
+ stat .count (),
195
+ stat .variety (),
196
+ stat .min (),
197
+ stat .max (),
198
+ stat .range (),
199
+ stat .sum (),
200
+ stat .mean (),
201
+ stat .median (),
202
+ stat .stDev (),
203
+ stat .minority (),
204
+ stat .majority (),
205
+ stat .firstQuartile (),
206
+ stat .thirdQuartile (),
207
+ stat .interQuartileRange ()])
208
+
121
209
sink .addFeature (f , QgsFeatureSink .FastInsert )
210
+ current += 1
122
211
123
- return {self .OUTPUT : dest_id }
212
+ def calcDateTimeStats (self , values , sink , feedback ):
213
+ stat = QgsDateTimeStatisticalSummary ()
214
+
215
+ total = 50.0 / len (values ) if values else 0
216
+ current = 0
217
+ for cat , v in values .items ():
218
+ if feedback .isCanceled ():
219
+ break
220
+
221
+ feedback .setProgress (int (current * total ) + 50 )
222
+
223
+ stat .calculate (v )
224
+ f = QgsFeature ()
225
+ f .setAttributes ([cat ,
226
+ stat .count (),
227
+ stat .countDistinct (),
228
+ stat .countMissing (),
229
+ stat .count () - stat .countMissing (),
230
+ stat .statistic (QgsDateTimeStatisticalSummary .Min ),
231
+ stat .statistic (QgsDateTimeStatisticalSummary .Max )
232
+ ])
233
+
234
+ sink .addFeature (f , QgsFeatureSink .FastInsert )
235
+ current += 1
236
+
237
+ def calcStringStats (self , values , sink , feedback ):
238
+ stat = QgsStringStatisticalSummary ()
239
+
240
+ total = 50.0 / len (values ) if values else 0
241
+ current = 0
242
+ for cat , v in values .items ():
243
+ if feedback .isCanceled ():
244
+ break
245
+
246
+ feedback .setProgress (int (current * total ) + 50 )
247
+
248
+ stat .calculate (v )
249
+ f = QgsFeature ()
250
+ f .setAttributes ([cat ,
251
+ stat .count (),
252
+ stat .countDistinct (),
253
+ stat .countMissing (),
254
+ stat .count () - stat .countMissing (),
255
+ stat .min (),
256
+ stat .max (),
257
+ stat .minLength (),
258
+ stat .maxLength (),
259
+ stat .meanLength ()
260
+ ])
261
+
262
+ sink .addFeature (f , QgsFeatureSink .FastInsert )
263
+ current += 1
0 commit comments