Skip to content

Commit

Permalink
Port Stats by Category to new API
Browse files Browse the repository at this point in the history
Improvements:
- keep original field type and name for category field
- add unit test
  • Loading branch information
nyalldawson committed Aug 5, 2017
1 parent adda744 commit b93be39
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 32 deletions.
5 changes: 3 additions & 2 deletions python/plugins/processing/algs/qgis/QGISAlgorithmProvider.py
Expand Up @@ -132,6 +132,7 @@
from .SpatialiteExecuteSQL import SpatialiteExecuteSQL
from .SpatialIndex import SpatialIndex
from .SplitWithLines import SplitWithLines
from .StatisticsByCategories import StatisticsByCategories
from .SumLines import SumLines
from .SymmetricalDifference import SymmetricalDifference
from .TextToFloat import TextToFloat
Expand All @@ -149,7 +150,6 @@
# from .SelectByLocation import SelectByLocation
# from .SpatialJoin import SpatialJoin
# from .GeometryConvert import GeometryConvert
# from .StatisticsByCategories import StatisticsByCategories
# from .FieldsCalculator import FieldsCalculator
# from .FieldPyculator import FieldsPyculator
# from .PointsDisplacement import PointsDisplacement
Expand Down Expand Up @@ -190,7 +190,7 @@ def getAlgs(self):
# SpatialJoin(),
# GeometryConvert(), FieldsCalculator(),
# FieldsPyculator(),
# StatisticsByCategories(),
#
# RasterLayerStatistics(), PointsDisplacement(),
# PointsFromPolygons(),
# PointsFromLines(),
Expand Down Expand Up @@ -298,6 +298,7 @@ def getAlgs(self):
SpatialiteExecuteSQL(),
SpatialIndex(),
SplitWithLines(),
StatisticsByCategories(),
SumLines(),
SymmetricalDifference(),
TextToFloat(),
Expand Down
80 changes: 50 additions & 30 deletions python/plugins/processing/algs/qgis/StatisticsByCategories.py
Expand Up @@ -26,19 +26,24 @@

__revision__ = '$Format:%H$'

from qgis.core import (QgsApplication,
QgsFeatureSink,
from qgis.core import (QgsProcessingParameterFeatureSource,
QgsStatisticalSummary,
QgsProcessingUtils)
from processing.core.outputs import OutputTable
QgsFeatureRequest,
QgsProcessingParameterField,
QgsProcessingParameterFeatureSink,
QgsFields,
QgsField,
QgsWkbTypes,
QgsCoordinateReferenceSystem,
QgsFeature,
QgsFeatureSink)
from qgis.PyQt.QtCore import QVariant
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
from processing.core.parameters import ParameterVector
from processing.core.parameters import ParameterTableField


class StatisticsByCategories(QgisAlgorithm):

INPUT_LAYER = 'INPUT_LAYER'
INPUT = 'INPUT'
VALUES_FIELD_NAME = 'VALUES_FIELD_NAME'
CATEGORIES_FIELD_NAME = 'CATEGORIES_FIELD_NAME'
OUTPUT = 'OUTPUT'
Expand All @@ -50,16 +55,16 @@ def __init__(self):
super().__init__()

def initAlgorithm(self, config=None):
self.addParameter(ParameterVector(self.INPUT_LAYER,
self.tr('Input vector layer')))
self.addParameter(ParameterTableField(self.VALUES_FIELD_NAME,
self.tr('Field to calculate statistics on'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_NUMBER))
self.addParameter(ParameterTableField(self.CATEGORIES_FIELD_NAME,
self.tr('Field with categories'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_ANY))
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
self.tr('Input vector layer')))
self.addParameter(QgsProcessingParameterField(self.VALUES_FIELD_NAME,
self.tr('Field to calculate statistics on'),
parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Numeric))
self.addParameter(QgsProcessingParameterField(self.CATEGORIES_FIELD_NAME,
self.tr('Field with categories'),
parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Any))

self.addOutput(OutputTable(self.OUTPUT, self.tr('Statistics by category')))
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Statistics by category')))

def name(self):
return 'statisticsbycategories'
Expand All @@ -68,36 +73,51 @@ def displayName(self):
return self.tr('Statistics by categories')

def processAlgorithm(self, parameters, context, feedback):
layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context)
valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME)
categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME)
source = self.parameterAsSource(parameters, self.INPUT, context)
value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context)
category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context)

output = self.getOutputFromName(self.OUTPUT)
valuesField = layer.fields().lookupField(valuesFieldName)
categoriesField = layer.fields().lookupField(categoriesFieldName)
value_field_index = source.fields().lookupField(value_field_name)
category_field_index = source.fields().lookupField(category_field_name)

features = QgsProcessingUtils.getFeatures(layer, context)
total = 100.0 / layer.featureCount() if layer.featureCount() else 0
features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry))
total = 100.0 / source.featureCount() if source.featureCount() else 0
values = {}
for current, feat in enumerate(features):
if feedback.isCanceled():
break

feedback.setProgress(int(current * total))
attrs = feat.attributes()
try:
value = float(attrs[valuesField])
cat = str(attrs[categoriesField])
value = float(attrs[value_field_index])
cat = attrs[category_field_index]
if cat not in values:
values[cat] = []
values[cat].append(value)
except:
pass

fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count']
writer = output.getTableWriter(fields)
fields = QgsFields()
fields.append(source.fields().at(category_field_index))
fields.append(QgsField('min', QVariant.Double))
fields.append(QgsField('max', QVariant.Double))
fields.append(QgsField('mean', QVariant.Double))
fields.append(QgsField('stddev', QVariant.Double))
fields.append(QgsField('sum', QVariant.Double))
fields.append(QgsField('count', QVariant.Int))

(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem())

stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)

for (cat, v) in list(values.items()):
stat.calculate(v)
record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]
writer.addRecord(record)
f = QgsFeature()
f.setAttributes([cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()])
sink.addFeature(f, QgsFeatureSink.FastInsert)

return {self.OUTPUT: dest_id}
@@ -0,0 +1,45 @@
<GMLFeatureClassList>
<GMLFeatureClass>
<Name>stats_by_category</Name>
<ElementPath>stats_by_category</ElementPath>
<GeometryType>100</GeometryType>
<DatasetSpecificInfo>
<FeatureCount>3</FeatureCount>
</DatasetSpecificInfo>
<PropertyDefn>
<Name>id2</Name>
<ElementPath>id2</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>min</Name>
<ElementPath>min</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>max</Name>
<ElementPath>max</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>mean</Name>
<ElementPath>mean</ElementPath>
<Type>Real</Type>
</PropertyDefn>
<PropertyDefn>
<Name>stddev</Name>
<ElementPath>stddev</ElementPath>
<Type>Real</Type>
</PropertyDefn>
<PropertyDefn>
<Name>sum</Name>
<ElementPath>sum</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>count</Name>
<ElementPath>count</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
</GMLFeatureClass>
</GMLFeatureClassList>
@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=""
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy><gml:null>missing</gml:null></gml:boundedBy>

<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.0">
<ogr:id2>2</ogr:id2>
<ogr:min>1</ogr:min>
<ogr:max>4</ogr:max>
<ogr:mean>2.5</ogr:mean>
<ogr:stddev>2.12132034355964</ogr:stddev>
<ogr:sum>5</ogr:sum>
<ogr:count>2</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.1">
<ogr:id2>1</ogr:id2>
<ogr:min>2</ogr:min>
<ogr:max>5</ogr:max>
<ogr:mean>3.5</ogr:mean>
<ogr:stddev>2.12132034355964</ogr:stddev>
<ogr:sum>7</ogr:sum>
<ogr:count>2</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.2">
<ogr:id2>0</ogr:id2>
<ogr:min>3</ogr:min>
<ogr:max>9</ogr:max>
<ogr:mean>6.6</ogr:mean>
<ogr:stddev>2.30217288664427</ogr:stddev>
<ogr:sum>33</ogr:sum>
<ogr:count>5</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
</ogr:FeatureCollection>
13 changes: 13 additions & 0 deletions python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml
Expand Up @@ -2489,6 +2489,19 @@ tests:
name: expected/single_to_multi.gml
type: vector

- algorithm: qgis:statisticsbycategories
name: stats by category
params:
VALUES_FIELD_NAME: id
CATEGORIES_FIELD_NAME: id2
INPUT:
name: points.gml
type: vector
results:
OUTPUT:
name: expected/stats_by_category.gml
type: vector

# - algorithm: qgis:zonalstatistics
# name: simple zonal statistics
# params:
Expand Down

0 comments on commit b93be39

Please sign in to comment.