Skip to content

Commit

Permalink
[FEATURE][processing] New raster zonal stats algorithm
Browse files Browse the repository at this point in the history
Like the vector zonal stats algorithm, but this one works with
the zones defined in another raster.

Iterates over the input rasters in blocks to be nice and
memory efficient.

From the algorithm help:

"This algorithm calculates statistics for a raster layer's
values, categorized by zones defined in another raster layer.

If the reference layer parameter is set to "Input layer",
then zones are determined by sampling the zone raster layer
value at the centroid of each pixel from the source raster
layer.

If the reference layer parameter is set to "Zones layer",
then the input raster layer will be sampled at the centroid
of each pixel from the zones raster layer.

If either the source raster layer or the zone raster layer
value is NODATA for a pixel, that pixel's value will be
skipped and not including in the calculated statistics."
  • Loading branch information
nyalldawson committed Jan 3, 2019
1 parent a7c3aad commit 5586352
Show file tree
Hide file tree
Showing 11 changed files with 468 additions and 0 deletions.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,5 @@
zone,deg2,sum,count,min,max,mean
6.00000000,0.00034562,5371074.34954071,"34562",85.50000000,243.00000000,155.40403766
4.00000000,0.00013810,1263010.46405792,"13810",86.30000305,93.90000153,91.45622477
3.00000000,0.00026125,4375224.02449036,"26125",111.69999695,232.43850708,167.47268993
1.00000000,0.00037237,4668982.96012115,"37237",85.00000000,219.19999695,125.38558316
@@ -0,0 +1,5 @@
zone,deg2,sum,count,min,max,mean
6.00000000,0.00034585,5378373.70841217,"34585",85.50000000,243.00000000,155.51174522
4.00000000,0.00013848,1266379.78169250,"13848",86.30000305,93.90000153,91.44856887
3.00000000,0.00026132,4379265.06565094,"26132",111.69999695,232.43850708,167.58246845
1.00000000,0.00037290,4677866.67560577,"37290",85.00000000,219.19999695,125.44560675
@@ -0,0 +1,5 @@
zone,m2,sum,count,min,max,mean
6.00000000,3055987.38515208,6069863.29187775,"39025",85.50000000,243.00000000,155.53781658
4.00000000,1226310.37671958,1432050.30903625,"15660",86.30000305,93.90000153,91.44637989
3.00000000,2310490.91092664,4944274.10529327,"29505",111.69999695,232.43850708,167.57410965
1.00000000,3297568.96319678,5283155.23725128,"42110",85.00000000,219.19999695,125.46082254
@@ -0,0 +1,5 @@
zone,deg2,sum,count,min,max,mean
6.00000000,0.00034562,5371074.34954071,"34562",85.50000000,243.00000000,155.40403766
4.00000000,0.00013810,1263010.46405792,"13810",86.30000305,93.90000153,91.45622477
3.00000000,0.00026125,4375224.02449036,"26125",111.69999695,232.43850708,167.47268993
1.00000000,0.00037237,4668982.96012115,"37237",85.00000000,219.19999695,125.38558316
65 changes: 65 additions & 0 deletions python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml
Expand Up @@ -6477,5 +6477,70 @@ tests:
name: expected/delete_duplicates_with_nulls.gml
type: vector

- algorithm: native:rasterlayerzonalstats
name: Raster layer zonal stats, same CRS
params:
BAND: 1
INPUT:
name: dem.tif
type: raster
ZONES:
name: custom/dem_zones.tif
type: raster
ZONES_BAND: 1
results:
OUTPUT_TABLE:
name: expected/raster_zonal_stats.csv
type: vector

- algorithm: native:rasterlayerzonalstats
name: Raster layer zonal stats, reprojected
params:
BAND: 1
INPUT:
name: dem.tif
type: raster
ZONES:
name: custom/dem_zones_crs.tif
type: raster
ZONES_BAND: 1
results:
OUTPUT_TABLE:
name: expected/raster_zonal_stats_reproj.csv
type: vector

- algorithm: native:rasterlayerzonalstats
name: Raster layer zonal stats, zones ref
params:
BAND: 1
INPUT:
name: dem.tif
type: raster
REF_LAYER: 1
ZONES:
name: custom/dem_zones.tif
type: raster
ZONES_BAND: 1
results:
OUTPUT_TABLE:
name: expected/raster_zonal_stats_zone_ref.csv
type: vector

- algorithm: native:rasterlayerzonalstats
name: Raster layer zonal stats reprojected, zones ref
params:
BAND: 1
INPUT:
name: dem.tif
type: raster
REF_LAYER: 1
ZONES:
name: custom/dem_zones_crs.tif
type: raster
ZONES_BAND: 1
results:
OUTPUT_TABLE:
name: expected/raster_zonal_stats_zone_crs_ref.csv
type: vector

# See ../README.md for a description of the file format
1 change: 1 addition & 0 deletions src/analysis/CMakeLists.txt
Expand Up @@ -76,6 +76,7 @@ SET(QGIS_ANALYSIS_SRCS
processing/qgsalgorithmprojectpointcartesian.cpp
processing/qgsalgorithmpromotetomultipart.cpp
processing/qgsalgorithmrasterlayeruniquevalues.cpp
processing/qgsalgorithmrasterzonalstats.cpp
processing/qgsalgorithmreclassifybylayer.cpp
processing/qgsalgorithmremoveduplicatesbyattribute.cpp
processing/qgsalgorithmremoveduplicatevertices.cpp
Expand Down
294 changes: 294 additions & 0 deletions src/analysis/processing/qgsalgorithmrasterzonalstats.cpp
@@ -0,0 +1,294 @@
/***************************************************************************
qgsalgorithmrasterzonalstats.cpp
---------------------
begin : December 2018
copyright : (C) 2018 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/

/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/

#include "qgsalgorithmrasterzonalstats.h"
#include "qgsstringutils.h"
#include "qgsstatisticalsummary.h"
#include "qgsrasterprojector.h"
#include <unordered_map>

///@cond PRIVATE

QString QgsRasterLayerZonalStatsAlgorithm::name() const
{
return QStringLiteral( "rasterlayerzonalstats" );
}

QString QgsRasterLayerZonalStatsAlgorithm::displayName() const
{
return QObject::tr( "Raster layer zonal statistics" );
}

QStringList QgsRasterLayerZonalStatsAlgorithm::tags() const
{
return QObject::tr( "count,area,statistics,stats,zones,categories,minimum,maximum,mean,sum,total" ).split( ',' );
}

QString QgsRasterLayerZonalStatsAlgorithm::group() const
{
return QObject::tr( "Raster analysis" );
}

QString QgsRasterLayerZonalStatsAlgorithm::groupId() const
{
return QStringLiteral( "rasteranalysis" );
}

void QgsRasterLayerZonalStatsAlgorithm::initAlgorithm( const QVariantMap & )
{
addParameter( new QgsProcessingParameterRasterLayer( QStringLiteral( "INPUT" ),
QObject::tr( "Input layer" ) ) );
addParameter( new QgsProcessingParameterBand( QStringLiteral( "BAND" ),
QObject::tr( "Band number" ), 1, QStringLiteral( "INPUT" ) ) );
addParameter( new QgsProcessingParameterRasterLayer( QStringLiteral( "ZONES" ),
QObject::tr( "Zones layer" ) ) );
addParameter( new QgsProcessingParameterBand( QStringLiteral( "ZONES_BAND" ),
QObject::tr( "Zones band number" ), 1, QStringLiteral( "ZONES" ) ) );

std::unique_ptr< QgsProcessingParameterEnum > refParam = qgis::make_unique< QgsProcessingParameterEnum >( QStringLiteral( "REF_LAYER" ), QObject::tr( "Reference layer" ),
QStringList() << QObject::tr( "Input layer" ) << QObject::tr( "Zones layer" ), false, 0 );
refParam->setFlags( refParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
addParameter( refParam.release() );

addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT_TABLE" ),
QObject::tr( "Statistics" ), QgsProcessing::TypeVector ) );

addOutput( new QgsProcessingOutputString( QStringLiteral( "EXTENT" ), QObject::tr( "Extent" ) ) );
addOutput( new QgsProcessingOutputString( QStringLiteral( "CRS_AUTHID" ), QObject::tr( "CRS authority identifier" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "WIDTH_IN_PIXELS" ), QObject::tr( "Width in pixels" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "HEIGHT_IN_PIXELS" ), QObject::tr( "Height in pixels" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "TOTAL_PIXEL_COUNT" ), QObject::tr( "Total pixel count" ) ) );
addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NODATA_PIXEL_COUNT" ), QObject::tr( "NODATA pixel count" ) ) );
}

QString QgsRasterLayerZonalStatsAlgorithm::shortDescription() const
{
return QObject::tr( "Calculates statistics for a raster layer's values, categorized by zones defined in another raster layer." );
}

QString QgsRasterLayerZonalStatsAlgorithm::shortHelpString() const
{
return QObject::tr( "This algorithm calculates statistics for a raster layer's values, categorized by zones defined in another raster layer.\n\n"
"If the reference layer parameter is set to \"Input layer\", then zones are determined by sampling the zone raster layer value at the centroid of each pixel from the source raster layer.\n\n"
"If the reference layer parameter is set to \"Zones layer\", then the input raster layer will be sampled at the centroid of each pixel from the zones raster layer.\n\n"
"If either the source raster layer or the zone raster layer value is NODATA for a pixel, that pixel's value will be skipped and not including in the calculated statistics." );
}

QgsRasterLayerZonalStatsAlgorithm *QgsRasterLayerZonalStatsAlgorithm::createInstance() const
{
return new QgsRasterLayerZonalStatsAlgorithm();
}

bool QgsRasterLayerZonalStatsAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
{
mRefLayer = static_cast< RefLayer >( parameterAsEnum( parameters, QStringLiteral( "REF_LAYER" ), context ) );

QgsRasterLayer *layer = parameterAsRasterLayer( parameters, QStringLiteral( "INPUT" ), context );
int band = parameterAsInt( parameters, QStringLiteral( "BAND" ), context );

if ( !layer )
throw QgsProcessingException( invalidRasterError( parameters, QStringLiteral( "INPUT" ) ) );

mBand = parameterAsInt( parameters, QStringLiteral( "BAND" ), context );
if ( mBand < 1 || mBand > layer->bandCount() )
throw QgsProcessingException( QObject::tr( "Invalid band number for BAND (%1): Valid values for input raster are 1 to %2" ).arg( mBand )
.arg( layer->bandCount() ) );

mHasNoDataValue = layer->dataProvider()->sourceHasNoDataValue( band );

QgsRasterLayer *zonesLayer = parameterAsRasterLayer( parameters, QStringLiteral( "ZONES" ), context );

if ( !zonesLayer )
throw QgsProcessingException( invalidRasterError( parameters, QStringLiteral( "ZONES" ) ) );

mZonesBand = parameterAsInt( parameters, QStringLiteral( "ZONES_BAND" ), context );
if ( mZonesBand < 1 || mZonesBand > zonesLayer->bandCount() )
throw QgsProcessingException( QObject::tr( "Invalid band number for ZONES_BAND (%1): Valid values for input raster are 1 to %2" ).arg( mZonesBand )
.arg( zonesLayer->bandCount() ) );
mZonesHasNoDataValue = zonesLayer->dataProvider()->sourceHasNoDataValue( band );

mSourceDataProvider.reset( layer->dataProvider()->clone() );
mSourceInterface = mSourceDataProvider.get();
mZonesDataProvider.reset( zonesLayer->dataProvider()->clone() );
mZonesInterface = mZonesDataProvider.get();

switch ( mRefLayer )
{
case Source:
mCrs = layer->crs();
mRasterUnitsPerPixelX = layer->rasterUnitsPerPixelX();
mRasterUnitsPerPixelY = layer->rasterUnitsPerPixelY();
mLayerWidth = layer->width();
mLayerHeight = layer->height();
mExtent = layer->extent();

// add projector if necessary
if ( layer->crs() != zonesLayer->crs() )
{
mProjector = qgis::make_unique< QgsRasterProjector >();
mProjector->setInput( mZonesDataProvider.get() );
mProjector->setCrs( zonesLayer->crs(), layer->crs() );
mZonesInterface = mProjector.get();
}
break;

case Zones:
mCrs = zonesLayer->crs();
mRasterUnitsPerPixelX = zonesLayer->rasterUnitsPerPixelX();
mRasterUnitsPerPixelY = zonesLayer->rasterUnitsPerPixelY();
mLayerWidth = zonesLayer->width();
mLayerHeight = zonesLayer->height();
mExtent = zonesLayer->extent();

// add projector if necessary
if ( layer->crs() != zonesLayer->crs() )
{
mProjector = qgis::make_unique< QgsRasterProjector >();
mProjector->setInput( mSourceDataProvider.get() );
mProjector->setCrs( layer->crs(), zonesLayer->crs() );
mSourceInterface = mProjector.get();
}
break;
}

return true;
}

QVariantMap QgsRasterLayerZonalStatsAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
{
QString areaUnit = QgsUnitTypes::toAbbreviatedString( QgsUnitTypes::distanceToAreaUnit( mCrs.mapUnits() ) );

QString tableDest;
std::unique_ptr< QgsFeatureSink > sink;
if ( parameters.contains( QStringLiteral( "OUTPUT_TABLE" ) ) && parameters.value( QStringLiteral( "OUTPUT_TABLE" ) ).isValid() )
{
QgsFields outFields;
outFields.append( QgsField( QStringLiteral( "zone" ), QVariant::Double, QString(), 20, 8 ) );
outFields.append( QgsField( areaUnit.replace( QStringLiteral( "²" ), QStringLiteral( "2" ) ), QVariant::Double, QString(), 20, 8 ) );
outFields.append( QgsField( QStringLiteral( "sum" ), QVariant::Double, QString(), 20, 8 ) );
outFields.append( QgsField( QStringLiteral( "count" ), QVariant::LongLong, QString(), 20 ) );
outFields.append( QgsField( QStringLiteral( "min" ), QVariant::Double, QString(), 20, 8 ) );
outFields.append( QgsField( QStringLiteral( "max" ), QVariant::Double, QString(), 20, 8 ) );
outFields.append( QgsField( QStringLiteral( "mean" ), QVariant::Double, QString(), 20, 8 ) );

sink.reset( parameterAsSink( parameters, QStringLiteral( "OUTPUT_TABLE" ), context, tableDest, outFields, QgsWkbTypes::NoGeometry, QgsCoordinateReferenceSystem() ) );
if ( !sink )
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT_TABLE" ) ) );
}

struct StatCalculator
{
// only calculate cheap stats-- we cannot calculate stats which require holding values in memory -- because otherwise we'll end
// up trying to store EVERY pixel value from the input in memory
QgsStatisticalSummary s{ QgsStatisticalSummary::Count | QgsStatisticalSummary::Sum | QgsStatisticalSummary::Min | QgsStatisticalSummary::Max | QgsStatisticalSummary::Mean };
};
std::unordered_map<double, StatCalculator, std::hash<double>, std::equal_to<double> > zoneStats;
qgssize noDataCount = 0;

qgssize layerSize = static_cast< qgssize >( mLayerWidth ) * static_cast< qgssize >( mLayerHeight );
int maxWidth = QgsRasterIterator::DEFAULT_MAXIMUM_TILE_WIDTH;
int maxHeight = QgsRasterIterator::DEFAULT_MAXIMUM_TILE_HEIGHT;
int nbBlocksWidth = static_cast< int>( std::ceil( 1.0 * mLayerWidth / maxWidth ) );
int nbBlocksHeight = static_cast< int >( std::ceil( 1.0 * mLayerHeight / maxHeight ) );
int nbBlocks = nbBlocksWidth * nbBlocksHeight;

QgsRasterIterator iter = mRefLayer == Source ? QgsRasterIterator( mSourceInterface )
: QgsRasterIterator( mZonesInterface );
iter.startRasterRead( mRefLayer == Source ? mBand : mZonesBand, mLayerWidth, mLayerHeight, mExtent );

int iterLeft = 0;
int iterTop = 0;
int iterCols = 0;
int iterRows = 0;
QgsRectangle blockExtent;
std::unique_ptr< QgsRasterBlock > rasterBlock;
std::unique_ptr< QgsRasterBlock > zonesRasterBlock;
while ( true )
{
if ( mRefLayer == Source )
{
if ( !iter.readNextRasterPart( mBand, iterCols, iterRows, rasterBlock, iterLeft, iterTop, &blockExtent ) )
break;

zonesRasterBlock.reset( mZonesInterface->block( mZonesBand, blockExtent, iterCols, iterRows ) );
if ( !zonesRasterBlock )
continue;
}
else
{
if ( !iter.readNextRasterPart( mZonesBand, iterCols, iterRows, zonesRasterBlock, iterLeft, iterTop, &blockExtent ) )
break;

rasterBlock.reset( mSourceInterface->block( mBand, blockExtent, iterCols, iterRows ) );
if ( !rasterBlock )
continue;
}

feedback->setProgress( 100 * ( ( iterTop / maxHeight * nbBlocksWidth ) + iterLeft / maxWidth ) / nbBlocks );
if ( !rasterBlock->isValid() || rasterBlock->isEmpty() || !zonesRasterBlock->isValid() || zonesRasterBlock->isEmpty() )
continue;

for ( int row = 0; row < iterRows; row++ )
{
if ( feedback->isCanceled() )
break;

for ( int column = 0; column < iterCols; column++ )
{
if ( ( mHasNoDataValue && rasterBlock->isNoData( row, column ) ) ||
( mZonesHasNoDataValue && zonesRasterBlock->isNoData( row, column ) ) )
{
noDataCount += 1;
}
else
{
double value = rasterBlock->value( row, column );
double zone = zonesRasterBlock->value( row, column );
zoneStats[ zone ].s.addValue( value );
}
}
}
}

QVariantMap outputs;
outputs.insert( QStringLiteral( "EXTENT" ), mExtent.toString() );
outputs.insert( QStringLiteral( "CRS_AUTHID" ), mCrs.authid() );
outputs.insert( QStringLiteral( "WIDTH_IN_PIXELS" ), mLayerWidth );
outputs.insert( QStringLiteral( "HEIGHT_IN_PIXELS" ), mLayerHeight );
outputs.insert( QStringLiteral( "TOTAL_PIXEL_COUNT" ), layerSize );
outputs.insert( QStringLiteral( "NODATA_PIXEL_COUNT" ), noDataCount );

double pixelArea = mRasterUnitsPerPixelX * mRasterUnitsPerPixelY;

for ( auto it = zoneStats.begin(); it != zoneStats.end(); ++it )
{
QgsFeature f;
it->second.s.finalize();
f.setAttributes( QgsAttributes() << it->first << it->second.s.count() * pixelArea << it->second.s.sum() << it->second.s.count() <<
it->second.s.min() << it->second.s.max() << it->second.s.mean() );
sink->addFeature( f, QgsFeatureSink::FastInsert );
}
outputs.insert( QStringLiteral( "OUTPUT_TABLE" ), tableDest );

return outputs;
}


///@endcond



0 comments on commit 5586352

Please sign in to comment.