Skip to content

Commit

Permalink
[opencl] Use fast formula for hillshade
Browse files Browse the repository at this point in the history
Also optimize cl buffers
  • Loading branch information
elpaso committed Aug 8, 2018
1 parent a73bbba commit 528302c
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/analysis/raster/qgshillshadefilter.cpp
Expand Up @@ -70,6 +70,7 @@ void QgsHillshadeFilter::setLightAngle( float angle )
mSinZenithRad = std::sin( angle * static_cast<float>( M_PI ) / 180.0f );
}

<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
#ifdef HAVE_OPENCL

void QgsHillshadeFilter::addExtraRasterParams( std::vector<float> &params )
Expand All @@ -82,3 +83,28 @@ void QgsHillshadeFilter::addExtraRasterParams( std::vector<float> &params )
}

#endif
=======
void QgsHillshadeFilter::addExtraRasterParams( std::vector<float> &params )
{
float azimuthRad = -1 * mLightAzimuth * M_PI / 180.0;
float zenithRad = std::max( 0.0f, 90.0f - mLightAngle ) * M_PI / 180.0;
float cosZenithRad = std::cos( zenithRad );
float cos_az_mul_cos_alt_mul_z = std::cos( azimuthRad ) * cosZenithRad * mZFactor;
float sin_az_mul_cos_alt_mul_z = std::sin( azimuthRad ) * cosZenithRad * mZFactor;
float cos_az_mul_cos_alt_mul_z_mul_254 = 254.0 * cos_az_mul_cos_alt_mul_z;
float sin_az_mul_cos_alt_mul_z_mul_254 = 254.0 * sin_az_mul_cos_alt_mul_z;
float square_z = mZFactor * mZFactor;
float sin_altRadians_mul_254 = 254.0 * std::sin( zenithRad );

// For fast formula from GDAL DEM
params.push_back( cos_az_mul_cos_alt_mul_z_mul_254 ); // 5
params.push_back( sin_az_mul_cos_alt_mul_z_mul_254 ); // 6
params.push_back( square_z ); // 7
params.push_back( sin_altRadians_mul_254 ); // 8
/*/ Slow formula
params.push_back( azimuthRad ); // 9
params.push_back( zenithRad ); // 10
*/

}
>>>>>>> [opencl] Use fast formula for hillshade
11 changes: 11 additions & 0 deletions src/analysis/raster/qgshillshadefilter.h
Expand Up @@ -44,15 +44,21 @@ class ANALYSIS_EXPORT QgsHillshadeFilter: public QgsDerivativeFilter
void setLightAngle( float angle );

private:
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c

#ifdef HAVE_OPENCL

=======
>>>>>>> [opencl] Use fast formula for hillshade
const QString openClProgramBaseName() const override
{
return QStringLiteral( "hillshade" );
}
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
#endif

=======
>>>>>>> [opencl] Use fast formula for hillshade
float mLightAzimuth;
float mLightAngle;
// Precalculate for speed:
Expand All @@ -67,6 +73,11 @@ class ANALYSIS_EXPORT QgsHillshadeFilter: public QgsDerivativeFilter
void addExtraRasterParams( std::vector<float> &params ) override;
#endif


// QgsNineCellFilter interface
private:

void addExtraRasterParams( std::vector<float> &params ) override;
};

#endif // QGSHILLSHADEFILTER_H
46 changes: 46 additions & 0 deletions src/analysis/raster/qgsninecellfilter.cpp
Expand Up @@ -261,21 +261,30 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
addExtraRasterParams( rasterParams );

std::size_t bufferSize( sizeof( float ) * ( xSize + 2 ) );
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
std::size_t inputSize( sizeof( float ) * ( xSize ) );
=======
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
std::size_t inputSize( sizeof( float ) * ( xSize ) );
>>>>>>> [opencl] Use fast formula for hillshade

cl::Buffer rasterParamsBuffer( queue, rasterParams.begin(), rasterParams.end(), true, false, nullptr );
cl::Buffer scanLine1Buffer( ctx, CL_MEM_READ_ONLY, bufferSize, nullptr, nullptr );
cl::Buffer scanLine2Buffer( ctx, CL_MEM_READ_ONLY, bufferSize, nullptr, nullptr );
cl::Buffer scanLine3Buffer( ctx, CL_MEM_READ_ONLY, bufferSize, nullptr, nullptr );
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
cl::Buffer *scanLineBuffer[3] = {&scanLine1Buffer, &scanLine2Buffer, &scanLine3Buffer};
cl::Buffer resultLineBuffer( ctx, CL_MEM_WRITE_ONLY, inputSize, nullptr, nullptr );
=======
cl::Buffer resultLineBuffer( ctx, CL_MEM_WRITE_ONLY, sizeof( float ) * xSize, nullptr, nullptr );
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
cl::Buffer *scanLineBuffer[3] = {&scanLine1Buffer, &scanLine2Buffer, &scanLine3Buffer};
cl::Buffer resultLineBuffer( ctx, CL_MEM_WRITE_ONLY, inputSize, nullptr, nullptr );
>>>>>>> [opencl] Use fast formula for hillshade

// Create a program from the kernel source
cl::Program program( QgsOpenClUtils::buildProgram( ctx, source, QgsOpenClUtils::ExceptionBehavior::Throw ) );
Expand All @@ -289,12 +298,18 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
cl::Buffer &
> ( program, "processNineCellWindow" );

<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
// Rotate buffer index
std::vector<int> rowIndex = {0, 1, 2};

=======
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
// Rotate buffer index
std::vector<int> rowIndex = {0, 1, 2};

>>>>>>> [opencl] Use fast formula for hillshade
// values outside the layer extent (if the 3x3 window is on the border) are sent to the processing method as (input) nodata values
for ( int i = 0; i < ySize; ++i )
{
Expand All @@ -310,18 +325,24 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee

if ( i == 0 )
{
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
// Fill scanline 1 with (input) nodata for the values above the first row and
// feed scanline2 with the first actual data row
=======
// Fill scanline 1 with (input) nodata for the values above the first row and feed scanline2 with the first row
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
// Fill scanline 1 with (input) nodata for the values above the first row and
// feed scanline2 with the first actual data row
>>>>>>> [opencl] Use fast formula for hillshade
for ( int a = 0; a < xSize + 2 ; ++a )
{
scanLine[a] = mInputNodataValue;
}
queue.enqueueWriteBuffer( scanLine1Buffer, CL_TRUE, 0, bufferSize, scanLine.get() );

<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
// Read scanline2: first real raster row
if ( GDALRasterIO( rasterBand, GF_Read, 0, i, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
Expand All @@ -346,14 +367,23 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
=======
// Read scanline2
if ( GDALRasterIO( rasterBand, GF_Read, 0, 0, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
=======
// Read scanline2: first real raster row
if ( GDALRasterIO( rasterBand, GF_Read, 0, i, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
>>>>>>> [opencl] Use fast formula for hillshade
{
QgsDebugMsg( "Raster IO Error" );
}
queue.enqueueWriteBuffer( scanLine2Buffer, CL_TRUE, 0, bufferSize, scanLine.get() );

<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
// Read scanline3
if ( GDALRasterIO( rasterBand, GF_Read, 0, 0, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
// Read scanline3: second real raster row
if ( GDALRasterIO( rasterBand, GF_Read, 0, i + 1, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
>>>>>>> [opencl] Use fast formula for hillshade
{
QgsDebugMsg( "Raster IO Error" );
}
Expand All @@ -362,6 +392,7 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
else
{
// Normally fetch only scanLine3 and move forward one row
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
// Read scanline 3, fill the last row with nodata values if it's the last iteration
=======
Expand All @@ -370,19 +401,26 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee

// Read scanline 3
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
// Read scanline 3, fill the last row with nodata values if it's the last iteration
>>>>>>> [opencl] Use fast formula for hillshade
if ( i == ySize - 1 ) //fill the row below the bottom with nodata values
{
for ( int a = 0; a < xSize + 2; ++a )
{
scanLine[a] = mInputNodataValue;
}
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
}
else // Read line i + 1 and put it into scanline 3
// Overwrite from input, skip first and last
=======
queue.enqueueWriteBuffer( scanLine3Buffer, CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
=======
queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
>>>>>>> [opencl] Use fast formula for hillshade
}
else // Overwrite from input, skip first and last
>>>>>>> [opencl] Reduce memory footprint and optimize
Expand All @@ -391,6 +429,7 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
{
QgsDebugMsg( "Raster IO Error" );
}
<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
}
Expand All @@ -414,6 +453,9 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
>>>>>>> Use OpenCL command queue
=======
queue.enqueueWriteBuffer( scanLine3Buffer, CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
=======
queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
>>>>>>> [opencl] Use fast formula for hillshade
}
}
>>>>>>> [opencl] Reduce memory footprint and optimize
Expand All @@ -429,11 +471,15 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
rasterParamsBuffer
);

<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c
<<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f
queue.enqueueReadBuffer( resultLineBuffer, CL_TRUE, 0, inputSize, resultLine.get() );
=======
queue.enqueueReadBuffer( resultLineBuffer, CL_TRUE, 0, xSize * sizeof( float ), resultLine.get() );
>>>>>>> [opencl] Reduce memory footprint and optimize
=======
queue.enqueueReadBuffer( resultLineBuffer, CL_TRUE, 0, inputSize, resultLine.get() );
>>>>>>> [opencl] Use fast formula for hillshade

if ( GDALRasterIO( outputRasterBand, GF_Write, 0, i, xSize, 1, resultLine.get(), xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
{
Expand Down

0 comments on commit 528302c

Please sign in to comment.