Skip to content

Commit cc3ccdf

Browse files
committedNov 1, 2014
[processing] fix Join by location alg
1 parent 9d1a7b3 commit cc3ccdf

File tree

1 file changed

+132
-174
lines changed

1 file changed

+132
-174
lines changed
 

‎python/plugins/processing/algs/qgis/SpatialJoin.py

Lines changed: 132 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -16,55 +16,36 @@
1616
* *
1717
***************************************************************************
1818
"""
19-
from processing.core.GeoAlgorithmExecutionException import GeoAlgorithmExecutionException
19+
2020
__author__ = 'Joshua Arnott'
2121
__date__ = 'October 2013'
2222
__copyright__ = '(C) 2013, Joshua Arnott'
2323
# This will get replaced with a git SHA1 when you do a git archive
2424
__revision__ = '$Format:%H$'
2525

26-
from processing.core.GeoAlgorithm import GeoAlgorithm
26+
import os
27+
2728
from PyQt4.QtCore import *
2829
from PyQt4.QtGui import *
2930
from qgis.core import *
31+
32+
from processing.core.GeoAlgorithm import GeoAlgorithm
33+
from processing.core.GeoAlgorithmExecutionException import \
34+
GeoAlgorithmExecutionException
35+
from processing.core.ProcessingLog import ProcessingLog
36+
3037
from processing.core.parameters import ParameterVector
3138
from processing.core.parameters import ParameterSelection
3239
from processing.core.parameters import ParameterString
33-
from processing.tools import dataobjects
3440
from processing.core.outputs import OutputVector
35-
from processing.tools import vector
36-
from processing.core.ProcessingLog import ProcessingLog
37-
import os
41+
from processing.tools import dataobjects, vector
3842

39-
def myself(L):
40-
#median computation
41-
nVal = len(L)
42-
if nVal == 1:
43-
return L[0]
44-
L.sort()
45-
#test for list length
46-
medianVal = 0
47-
if nVal > 1:
48-
if ( nVal % 2 ) == 0:
49-
#index begin at 0
50-
#remove 1 to index in standard median computation
51-
medianVal = 0.5 * ( (L[ (nVal) / 2 - 1]) + (L[ (nVal) / 2 ] ))
52-
else:
53-
medianVal = L[ (nVal + 1) / 2 - 1]
54-
return medianVal
5543

5644
class SpatialJoin(GeoAlgorithm):
57-
'''
58-
Join by location
59-
60-
Port of the spatial join algorithm from fTools to the Processing Toolbox.
61-
'''
62-
63-
INPUT1 = "INPUT1"
64-
INPUT2 = "INPUT2"
45+
TARGET = "TARGET"
46+
JOIN = "JOIN"
6547
SUMMARY = "SUMMARY"
6648
STATS = "STATS"
67-
GEOMETRY = "GEOMETRY"
6849
KEEP = "KEEP"
6950
OUTPUT = "OUTPUT"
7051

@@ -73,216 +54,193 @@ class SpatialJoin(GeoAlgorithm):
7354
'Take summary of intersecting features'
7455
]
7556

76-
GEOMETRYS = [
77-
'Use geometry from target layer',
78-
'Use geometry from joined layer (multipart if summary)'
79-
]
80-
8157
KEEPS = [
8258
'Only keep matching records',
8359
'Keep all records (including non-matching target records)'
8460
]
8561

86-
#===========================================================================
87-
# def getIcon(self):
88-
# return QIcon(os.path.dirname(__file__) + "/icons/join_location.png")
89-
#===========================================================================
90-
9162
def defineCharacteristics(self):
92-
self.name = "Join by location"
63+
self.name = "Join atributes by location"
9364
self.group = "Vector general tools"
94-
self.addParameter(ParameterVector(SpatialJoin.INPUT1, "Target vector layer", [ParameterVector.VECTOR_TYPE_ANY]))
95-
self.addParameter(ParameterVector(SpatialJoin.INPUT2, "Join vector layer", [ParameterVector.VECTOR_TYPE_ANY]))
96-
self.addParameter(ParameterSelection(self.SUMMARY, "Attribute summary", self.SUMMARYS, 0))
97-
self.addParameter(ParameterString(self.STATS, "Statistics for summary (comma separated)", "sum,mean,min,max,median"))
98-
self.addParameter(ParameterSelection(self.GEOMETRY, "Output geometry", self.GEOMETRYS, 0))
99-
self.addParameter(ParameterSelection(self.KEEP, "Output table", self.KEEPS, 0))
100-
self.addOutput(OutputVector(SpatialJoin.OUTPUT, "Output layer"))
65+
66+
self.addParameter(ParameterVector(self.TARGET,
67+
'Target vector layer', [ParameterVector.VECTOR_TYPE_ANY]))
68+
self.addParameter(ParameterVector(self.JOIN,
69+
'Join vector layer', [ParameterVector.VECTOR_TYPE_ANY]))
70+
self.addParameter(ParameterSelection(self.SUMMARY,
71+
'Attribute summary', self.SUMMARYS))
72+
self.addParameter(ParameterString(self.STATS,
73+
'Statistics for summary (comma separated)',
74+
'sum,mean,min,max,median'))
75+
self.addParameter(ParameterSelection(self.KEEP,
76+
'Output table', self.KEEPS))
77+
self.addOutput(OutputVector(self.OUTPUT, 'Output layer'))
10178

10279
def processAlgorithm(self, progress):
103-
progress.setText('Analysing inputs...')
80+
target = dataobjects.getObjectFromUri(
81+
self.getParameterValue(self.TARGET))
82+
join = dataobjects.getObjectFromUri(
83+
self.getParameterValue(self.JOIN))
10484

10585
summary = self.getParameterValue(self.SUMMARY) == 1
106-
sumList = self.getParameterValue(self.STATS).upper().replace(' ','').split(',')
107-
use_geom = self.getParameterValue(self.GEOMETRY)
108-
keep = self.getParameterValue(self.KEEP) == 1
86+
keep = self.getParameterValue(self.KEEP) == 0
87+
88+
sumList = self.getParameterValue(self.STATS).lower().split(',')
10989

110-
input1 = self.getParameterValue(self.INPUT1)
111-
layer1 = dataobjects.getObjectFromUri(input1)
112-
provider1 = layer1.dataProvider()
113-
fieldList1 = provider1.fields()
90+
targetProvider = target.dataProvider()
91+
joinProvider = join.dataProvider()
11492

115-
input2 = self.getParameterValue(self.INPUT2)
116-
layer2 = dataobjects.getObjectFromUri(input2)
117-
provider2 = layer2.dataProvider()
118-
fieldList2 = provider2.fields()
93+
targetFields = targetProvider.fields()
94+
joinFields = joinProvider.fields()
11995

12096
fieldList = QgsFields()
97+
12198
if not summary:
122-
fieldList2 = vector.testForUniqueness(fieldList1, fieldList2)
123-
seq = range(0, len(fieldList1) + len(fieldList2))
124-
fieldList1.extend(fieldList2)
125-
fieldList1 = dict(zip(seq, fieldList1))
99+
joinFields = vector.testForUniqueness(targetFields, joinFields)
100+
seq = range(0, len(targetFields) + len(joinFields))
101+
targetFields.extend(joinFields)
102+
targetFields = dict(zip(seq, targetFields))
126103
else:
127104
numFields = {}
128-
for j in xrange(len(fieldList2)):
129-
if fieldList2[j].type() == QVariant.Int or fieldList2[j].type() == QVariant.Double:
105+
for j in xrange(len(joinFields)):
106+
if joinFields[j].type() in [QVariant.Int, QVariant.Double]:
130107
numFields[j] = []
131108
for i in sumList:
132-
field = QgsField(i + unicode(fieldList2[j].name()), QVariant.Double, "real", 24, 16, "Summary field" )
109+
field = QgsField(i + unicode(joinFields[j].name()), QVariant.Double, '', 24, 16)
133110
fieldList.append(field)
134-
field = QgsField("COUNT", QVariant.Double, "real", 24, 0, "Summary field" )
111+
field = QgsField('count', QVariant.Double, '', 24, 16)
135112
fieldList.append(field)
136-
fieldList2 = vector.testForUniqueness(fieldList1, fieldList)
137-
fieldList1.extend(fieldList)
138-
seq = range(0, len(fieldList1))
139-
fieldList1 = dict(zip(seq, fieldList1))
113+
joinFields = vector.testForUniqueness(targetFields, fieldList)
114+
targetFields.extend(fieldList)
115+
seq = range(0, len(targetFields))
116+
targetFields = dict(zip(seq, targetFields))
140117

141-
progress.setPercentage(13)
142118
fields = QgsFields()
143-
for f in fieldList1.values():
119+
for f in targetFields.values():
144120
fields.append(f)
145-
output = self.getOutputFromName(self.OUTPUT)
146-
147-
if use_geom == 0:
148-
# from target layer
149-
crs = provider1.crs()
150-
geometry_type = provider1.geometryType()
151-
else:
152-
# from joined layer
153-
crs = provider2.crs()
154-
if summary:
155-
geometry_type = self.singleToMultiGeom(provider2.geometryType())
156-
else:
157-
geometry_type = provider2.geometryType()
158121

159-
writer = output.getVectorWriter(fields, geometry_type, crs)
122+
writer = self.getOutputFromName(self.OUTPUT).getVectorWriter(
123+
fields, targetProvider.geometryType(), targetProvider.crs())
160124

161125
inFeat = QgsFeature()
162126
outFeat = QgsFeature()
163127
inFeatB = QgsFeature()
164128
inGeom = QgsGeometry()
165129

166-
progress.setPercentage(15)
167-
start = 15.00
168-
add = 85.00 / provider1.featureCount()
169-
170-
progress.setText('Creating spatial index...')
171-
index = vector.spatialindex(layer2)
172-
progress.setText('Processing spatial join...')
173-
fit1 = provider1.getFeatures()
174-
while fit1.nextFeature(inFeat):
175-
inGeom = inFeat.geometry()
176-
atMap1 = inFeat.attributes()
177-
if use_geom == 0:
178-
outFeat.setGeometry(inGeom)
130+
index = vector.spatialindex(join)
131+
132+
133+
# cache all features from provider2 to avoid huge number
134+
# of feature requests in the inner loop
135+
mapP2 = {}
136+
features = vector.features(join)
137+
for f in features:
138+
mapP2[f.id()] = QgsFeature(f)
139+
140+
features = vector.features(target)
141+
142+
count = 0
143+
total = 100.0 / len(features)
144+
145+
for f in features:
146+
inGeom = f.geometry()
147+
atMap1 = f.attributes()
148+
outFeat.setGeometry(inGeom)
179149
none = True
180150
joinList = []
181151
if inGeom.type() == QGis.Point:
182-
joinList = index.intersects( inGeom.buffer(10,2).boundingBox() )
183-
if len(joinList) > 0: check = 0
184-
else: check = 1
152+
joinList = index.intersects(inGeom.buffer(10,2).boundingBox())
153+
if len(joinList) > 0:
154+
check = 0
155+
else:
156+
check = 1
185157
else:
186-
joinList = index.intersects( inGeom.boundingBox() )
187-
if len(joinList) > 0: check = 0
188-
else: check = 1
158+
joinList = index.intersects(inGeom.boundingBox())
159+
if len(joinList) > 0:
160+
check = 0
161+
else:
162+
check = 1
163+
189164
if check == 0:
190165
count = 0
191-
multi_feature = []
192166
for i in joinList:
193-
provider2.getFeatures( QgsFeatureRequest().setFilterFid( int(i) ) ).nextFeature( inFeatB )
167+
inFeatB = mapP2[i] # cached feature from provider2
194168
if inGeom.intersects(inFeatB.geometry()):
195-
count = count + 1
169+
count += 1
170+
none = False
196171
atMap2 = inFeatB.attributes()
197172
if not summary:
198-
# first located feature
199173
atMap = atMap1
200174
atMap2 = atMap2
201175
atMap.extend(atMap2)
202176
atMap = dict(zip(seq, atMap))
203-
if use_geom == 1:
204-
outFeat.setGeometry(inFeatB.geometry())
205-
none = False
206177
break
207178
else:
208179
for j in numFields.keys():
209180
numFields[j].append(atMap2[j])
210-
if use_geom == 0:
211-
if none:
212-
outFeat.setGeometry(inGeom)
213-
else:
214-
feature_list = self.extractAsMulti(inFeatB.geometry())
215-
multi_feature.extend(feature_list)
216-
none = False
217181
if summary and not none:
218182
atMap = atMap1
219183
for j in numFields.keys():
220184
for k in sumList:
221-
if k == "SUM": atMap.append(sum(numFields[j]))
222-
elif k == "MEAN": atMap.append(sum(numFields[j]) / count)
223-
elif k == "MIN": atMap.append(min(numFields[j]))
224-
elif k == "MEDIAN": atMap.append(myself(numFields[j]))
225-
else: atMap.append(max(numFields[j]))
185+
if k == 'sum':
186+
atMap.append(sum(self._filter_null(numFields[j])))
187+
elif k == 'mean':
188+
try:
189+
nn_count = sum(1 for _ in self._filter_null(numFields[j]))
190+
atMap.append(sum(self._filter_null(numFields[j])) / nn_count)
191+
except ZeroDivisionError:
192+
atMap.append(NULL)
193+
elif k == 'min':
194+
try:
195+
atMap.append(min(self._filter_null(numFields[j])))
196+
except ValueError:
197+
atMap.append(NULL)
198+
elif k == 'median':
199+
atMap.append(self._myself(numFields[j]))
200+
else:
201+
try:
202+
atMap.append(max(self._filter_null(numFields[j])))
203+
except ValueError:
204+
atMap.append(NULL)
205+
226206
numFields[j] = []
227207
atMap.append(count)
228208
atMap = dict(zip(seq, atMap))
229-
if use_geom == 1:
230-
outGeom = QgsGeometry(self.convertGeometry(multi_feature, geometry_type))
231-
outFeat.setGeometry(outGeom)
209+
232210
if none:
233211
outFeat.setAttributes(atMap1)
234212
else:
235213
outFeat.setAttributes(atMap.values())
214+
236215
if keep: # keep all records
237216
writer.addFeature(outFeat)
238217
else: # keep only matching records
239218
if not none:
240219
writer.addFeature(outFeat)
241-
start = start + add
242-
progress.setPercentage(start)
243-
244-
del writer
245220

246-
def singleToMultiGeom(self, wkbType):
247-
try:
248-
if wkbType in (QGis.WKBPoint, QGis.WKBMultiPoint,
249-
QGis.WKBPoint25D, QGis.WKBMultiPoint25D):
250-
return QGis.WKBMultiPoint
251-
elif wkbType in (QGis.WKBLineString, QGis.WKBMultiLineString,
252-
QGis.WKBMultiLineString25D,
253-
QGis.WKBLineString25D):
221+
count += 1
222+
progress.setPercentage(int(count * total))
254223

255-
return QGis.WKBMultiLineString
256-
elif wkbType in (QGis.WKBPolygon, QGis.WKBMultiPolygon,
257-
QGis.WKBMultiPolygon25D, QGis.WKBPolygon25D):
224+
del writer
258225

259-
return QGis.WKBMultiPolygon
260-
else:
261-
return QGis.WKBUnknown
262-
except Exception, err:
263-
pass
264-
265-
def extractAsMulti(self, geom):
266-
if geom.type() == QGis.Point:
267-
if geom.isMultipart():
268-
return geom.asMultiPoint()
226+
def _filter_null(self, vals):
227+
"""Takes an iterator of values and returns a new iterator
228+
returning the same values but skipping any NULL values"""
229+
return (v for v in vals if v != NULL)
230+
231+
def _myself(self, L):
232+
#median computation
233+
nVal = len(L)
234+
if nVal == 1:
235+
return L[0]
236+
L.sort()
237+
#test for list length
238+
medianVal = 0
239+
if nVal > 1:
240+
if ( nVal % 2 ) == 0:
241+
#index begin at 0
242+
#remove 1 to index in standard median computation
243+
medianVal = 0.5 * ( (L[ (nVal) / 2 - 1]) + (L[ (nVal) / 2 ] ))
269244
else:
270-
return [geom.asPoint()]
271-
elif geom.type() == QGis.Line:
272-
if geom.isMultipart():
273-
return geom.asMultiPolyline()
274-
else:
275-
return [geom.asPolyline()]
276-
else:
277-
if geom.isMultipart():
278-
return geom.asMultiPolygon()
279-
else:
280-
return [geom.asPolygon()]
281-
282-
def convertGeometry(self, geom_list, vType):
283-
if vType == QGis.Point:
284-
return QgsGeometry().fromMultiPoint(geom_list)
285-
elif vType == QGis.Line:
286-
return QgsGeometry().fromMultiPolyline(geom_list)
287-
else:
288-
return QgsGeometry().fromMultiPolygon(geom_list)
245+
medianVal = L[ (nVal + 1) / 2 - 1]
246+
return medianVal

0 commit comments

Comments
 (0)
Please sign in to comment.