shapemerger.py

Giovanni Manghi, 2013-04-11 05:59 AM

Download (16.8 KB)

 
1
#!/usr/bin/python
2
# shapemerger.py
3
# v.1.2 by toni @ http://gistncase.blogspot.com
4
#
5
# shapefile merger utility 
6

    
7
import os, sys, glob
8
try:
9
    from osgeo import ogr, gdal
10
except:
11
    try:
12
        import ogr, gdal
13
    except:
14
        print 'Error: shapemerge.py cannot find python OGR and GDAL modules'
15
        print '       for handling geometries. OsGeo4W python is recommended.'
16
        sys.exit(1)
17

    
18
def Usage():
19
    print
20
    print 'Shapefile merger'
21
    print 'Usage: shapemerger.py\t[-o <output.shp>] [-n] [-i] [-s <fieldname>] [-f <fieldname>]'
22
    print '\t\t\t[-e] [-2] [-r] [-t polygon|linestring|point]'
23
    print '\t\t\t<shapefiles> [-x <exclude shapefile list>]'
24
    print 
25
    print '  <shapefiles>: input shapefile list (accepts wildcards)'
26
    print
27
    print '  -o:\toutput shapefile name (default: shapemerge.shp)'
28
    print '  -n:\tdo not add fields for the source filenames and fids'
29
    print '  -i:\tdbf schemes intersection (default: union)'
30
    print '  -s:\tcustom name for the source filenames field (default: SOURCESHP)'
31
    print '  -f:\tcustom name for the source fids field (default: SOURCEFID)'
32
    print '  -e:\tuse existing source filenames and fids fields, if any'
33
    print '  -2:\tforce output geometry type to 2D'
34
    print '  -x:\tlist of shapefiles to exclude (accepts wildcards)'
35
    print '  -r:\trecursive (grabs every path of input files)'
36
    print '  -t:\tfilter input shapefiles by geometry type'
37
    sys.exit(1)
38

    
39
strCurrentPath = os.path.abspath('')
40
os.chdir(strCurrentPath)
41

    
42
strOutDirname = ''
43
strMergeName = 'shapemerge'
44
strOutBasename = 'shapemerge.shp'
45

    
46
pOgrShpDriver = ogr.GetDriverByName('Esri Shapefile')
47

    
48
lsFilenames=[]
49
lsExcludeFilenames=[]
50
lsVerifiedFilenames=[]
51
lsMergeFieldsToRetain=[]
52
lsMergeFieldNames=[]
53
lsMergeFieldOgrDefns=[]
54

    
55
dGeomTypeFilters={'POLYGON': ogr.wkbPolygon, 'AREAL': ogr.wkbPolygon, 'LINESTRING': ogr.wkbLineString, 'LINE': ogr.wkbLineString, 'POINT': ogr.wkbPoint, 'POINTS': ogr.wkbPoint}
56

    
57
bFlagExclude = 0
58
bFlagAddSourcefields = 1
59
bFlagUseExistingSfields = 0
60
bFlagIntersection = 0
61
bFlagRecursive = 0
62
bFlagGeomTFilter = 0
63
bFlag2D = 0
64

    
65
bGeomTypeRefIsSet = 0
66

    
67
strFilenameFieldname = 'SOURCESHP'
68
strFidFieldname = 'SOURCEFID'
69

    
70
i = 1
71

    
72
# parse commandline (first check for recursive flag)
73
while i < len(sys.argv): 
74
    arg = sys.argv[i]
75
    if arg.upper() == '-R':
76
        bFlagRecursive = 1
77
        del sys.argv[i]
78
        break
79
    i = i + 1
80

    
81
i = 1
82
    
83
while i < len(sys.argv): 
84
    arg = sys.argv[i]
85

    
86
    if arg.upper() == '-N':
87
        bFlagAddSourcefields = 0
88
        bFlagExclude = 0
89

    
90
    elif arg.upper() == '-I':
91
        try:
92
            strGdalVersion = int(gdal.VersionInfo()[:2])
93
        except:
94
            strGdalVersion = 0
95
        
96
        if (strGdalVersion < 19):
97
            print 'Warning: dbf schemes intersection supported only with gdal >= 1.9.0, ignoring.'
98
            bFlagIntersection = 0
99
        else:
100
            bFlagIntersection = 1
101
        
102
        bFlagExclude = 0
103

    
104
    elif arg.upper() == '-S':
105
        i = i + 1
106
        strFilenameFieldname = ''.join(sys.argv[i].split())[:10].upper() #upper, tronca al decimo, toglie spazi
107
        bFlagExclude = 0
108

    
109
    elif arg.upper() == '-F':
110
        i = i + 1
111
        strFidFieldname = ''.join(sys.argv[i].split())[:10].upper() #upper, tronca al decimo, toglie spazi
112
        bFlagExclude = 0
113
        
114
    elif arg.upper() == '-E':
115
        bFlagUseExistingSfields = 1
116
        bFlagExclude = 0
117

    
118
    elif arg.upper() == '-2':
119
        bFlag2D = 1
120
        bFlagExclude = 0
121

    
122
    elif arg.upper() == '-T':
123
        i = i + 1
124
        strGeomTFilter = (sys.argv[i]).upper()
125
        
126
        if (not (strGeomTFilter in dGeomTypeFilters)):
127
            print "Warning: invalid geometry type declared, ignoring." 
128
        else:
129
            intGeometryTypeRef = dGeomTypeFilters[strGeomTFilter]
130
            bFlagGeomTFilter = 1
131
        
132
        bFlagExclude = 0
133
        
134
    elif arg.upper() == '-X':
135
        bFlagExclude = 1
136

    
137
    elif arg.upper() == '-O':
138
        i = i + 1
139
        bFlagExclude = 0
140

    
141
        strOutDirname, strOutBasename = os.path.split(sys.argv[i])
142
        strMergeName, strExtension = os.path.splitext(strOutBasename)
143
            
144
    else:
145
        # expands wildcards: exclude, input list/recursive input list
146
        if (bFlagRecursive):
147
            strInputShapePath, strInputShapeBasename = os.path.split(arg)
148
            strInputShapePath = strInputShapePath or '.'
149
            
150
            lsInputShapeRoots=[]
151
            lsInputShapeRoots.extend(glob.glob(strInputShapePath)) #expands paths with wildcards
152

    
153
        if (bFlagExclude and bFlagRecursive):
154
            for strRootPath in lsInputShapeRoots:
155
                for strRoot,strDirs,strFiles in os.walk(strRootPath):
156
                    lsExcludeFilenames.extend ( glob.glob(strRoot + os.sep + strInputShapeBasename) )
157
        
158
        elif (bFlagExclude and (not bFlagRecursive)):
159
            lsExcludeFilenames.extend ( glob.glob(arg) )
160
        
161
        elif (bFlagRecursive):
162
            for strRootPath in lsInputShapeRoots:
163
                for strRoot,strDirs,strFiles in os.walk(strRootPath):
164
                    lsFilenames.extend ( glob.glob(strRoot + os.sep + strInputShapeBasename) )
165
        else:
166
            lsFilenames.extend ( glob.glob(arg) )
167
                
168
    i = i + 1
169

    
170
for strItem in lsExcludeFilenames:
171
    try:
172
        lsFilenames.remove(strItem)
173
    except:
174
        pass
175

    
176
if len(lsFilenames) == 0:
177
    Usage()
178
    sys.exit(1)
179

    
180
if not os.path.isdir(strOutDirname or '.'):
181
    print strOutDirname + ': invalid directory, outputting here'
182
    strOutDirname = ''
183

    
184
if os.path.isfile( os.path.join(strOutDirname, strOutBasename) ):
185
    strSuffix = ''
186
    j = 0
187
    while os.path.isfile( os.path.join(strOutDirname,strMergeName + strSuffix + '.shp') ):
188
        j = j + 1
189
        strSuffix = '_' + str(j)
190
  
191
    if (strMergeName != 'shapemerge'):
192
        print strOutBasename + ': file exists, outputting in ' + strMergeName + strSuffix + '.shp'
193
  
194
    if (j > 0):
195
        strMergeName = strMergeName + strSuffix
196
        strOutBasename = strMergeName + '.shp'  
197
  
198
# geometry type check / "source attributes" renaming
199
strSuffix = ''
200
j = 0 
201
for i,strFile in enumerate(lsFilenames):
202
    try:
203
        pOgrDatasource = pOgrShpDriver.Open(strFile)
204
        pOgrLayer = pOgrDatasource.GetLayer()
205
        pOgrLayerSchema = pOgrLayer.GetLayerDefn()
206
        
207
        if ( (not bGeomTypeRefIsSet) and (not bFlagGeomTFilter) ):
208

    
209
            # picks up the first geometry type as reference if geometry type filter is not set
210
            intGeometryTypeRef = pOgrLayer.GetGeomType()
211
            
212
            if (bFlag2D):
213
                # fetch first feature to flatten geometry and grab TypeRef
214
                pOgrFeature = pOgrLayer.GetNextFeature()
215
                pOgrFeatureGeometry = pOgrFeature.GetGeometryRef()
216
                pOgrClonedGeometry = pOgrFeatureGeometry.Clone()
217
                pOgrClonedGeometry.FlattenTo2D()
218
                intGeometryTypeRef = pOgrClonedGeometry.GetGeometryType() 
219
                #pOgrFeature.Destroy()
220
            
221
            bGeomTypeRefIsSet = i # register index of the reference shapefile
222
                
223
        #check if layers the picked geometry type (ignore 3D flag)
224
        if pOgrLayer.GetGeomType() != intGeometryTypeRef:
225
            if ((pOgrLayer.GetGeomType() == ogr.wkbPolygon) or (pOgrLayer.GetGeomType() == ogr.wkbPolygon25D)) and ((intGeometryTypeRef == ogr.wkbPolygon) or (intGeometryTypeRef == ogr.wkbPolygon25D)) \
226
                or ((pOgrLayer.GetGeomType() == ogr.wkbLineString) or (pOgrLayer.GetGeomType() == ogr.wkbLineString25D)) and ((intGeometryTypeRef == ogr.wkbLineString) or (intGeometryTypeRef == ogr.wkbLineString25D)) \
227
                or ((pOgrLayer.GetGeomType() == ogr.wkbPoint) or (pOgrLayer.GetGeomType() == ogr.wkbPoint25D)) and ((intGeometryTypeRef == ogr.wkbPoint) or (intGeometryTypeRef == ogr.wkbPoint25D)) :
228
                    pass
229
            else:
230
                if (not bFlagGeomTFilter):
231
                    #print 'Error: shapefiles must have the same geometry type.'
232
                    #print '\t' + os.path.basename(lsFilenames[0]) + ': ' + ogr.GeometryTypeToName(intGeometryTypeRef).upper()
233
                    #print '\t' + os.path.basename(lsFilenames[i]) + ': ' + ogr.GeometryTypeToName(pOgrLayer.GetGeomType()).upper() 
234
                    #sys.exit(1)
235
                    print 'Warning: ' + os.path.basename( strFile ) + ' type ' + ogr.GeometryTypeToName(pOgrLayer.GetGeomType()).upper() + ' unfits first file type ' + ogr.GeometryTypeToName(intGeometryTypeRef).upper() + ' (' + os.path.basename(lsFilenames[bGeomTypeRefIsSet]) +'), skipping.'
236
                continue
237
                
238
        if ( (not bFlagUseExistingSfields) and bFlagAddSourcefields ):
239
            # check for "source fields" in current shapefile to eventually rename them
240
            while ( (pOgrLayerSchema.GetFieldIndex(strFilenameFieldname) != -1) or (pOgrLayerSchema.GetFieldIndex(strFidFieldname) != -1) ):
241
                j = j + 1
242
                strSuffix = '_' + str(j)
243
                strFilenameFieldname = strFilenameFieldname[:(10 - len(strSuffix))] + strSuffix
244
                strFidFieldname      = strFidFieldname[:(10 - len(strSuffix))] + strSuffix
245

    
246
        lsVerifiedFilenames.append( strFile )
247
        pOgrDatasource.Destroy()
248
        
249
    except:
250
        print 'Warning: ' + strFile + ' invalid shapefile, skipping.'
251
        #continue
252

    
253
if (len(lsVerifiedFilenames) == 0):
254
    print 'Error: all the input shapefiles are invalid.'
255
    sys.exit(1)
256
else:
257
    lsFilenames = lsVerifiedFilenames
258

    
259
if (j >0):
260
    print "Warning: \"source fieldnames\" renamed to: " +  strFilenameFieldname + ", " + strFidFieldname
261

    
262
# building attributes
263
if (bFlagAddSourcefields):
264
    # "source fields" attributes
265
    lsMergeFieldOgrDefns.append( ogr.FieldDefn(strFilenameFieldname, ogr.OFTString) )
266
    lsMergeFieldOgrDefns[0].SetWidth(255)
267
    lsMergeFieldOgrDefns.append( ogr.FieldDefn(strFidFieldname, ogr.OFTInteger) )
268
    lsMergeFieldOgrDefns[1].SetWidth(9) #max one billion features! to be fixed!
269
    
270
    lsMergeFieldNames.append(strFilenameFieldname)
271
    lsMergeFieldNames.append(strFidFieldname)
272

    
273

    
274
for i,strFile in enumerate(lsFilenames):
275
    # shapefiles attributes
276
    
277
    if ( i>0 and bFlagIntersection and ( (bFlagAddSourcefields and (len(lsMergeFieldNames)==2)) or (len(lsMergeFieldNames)==0) ) ):
278
        # no more scheme intersection to do, exit loop 
279
        break
280
    
281
    strFullname = strFile
282
    strDirname, strBasename = os.path.split(strFullname)
283
    strShapename, strExtension = os.path.splitext(strBasename)
284
    
285
    pOgrShpDatasource = pOgrShpDriver.Open (strFullname)
286
    pOgrShpLayer = pOgrShpDatasource.GetLayer()
287
    pOgrShpLayerSchema = pOgrShpLayer.GetLayerDefn()
288
     
289
    for j in range( pOgrShpLayerSchema.GetFieldCount() ):
290
        # parse shapefile attributes
291
        pOgrShpFielddefn = pOgrShpLayerSchema.GetFieldDefn(j)
292
        strShpFieldname = pOgrShpFielddefn.GetNameRef().upper()
293
        
294
        try:
295
            iMergeListsFieldIdx = lsMergeFieldNames.index(strShpFieldname)
296
        except:
297
            iMergeListsFieldIdx = -1
298
 
299
        if (iMergeListsFieldIdx == -1):
300
            # field doesn't exist: create it. If flag intersection is on, parse only the first file
301
            
302
            if ( (not bFlagIntersection) or (bFlagIntersection and (i == 0)) ):
303
                
304
                pOgrFielddefn = ogr.FieldDefn( strShpFieldname, pOgrShpFielddefn.GetType() )
305
                pOgrFielddefn.SetWidth( pOgrShpFielddefn.GetWidth() )
306
                pOgrFielddefn.SetPrecision( pOgrShpFielddefn.GetPrecision() )
307
                
308
                #ONLY TEXT:
309
                #pOgrFielddefn = ogr.FieldDefn( strShpFieldname, ogr.OFTString )
310
                #pOgrFielddefn.SetWidth( pOgrShpFielddefn.GetWidth() + pOgrShpFielddefn.GetPrecision() )
311
                
312
                lsMergeFieldOgrDefns.append( pOgrFielddefn )
313
                lsMergeFieldNames.append( strShpFieldname )
314
                
315
                # add field to the retain list for intersection
316
                lsMergeFieldsToRetain.append( len(lsMergeFieldNames) - 1 )
317
        
318
        else:
319
            # field exists, check properties
320
            pOgrFielddefn = lsMergeFieldOgrDefns[iMergeListsFieldIdx]
321
            
322
            #same data type
323
            if ( pOgrFielddefn.GetType() == pOgrShpFielddefn.GetType() ):
324
            
325
                if pOgrFielddefn.GetWidth() < pOgrShpFielddefn.GetWidth():
326
                    pOgrFielddefn.SetWidth( pOgrShpFielddefn.GetWidth() )
327
            
328
                if pOgrFielddefn.GetPrecision() < pOgrShpFielddefn.GetPrecision():
329
                    pOgrFielddefn.SetPrecision( pOgrShpFielddefn.GetPrecision() )
330
                    
331
            else:
332
                # different data type: "translate" it into string
333
                pOgrFielddefn.SetType( ogr.OFTString )
334
                pOgrFielddefn.SetWidth( pOgrFielddefn.GetWidth() + pOgrFielddefn.GetPrecision() )
335
                
336
                if ( pOgrFielddefn.GetWidth() < pOgrShpFielddefn.GetWidth() + pOgrShpFielddefn.GetPrecision() ): 
337
                    pOgrFielddefn.SetWidth( pOgrShpFielddefn.GetWidth() + pOgrShpFielddefn.GetPrecision() )
338
                                        
339
            if (bFlagIntersection):
340
                # add field to the retain list for intersection
341
                lsMergeFieldsToRetain.append(iMergeListsFieldIdx)
342
            
343
    if ( bFlagIntersection ):
344
        # performing dbf intersection
345
        
346
        if (bFlagAddSourcefields):
347
            lsMergeFieldsToRetain.append( lsMergeFieldNames.index(strFilenameFieldname) )
348
            lsMergeFieldsToRetain.append( lsMergeFieldNames.index(strFidFieldname) )
349
            
350
        lsMergeFieldsToDelete = range( len(lsMergeFieldNames)-1, -1, -1) # reverse: from last index to 0
351
        
352
        for iIdx in lsMergeFieldsToRetain:
353
            try:
354
                lsMergeFieldsToDelete.remove(iIdx)
355
            except:
356
                pass
357
       
358
        for iIdx in lsMergeFieldsToDelete:
359
            # add or remove fields items
360
            del lsMergeFieldOgrDefns[iIdx]
361
            del lsMergeFieldNames[iIdx]            
362

    
363
    pOgrShpDatasource.Destroy()
364
    lsMergeFieldsToRetain = []
365

    
366
# generate the merge layer
367
strMergeShpFullname = os.path.join(strOutDirname, strOutBasename)
368

    
369
pOgrMergeShpDatasource = pOgrShpDriver.CreateDataSource( strMergeShpFullname )
370
pOgrMergeShpLayer = pOgrMergeShpDatasource.CreateLayer( strMergeName, geom_type = intGeometryTypeRef )
371

    
372
for i in lsMergeFieldOgrDefns:
373
    # create the fields
374
    pOgrMergeShpLayer.CreateField(i,1)
375

    
376
pOgrMergeShpLayerSchema = pOgrMergeShpLayer.GetLayerDefn()
377

    
378
# Loop for populating the merge shapefile
379
for i,strFile in enumerate(lsFilenames):
380
    strFullname = strFile
381
    strDirname, strBasename = os.path.split(strFullname)
382
    strShapename, strExtension = os.path.splitext(strBasename)
383
    
384
    pOgrShpDatasource = pOgrShpDriver.Open (strFullname)
385
    pOgrShpLayer = pOgrShpDatasource.GetLayer()
386
    pOgrShpLayerSchema = pOgrShpLayer.GetLayerDefn()
387
            
388
    pOgrShpLayer.ResetReading()
389
    
390
    pOgrShpFeature = pOgrShpLayer.GetNextFeature()    
391
    
392
    bFlagExistsFilenameFld = pOgrShpLayerSchema.GetFieldIndex(strFilenameFieldname)
393
    bFlagExistsFidFld = pOgrShpLayerSchema.GetFieldIndex(strFidFieldname)
394
    
395
    while pOgrShpFeature:
396
        pOgrMergeShpfeature = ogr.Feature(pOgrMergeShpLayerSchema)
397
        pOgrMergeShpfeature.SetFrom(pOgrShpFeature,1)
398
        
399
        if (bFlag2D):
400
            # performing 2d conversion
401
            pOgrMergeGeometry2d = pOgrMergeShpfeature.GetGeometryRef().Clone()
402
            pOgrMergeGeometry2d.FlattenTo2D()
403
            pOgrMergeShpfeature.SetGeometryDirectly(pOgrMergeGeometry2d)
404
        
405
        if (bFlagAddSourcefields):
406
            # "source fields" values, if any and if they should be populated
407
            if ( (bFlagUseExistingSfields and bFlagExistsFilenameFld == -1) or (not bFlagUseExistingSfields) ):
408
                #pOgrMergeShpfeature.SetField( strFilenameFieldname, strBasename )
409
                pOgrMergeShpfeature.SetField( strFilenameFieldname, strShapename)
410
            if ( (bFlagUseExistingSfields and bFlagExistsFidFld == -1) or (not bFlagUseExistingSfields) ):
411
                pOgrMergeShpfeature.SetField( strFidFieldname, pOgrShpFeature.GetFID() )
412
            
413
        pOgrMergeShpLayer.CreateFeature(pOgrMergeShpfeature)
414
        
415
        pOgrMergeShpfeature.Destroy()
416
        pOgrShpFeature.Destroy()
417
        pOgrShpFeature = pOgrShpLayer.GetNextFeature()
418
            
419
    pOgrShpDatasource.Destroy()
420

    
421
# flush all to disk before quitting
422
pOgrMergeShpDatasource.SyncToDisk()
423
pOgrMergeShpDatasource.Destroy()
424

    
425
print
426
print strMergeShpFullname
427
sys.exit(0)