45
45
QgsFeedback ,
46
46
QgsVectorLayer
47
47
)
48
- from shapely import wkb
49
48
50
49
from .. import CanceledError
51
50
from .intl import tr
@@ -83,6 +82,17 @@ def iterateWithProgress(self, it: Iterator, total: int = 0):
83
82
self .checkCanceled ()
84
83
yield n
85
84
85
+ def split_provider_url (self ):
86
+ uri_parts = self ._layer .dataProvider ().dataSourceUri ().split ('|' )
87
+ if len (uri_parts ) <= 1 :
88
+ raise ValueError ("Could not determine table name from URI" )
89
+ database = uri_parts [0 ]
90
+ lexer = shlex .shlex (uri_parts [1 ])
91
+ lexer .whitespace_split = True
92
+ lexer .whitespace = '&'
93
+ params = dict (pair .split ('=' , 1 ) for pair in lexer )
94
+ return database , params
95
+
86
96
def read_qgis (
87
97
self ,
88
98
columns : Optional [list [str ]] = None ,
@@ -92,58 +102,64 @@ def read_qgis(
92
102
filt : Optional [dict [str , Any ]] = None
93
103
) -> Union [pd .DataFrame , gpd .GeoDataFrame ]:
94
104
def prog_attributes (f : QgsFeature ):
95
- nonlocal count
96
- count += 1
97
- if count % chunksize == 0 :
98
- self .checkCanceled ()
99
- self .updateProgress (fc , count )
100
105
attrs = [f .attribute (i ) for i in indices ]
101
106
if read_geometry :
102
107
attrs .append (f .geometry ().asWkb ().data ())
103
108
return attrs
104
109
105
110
if not chunksize :
106
111
chunksize = 1
107
- fc = self ._layer .featureCount ()
108
- count = 0
112
+
109
113
fields = self ._layer .fields ()
114
+ req = QgsFeatureRequest ()
115
+ if self ._feedback :
116
+ req .setFeedback (self ._feedback )
117
+
110
118
if columns is None :
111
119
columns = fields .names ()
112
- gen = ( prog_attributes ( f ) for f in self . _layer . getFeatures ( ))
120
+ indices = range ( len ( columns ))
113
121
else :
114
122
indices = [fields .lookupField (c ) for c in columns ]
115
123
if any ((i == - 1 for i in indices )):
116
124
raise RuntimeError ("Bad fields" )
117
- req = QgsFeatureRequest ()
118
125
req .setSubsetOfAttributes (indices )
119
- if filt :
120
- filt = {f : f"{ f'{ v } ' if isinstance (v , str ) else v } " for f , v in filt .items ()}
121
- expr = f"{ 'AND' .join (f'{ f } = { v } ' for f ,v in filt .items ())} "
122
- req .setFilterExpression (expr )
123
- gen = (prog_attributes (f ) for f in self ._layer .getFeatures (req ))
126
+
127
+ if filt :
128
+ expr = f"{ ' AND ' .join (f'({ f } = { v !r} )' for f , v in filt .items ())} "
129
+ req .setFilterExpression (expr )
130
+
131
+ if order :
132
+ clause = QgsFeatureRequest .OrderByClause (order )
133
+ orderby = QgsFeatureRequest .OrderBy ([clause ])
134
+ req .setOrderBy (orderby )
124
135
125
136
if read_geometry :
126
- columns = [* columns , "geometry" ]
127
- df = pd .DataFrame (gen , columns = columns )
128
- df ['geometry' ] = df ['geometry' ].apply (wkb .loads )
129
- df = gpd .GeoDataFrame (df , geometry = "geometry" , crs = self ._layer .crs ().authid ())
137
+ columns .append ('geometry' )
138
+ df = gpd .GeoDataFrame .from_features (self ._layer .getFeatures (req ), self ._layer .crs ().authid (), columns )
130
139
else :
140
+ gen = (prog_attributes (f ) for f in self ._layer .getFeatures (req ))
131
141
df = pd .DataFrame (gen , columns = columns )
132
142
133
- if order and order in df .columns :
134
- df = df .sort_values (order ).set_index (order )
135
-
136
143
return df
137
144
138
145
def gpd_read (
139
146
self ,
140
- source ,
147
+ source = None ,
141
148
fc : int = 0 ,
142
149
chunksize : Optional [int ] = None ,
143
150
filt : Optional [dict [str , Any ]] = None ,
144
151
** kwargs
145
152
) -> gpd .GeoDataFrame :
146
153
df : gpd .GeoDataFrame = None
154
+
155
+ if source is None :
156
+ source , params = self .split_provider_url ()
157
+ if "layer" not in kwargs :
158
+ kwargs ["layer" ] = params ["layername" ]
159
+
160
+ if filt is not None :
161
+ kwargs ["where" ] = " AND " .join (f"({ f } = { v !r} )" for f , v in filt .items ())
162
+
147
163
if (fc or chunksize ):
148
164
if chunksize is None :
149
165
divisions = 10
@@ -166,10 +182,6 @@ def gpd_read(
166
182
)
167
183
else :
168
184
df = gpd .read_file (source , ** kwargs )
169
-
170
- if filt :
171
- for f , v in filt .items ():
172
- df = df [df [f ] == v ]
173
185
else :
174
186
df = gpd .read_file (source , ** kwargs )
175
187
self .updateProgress (len (df ), len (df ))
@@ -184,7 +196,8 @@ def read_layer(
184
196
order : Optional [str ] = ...,
185
197
filt : Optional [dict [str , Any ]] = ...,
186
198
read_geometry : Literal [False ] = ...,
187
- chunksize : int = ...
199
+ chunksize : int = ...,
200
+ ** kwargs
188
201
) -> pd .DataFrame :
189
202
...
190
203
@@ -195,7 +208,8 @@ def read_layer(
195
208
order : Optional [str ] = ...,
196
209
filt : Optional [dict [str , Any ]] = ...,
197
210
read_geometry : Literal [True ] = ...,
198
- chunksize : int = ...
211
+ chunksize : int = ...,
212
+ ** kwargs
199
213
) -> gpd .GeoDataFrame :
200
214
...
201
215
@@ -205,7 +219,8 @@ def read_layer(
205
219
order = None ,
206
220
filt = None ,
207
221
read_geometry = True ,
208
- chunksize = 0
222
+ chunksize = 0 ,
223
+ ** kwargs
209
224
) -> Union [pd .DataFrame , gpd .GeoDataFrame ]:
210
225
def makeSqlQuery ():
211
226
nonlocal filt
@@ -216,7 +231,7 @@ def makeSqlQuery():
216
231
cols = "," .join (columns )
217
232
if read_geometry and (g := self .getGeometryColumn (self ._layer )):
218
233
cols += f",{ g } "
219
- sql = f"SELECT { cols } from { self .getTableName (self ._layer )} "
234
+ sql = f"SELECT { cols } FROM { self .getTableName (self ._layer )} "
220
235
if filt or self ._layer .subsetString ():
221
236
filters = []
222
237
if filt :
@@ -248,21 +263,15 @@ def makeSqlQuery():
248
263
249
264
if self ._layer .storageType () in ("GPKG" , "OpenFileGDB" ):
250
265
if read_geometry :
251
- uri_parts = self ._layer .dataProvider ().dataSourceUri ().split ('|' )
252
- if len (uri_parts ) <= 1 :
253
- raise ValueError ("Could not determine table name from URI" )
254
- database = uri_parts [0 ]
255
- lexer = shlex .shlex (uri_parts [1 ])
256
- lexer .whitespace_split = True
257
- lexer .whitespace = '&'
258
- params = dict (pair .split ('=' , 1 ) for pair in lexer )
259
- df = self .gpd_read (database , self ._layer .featureCount (), chunksize ,
260
- layer = params ['layername' ], columns = columns )
266
+ database , params = self .split_provider_url ()
267
+ df = self .gpd_read (database , self ._layer .featureCount (), chunksize , filt ,
268
+ layer = params ['layername' ], columns = columns , ** kwargs )
261
269
if order :
262
270
df = df .set_index (order ).sort_index ()
263
271
else :
264
272
with self ._connectSqlOgrSqlite (self ._layer .dataProvider ()) as db :
265
- df = pd .read_sql (makeSqlQuery (), db , index_col = order , columns = columns , chunksize = chunksize )
273
+ df = pd .read_sql (makeSqlQuery (), db , index_col = order ,
274
+ columns = columns , chunksize = chunksize , ** kwargs )
266
275
if isinstance (df , Iterator ):
267
276
df = pd .concat (self .iterateWithProgress (df , total ))
268
277
elif self ._layer .dataProvider ().name () in ('spatialite' , 'SQLite' ):
@@ -272,7 +281,7 @@ def makeSqlQuery():
272
281
shlex .split (re .sub (r' \(\w+\)' , '' , self ._layer .dataProvider ().dataSourceUri (True )))
273
282
)
274
283
df = self .gpd_read (params ['dbname' ], self ._layer .featureCount (),
275
- chunksize , layer = params ['table' ], columns = columns )
284
+ chunksize , layer = params ['table' ], columns = columns , ** kwargs )
276
285
if order :
277
286
df = df .set_index (order ).sort_index ()
278
287
else :
@@ -288,15 +297,17 @@ def makeSqlQuery():
288
297
db ,
289
298
self .getGeometryColumn (self ._layer ),
290
299
index_col = order ,
291
- chunksize = chunksize
300
+ chunksize = chunksize ,
301
+ ** kwargs
292
302
)
293
303
else :
294
304
df = pd .read_sql (
295
305
makeSqlQuery (),
296
306
db ,
297
307
index_col = order ,
298
308
columns = columns ,
299
- chunksize = chunksize
309
+ chunksize = chunksize ,
310
+ ** kwargs
300
311
)
301
312
302
313
if isinstance (df , Iterator ):
@@ -307,7 +318,8 @@ def makeSqlQuery():
307
318
self ._layer .featureCount (),
308
319
columns = columns ,
309
320
chunksize = chunksize ,
310
- read_geometry = read_geometry
321
+ read_geometry = read_geometry ,
322
+ ** kwargs
311
323
)
312
324
if order :
313
325
df = df .set_index (order ).sort_index ()
@@ -333,11 +345,12 @@ def makeSqlQuery():
333
345
delimiter = delimiter ,
334
346
header = header ,
335
347
usecols = usecols ,
336
- chunksize = chunksize
348
+ chunksize = chunksize ,
349
+ ** kwargs
337
350
)
338
351
df = pd .concat (self .iterateWithProgress (reader .get_chunk (), total ))
339
352
else :
340
- df = pd .read_csv (uri_parts .path , delimiter = delimiter , header = header , usecols = usecols )
353
+ df = pd .read_csv (uri_parts .path , delimiter = delimiter , header = header , usecols = usecols , ** kwargs )
341
354
if header is None :
342
355
if len (columns ) == len (df .columns ):
343
356
df .columns = columns
0 commit comments