Skip to content

Commit c4bd595

Browse files
authored
Merge pull request #347 from ropensci/encoding_UTF8
Set encoding to UTF-8 for tags and user names
2 parents 88ba939 + 2ff8526 commit c4bd595

9 files changed

+49
-13
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: osmdata
22
Title: Import 'OpenStreetMap' Data as Simple Features or Spatial Objects
3-
Version: 0.2.5.018
3+
Version: 0.2.5.019
44
Authors@R: c(
55
person("Mark", "Padgham", , "[email protected]", role = c("aut", "cre")),
66
person("Bob", "Rudis", role = "aut"),

NEWS.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Improved `get_bb(..., format_out = "sf_polygon")` to return full metadata
1212
along with geometries (#338 thanks to @RegularnaMatrica)
1313
- Mention key-only feature requests in README (#342 thanks to @joostschouppe)
14+
- Set encoding to UTF-8 for tags and user names (#347)
1415

1516

1617
0.2.5

R/get-osmdata-df.R

+13-7
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ osmdata_data_frame <- function (q,
7575
colClasses = "character", # osm_id doesn't fit in integer
7676
check.names = FALSE,
7777
comment.char = "",
78-
stringsAsFactors = stringsAsFactors
78+
stringsAsFactors = stringsAsFactors,
79+
encoding = "UTF-8"
7980
)
8081
} else if (isTRUE (obj$meta$query_type == "adiff")) {
8182
datetime_from <- obj$meta$datetime_from
@@ -122,13 +123,15 @@ xml_to_df <- function (doc, stringsAsFactors = FALSE) {
122123

123124
tags <- mapply (function (i, k) {
124125
i <- i [, k, drop = FALSE] # remove osm_id column if exists
126+
out <- matrix (
127+
NA_character_,
128+
nrow = nrow (i), ncol = length (keys),
129+
dimnames = list (NULL, keys)
130+
)
131+
out <- enc2utf8 (out)
125132
out <- data.frame (
126-
matrix (
127-
nrow = nrow (i), ncol = length (keys),
128-
dimnames = list (NULL, keys)
129-
),
130-
stringsAsFactors = stringsAsFactors,
131-
check.names = FALSE
133+
out,
134+
stringsAsFactors = stringsAsFactors, check.names = FALSE
132135
)
133136
out [, names (i)] <- i
134137
return (out)
@@ -214,6 +217,7 @@ xml_adiff_to_df <- function (doc,
214217
tags_u <- xml2::xml_find_all (osm_actions, xpath = ".//tag")
215218
col_names <- sort (unique (xml2::xml_attr (tags_u, attr = "k")))
216219
m <- matrix (
220+
NA_character_,
217221
nrow = length (osm_obj), ncol = length (col_names),
218222
dimnames = list (NULL, col_names)
219223
)
@@ -225,6 +229,7 @@ xml_adiff_to_df <- function (doc,
225229
tagV <- vapply (tag, function (x) x, FUN.VALUE = character (2))
226230
m [i, tagV [1, ]] <- tagV [2, ]
227231
}
232+
m <- enc2utf8 (m)
228233

229234
osm_type <- xml2::xml_name (osm_obj)
230235
osm_id <- xml2::xml_attr (osm_obj, "id")
@@ -325,6 +330,7 @@ get_meta_from_xml <- function (osm_obj) {
325330
osm_uid = xml2::xml_attr (osm_obj, attr = "uid"),
326331
osm_user = xml2::xml_attr (osm_obj, attr = "user")
327332
)
333+
out$osm_user <- enc2utf8 (out$osm_user)
328334

329335
} else {
330336
out <- matrix (nrow = length (osm_obj), ncol = 0)

R/get-osmdata-sc.R

+6
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ osmdata_sc <- function (q, doc, quiet = TRUE) {
7575
overpass_version = temp$obj$meta$overpass_version
7676
)
7777

78+
has_tags <- c ("nodes", "relation_properties", "object")
79+
obj [has_tags] <- lapply(obj [has_tags], function (x) {
80+
x [, c ("key", "value")] <- setenc_utf8 (x [, c ("key", "value")])
81+
x
82+
})
83+
7884
if (!missing (q)) {
7985
if (!is.character (q)) {
8086
obj$meta$bbox <- q$bbox

R/get-osmdata-sf.R

+3-2
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,9 @@ osmdata_sf <- function (q, doc, quiet = TRUE, stringsAsFactors = FALSE) { # noli
6262
if (!"osm_id" %in% names (res$polygons_kv)[1]) {
6363
res <- fill_kv (res, "polygons_kv", "polygons", stringsAsFactors)
6464
}
65-
kv_df <- grep ("_kv$", names (res))
66-
res[kv_df] <- fix_columns_list (res[kv_df])
65+
kv_df <- grep ("_kv$", names (res)) # objects with tags
66+
res [kv_df] <- fix_columns_list (res[kv_df])
67+
res [kv_df] <- lapply (res [kv_df], setenc_utf8)
6768

6869
if (missing (q)) {
6970
obj$bbox <- paste (res$bbox, collapse = " ")

R/get-osmdata-sp.R

+5-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,11 @@ osmdata_sp <- function (q, doc, quiet = TRUE) {
7171
obj$osm_multipolygons <- res$multipolygons
7272

7373
osm_items <- grep ("^osm_", names (obj))
74-
obj[osm_items] <- fix_columns_list (obj[osm_items])
74+
obj [osm_items] <- fix_columns_list (obj [osm_items])
75+
obj [osm_items] <- lapply (obj [osm_items], function (x) {
76+
x@data <- setenc_utf8 (x@data)
77+
x
78+
})
7579
class (obj) <- c (class (obj), "osmdata_sp")
7680

7781
return (obj)

R/get-osmdata.R

+16
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,19 @@ get_center_from_cpp_output <- function (res, what = "points") {
344344

345345
return (as.data.frame (this))
346346
}
347+
348+
349+
#' Set encoding to UTF-8
350+
#'
351+
#' @param x a data.frame or a list.
352+
#'
353+
#' @return `x` with all the columns or items of type character with UTF-8 encoding set.
354+
#' @noRd
355+
setenc_utf8 <- function (x) {
356+
char_cols <- which (vapply (x, is.character, FUN.VALUE = logical (1)))
357+
x [char_cols] <- lapply (x [char_cols], function (y) {
358+
enc2utf8 (y)
359+
})
360+
361+
return (x)
362+
}

R/getbb.R

+2
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ getbb <- function (place_name,
229229
)
230230

231231
if (format_out == "data.frame") {
232+
utf8cols <- c ("licence", "name", "display_name")
233+
obj [, utf8cols] <- setenc_utf8 (obj [, utf8cols])
232234
return (obj)
233235
}
234236

codemeta.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111
"codeRepository": "https://github.com/ropensci/osmdata/",
1212
"issueTracker": "https://github.com/ropensci/osmdata/issues",
1313
"license": "https://spdx.org/licenses/GPL-3.0",
14-
"version": "0.2.5.018",
14+
"version": "0.2.5.019",
1515
"programmingLanguage": {
1616
"@type": "ComputerLanguage",
1717
"name": "R",
1818
"url": "https://r-project.org"
1919
},
20-
"runtimePlatform": "R version 4.3.1 (2023-06-16)",
20+
"runtimePlatform": "R version 4.4.1 (2024-06-14)",
2121
"provider": {
2222
"@id": "https://cran.r-project.org",
2323
"@type": "Organization",

0 commit comments

Comments
 (0)