Skip to content
This repository was archived by the owner on Jun 30, 2023. It is now read-only.

Commit

Permalink
Merge pull request #56 from cjbarrie/barrie-branch
Browse files Browse the repository at this point in the history
Allow more than one string as query; update vignettes; add build_user…
  • Loading branch information
cjbarrie authored Apr 29, 2021
2 parents 830165f + 021537f commit 29b7898
Show file tree
Hide file tree
Showing 20 changed files with 507 additions and 14 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(bind_tweet_jsons)
export(bind_user_jsons)
export(build_query)
export(build_user_query)
export(get_all_tweets)
export(get_bbox_tweets)
export(get_country_tweets)
Expand Down
4 changes: 4 additions & 0 deletions R/build_queryv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ build_query <- function(query,
has_geo = FALSE,
lang= NULL) {

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

if (isTRUE(is_retweet) & isTRUE(is_reply)) {
stop("A tweet cannot be both a retweet and a reply")
}
Expand Down
196 changes: 196 additions & 0 deletions R/build_user_queryv2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#' Build tweet query
#'
#' Build tweet query according to targeted parameters, can then be input to main \code{\link{get_all_tweets}} function as query parameter.
#'
#' @param users string or character vector, user handles from which to collect data
#' @param is_retweet If `TRUE`, only retweets will be returned; if `FALSE` retweets will not be returned
#' @param is_reply If `TRUE`, only reply tweets will be returned
#' @param is_quote If `TRUE`, only quote tweets will be returned
#' @param place string, name of place e.g. "London"
#' @param country string, name of country as ISO alpha-2 code e.g. "GB"
#' @param point_radius numeric, a vector of two point coordinates latitude, longitude, and point radius distance (in miles)
#' @param bbox numeric, a vector of four bounding box coordinates from west longitude to north latitude
#' @param geo_query If `TRUE` user will be propmted to enter relevant information for bounding box or point radius geo buffers
#' @param remove_promoted If `TRUE`, tweets created for promotion only on ads.twitter.com are removed
#' @param has_hashtags If `TRUE`, only tweets containing hashtags will be returned
#' @param has_cashtags If `TRUE`, only tweets containing cashtags will be returned
#' @param has_links If `TRUE`, only tweets containing links and media will be returned
#' @param has_mentions If `TRUE`, only tweets containing mentions will be returned
#' @param has_media If `TRUE`, only tweets containing a recognized media object, such as a photo, GIF, or video, as determined by Twitter will be returned
#' @param has_images If `TRUE`, only tweets containing a recognized URL to an image will be returned
#' @param has_videos If `TRUE`, only tweets containing contain native Twitter videos, uploaded directly to Twitter will be returned
#' @param has_geo If `TRUE`, only tweets containing Tweet-specific geolocation data provided by the Twitter user will be returned
#' @param lang string, a single BCP 47 language identifier e.g. "fr"
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' users <- c("cbarrie", "justin_ct_ho")
#' users <- build_user_query(users, is_retweet = F, has_media = T, lang = "en")
#' }
build_user_query <- function(users,
is_retweet = NULL,
is_reply = FALSE,
is_quote = FALSE,
place = NULL,
country = NULL,
point_radius = NULL,
bbox = NULL,
geo_query = FALSE,
remove_promoted = FALSE,
has_hashtags = FALSE,
has_cashtags = FALSE,
has_links = FALSE,
has_mentions = FALSE,
has_media = FALSE,
has_images = FALSE,
has_videos = FALSE,
has_geo = FALSE,
lang= NULL) {

for(i in seq_along(users)) {
query <- users[[i]]

if (isTRUE(is_retweet) & isTRUE(is_reply)) {
stop("A tweet cannot be both a retweet and a reply")
}

if (isTRUE(is_quote) & isTRUE(is_reply)) {
stop("A tweet cannot be both a quote tweet and a reply")
}

if (isTRUE(point_radius) & isTRUE(bbox)) {
stop("Select either point radius or bounding box")
}

if(isTRUE(is_retweet)) {
query <- paste(query, "is:retweet")
}

if(isFALSE(is_retweet)) {
query <- paste(query, "-is:retweet")
}

if(isTRUE(is_reply)) {
query <- paste(query, "is:reply")
}

if(isTRUE(is_quote)) {
query <- paste(query, "is:quote")
}

if(!is.null(place)) {
query <- paste(query, paste0("place:", place))
}

if(!is.null(country)) {
query <- paste(query, paste0("place_country:", country))
}

if(isTRUE(geo_query)) {
if(response <- menu(c("Point radius", "Bounding box"), title="Which geo buffer type type do you want?") ==1) {
x <- readline("What is longitude? ")
y <- readline("What is latitude? ")
z <- readline("What is radius? ")

zn<- as.integer(z)
while(zn>25) {
cat("Radius must be less than 25 miles")
z <- readline("What is radius? ")
zn<- as.integer(z)
}

z <- paste0(z, "mi")

r <- paste(x,y,z)
query <- paste(query, paste0("point_radius:","[", r,"]"))
}
else if(response <- menu(c("Point radius", "Bounding box"), title="Which geo buffer type type do you want?") ==2) {
w <- readline("What is west longitude? ")
x <- readline("What is south latitude? ")
y <- readline("What is east longitude? ")
z <- readline("What is north latitude? ")

z <- paste(w,x,y,z)

query <- paste(query, paste0("bounding_box:","[", z,"]"))
}

}

if(!is.null(point_radius)) {
x <- point_radius[1]
y <- point_radius[2]
z <- point_radius[3]

zn<- as.numeric(z)
while(zn>25) {
cat("Radius must be less than 25 miles")
z <- readline("Input new radius: ")
zn<- as.numeric(z)
}

z <- paste0(z, "mi")

r <- paste(x,y,z)
query <- paste(query, paste0("point_radius:","[", r,"]"))
}

if(!is.null(bbox)) {
w <- bbox[1]
x <- bbox[2]
y <- bbox[3]
z <- bbox[4]

z <- paste(w,x,y,z)

query <- paste(query, paste0("bounding_box:","[", z,"]"))
}

if(isTRUE(remove_promoted)) {
query <- paste(query, "-is:nullcast")
}

if(isTRUE(has_hashtags)) {
query <- paste(query, "has:hashtags")
}

if(isTRUE(has_cashtags)) {
query <- paste(query, "has:cashtags")
}

if(isTRUE(has_links)) {
query <- paste(query, "has:links")
}

if(isTRUE(has_mentions)) {
query <- paste(query, "has:mentions")
}

if(isTRUE(has_media)) {
query <- paste(query, "has:media")
}

if(isTRUE(has_images)) {
query <- paste(query, "has:images")
}

if(isTRUE(has_videos)) {
query <- paste(query, "has:videos")
}

if(isTRUE(has_geo)) {
query <- paste(query, "has:geo")
}

if(!is.null(lang)) {
query <- paste(query, paste0("lang:", lang))

}

users[[i]] <- paste(query)
}
return(users)
}
4 changes: 4 additions & 0 deletions R/get_all_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ get_all_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
df <-
get_tweets(
Expand Down
4 changes: 4 additions & 0 deletions R/get_bbox_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ get_bbox_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
w <- bbox[1]
x <- bbox[2]
Expand Down
4 changes: 4 additions & 0 deletions R/get_country_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ get_country_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
cntparam <- paste0("place_country:", country)
df <-
Expand Down
4 changes: 4 additions & 0 deletions R/get_geo_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ get_geo_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
df <-
get_tweets(
Expand Down
4 changes: 4 additions & 0 deletions R/get_image_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ get_image_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
df <-
get_tweets(
Expand Down
4 changes: 4 additions & 0 deletions R/get_lang_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ get_lang_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
langparam <- paste0("lang:", lang)
df <-
Expand Down
6 changes: 5 additions & 1 deletion R/get_media_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,14 @@ get_media_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
df <-
get_tweets(
q = paste0('has:media ', query),
q = paste(query, 'has:media'),
n = 500,
start_time = start_tweets,
end_time = end_tweets,
Expand Down
6 changes: 5 additions & 1 deletion R/get_mentions_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,14 @@ get_mentions_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
df <-
get_tweets(
q = paste0('has:mentions ', query),
q = paste(query, 'has:mentions'),
n = 500,
start_time = start_tweets,
end_time = end_tweets,
Expand Down
4 changes: 4 additions & 0 deletions R/get_place_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ get_place_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
placeparam <- paste0("place:", place)
df <-
Expand Down
4 changes: 4 additions & 0 deletions R/get_radius_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ get_radius_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
x <- radius[1]
y <- radius[2]
Expand Down
6 changes: 5 additions & 1 deletion R/get_video_tweetsv2.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,14 @@ get_video_tweets <-
toknum <- 0
ntweets <- 0

if(isTRUE(length(query) >1)) {
query <- paste(query, collapse = " OR ")
}

while (!is.null(nextoken)) {
df <-
get_tweets(
q = paste0('has:videos ', query),
q = paste(query, 'has:videos'),
n = 500,
start_time = start_tweets,
end_time = end_tweets,
Expand Down
23 changes: 21 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,28 @@ tweets <-
```

Note that the "AND" operator is implicit when specifying more than one character string in the query. See [here](https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query) for information on building queries for search tweets.
Alternatively, we can specify a character vector comprising several elements. For example, we if we wanted to search multiple hashtags, we could specify a query as follows:

Thus, when searching for all elements of a character string, a call may look like:
```{r}
bearer_token <- "" # Insert bearer token
htagquery <- c("#BLM", "#BlackLivesMatter", "#GeorgeFloyd")
tweets <-
get_all_tweets(
htagquery,
"2020-01-01T00:00:00Z",
"2020-01-05T00:00:00Z",
bearer_token
)
```

, which will achieve the same thing as typing out `OR` between our strings.


Note that the "AND" operator is implicit when specifying more than one character string in the query. See [here](https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query) for information on building queries for search tweets. Thus, when searching for all elements of a character string, a call may look like:

```{r}
Expand Down
Loading

0 comments on commit 29b7898

Please sign in to comment.