Merge pull request #56 from cjbarrie/barrie-branch

Allow more than one string as query; update vignettes; add build_user…
cjbarrie · Apr 29, 2021 · 29b7898 · 29b7898
2 parents 830165f + 021537f
commit 29b7898
Show file tree

Hide file tree

Showing 20 changed files with 507 additions and 14 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 export(bind_tweet_jsons)
 export(bind_user_jsons)
 export(build_query)
+export(build_user_query)
 export(get_all_tweets)
 export(get_bbox_tweets)
 export(get_country_tweets)

diff --git a/R/build_queryv2.R b/R/build_queryv2.R
@@ -49,6 +49,10 @@ build_query <- function(query,
                         has_geo = FALSE,
                         lang= NULL) {
 
+  if(isTRUE(length(query) >1)) {
+    query <- paste(query, collapse = " OR ")
+  }
+
   if (isTRUE(is_retweet) & isTRUE(is_reply)) {
     stop("A tweet cannot be both a retweet and a reply")
   }

diff --git a/R/build_user_queryv2.R b/R/build_user_queryv2.R
@@ -0,0 +1,196 @@
+#' Build tweet query 
+#' 
+#' Build tweet query according to targeted parameters, can then be input to main \code{\link{get_all_tweets}} function as query parameter.
+#'
+#' @param users string or character vector, user handles from which to collect data
+#' @param is_retweet If `TRUE`, only retweets will be returned; if `FALSE` retweets will not be returned
+#' @param is_reply If `TRUE`, only reply tweets will be returned
+#' @param is_quote If `TRUE`, only quote tweets will be returned
+#' @param place string, name of place e.g. "London"
+#' @param country string, name of country as ISO alpha-2 code e.g. "GB"
+#' @param point_radius numeric, a vector of two point coordinates latitude, longitude, and point radius distance (in miles)
+#' @param bbox numeric, a vector of four bounding box coordinates from west longitude to north latitude
+#' @param geo_query If `TRUE` user will be propmted to enter relevant information for bounding box or point radius geo buffers
+#' @param remove_promoted If `TRUE`, tweets created for promotion only on ads.twitter.com are removed
+#' @param has_hashtags If `TRUE`, only tweets containing hashtags will be returned
+#' @param has_cashtags If `TRUE`, only tweets containing cashtags will be returned
+#' @param has_links If `TRUE`, only tweets containing links and media will be returned
+#' @param has_mentions If `TRUE`, only tweets containing mentions will be returned
+#' @param has_media If `TRUE`, only tweets containing a recognized media object, such as a photo, GIF, or video, as determined by Twitter will be returned
+#' @param has_images If `TRUE`, only tweets containing a recognized URL to an image will be returned
+#' @param has_videos If `TRUE`, only tweets containing contain native Twitter videos, uploaded directly to Twitter will be returned
+#' @param has_geo If `TRUE`, only tweets containing Tweet-specific geolocation data provided by the Twitter user will be returned
+#' @param lang string, a single BCP 47 language identifier e.g. "fr"
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' users <- c("cbarrie", "justin_ct_ho")
+#' users <-   build_user_query(users, is_retweet = F, has_media = T, lang = "en")
+#' }
+build_user_query <- function(users, 
+                        is_retweet = NULL, 
+                        is_reply = FALSE, 
+                        is_quote = FALSE,
+                        place = NULL, 
+                        country = NULL, 
+                        point_radius = NULL,
+                        bbox = NULL,
+                        geo_query = FALSE,
+                        remove_promoted = FALSE,
+                        has_hashtags = FALSE,
+                        has_cashtags = FALSE,
+                        has_links = FALSE,
+                        has_mentions = FALSE,
+                        has_media = FALSE,
+                        has_images = FALSE,
+                        has_videos = FALSE,
+                        has_geo = FALSE,
+                        lang= NULL) {
+
+  for(i in seq_along(users)) {
+    query <- users[[i]]
+
+  if (isTRUE(is_retweet) & isTRUE(is_reply)) {
+    stop("A tweet cannot be both a retweet and a reply")
+  }
+
+  if (isTRUE(is_quote) & isTRUE(is_reply)) {
+    stop("A tweet cannot be both a quote tweet and a reply")
+  }
+
+  if (isTRUE(point_radius) & isTRUE(bbox)) {
+    stop("Select either point radius or bounding box")
+  }
+
+  if(isTRUE(is_retweet)) {
+    query <- paste(query, "is:retweet")
+  }
+
+  if(isFALSE(is_retweet)) {
+    query <- paste(query, "-is:retweet")
+  }
+
+  if(isTRUE(is_reply)) {
+    query <- paste(query, "is:reply")
+  }
+
+  if(isTRUE(is_quote)) {
+    query <- paste(query, "is:quote")
+  }
+
+  if(!is.null(place)) {
+    query <- paste(query, paste0("place:", place))
+  }
+
+  if(!is.null(country)) {
+    query <- paste(query, paste0("place_country:", country))
+  }
+
+  if(isTRUE(geo_query)) {
+    if(response <- menu(c("Point radius", "Bounding box"), title="Which geo buffer type type do you want?") ==1) {
+      x <- readline("What is longitude? ")  
+      y <- readline("What is latitude? ")  
+      z <- readline("What is radius? ")
+
+      zn<- as.integer(z)
+      while(zn>25) {
+        cat("Radius must be less than 25 miles")
+        z <- readline("What is radius? ")
+        zn<- as.integer(z)
+      }
+
+      z <- paste0(z, "mi")
+
+      r <- paste(x,y,z)
+      query <- paste(query, paste0("point_radius:","[", r,"]"))
+    }
+    else if(response <- menu(c("Point radius", "Bounding box"), title="Which geo buffer type type do you want?") ==2) {
+      w <- readline("What is west longitude? ")  
+      x <- readline("What is south latitude? ")
+      y <- readline("What is east longitude? ")
+      z <- readline("What is north latitude? ")
+
+      z <- paste(w,x,y,z)
+
+      query <- paste(query, paste0("bounding_box:","[", z,"]"))
+    }
+
+  }
+
+  if(!is.null(point_radius)) {
+    x <- point_radius[1]
+    y <- point_radius[2]
+    z <- point_radius[3]
+
+    zn<- as.numeric(z)
+    while(zn>25) {
+      cat("Radius must be less than 25 miles")
+      z <- readline("Input new radius: ")
+      zn<- as.numeric(z)
+    }
+
+    z <- paste0(z, "mi")
+
+    r <- paste(x,y,z)
+    query <- paste(query, paste0("point_radius:","[", r,"]"))
+  }
+
+  if(!is.null(bbox)) {
+    w <- bbox[1]
+    x <- bbox[2]
+    y <- bbox[3]
+    z <- bbox[4]
+
+    z <- paste(w,x,y,z)
+
+    query <- paste(query, paste0("bounding_box:","[", z,"]"))
+  }
+
+  if(isTRUE(remove_promoted)) {
+    query <- paste(query, "-is:nullcast")
+  }
+
+  if(isTRUE(has_hashtags)) {
+    query <- paste(query, "has:hashtags")
+  }
+
+  if(isTRUE(has_cashtags)) {
+    query <- paste(query, "has:cashtags")
+  }
+
+  if(isTRUE(has_links)) {
+    query <- paste(query, "has:links")
+  }
+
+  if(isTRUE(has_mentions)) {
+    query <- paste(query, "has:mentions")
+  }
+
+  if(isTRUE(has_media)) {
+    query <- paste(query, "has:media")
+  }
+
+  if(isTRUE(has_images)) {
+    query <- paste(query, "has:images")
+  }
+
+  if(isTRUE(has_videos)) {
+    query <- paste(query, "has:videos")
+  }
+
+  if(isTRUE(has_geo)) {
+    query <- paste(query, "has:geo")
+  }
+
+  if(!is.null(lang)) {
+    query <- paste(query, paste0("lang:", lang))
+
+  }
+
+  users[[i]] <- paste(query)
+  }
+  return(users)
+}
diff --git a/R/get_all_tweetsv2.R b/R/get_all_tweetsv2.R
@@ -81,6 +81,10 @@ get_all_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       df <-
         get_tweets(

diff --git a/R/get_bbox_tweetsv2.R b/R/get_bbox_tweetsv2.R
@@ -89,6 +89,10 @@ get_bbox_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       w <- bbox[1]
       x <- bbox[2]

diff --git a/R/get_country_tweetsv2.R b/R/get_country_tweetsv2.R
@@ -88,6 +88,10 @@ get_country_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       cntparam <- paste0("place_country:", country)
       df <-

diff --git a/R/get_geo_tweetsv2.R b/R/get_geo_tweetsv2.R
@@ -84,6 +84,10 @@ get_geo_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       df <-
         get_tweets(

diff --git a/R/get_image_tweetsv2.R b/R/get_image_tweetsv2.R
@@ -82,6 +82,10 @@ get_image_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       df <-
         get_tweets(

diff --git a/R/get_lang_tweetsv2.R b/R/get_lang_tweetsv2.R
@@ -88,6 +88,10 @@ get_lang_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       langparam <- paste0("lang:", lang)
       df <-

diff --git a/R/get_media_tweetsv2.R b/R/get_media_tweetsv2.R
@@ -82,10 +82,14 @@ get_media_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       df <-
         get_tweets(
-          q = paste0('has:media ', query),
+          q = paste(query, 'has:media'),
           n = 500,
           start_time = start_tweets,
           end_time = end_tweets,

diff --git a/R/get_mentions_tweetsv2.R b/R/get_mentions_tweetsv2.R
@@ -82,10 +82,14 @@ get_mentions_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       df <-
         get_tweets(
-          q = paste0('has:mentions ', query),
+          q = paste(query, 'has:mentions'),
           n = 500,
           start_time = start_tweets,
           end_time = end_tweets,

diff --git a/R/get_place_tweetsv2.R b/R/get_place_tweetsv2.R
@@ -88,6 +88,10 @@ get_place_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       placeparam <- paste0("place:", place)
       df <-

diff --git a/R/get_radius_tweetsv2.R b/R/get_radius_tweetsv2.R
@@ -89,6 +89,10 @@ get_radius_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       x <- radius[1]
       y <- radius[2]

diff --git a/R/get_video_tweetsv2.R b/R/get_video_tweetsv2.R
@@ -83,10 +83,14 @@ get_video_tweets <-
     toknum <- 0
     ntweets <- 0
 
+    if(isTRUE(length(query) >1)) {
+      query <- paste(query, collapse = " OR ")
+    }
+
     while (!is.null(nextoken)) {
       df <-
         get_tweets(
-          q = paste0('has:videos ', query),
+          q = paste(query, 'has:videos'),
           n = 500,
           start_time = start_tweets,
           end_time = end_tweets,

diff --git a/README.md b/README.md
@@ -92,9 +92,28 @@ tweets <-
 
 ```
 
-Note that the "AND" operator is implicit when specifying more than one character string in the query. See [here](https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query) for information on building queries for search tweets. 
+Alternatively, we can specify a character vector comprising several elements. For example, we if we wanted to search multiple hashtags, we could specify a query as follows:
 
-Thus, when searching for all elements of a character string, a call may look like:
+```{r}
+
+bearer_token <- "" # Insert bearer token
+
+htagquery <- c("#BLM", "#BlackLivesMatter", "#GeorgeFloyd")
+
+tweets <-
+  get_all_tweets(
+    htagquery,
+    "2020-01-01T00:00:00Z",
+    "2020-01-05T00:00:00Z",
+    bearer_token
+  )
+
+```
+
+, which will achieve the same thing as typing out `OR` between our strings.  
+
+
+Note that the "AND" operator is implicit when specifying more than one character string in the query. See [here](https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query) for information on building queries for search tweets. Thus, when searching for all elements of a character string, a call may look like:
 
 ```{r}