This vignette provides a quick tour of the R package rtweet: Collecting Twitter Data
.
Search for up to 18,000 (non-retweeted) tweets containing the rstats hashtag.
## search for 18000 tweets using the rstats hashtag
rt <- search_tweets(
"#rstats", n = 18000, include_rts = FALSE
)
## preview tweets data
rt
## preview users data
users_data(rt)
## plot time series (if ggplot2 is installed)
ts_plot(rt)
Quickly visualize frequency of tweets over time using ts_plot()
.
## plot time series of tweets
ts_plot(rt, "3 hours") +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of #rstats Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
Twitter rate limits cap the number of search results returned to 18,000 every 15 minutes. To request more than that, simply set retryonratelimit = TRUE
and rtweet will wait for rate limit resets for you.
## search for 250,000 tweets containing the word data
rt <- search_tweets(
"data", n = 250000, retryonratelimit = TRUE
)
Search by geo-location—for example, find 10,000 tweets in the English language sent from the United States.
## search for 10,000 tweets sent from the US
rt <- search_tweets(
"lang:en", geocode = lookup_coords("usa"), n = 10000
)
## create lat/lng variables using all available tweet and profile geo-location data
rt <- lat_lng(rt)
## plot state boundaries
par(mar = c(0, 0, 0, 0))
maps::map("state", lwd = .25)
## plot lat and lng points onto state map
with(rt, points(lng, lat, pch = 20, cex = .75, col = rgb(0, .3, .7, .75)))
Randomly sample (approximately 1%) from the live stream of all tweets.
Stream all geo enabled tweets from London for 60 seconds.
## stream tweets from london for 60 seconds
rt <- stream_tweets(lookup_coords("london, uk"), timeout = 60)
Stream all tweets mentioning realDonaldTrump or Trump for a week.
Retrieve a list of all the accounts a user follows.
Retrieve a list of the accounts following a user.
## get user IDs of accounts following CNN
cnn_flw <- get_followers("cnn", n = 75000)
## lookup data on those accounts
cnn_flw_data <- lookup_users(cnn_flw$user_id)
Or if you really want ALL of their followers:
Get the most recent 3,200 tweets from cnn, BBCWorld, and foxnews.
## get user IDs of accounts followed by CNN
tmls <- get_timelines(c("cnn", "BBCWorld", "foxnews"), n = 3200)
## plot the frequency of tweets for each user over time
tmls %>%
dplyr::filter(created_at > "2017-10-29") %>%
dplyr::group_by(screen_name) %>%
ts_plot("days", trim = 1L) +
ggplot2::geom_point() +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.title = ggplot2::element_blank(),
legend.position = "bottom",
plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of Twitter statuses posted by news organization",
subtitle = "Twitter status (tweet) counts aggregated by day from October/November 2017",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
Get the 3,000 most recently favorited statuses by JK Rowling.
Search for 1,000 users with the rstats hashtag in their profile bios.
## lookup users by screen_name or user_id
users <- c("KimKardashian", "justinbieber", "taylorswift13",
"espn", "JoelEmbiid", "cstonehoops", "KUHoops",
"upshotnyt", "fivethirtyeight", "hadleywickham",
"cnn", "foxnews", "msnbc", "maddow", "seanhannity",
"potus", "epa", "hillaryclinton", "realdonaldtrump",
"natesilver538", "ezraklein", "annecoulter")
famous_tweeters <- lookup_users(users)
## preview users data
famous_tweeters
# extract most recent tweets data from the famous tweeters
tweets_data(famous_tweeters)