Examples #
Question 1: Which user received the most likes in total? #
SELECT
username
FROM youtube_tweets
GROUP BY username
ORDER BY sum(like_count) DESC
LIMIT 1;
Question 2: Who (username) created the most tweets in our panel? #
SELECT
username
FROM youtube_tweets
GROUP BY username
ORDER BY count(tweet_count) DESC
LIMIT 1;
Question 3: Who (username) created the most tweets overall? #
SELECT
username
FROM youtube_tweets
GROUP BY username
ORDER BY max(tweet_count) DESC
LIMIT 1;
Question 4: How many distinct users posted on December 24th? #
SELECT count(distinct username)
FROM youtube_tweets
WHERE created_at::DateTime::Date = '2022-12-24';
Question 5: How many tweets mention U2? #
SELECT count(distinct id)
FROM youtube_tweets
WHERE text like '%U2%';
Question 6: How many tweets mention Wham? #
SELECT count(distinct id)
FROM youtube_tweets
WHERE text ilike '%wham%';
Question 7: On which day do people write the most about “WHAM”? #
SELECT
created_at::Datetime::Date as date,
count(*)
FROM youtube_tweets
WHERE text ilike '%wham%'
GROUP BY date
ORDER BY count(*) DESC
LIMIT 1
Question 8: How many tweets receive more retweets than likes? #
SELECT count(*)
FROM youtube_tweets
WHERE retweet_count > like_count;
Question 9: What is the proportion of tweets that receive more retweets than likes? #
SELECT avg((retweet_count > like_count)::INTEGER) FROM youtube_tweets;
or …
SELECT (SELECT count(*) FROM youtube_tweets WHERE retweet_count > like_count)/(SELECT count(*) FROM youtube_tweets);
or …
SELECT avg(IF(retweet_count > like_count, 1, 0)) FROM youtube_tweets;
Question 10: Are there, on average, more replies than retweets?; return the proportion! #
SELECT avg(IF(reply_count > retweet_count, 1, 0)) FROM youtube_tweets; -- or...
SELECT avg(IF(reply_count > retweet_count, 1, 0)) > 0.5 FROM youtube_tweets; -- or...
SELECT mode(reply_count > retweet_count) FROM youtube_tweets;