Table of contents
JOIN
: to combine tables by key value (the common information between two tables)
Left Join
select * from users u
left join point_users p
on u.user_id = p.user_id;
(Inner) Join
select * from users u
inner join point_users p
on u.user_id = p.user_id;
The order that queries are executed
from -> join -> select
UNION
: to combine two SELECTs
(
select 'July' as month, c.title, c2.week, count(*) as cnt from checkins c2
inner join courses c on c2.course_id = c.course_id
inner join orders o on o.user_id = c2.user_id
where o.created_at < '2020-08-01'
group by c2.course_id, c2.week
order by c2.course_id, c2.week
)
union all
(
select 'August' as month, c.title, c2.week, count(*) as cnt from checkins c2
inner join courses c on c2.course_id = c.course_id
inner join orders o on o.user_id = c2.user_id
where o.created_at > '2020-08-01'
group by c2.course_id, c2.week
order by c2.course_id, c2.week
)
Subquery
: query inside the query, in order to make the queries look simple
select u.user_id, u.name, u.email from users u
inner join orders o on u.user_id = o.user_id
where o.payment_method = 'kakaopay'
-- Subquery
select u.user_id, u.name, u.email from users u
where u.user_id in (
select user_id from orders
where payment_method = 'kakaopay'
)
Subquery in WHERE
select * from users u
where u.user_id in (
select o.user_id from orders o
where o.payment_method = 'kakaopay'
);
-- Execution Order:
-- from -> subquery -> where .. in -> print
Subquery in SELECT
select c.checkin_id, c.user_id, c.likes,
(select avg(likes) from checkins c2
where c2.user_id = c.user_id) as avg_like_user
from checkins c;
-- Execution Order:
-- outer select executed as reading data one by one -> inner select executed while outer select reading one data -> print at the same time
Subquery in FROM (most common)
select pu.user_id, t1.avg_like, pu.point from point_users pu
inner join (
select user_id, round(avg(likes),1) as avg_like from checkins
group by user_id
) t1 on pu.user_id = t1.user_id
-- Execution Order:
-- inner select as a table -> outer select
WITH
select
c.title,
b.cnt_checkins,
a.cnt_total,
round(b.cnt_checkins/a.cnt_total, 4) as ratio
from
(
select course_id, count(*) as cnt_total from orders
group by course_id
) a
inner join
(
select course_id, count(distinct(user_id)) as cnt_checkins from checkins
group by course_id
) b
on a.course_id = b.course_id
inner join courses c on c.course_id = a.course_id
with table1 as (
select course_id, count(distinct(user_id)) as cnt_checkins from checkins
group by course_id
), table2 as (
select course_id, count(*) as cnt_total from orders
group by course_id
)
select c.title,
a.cnt_checkins,
b.cnt_total,
(a.cnt_checkins/b.cnt_total) as ratio
from table1 a inner join table2 b on a.course_id = b.course_id
inner join courses c on a.course_id = c.course_id
SQL Grammar
String Type Data
SUBSTRING_INDEX(string, 'key', index): splitting string
select user_id, email, SUBSTRING_INDEX(email, '@', -1) from users
SUBSTRING(string, start_pos, num_of_str): printing substring
select SUBSTRING(created_at, 1, 10) as date, count(*) as num_orders
from orders
group by date
Case Statement
select pu.point_user_id, pu.point,
(CASE when pu.point > 10000 then 'Great!'
else 'Keep up!' END) as msg
from point_users pu;
Practical Use of CASE, using a subquery
select pu.point_user_id, pu.point,
(CASE when pu.point >= 10000 then 'Over 10,000'
when pu.point >= 5000 then 'Over 5,000'
else 'Under 5,000' END) as level
from point_users pu
select level, count(*) as cnt from (
select pu.point_user_id, pu.point,
(CASE when pu.point >= 10000 then 'Over 10,000'
when pu.point >= 5000 then 'Over 5,000'
else 'Under 5,000' END) as level
from point_users pu
) a
group by level
-- Same way by using WITH
with table1 as (
select pu.point_user_id, pu.point,
(CASE when pu.point >= 10000 then 'Over 10,000'
when pu.point >= 5000 then 'Over 5,000'
else 'Under 5,000' END) as level
from point_users pu
)
select level, count(*) as cnt from table1
group by level
Practice Problem
-- find the average and sample variance
select
quartet,
round(avg(x), 2) as x_mean,
round(variance (x), 2) as x_var,
round(avg(y), 2) as y_mean,
round(variance (y), 2) as y_var
from
points
group by
quartet;