[TIL] Database MySQL - JOIN, UNION, subquery, WITH, SQL Grammar

03/21/23

·

4 min read

[TIL] Database MySQL - JOIN, UNION, subquery, WITH, SQL Grammar

JOIN

: to combine tables by key value (the common information between two tables)

Left Join

select * from users u
left join point_users p
on u.user_id = p.user_id;

(Inner) Join

select * from users u
inner join point_users p
on u.user_id = p.user_id;

The order that queries are executed

from -> join -> select

UNION

: to combine two SELECTs

(
    select 'July' as month, c.title, c2.week, count(*) as cnt from checkins c2
    inner join courses c on c2.course_id = c.course_id
    inner join orders o on o.user_id = c2.user_id
    where o.created_at < '2020-08-01'
    group by c2.course_id, c2.week
  order by c2.course_id, c2.week
)
union all
(
    select 'August' as month, c.title, c2.week, count(*) as cnt from checkins c2
    inner join courses c on c2.course_id = c.course_id
    inner join orders o on o.user_id = c2.user_id
    where o.created_at > '2020-08-01'
    group by c2.course_id, c2.week
  order by c2.course_id, c2.week
)

Subquery

: query inside the query, in order to make the queries look simple

select u.user_id, u.name, u.email from users u
inner join orders o on u.user_id = o.user_id
where o.payment_method = 'kakaopay'
-- Subquery
select u.user_id, u.name, u.email from users u
where u.user_id in (
    select user_id from orders
    where payment_method = 'kakaopay'
)

Subquery in WHERE

select * from users u
where u.user_id in (
    select o.user_id from orders o 
    where o.payment_method = 'kakaopay'
);

-- Execution Order:
-- from -> subquery -> where .. in -> print

Subquery in SELECT

select c.checkin_id, c.user_id, c.likes, 
    (select avg(likes) from checkins c2
    where c2.user_id = c.user_id) as avg_like_user
from checkins c;

-- Execution Order:
-- outer select executed as reading data one by one -> inner select executed while outer select reading one data -> print at the same time

Subquery in FROM (most common)

select pu.user_id, t1.avg_like, pu.point from point_users pu
inner join (
    select user_id, round(avg(likes),1) as avg_like from checkins
    group by user_id
) t1 on pu.user_id = t1.user_id

-- Execution Order:
-- inner select as a table -> outer select

WITH

select 
    c.title, 
    b.cnt_checkins, 
    a.cnt_total, 
    round(b.cnt_checkins/a.cnt_total, 4) as ratio 
from
(
    select course_id, count(*) as cnt_total from orders
    group by course_id
) a
inner join 
(
    select course_id, count(distinct(user_id)) as cnt_checkins from checkins
    group by course_id
) b
on a.course_id = b.course_id
inner join courses c on c.course_id = a.course_id
with table1 as (
    select course_id, count(distinct(user_id)) as cnt_checkins from checkins
    group by course_id
), table2 as (
    select course_id, count(*) as cnt_total from orders
    group by course_id
)
select c.title,
       a.cnt_checkins,
       b.cnt_total,
       (a.cnt_checkins/b.cnt_total) as ratio
from table1 a inner join table2 b on a.course_id = b.course_id
inner join courses c on a.course_id = c.course_id

SQL Grammar

String Type Data

SUBSTRING_INDEX(string, 'key', index): splitting string

select user_id, email, SUBSTRING_INDEX(email, '@', -1) from users

SUBSTRING(string, start_pos, num_of_str): printing substring

select SUBSTRING(created_at, 1, 10) as date, count(*) as num_orders 
from orders
group by date

Case Statement

select pu.point_user_id, pu.point, 
       (CASE when pu.point > 10000 then 'Great!'
             else 'Keep up!' END) as msg  
from point_users pu;

Practical Use of CASE, using a subquery

select pu.point_user_id, pu.point,
    (CASE when pu.point >= 10000 then 'Over 10,000'
          when pu.point >= 5000 then 'Over 5,000'
          else 'Under 5,000' END) as level
from point_users pu

select level, count(*) as cnt from (
    select pu.point_user_id, pu.point, 
        (CASE when pu.point >= 10000 then 'Over 10,000'
              when pu.point >= 5000 then 'Over 5,000'
              else 'Under 5,000' END) as level
      from point_users pu
) a
group by level

-- Same way by using WITH
with table1 as (
    select pu.point_user_id, pu.point, 
        (CASE when pu.point >= 10000 then 'Over 10,000'
              when pu.point >= 5000 then 'Over 5,000'
              else 'Under 5,000' END) as level
      from point_users pu
)

select level, count(*) as cnt from table1
group by level

Practice Problem

-- find the average and sample variance
select
  quartet,
  round(avg(x), 2) as x_mean,
  round(variance (x), 2) as x_var,
  round(avg(y), 2) as y_mean,
  round(variance (y), 2) as y_var
from
  points
group by
  quartet;