[SQL] Intermediate SQL(3)
SQL Intermediate(3)
- Correlated Subquery
- Nested Subquery
- Common Table Expression(CTE)
- 비교
import sqlite3
%load_ext sql
%sql sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
'Connected: @/Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite'
Correlated Subquery
- Difference with simple Subquery
- Dependent on the main query to execute
- evaluted in each loop(significantly slows down query runtime)
- average number of goals scored in each country
%%sql
SELECT
c.name AS country,
(SELECT
AVG(m.home_team_goal + m.away_team_goal)
FROM Match AS m
WHERE m.country_id = c.id) AS avg_goals
FROM country as c
GROUP BY country;
-- 위와 같음
-- SELECT
-- c.name AS country,
-- AVG(m.home_team_goal + m.away_team_goal) AS avg_goals
-- FROM country as c
-- LEFT JOIN Match as m
-- ON c.id = m.country_id
-- GROUP BY country
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
country | avg_goals |
---|---|
Belgium | 2.8015046296296298 |
England | 2.710526315789474 |
France | 2.443092105263158 |
Germany | 2.9015522875816995 |
Italy | 2.6168379184620485 |
Netherlands | 3.0808823529411766 |
Poland | 2.425 |
Portugal | 2.534600389863548 |
Scotland | 2.633771929824561 |
Spain | 2.767105263157895 |
Switzerland | 2.929676511954993 |
- get matches with scores that are 3 times above the average score for each country
%%sql
SELECT
main.country_id,
main.date,
main.home_team_goal,
main.away_team_goal
FROM match AS main
WHERE
(home_team_goal + away_team_goal) >
(SELECT AVG((sub.home_team_goal + sub.away_team_goal) * 3)
FROM Match AS sub
WHERE main.country_id = sub.country_id);
- get matches with scores that are equal to the highest scoring match for each country, in each season
- highest scoring: (home_team_goal + away_team_goal)
%%sql
SELECT
main.country_id,
main.date,
main.home_team_goal,
main.away_team_goal
FROM match AS main
WHERE
(home_team_goal + away_team_goal) =
(SELECT MAX(sub.home_team_goal + sub.away_team_goal)
FROM match AS sub
WHERE main.country_id = sub.country_id
AND main.season = sub.season);
Nested Subquery
-
can be either simple or correlated subquery
-
get the highest total number of goals in each season, overall, and during July across all seasons
%%sql
SELECT
season,
MAX(home_team_goal + away_team_goal) AS max_goals,
(SELECT MAX(home_team_goal + away_team_goal) FROM Match) AS overall_max_goals,
(SELECT MAX(home_team_goal + away_team_goal)
FROM Match
WHERE id IN (
SELECT id FROM Match WHERE EXTRACT(MONTH FROM date) = 07)) AS july_max_goals
FROM Match
GROUP BY season;
- get average number of matches per season where a team scored 5 or more goals by country
%%sql
SELECT
c.name AS country,
AVG(matches) AS avg_seasonal_high_scores
FROM country AS c
LEFT JOIN (
SELECT country_id,
season,
COUNT(id) AS matches
FROM (
SELECT country_id, season, id
FROM match
WHERE home_team_goal >= 5 OR away_team_goal >= 5) AS inner_s
GROUP BY country_id, season) AS outer_s
ON c.id = outer_s.country_id
GROUP BY country;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
country | avg_seasonal_high_scores |
---|---|
Belgium | 9.571428571428571 |
England | 14.5 |
France | 8.0 |
Germany | 13.75 |
Italy | 8.5 |
Netherlands | 20.125 |
Poland | 5.857142857142857 |
Portugal | 8.625 |
Scotland | 7.125 |
Spain | 19.125 |
Switzerland | 8.0 |
Common Table Expression(CTE)
-
method to make code readable
-
get number of matches with more than 10 total goals in each country
%%sql
WITH match_list AS (
SELECT
country_id,
id
FROM match
WHERE (home_team_goal + away_team_goal) >= 10)
SELECT
l.name AS league,
COUNT(match_list.id) AS matches
FROM league AS l
LEFT JOIN match_list ON l.id = match_list.country_id
GROUP BY l.name;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
league | matches |
---|---|
Belgium Jupiler League | 0 |
England Premier League | 4 |
France Ligue 1 | 1 |
Germany 1. Bundesliga | 1 |
Italy Serie A | 0 |
Netherlands Eredivisie | 2 |
Poland Ekstraklasa | 0 |
Portugal Liga ZON Sagres | 0 |
Scotland Premier League | 1 |
Spain LIGA BBVA | 5 |
Switzerland Super League | 0 |
비교
- get all match’s date, home_team_name, away_team_name and each team’s goal
Simple Query
%%sql
SELECT
m.date,
home.hometeam,
away.awayteam,
m.home_team_goal,
m.away_team_goal
FROM Match AS m
LEFT JOIN (
SELECT m1.id, t1.team_long_name AS hometeam
FROM Match m1
LEFT JOIN Team t1
ON m1.home_team_api_id = t1.team_api_id) AS home
ON m.id = home.id
LEFT JOIN (
SELECT m2.id, t2.team_long_name AS awayteam
FROM Match m2
LEFT JOIN Team t2
ON m2.away_team_api_id = t2.team_api_id ) AS away
ON m.id = away.id
LIMIT 10;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
date | hometeam | awayteam | home_team_goal | away_team_goal |
---|---|---|---|---|
2008-08-17 00:00:00 | KRC Genk | Beerschot AC | 1 | 1 |
2008-08-16 00:00:00 | SV Zulte-Waregem | Sporting Lokeren | 0 | 0 |
2008-08-16 00:00:00 | KSV Cercle Brugge | RSC Anderlecht | 0 | 3 |
2008-08-17 00:00:00 | KAA Gent | RAEC Mons | 5 | 0 |
2008-08-16 00:00:00 | FCV Dender EH | Standard de Liège | 1 | 3 |
2008-09-24 00:00:00 | KV Mechelen | Club Brugge KV | 1 | 1 |
2008-08-16 00:00:00 | KSV Roeselare | KV Kortrijk | 2 | 2 |
2008-08-16 00:00:00 | Tubize | Royal Excel Mouscron | 1 | 2 |
2008-08-16 00:00:00 | KVC Westerlo | Sporting Charleroi | 1 | 0 |
2008-11-01 00:00:00 | Club Brugge KV | KV Kortrijk | 4 | 1 |
Correlated Subquery
%%sql
SELECT
m.date,
(SELECT team_long_name
FROM team AS t
WHERE t.team_api_id = m.home_team_api_id) AS hometeam,
(SELECT team_long_name
FROM team AS t
WHERE t.team_api_id = m.away_team_api_id) AS awayteam,
m.home_team_goal,
m.away_team_goal
FROM Match AS m
LIMIT 10;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
date | hometeam | awayteam | home_team_goal | away_team_goal |
---|---|---|---|---|
2008-08-17 00:00:00 | KRC Genk | Beerschot AC | 1 | 1 |
2008-08-16 00:00:00 | SV Zulte-Waregem | Sporting Lokeren | 0 | 0 |
2008-08-16 00:00:00 | KSV Cercle Brugge | RSC Anderlecht | 0 | 3 |
2008-08-17 00:00:00 | KAA Gent | RAEC Mons | 5 | 0 |
2008-08-16 00:00:00 | FCV Dender EH | Standard de Liège | 1 | 3 |
2008-09-24 00:00:00 | KV Mechelen | Club Brugge KV | 1 | 1 |
2008-08-16 00:00:00 | KSV Roeselare | KV Kortrijk | 2 | 2 |
2008-08-16 00:00:00 | Tubize | Royal Excel Mouscron | 1 | 2 |
2008-08-16 00:00:00 | KVC Westerlo | Sporting Charleroi | 1 | 0 |
2008-11-01 00:00:00 | Club Brugge KV | KV Kortrijk | 4 | 1 |
Common Table Expression
%%sql
WITH home AS (
SELECT m.id,
m.date,
t.team_long_name AS hometeam,
m.home_team_goal
FROM Match AS m
LEFT JOIN Team AS t
ON m.home_team_api_id = t.team_api_id),
away AS (
SELECT m.id,
m.date,
t.team_long_name AS awayteam,
m.away_team_goal
FROM match AS m
LEFT JOIN team AS t
ON m.away_team_api_id = t.team_api_id)
SELECT
home.date,
home.hometeam,
away.awayteam,
home.home_team_goal,
away.away_team_goal
FROM home
INNER JOIN away
ON home.id = away.id
LIMIT 10;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
date | hometeam | awayteam | home_team_goal | away_team_goal |
---|---|---|---|---|
2008-08-17 00:00:00 | KRC Genk | Beerschot AC | 1 | 1 |
2008-08-16 00:00:00 | SV Zulte-Waregem | Sporting Lokeren | 0 | 0 |
2008-08-16 00:00:00 | KSV Cercle Brugge | RSC Anderlecht | 0 | 3 |
2008-08-17 00:00:00 | KAA Gent | RAEC Mons | 5 | 0 |
2008-08-16 00:00:00 | FCV Dender EH | Standard de Liège | 1 | 3 |
2008-09-24 00:00:00 | KV Mechelen | Club Brugge KV | 1 | 1 |
2008-08-16 00:00:00 | KSV Roeselare | KV Kortrijk | 2 | 2 |
2008-08-16 00:00:00 | Tubize | Royal Excel Mouscron | 1 | 2 |
2008-08-16 00:00:00 | KVC Westerlo | Sporting Charleroi | 1 | 0 |
2008-11-01 00:00:00 | Club Brugge KV | KV Kortrijk | 4 | 1 |