[SQL] Intermediate SQL(2)
SQL Intermediate(2)
- Simple Subquery
- Subquery in
WHERE
- Subquery in
FROM
- Subquery in
SELECT
- Subquery Everywhere
- Subquery in
- 정확한 데이터를 얻었는지 항상 확인, 특히 filtering 주의
import sqlite3
%load_ext sql
%sql sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
'Connected: @/Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite'
Subquery in WHERE
- list of teams that scored 8 or more goals in a home match
%%sql
SELECT
team_long_name,
team_short_name
FROM Team
WHERE team_api_id IN
(SELECT home_team_api_id
FROM Match
WHERE home_team_goal >= 8);
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
team_long_name | team_short_name |
---|---|
Chelsea | CHE |
Southampton | SOU |
Tottenham Hotspur | TOT |
Real Madrid CF | REA |
FC Barcelona | BAR |
PSV | PSV |
SL Benfica | BEN |
FC Bayern Munich | BMU |
Celtic | CEL |
Manchester United | MUN |
Subquery in FROM
-
alias 해줘야 함
-
matches with 10 or more total goals by country
%%sql
SELECT
c.name AS country_name,
COUNT(sub.id) AS matches
FROM Country AS c
INNER JOIN (
SELECT country_id, id
FROM Match
WHERE (home_team_goal + away_team_goal) >= 10) AS sub
ON c.id = sub.country_id
GROUP BY country_name;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
country_name | matches |
---|---|
England | 4 |
France | 1 |
Germany | 1 |
Netherlands | 2 |
Scotland | 1 |
Spain | 5 |
- number of matches that scored 10 or more goals in a home match by countries
%%sql
SELECT
c.name AS country_name,
COUNT(sub.id) AS matches
FROM Country AS c
INNER JOIN (
SELECT country_id, id
FROM Match
WHERE (home_team_goal + away_team_goal) >= 10) AS sub
ON c.id = sub.country_id
GROUP BY country_name;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
country_name | matches |
---|---|
England | 4 |
France | 1 |
Germany | 1 |
Netherlands | 2 |
Scotland | 1 |
Spain | 5 |
Subquery in SELECT
- avg_goals by league and overall avg_goals in 2013/2014 seasons
%%sql
SELECT
l.name AS league,
ROUND(AVG(m.home_team_goal + m.away_team_goal), 2) AS avg_goals,
(SELECT ROUND(AVG(home_team_goal + away_team_goal), 2)
FROM match
WHERE season = "2013/2014") AS overall_avg
FROM league AS l
LEFT JOIN match AS m
ON l.country_id = m.country_id
WHERE season = '2013/2014'
GROUP BY league;
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
league | avg_goals | overall_avg |
---|---|---|
Belgium Jupiler League | 2.5 | 2.77 |
England Premier League | 2.77 | 2.77 |
France Ligue 1 | 2.46 | 2.77 |
Germany 1. Bundesliga | 3.16 | 2.77 |
Italy Serie A | 2.72 | 2.77 |
Netherlands Eredivisie | 3.2 | 2.77 |
Poland Ekstraklasa | 2.64 | 2.77 |
Portugal Liga ZON Sagres | 2.37 | 2.77 |
Scotland Premier League | 2.75 | 2.77 |
Spain LIGA BBVA | 2.75 | 2.77 |
Switzerland Super League | 2.89 | 2.77 |
- avg_goals by league and difference of overall avg_goals - avg_goals in 2013/2014 season
%%sql
SELECT
l.name AS league,
ROUND(AVG(m.home_team_goal + m.away_team_goal),2) AS avg_goals,
ROUND(AVG(m.home_team_goal + m.away_team_goal) -
(SELECT AVG(home_team_goal + away_team_goal)
FROM match
WHERE season = '2013/2014'),2) AS diff
FROM league AS l
LEFT JOIN match AS m
ON l.country_id = m.country_id
WHERE season = '2013/2014'
GROUP BY l.name;
Subquery Everywhere
- avg_goal and overall_goal by stage in 2012/2013 season
- only display the results where avg_goal of the stage is higher than the overall average
%%sql
SELECT
s.stage,
ROUND(s.avg_goals,2) AS avg_goals,
ROUND((SELECT AVG(home_team_goal + away_team_goal)
FROM Match
WHERE season = '2012/2013'),2) AS overall_avg
FROM
(SELECT
stage,
AVG(home_team_goal + away_team_goal) AS avg_goals
FROM Match
WHERE season = '2012/2013'
GROUP BY stage) AS s
WHERE
s.avg_goals > (SELECT AVG(home_team_goal + away_team_goal)
FROM match WHERE season = '2012/2013');
* sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
stage | avg_goals | overall_avg |
---|---|---|
3 | 2.83 | 2.77 |
4 | 2.8 | 2.77 |
6 | 2.78 | 2.77 |
8 | 3.09 | 2.77 |
10 | 2.96 | 2.77 |
11 | 2.92 | 2.77 |
12 | 3.23 | 2.77 |
17 | 2.85 | 2.77 |
20 | 2.96 | 2.77 |
21 | 2.9 | 2.77 |
23 | 3.01 | 2.77 |
27 | 2.8 | 2.77 |
30 | 2.87 | 2.77 |
31 | 3.06 | 2.77 |
33 | 3.1 | 2.77 |
36 | 2.9 | 2.77 |
38 | 3.17 | 2.77 |