[SQL] Intermediate SQL(3)

4 minute read


SQL Intermediate(3)

  • Correlated Subquery
  • Nested Subquery
  • Common Table Expression(CTE)
  • 비교

import sqlite3
%load_ext sql
%sql sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
'Connected: @/Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite'

Correlated Subquery

  • Difference with simple Subquery
    • Dependent on the main query to execute
    • evaluted in each loop(significantly slows down query runtime)
  • average number of goals scored in each country
%%sql
SELECT 
	c.name AS country,
	(SELECT
		AVG(m.home_team_goal + m.away_team_goal) 
	 FROM Match AS m
	 WHERE m.country_id = c.id) AS avg_goals
FROM country as c
GROUP BY country;

-- 위와 같음
-- SELECT 
--	  c.name AS country,
--	  AVG(m.home_team_goal + m.away_team_goal) AS avg_goals
-- FROM country as c
-- LEFT JOIN Match as m
--	  ON c.id = m.country_id
-- GROUP BY country
 * sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
country avg_goals
Belgium 2.8015046296296298
England 2.710526315789474
France 2.443092105263158
Germany 2.9015522875816995
Italy 2.6168379184620485
Netherlands 3.0808823529411766
Poland 2.425
Portugal 2.534600389863548
Scotland 2.633771929824561
Spain 2.767105263157895
Switzerland 2.929676511954993
  • get matches with scores that are 3 times above the average score for each country
%%sql
SELECT 
	main.country_id,
    main.date,
    main.home_team_goal, 
    main.away_team_goal
FROM match AS main
WHERE
	(home_team_goal + away_team_goal) > 
        (SELECT AVG((sub.home_team_goal + sub.away_team_goal) * 3)
         FROM Match AS sub
         WHERE main.country_id = sub.country_id);
  • get matches with scores that are equal to the highest scoring match for each country, in each season
  • highest scoring: (home_team_goal + away_team_goal)
%%sql
SELECT 
	main.country_id,
    main.date,
    main.home_team_goal,
    main.away_team_goal
FROM match AS main
WHERE 
	(home_team_goal + away_team_goal) =
        (SELECT MAX(sub.home_team_goal + sub.away_team_goal)
         FROM match AS sub
         WHERE main.country_id = sub.country_id
               AND main.season = sub.season);

Nested Subquery

  • can be either simple or correlated subquery

  • get the highest total number of goals in each season, overall, and during July across all seasons

%%sql
SELECT
	season,
    MAX(home_team_goal + away_team_goal) AS max_goals,
   (SELECT MAX(home_team_goal + away_team_goal) FROM Match) AS overall_max_goals,
   (SELECT MAX(home_team_goal + away_team_goal) 
    FROM Match
    WHERE id IN (
          SELECT id FROM Match WHERE EXTRACT(MONTH FROM date) = 07)) AS july_max_goals
FROM Match
GROUP BY season;
  • get average number of matches per season where a team scored 5 or more goals by country
%%sql
SELECT
	c.name AS country,
	AVG(matches) AS avg_seasonal_high_scores
FROM country AS c
LEFT JOIN (
  SELECT country_id, 
  	     season,
         COUNT(id) AS matches
  FROM (
    SELECT country_id, season, id
	FROM match
	WHERE home_team_goal >= 5 OR away_team_goal >= 5) AS inner_s
  GROUP BY country_id, season) AS outer_s
ON c.id = outer_s.country_id
GROUP BY country;
 * sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
country avg_seasonal_high_scores
Belgium 9.571428571428571
England 14.5
France 8.0
Germany 13.75
Italy 8.5
Netherlands 20.125
Poland 5.857142857142857
Portugal 8.625
Scotland 7.125
Spain 19.125
Switzerland 8.0

Common Table Expression(CTE)

  • method to make code readable

  • get number of matches with more than 10 total goals in each country

%%sql
WITH match_list AS (
    SELECT 
  		country_id, 
  		id
    FROM match
    WHERE (home_team_goal + away_team_goal) >= 10)

SELECT
    l.name AS league,
    COUNT(match_list.id) AS matches
FROM league AS l
LEFT JOIN match_list ON l.id = match_list.country_id
GROUP BY l.name;
 * sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
league matches
Belgium Jupiler League 0
England Premier League 4
France Ligue 1 1
Germany 1. Bundesliga 1
Italy Serie A 0
Netherlands Eredivisie 2
Poland Ekstraklasa 0
Portugal Liga ZON Sagres 0
Scotland Premier League 1
Spain LIGA BBVA 5
Switzerland Super League 0

비교

  • get all match’s date, home_team_name, away_team_name and each team’s goal

Simple Query

%%sql
SELECT 
	m.date,
	home.hometeam,
	away.awayteam,
	m.home_team_goal,
	m.away_team_goal 
FROM Match AS m
LEFT JOIN (
		SELECT m1.id, t1.team_long_name AS hometeam
		FROM Match m1
		LEFT JOIN Team t1
			ON m1.home_team_api_id = t1.team_api_id) AS home
	ON m.id = home.id
LEFT JOIN (
		SELECT m2.id, t2.team_long_name AS awayteam
		FROM Match m2
		LEFT JOIN Team t2
			ON m2.away_team_api_id = t2.team_api_id ) AS away
	ON m.id = away.id
LIMIT 10;
 * sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
date hometeam awayteam home_team_goal away_team_goal
2008-08-17 00:00:00 KRC Genk Beerschot AC 1 1
2008-08-16 00:00:00 SV Zulte-Waregem Sporting Lokeren 0 0
2008-08-16 00:00:00 KSV Cercle Brugge RSC Anderlecht 0 3
2008-08-17 00:00:00 KAA Gent RAEC Mons 5 0
2008-08-16 00:00:00 FCV Dender EH Standard de Liège 1 3
2008-09-24 00:00:00 KV Mechelen Club Brugge KV 1 1
2008-08-16 00:00:00 KSV Roeselare KV Kortrijk 2 2
2008-08-16 00:00:00 Tubize Royal Excel Mouscron 1 2
2008-08-16 00:00:00 KVC Westerlo Sporting Charleroi 1 0
2008-11-01 00:00:00 Club Brugge KV KV Kortrijk 4 1

Correlated Subquery

%%sql
SELECT
    m.date,
    (SELECT team_long_name
     FROM team AS t
     WHERE t.team_api_id = m.home_team_api_id) AS hometeam,
    (SELECT team_long_name
     FROM team AS t
     WHERE t.team_api_id = m.away_team_api_id) AS awayteam,
     m.home_team_goal,
     m.away_team_goal
FROM Match AS m
LIMIT 10;
 * sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
date hometeam awayteam home_team_goal away_team_goal
2008-08-17 00:00:00 KRC Genk Beerschot AC 1 1
2008-08-16 00:00:00 SV Zulte-Waregem Sporting Lokeren 0 0
2008-08-16 00:00:00 KSV Cercle Brugge RSC Anderlecht 0 3
2008-08-17 00:00:00 KAA Gent RAEC Mons 5 0
2008-08-16 00:00:00 FCV Dender EH Standard de Liège 1 3
2008-09-24 00:00:00 KV Mechelen Club Brugge KV 1 1
2008-08-16 00:00:00 KSV Roeselare KV Kortrijk 2 2
2008-08-16 00:00:00 Tubize Royal Excel Mouscron 1 2
2008-08-16 00:00:00 KVC Westerlo Sporting Charleroi 1 0
2008-11-01 00:00:00 Club Brugge KV KV Kortrijk 4 1

Common Table Expression

%%sql
WITH home AS (
  SELECT m.id, 
  		 m.date, 
  		 t.team_long_name AS hometeam, 
  		 m.home_team_goal
  FROM Match AS m
  LEFT JOIN Team AS t 
  	ON m.home_team_api_id = t.team_api_id),
away AS (
  SELECT m.id, 
  		 m.date, 
  		 t.team_long_name AS awayteam, 
  		 m.away_team_goal
  FROM match AS m
  LEFT JOIN team AS t 
  	ON m.away_team_api_id = t.team_api_id)
SELECT 
	home.date,
    home.hometeam,
    away.awayteam,
    home.home_team_goal,
    away.away_team_goal
FROM home
INNER JOIN away
ON home.id = away.id
LIMIT 10;
 * sqlite:////Users/jisu/Dropbox_Carl/Dropbox/JISU/Data/Jupyter_Projects/blog/data_engineer/sql/3_intermediate_sql/database.sqlite
Done.
date hometeam awayteam home_team_goal away_team_goal
2008-08-17 00:00:00 KRC Genk Beerschot AC 1 1
2008-08-16 00:00:00 SV Zulte-Waregem Sporting Lokeren 0 0
2008-08-16 00:00:00 KSV Cercle Brugge RSC Anderlecht 0 3
2008-08-17 00:00:00 KAA Gent RAEC Mons 5 0
2008-08-16 00:00:00 FCV Dender EH Standard de Liège 1 3
2008-09-24 00:00:00 KV Mechelen Club Brugge KV 1 1
2008-08-16 00:00:00 KSV Roeselare KV Kortrijk 2 2
2008-08-16 00:00:00 Tubize Royal Excel Mouscron 1 2
2008-08-16 00:00:00 KVC Westerlo Sporting Charleroi 1 0
2008-11-01 00:00:00 Club Brugge KV KV Kortrijk 4 1

Tags:

Categories:

Updated: