# Load necessary packages
library(readxl)
library(tidyverse)
library(lubridate)
library(dplyr)
library(zoo)
library(ggplot2)
library(tseries)

Warning message:
"package 'readxl' was built under R version 4.3.3"
Warning message:
"package 'tidyverse' was built under R version 4.3.3"
Warning message:
"package 'ggplot2' was built under R version 4.3.3"
Warning message:
"package 'tidyr' was built under R version 4.3.3"
Warning message:
"package 'readr' was built under R version 4.3.3"
Warning message:
"package 'purrr' was built under R version 4.3.1"
Warning message:
"package 'dplyr' was built under R version 4.3.3"
Warning message:
"package 'forcats' was built under R version 4.3.1"
Warning message:
"package 'lubridate' was built under R version 4.3.3"
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Warning message:
"package 'zoo' was built under R version 4.3.1"

Attaching package: 'zoo'


The following objects are masked from 'package:base':

    as.Date, as.Date.numeric


Warning message:
"package 'tseries' was built under R version 4.3.1"
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo

# Set Excel file path
file_path <- "your_dataset.xlsx"  # path hidden for privacy

# Define sheet names to load
sheet_names <- c("HPI-SA-GTA", "On-afterTaxIncom", "BankRate-BOC", "PolicyRate-BOC", "Prate-FRED")

# Load all sheets into a list
all_data <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})

# Name each sheet properly
names(all_data) <- sheet_names

Warning message:
"Expecting numeric in B1609 / R1609C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B1807 / R1807C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B1869 / R1869C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B1870 / R1870C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B1874 / R1874C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B2025 / R2025C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B2068 / R2068C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B2130 / R2130C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B2134 / R2134C2: got 'Bank holiday'"
Warning message:
"Expecting numeric in B2394 / R2394C2: got 'Bank holiday'"

HPI_GTA <- all_data[["HPI-SA-GTA"]]
On_ATincome <- all_data[["On-afterTaxIncom"]]
BankRate <- all_data[["BankRate-BOC"]]
policy_rate <- all_data[["PolicyRate-BOC"]]
Prate <- all_data[["Prate-FRED"]]

# View HPI sheet
View(all_data[["HPI-SA-GTA"]])

# Rename income column for clarity
On_ATincome <- On_ATincome %>% rename(M_AfterTincome = `M-AfterTincome`)

# Create a monthly date sequence for HPI data
HPI_GTA$date <- seq(as.Date("2005-01-01"), by = "month", length.out = nrow(HPI_GTA))

# Convert On_ATincome to proper date format (yearly start)
On_ATincome$date <- as.Date(paste0(On_ATincome$Years, "-01-01"))

# Fill missing HPI values using last observation carried forward
HPI_GTA$Composite_HPI_SA <- na.locf(HPI_GTA$Composite_HPI_SA, na.rm = FALSE)
HPI_GTA$Composite_Benchmark_SA <- na.locf(HPI_GTA$Composite_Benchmark_SA, na.rm = FALSE)

# Check if any NA values remain
sum(is.na(HPI_GTA$Composite_HPI_SA))
sum(is.na(On_ATincome$M_AfterTincome))
sum(is.na(HPI_GTA$Composite_Benchmark_SA))

str (On_ATincome)

tibble [47 × 3] (S3: tbl_df/tbl/data.frame)
 $ Years         : num [1:47] 1976 1977 1978 1979 1980 ...
 $ M_AfterTincome: num [1:47] 64400 65500 65600 65100 67000 65600 62100 59700 61800 63200 ...
 $ date          : Date[1:47], format: "1976-01-01" "1977-01-01" ...

# Assign monthly dates to HPI data
HPI_GTA$date <- seq(as.Date("2005-01-01"), by = "month", length.out = nrow(HPI_GTA))

# Convert Year column in On_ATincome to Date (annual)
On_ATincome$date <- as.Date(paste0(On_ATincome$Years, "-01-01"))

# Confirm the structure
str(HPI_GTA$date)
str(On_ATincome$date)

 Date[1:236], format: "2005-01-01" "2005-02-01" "2005-03-01" "2005-04-01" "2005-05-01" ...
 Date[1:47], format: "1976-01-01" "1977-01-01" "1978-01-01" "1979-01-01" "1980-01-01" ...

library(zoo)

# Fill missing values in HPI and Benchmark columns
HPI_GTA$Composite_HPI_SA <- na.locf(HPI_GTA$Composite_HPI_SA, na.rm = FALSE)
HPI_GTA$Composite_Benchmark_SA <- na.locf(HPI_GTA$Composite_Benchmark_SA, na.rm = FALSE)

# Check that no missing values remain
sum(is.na(HPI_GTA$Composite_HPI_SA))
sum(is.na(On_ATincome$M_AfterTincome))
sum(is.na(HPI_GTA$Composite_Benchmark_SA))

library(dplyr)
library(lubridate)

# Aggregate Composite HPI to yearly average
HPI_year_rate <- HPI_GTA %>%
  group_by(Year = year(date)) %>%
  summarize(Yearly_Average = mean(Composite_HPI_SA, na.rm = TRUE))

# Aggregate Benchmark Index to yearly average
Benchmark_year_rate <- HPI_GTA %>%
  group_by(Year = year(date)) %>%
  summarize(Yearly_Average = mean(Composite_Benchmark_SA, na.rm = TRUE))

# Convert Year to Date for plotting
HPI_year_rate$Year <- as.Date(paste0(HPI_year_rate$Year, "-01-01"))
Benchmark_year_rate$Year <- as.Date(paste0(Benchmark_year_rate$Year, "-01-01"))

# Calculate scaling factor to align income with HPI visually
income_scale_factor <- max(HPI_GTA$Composite_HPI_SA, na.rm = TRUE) / 
  max(On_ATincome$M_AfterTincome, na.rm = TRUE)

# Plot with dual axes
ggplot() +
  # HPI line and points
  geom_point(data = HPI_GTA, aes(x = date, y = Composite_HPI_SA), 
             color = "blue", size = 1) +
  geom_line(data = HPI_GTA, aes(x = date, y = Composite_HPI_SA), 
            color = "blue", alpha = 0.5) +

  # Income line and points scaled to match HPI range
  geom_point(data = On_ATincome, aes(x = date, y = M_AfterTincome * income_scale_factor), 
             color = "red", size = 3) +
  geom_line(data = On_ATincome, aes(x = date, y = M_AfterTincome * income_scale_factor), 
            color = "red", linetype = "dashed") +

  # Format axis and labels
  scale_x_date(date_labels = "%Y", date_breaks = "1 year",
               limits = c(as.Date("2005-01-01"), Sys.Date())) +
  scale_y_continuous(name = "Housing Price Index (Blue)",
                     sec.axis = sec_axis(~ . / income_scale_factor, 
                                         name = "Median After-Tax Income (Red)")) +

  labs(title = "HPI and Median After-Tax Income Over Time",
       x = "Year") +
  theme_minimal()

Warning message:
"Removed 29 rows containing missing values or values outside the scale range (`geom_point()`)."
Warning message:
"Removed 29 rows containing missing values or values outside the scale range (`geom_line()`)."

# Aggregate HPI to yearly average first
HPI_yearly <- HPI_GTA %>%
  group_by(Year = year(date)) %>%
  summarize(HPI = mean(Composite_HPI_SA, na.rm = TRUE))

# Prepare income as yearly with proper Year column
Income_yearly <- On_ATincome %>%
  mutate(Year = year(date)) %>%
  group_by(Year) %>%
  summarize(Income = mean(M_AfterTincome, na.rm = TRUE))

# Merge to ensure same years
growth_data <- inner_join(HPI_yearly, Income_yearly, by = "Year")

# Normalize using 2005 as base year
base_HPI <- growth_data$HPI[growth_data$Year == 2005]
base_income <- growth_data$Income[growth_data$Year == 2005]

growth_data <- growth_data %>%
  mutate(HPI_indexed = (HPI / base_HPI) * 100,
         Income_indexed = (Income / base_income) * 100)

# Plot
ggplot(growth_data, aes(x = Year)) +
  geom_line(aes(y = HPI_indexed, color = "HPI (Housing Price Index)"), size = 1.2) +
  geom_line(aes(y = Income_indexed, color = "After-Tax Income"), linetype = "dashed", size = 1.2) +
  scale_color_manual(values = c("HPI (Housing Price Index)" = "blue", "After-Tax Income" = "red")) +
  labs(title = "Indexed Growth of HPI vs After-Tax Income (Base = 2005)",
       y = "Indexed Value (2005 = 100)", x = "Year", color = "Legend") +
  theme_minimal()

# Calculate affordability ratio (HPI / Income)
growth_data <- growth_data %>%
  mutate(Affordability_Ratio = HPI / Income)

# Plot the affordability ratio over time
ggplot(growth_data, aes(x = Year, y = Affordability_Ratio)) +
  geom_line(color = "darkgreen", size = 1.2) +
  labs(title = "Affordability Ratio Over Time: HPI / After-Tax Income",
       x = "Year",
       y = "Affordability Ratio",
       caption = "Higher values indicate lower affordability") +
  theme_minimal()

# Trim income_ts to match HPI range (start in 2005)
income_ts_short <- window(income_ts, start = 2005)

# Set base values from 2005
base_HPI <- hpi_yearly$Composite_HPI_SA[1]
base_income <- income_ts_short[1]

# Compute indexed (normalized) values
hpi_yearly$HPI_indexed <- (hpi_yearly$Composite_HPI_SA / base_HPI) * 100
income_indexed <- (income_ts_short / base_income) * 100

# First check how many rows you have in HPI yearly data
length(hpi_yearly$Composite_HPI_SA)  # let's say this returns 20

# Then subset income_ts accordingly (starting from 2005)
income_ts_short <- window(income_ts, start = 2005, end = 2005 + 19)

# Normalize to base year
base_HPI <- hpi_yearly$Composite_HPI_SA[1]
base_income <- income_ts_short[1]

hpi_yearly$HPI_indexed <- (hpi_yearly$Composite_HPI_SA / base_HPI) * 100
income_indexed <- (income_ts_short / base_income) * 100

# Create a unified data frame
indexed_data <- data.frame(
  Year = time(income_ts_short),        # Year vector (e.g., 2005 to 2024)
  HPI_indexed = hpi_yearly$HPI_indexed,
  Income_indexed = income_indexed
)

ggplot(indexed_data, aes(x = Year)) +
  geom_line(aes(y = HPI_indexed, color = "HPI Indexed"), size = 1) +
  geom_line(aes(y = Income_indexed, color = "Income Indexed"), size = 1, linetype = "dashed") +
  labs(title = "Indexed Growth: HPI vs Median After-Tax Income (Base = 2005)",
       y = "Indexed Value (Base = 100)", color = "Legend") +
  theme_minimal()

Don't know how to automatically pick scale for object of type <ts>. Defaulting to continuous.

Date	Composite_HPI_SA	Single_Family_HPI_SA	One_Storey_HPI_SA	Two_Storey_HPI_SA	Townhouse_HPI_SA	Apartment_HPI_SA	Composite_Benchmark_SA	Single_Family_Benchmark_SA	One_Storey_Benchmark_SA	Two_Storey_Benchmark_SA	Townhouse_Benchmark_SA	Apartment_Benchmark_SA
<dttm>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
2005-01-01	100.0	100.0	100.0	100.0	100.0	100.0	315800	363900	307500	383300	212000	189100
2005-02-01	100.3	100.4	100.6	100.2	100.1	100.3	316600	365200	309400	384100	212200	189600
2005-03-01	100.8	100.9	100.8	100.8	100.6	101.0	318300	367200	309900	386500	213300	191000
2005-04-01	101.2	101.3	101.7	101.1	100.8	101.2	319500	368700	312700	387400	213600	191400
2005-05-01	101.7	101.8	101.8	101.7	101.2	102.1	321300	370600	313000	389900	214600	193100
2005-06-01	102.4	102.5	102.1	102.4	101.6	102.8	323300	372900	314100	392400	215300	194400
2005-07-01	102.9	103.1	102.6	103.0	102.0	102.9	325000	375100	315400	394800	216200	194500
2005-08-01	103.5	103.8	103.8	103.5	102.4	103.4	327000	377600	319200	396700	217100	195500
2005-09-01	103.7	103.8	103.6	103.6	102.8	103.8	327500	377800	318600	397000	218000	196300
2005-10-01	104.5	104.8	104.7	104.5	103.6	104.0	330100	381200	322100	400400	219600	196600
2005-11-01	105.1	105.2	105.3	104.9	103.9	104.8	331800	382900	323800	402200	220200	198200
2005-12-01	105.4	105.4	105.3	105.2	104.7	105.7	333000	383700	323700	403300	222000	199900
2006-01-01	106.2	106.3	106.6	105.9	105.1	105.9	335300	386900	327700	406100	222800	200300
2006-02-01	106.6	106.7	106.8	106.4	105.8	107.1	336800	388400	328500	407700	224200	202500
2006-03-01	107.2	107.4	107.9	106.9	105.9	106.9	338600	390900	331900	409900	224500	202100
2006-04-01	107.8	108.0	108.3	107.5	106.6	107.5	340300	392900	333000	412100	225900	203200
2006-05-01	108.1	108.4	109.2	107.8	106.8	107.8	341500	394300	335700	413100	226500	203800
2006-06-01	108.2	108.4	108.8	107.9	107.5	107.7	341700	394600	334600	413700	227800	203700
2006-07-01	108.3	108.4	108.8	107.9	107.6	108.5	342000	394400	334500	413500	228200	205200
2006-08-01	108.4	108.5	108.5	108.1	107.8	108.8	342300	394900	333500	414500	228600	205800
2006-09-01	108.9	109.0	109.2	108.5	108.6	109.6	343900	396600	335700	415700	230200	207300
2006-10-01	108.7	108.8	109.0	108.3	108.6	109.7	343400	395800	335200	415000	230300	207500
2006-11-01	109.2	109.3	109.9	108.7	109.3	109.9	344800	397800	337900	416800	231800	207800
2006-12-01	109.7	109.8	110.7	109.1	109.6	110.5	346400	399500	340300	418300	232400	208900
2007-01-01	109.8	109.8	110.5	109.1	110.2	111.3	346800	399400	339900	418200	233600	210400
2007-02-01	111.2	111.4	111.9	110.9	110.8	111.7	351200	405400	344000	425000	235000	211200
2007-03-01	111.9	112.1	112.7	111.5	111.6	112.9	353500	407900	346600	427200	236600	213500
2007-04-01	112.7	112.8	112.9	112.3	112.1	113.9	355800	410600	347200	430500	237700	215300
2007-05-01	113.3	113.4	113.6	112.9	112.7	115.0	357800	412800	349300	432900	238900	217400
2007-06-01	114.2	114.3	114.7	113.7	113.4	116.4	360800	416000	352700	435900	240500	220100
⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮
2022-03-01	403.9	421.2	427.1	418.1	441.9	405.7	1275600	1532800	1313400	1602500	936800	767100
2022-04-01	393.8	409.0	412.5	406.5	435.0	403.0	1243700	1488300	1268400	1558100	922300	762100
2022-05-01	383.6	396.5	396.5	394.8	426.6	398.6	1211400	1442800	1219200	1513300	904400	753800
2022-06-01	371.5	382.8	378.8	382.1	416.7	389.2	1173200	1393100	1164900	1464400	883400	735900
2022-07-01	362.0	371.6	367.0	370.8	409.7	384.0	1143300	1352100	1128500	1421300	868500	726100
2022-08-01	355.6	364.4	359.8	362.6	397.9	380.3	1123100	1326000	1106300	1390000	843600	719100
2022-09-01	352.6	361.9	352.8	361.5	394.1	375.1	1113500	1317000	1084800	1385500	835500	709400
2022-10-01	349.9	358.8	351.8	357.6	391.2	372.6	1104900	1305600	1081700	1370800	829400	704600
2022-11-01	348.3	357.6	350.9	356.0	388.3	369.3	1099900	1301200	1079000	1364400	823100	698400
2022-12-01	345.8	354.8	347.9	354.2	383.3	367.8	1092000	1291200	1069900	1357700	812700	695600
2023-01-01	340.9	349.9	343.5	350.4	380.1	364.6	1076600	1273200	1056300	1343200	805800	689400
2023-02-01	339.0	349.1	345.5	349.3	375.2	358.0	1070600	1270300	1062400	1339000	795400	676900
2023-03-01	340.6	352.0	350.1	351.6	378.3	356.4	1075700	1281100	1076700	1347500	802000	673900
2023-04-01	347.5	360.8	358.0	360.2	384.2	357.7	1097300	1312800	1101000	1380800	814500	676500
2023-05-01	356.3	370.4	367.3	369.7	392.1	364.7	1125200	1348000	1129300	1417100	831200	689700
2023-06-01	362.4	376.9	373.1	376.1	396.2	372.5	1144500	1371700	1147400	1441600	839900	704400
2023-07-01	364.7	379.7	376.1	378.6	398.8	375.0	1151700	1381600	1156600	1451200	845500	709200
2023-08-01	363.2	377.9	372.0	376.8	402.1	374.9	1147100	1375100	1143900	1444400	852500	708900
2023-09-01	360.1	374.3	369.4	373.1	402.1	375.3	1137200	1362100	1135900	1430000	852500	709700
2023-10-01	354.6	368.6	363.2	367.5	398.2	370.0	1119900	1341500	1116700	1408800	844100	699700
2023-11-01	349.4	363.1	357.8	361.8	390.0	366.9	1103300	1321300	1100300	1386700	826900	693900
2023-12-01	345.6	358.7	352.6	358.1	388.3	363.6	1091500	1305200	1084300	1372700	823300	687500
2024-01-01	343.4	357.1	353.5	357.0	383.4	359.9	1084500	1299400	1087000	1368400	812900	680600
2024-02-01	344.1	359.1	355.0	359.6	383.7	356.7	1086700	1306900	1091600	1378200	813500	674600
2024-03-01	344.2	360.3	354.4	361.2	382.3	355.6	1087000	1311200	1089900	1384400	810500	672500
2024-04-01	344.8	361.7	357.4	362.0	382.2	355.8	1088900	1316100	1098900	1387700	810200	672800
2024-05-01	343.5	360.6	356.8	360.8	381.4	353.8	1084900	1312400	1097100	1382800	808600	669100
2024-06-01	344.3	361.3	358.5	361.0	382.8	354.4	1087400	1314800	1102400	1383600	811500	670100
2024-07-01	344.7	361.6	357.3	361.4	381.3	354.6	1088600	1315800	1098600	1385300	808300	670500
2024-08-01	344.8	361.7	358.4	361.1	379.6	353.4	1089000	1316300	1102000	1384000	804700	668300

Housing Affordability Gap in the GTA¶

Step 1: Load Required Libraries¶

Step 2: Import Excel Data¶

Step 3: Extract Individual Sheets¶

Step 4: Clean and Prepare the Data¶

Step 4: Create Date Columns¶

Step 5: Handle Missing Values (Forward Fill)¶

Step 6: Aggregate Monthly Housing Data to Yearly Averages¶

Step 7: Plot HPI vs Median After-Tax Income (Dual Y-Axis)¶

Step 8 : Normalize Yearly HPI and Income (Base Year = 2005)¶

Step 9: Calculate and Visualize the Affordability Ratio (HPI / Income)¶

📉 Interpretation: The Growing Gap Between Income and Housing Prices¶

🔺 Affordability has sharply worsened:¶

⚠️ Critical surges in 2015–2017 and 2020–2023:¶

📉 Income hasn’t kept pace:¶