第2週

12/14(TH) 所得と富の不平等の現状1

       所得と富の不平等の現状2

講義では、第2週、第3週とWorld Inequality report 2022を使って、所得と富の不平等について議論します。

12/19(TU) Rでデータサイエンス2:人口の少子高齢化  [Main]・[授業]

People- Population dynamics

総人口 Population, total:SP.POP.TOTL [Link]

出生率(千人)Birth rate, crude (per 1,000 people):SP.DYN.CBRT.IN [Link]

死亡率(千人)Death rate, crude (per 1,000 people):SP.DYN.CDRT.IN [Link]

若年労働人口率 Age dependency ratio, young (% of working-age population):SP.POP.DPND.YG [Link]

高齢者労働人口率 Age dependency ratio, old (% of working-age population):SP.POP.DPND.OL [Link to Metadata]

内容

トピック:人口の少子高齢化

library(tidyverse)
library(WDI)
df_pop <- WDI(indicator = c(pop = "SP.POP.TOTL",
                            birth_rate = "SP.DYN.CBRT.IN",
                            death_rate = "SP.DYN.CDRT.IN",
                            young = "SP.POP.DPND.YG",
                            old = "SP.POP.DPND.OL"))
write_csv(df_pop, "data/pop.csv")
df_pop <- read_csv("data/pop.csv")
Rows: 16758 Columns: 9── Column specification ───────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): country, iso2c, iso3c
dbl (6): year, pop, birth_rate, death_rate, young, old
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_pop

それぞれ十分なデータがあるかチェック

df_pop_long <- df_pop |> 
  pivot_longer(5:9, names_to = "name", values_to = "value")
df_pop_long |> 
  group_by(year, name) |> drop_na(value) |>
  summarize(num = n()) |> arrange(desc(year))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
df_pop_long |> 
  group_by(year, name) |> drop_na(value) |>
  summarize(num = n()) |> 
  ggplot(aes(year, num, col = name)) + geom_line() +
  labs(title = "各指標の年毎のデータ数", y = "データ数", x = "年")
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

253から265の国・地域のデータがあり、問題なし

df_pop_extra <- WDI(indicator = c(pop = "SP.POP.TOTL",
                            birth_rate = "SP.DYN.CBRT.IN",
                            death_rate = "SP.DYN.CDRT.IN",
                            young = "SP.POP.DPND.YG",
                            old = "SP.POP.DPND.OL"), extra = TRUE)
write_csv(df_pop_extra, "data/pop_extra.csv")
df_pop_extra <- read_csv("data/pop_extra.csv")
str(df_pop_extra)
glimpse(df_pop_extra)

いくつかの文字ベクトルの内容確認

df_pop_extra |> select(region, income, lending, status) |> lapply(unique)
REGION <- df_pop_extra |> filter(region %in% c("Aggregates", NA)) |> distinct(iso2c) |> pull()
length(REGION)

他の指定の仕方と比較

REGION1 <- df_pop_extra |> filter(region %in% c("Aggregates")) |> distinct(iso2c) |> pull()
length(REGION1)
df_pop_extra |> filter(region == "Aggregates") |> distinct(iso2c) |> pull() |> identical(REGION1)
df_pop %>% filter(iso2c %in% REGION) %>% distinct(country, iso2c)

Czechia, CZ, Not classified, Viet Nam, VN

df_pop_extra |> distinct(country, iso2c)
df_pop_extra |> filter(iso2c %in% c("CZ","VN"))
length(REGION)
REGION <- REGION[!(REGION %in% c("CZ","VN"))] |> sort()
length(REGION)
REGION
df_pop |> filter(iso2c %in% REGION) |> distinct(country, iso2c)
df_pop |> filter(!(iso2c %in% REGION)) |> 
  distinct(country, iso2c) |> arrange(country)
dput(REGION)
length(REGION)
df_pop_extra |> filter(!(iso2c %in% REGION)) |> 
  select(region, income, lending, status) |> lapply(unique)
df_pop_extra |> filter(!(iso2c %in% REGION)) |> 
  select(region, income, lending, status) |> lapply(unique) |> dput()
df_pop_extra |> filter(!(iso2c %in% REGION)) |> 
  filter(is.na(region)) |> distinct(country, iso2c)
df_pop_extra |> filter(!(iso2c %in% REGION)) |> 
  filter(income %in% c("Not classified", NA)) |> distinct(country, iso2c, income)
df_pop_extra |> filter(!(iso2c %in% REGION)) |> 
  filter(lending %in% c("Not classified", NA)) |> distinct(country, iso2c, lending)

World Bank Country and Lending Groups [Link]

How does the World Bank classify countries? [Link]

World Bank Group country classifications by income level for FY24 (July 1, 2023- June 30, 2024) [Link]

Operational lending categories

Economies are also divided into IDA, IBRD, and Blend categories based on the operational policies of the World Bank.International Development Association (IDA) countries are those with low per capita incomes that lack the financial ability to borrow from the International Bank for Reconstruction and Development (IBRD). Blend countries are eligible for IDA loans but are also eligible for IBRD loans because they are financially creditworthy.

df_pop_extra |> filter(country == "China") |> select(country, year, income, lending)

総人口

総人口 Population, total:SP.POP.TOTL [Link]

df_pop |> filter(country == "World") |> 
  ggplot(aes(year, pop)) + geom_line() + 
  labs(title = "世界の人口")
df_pop |> filter(country == "Japan") |> 
  ggplot(aes(year, pop)) + geom_line() +
  labs(title = "日本の人口")
COUNTRY <- "Germany"
df_pop |> filter(country == "France") |> 
  ggplot(aes(year, pop)) + geom_line() +
  labs(title = "ドイツの人口")
df_pop |> filter(!(iso2c %in% REGION)) |> filter(year == 2022) |> 
  arrange(desc(pop))
df_pop |> filter(!(iso2c %in% REGION)) |> filter(year == 2022) |> 
  arrange(desc(pop)) |> slice_head(n=10)
pop_top11 <- df_pop |> filter(!(iso2c %in% REGION)) |> filter(year == 2022) |> 
  arrange(desc(pop)) |> slice_head(n=11) |> pull(iso2c)
pop_top11
dput(pop_top11)
df_pop |> filter(iso2c %in% pop_top11) |>
  ggplot(aes(year, pop, color = iso2c)) + geom_line()
df_pop |> filter(country %in% c("South Asia", "Europe & Central Asia", "Middle East & North Africa", 
"East Asia & Pacific", "Sub-Saharan Africa", "Latin America & Caribbean", "North America")) |>
  ggplot(aes(year, pop, color = country)) + geom_line()
df_pop |> filter(country %in% c("South Asia", "Europe & Central Asia", "Middle East & North Africa", 
"East Asia & Pacific", "Sub-Saharan Africa", "Latin America & Caribbean", "North America")) |> 
  distinct(iso2c) |> pull() |> dput()
df_pop |> filter(country %in% c("South Asia", "Europe & Central Asia", "Middle East & North Africa", 
"East Asia & Pacific", "Sub-Saharan Africa", "Latin America & Caribbean", "North America")) |> filter(year == 2022) |> arrange(desc(pop))
df_pop |> filter(iso2c %in% pop_top11) |> 
  filter(!(iso2c %in% c("CN", "IN"))) |> 
  ggplot(aes(year, pop, color = country)) + geom_line()
df_pop |> filter(iso2c %in% pop_top11) |> 
  filter(!(iso2c %in% c("CN", "IN"))) |> 
  ggplot(aes(year, pop, color = factor(iso2c, levels = pop_top11))) + geom_line() + labs(color = "iso2c")

出生率と死亡率

出生率(千人)Birth rate, crude (per 1,000 people):SP.DYN.CBRT.IN [Link]

死亡率(千人)Death rate, crude (per 1,000 people):SP.DYN.CDRT.IN [Link]

df_pop_long |> filter(name %in% c("birth_rate", "death_rate")) |>
  filter(country == "World") |> 
  ggplot(aes(year, value, col = name)) + geom_line()
df_pop_long |> filter(name %in% c("birth_rate", "death_rate")) |>
  filter(iso2c %in% c("BD", "BR", "CN", "ID", "NG", "JP")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line()
df_pop_long |> filter(name %in% c("birth_rate", "death_rate")) |>
  filter(iso2c %in% c("Z4", "Z7", "ZJ", "ZQ", "XU", "8S", "ZG")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line()
df_pop_extra |> 
  filter(!(iso2c %in% REGION)) |> filter(year == 2020) |> 
  filter(!(income %in% c(NA, "Not classified"))) |>
  drop_na(birth_rate, death_rate) |>
  ggplot(aes(birth_rate,death_rate,col=income)) + geom_point()
df_pop_extra |> 
  filter(!(iso2c %in% REGION)) |> 
  filter(year %in% c(1960, 1990, 2020)) |> 
  filter(!(income %in% c(NA, "Not classified"))) |>
  drop_na(birth_rate, death_rate) |>
  ggplot(aes(birth_rate,death_rate,col=income)) + geom_point() + 
  facet_wrap(~ year) + theme(legend.position = "bottom") +
  labs(col = "")

Google Public Data Explorer: Link

扶養家族の労働人口に対する割合

若年労働人口率 Age dependency ratio, young (% of working-age population):SP.POP.DPND.YG [Link]

Age dependency ratio, young, is the ratio of younger dependents–people younger than 15–to the working-age population–those ages 15-64. Data are shown as the proportion of dependents per 100 working-age population.

年齢別扶養比率(若年)は、15歳未満の扶養家族の、15歳から64歳までの生産年齢人口に対する比率である。データは、生産年齢人口100人当たりの扶養家族の割合で示されている。

高齢者労働人口率 Age dependency ratio, old (% of working-age population):SP.POP.DPND.OL [Link to Metadata]

Age dependency ratio, old, is the ratio of older dependents–people older than 64–to the working-age population–those ages 15-64. Data are shown as the proportion of dependents per 100 working-age population.

年齢別扶養比率(高齢)は、生産年齢人口(15~64歳)に対する高齢扶養家族(64歳以上)の比率。データは、生産年齢人口100人当たりの扶養家族の割合で示されている。

df_pop_long |> filter(name %in% c("young", "old")) |>
  filter(country == "World") |> 
  ggplot(aes(year, value, col = name)) + geom_line()
df_pop_long |> filter(name %in% c("young", "old")) |>
  filter(iso2c %in% c("BD", "BR", "CN", "ID", "NG", "JP")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line()
df_pop_extra |> 
  filter(!(iso2c %in% REGION)) |> filter(year == 2020) |> 
  filter(!(income %in% c(NA, "Not classified"))) |>
  drop_na(young, old) |>
  ggplot(aes(young, old, col=income)) + geom_point()
df_pop_extra |> 
  filter(region == "Sub-Saharan Africa") |> filter(year == 2020) |> 
  drop_na(young, old) |>
  ggplot(aes(young, old, col=income)) + geom_point()
df_pop_long |> filter(name %in% c("young", "old")) |>
  filter(iso2c %in% c("US", "GB", "CN", "DE", "FR", "JP", "IN")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line()
df_pop_long |> filter(name %in% c("young", "old")) |>
  filter(country %in% c("South Asia", "Europe & Central Asia", "Middle East & North Africa", 
"East Asia & Pacific", "Sub-Saharan Africa", "Latin America & Caribbean", "North America")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line() + labs(title = "地域別の労働人口に対する高齢・若年扶養率(%)", 
       subtitle = "実線:高齢者、点線:若年者", x = "", col = "", linetype = "")
df_pop_long |> filter(name %in% c("young", "old")) |>
  filter(country %in% c("South Asia", "Europe & Central Asia", "Middle East & North Africa", 
"East Asia & Pacific", "Sub-Saharan Africa", "Latin America & Caribbean", "North America")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line() + facet_wrap(~country) + theme(legend.position = "none") +
  labs(title = "地域別の労働人口に対する高齢・若年扶養率(%)", 
       subtitle = "実線:高齢者、点線:若年者", x = "", y = "")

Default is fig. width = 7 and fig. height = 5

df_pop_long |> filter(name %in% c("young", "old")) |>
  filter(country %in% c("South Asia", "Europe & Central Asia", "Middle East & North Africa", 
"East Asia & Pacific", "Sub-Saharan Africa", "Latin America & Caribbean", "North America")) |> 
  ggplot(aes(year, value, col = country, linetype = name)) + 
  geom_line() + facet_wrap(~country, 2,4) + theme(legend.position = "none") +
  labs(title = "地域別の労働人口に対する高齢・若年扶養率(%)", 
       subtitle = "実線:高齢者、点線:若年者", x = "", y = "")
