1月9日(火)のジニ指数の演習ファイルをもとに、1月11日(木)の講義に関係したグラフをいくつか作成し、以下に提示します。基本的に、木曜日の講義を聴きながら作成したものです。
所得または消費の分配(Distribution of income or consumption)
GINI 指数 (世界銀行推計):SI.POV.GINI [Link]
下位 10% が占める所得シェア:SI.DST.FRST.10 [Link]
下位 20% が占める所得シェア:SI.DST.FRST.20 [Link]
2番目の 20% が占める収入シェア:SI.DST.02ND.20 [Link]
3番目の 20% が占める収入シェア :SI.DST.03RD.20 [Link]
4番目の 20% が占める収入シェア:SI.DST.04TH.20 [Link]
上位 20% が占める収入シェア:SI.DST.05TH.20 [Link]
上位 10% が占める収入シェア:SI.DST.10TH.10 [Link]
library(tidyverse)
library(WDI)
library(DescTools)
地域情報を利用するために、extra = TRUE
を加えました。
df_gini_extra <- WDI(indicator = c(gini = "SI.POV.GINI",
`0-10` = "SI.DST.FRST.10",
`0-20` = "SI.DST.FRST.20",
`20-40` = "SI.DST.02ND.20",
`40-60` = "SI.DST.03RD.20",
`60-80` = "SI.DST.04TH.20",
`80-100` = "SI.DST.05TH.20",
`90-100` = "SI.DST.10TH.10"), extra = TRUE)
何度もダウンロードしなくて良いように、保存したものを読み込みます。
write_csv(df_gini_extra, "data/gini_extra.csv")
df_gini_extra <- read_csv("data/gini_extra.csv")
Rows: 16758 Columns: 20── Column specification ─────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): country, iso2c, iso3c, region, capital, income, lending
dbl (11): year, gini, 0-10, 0-20, 20-40, 40-60, 60-80, 80-100, 90-100, longitude, latitude
lgl (1): status
date (1): lastupdated
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_gini_extra
df_gini_extra <- df_gini_extra |> select(country, iso2c, year, gini:region)
df_gini_extra |> drop_na(gini)
df_gini_extra |> drop_na(gini) |>
arrange(desc(gini)) |> distinct(country, year, gini, region)
df_gini_extra_recent <-df_gini_extra |> drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
arrange(desc(gini))
df_gini_extra_recent
pivot_longer(cols, names_to = "", values_to = "")
についてはいずれ説明します。ここでは、レベルに分けられたものを levels
という名の列にレベルを、value
という名の列に、その値を並べたものとします。
df_gini_extra_long <- df_gini_extra |>
pivot_longer(`0-10`:`90-100`, names_to = "levels", values_to = "value")
df_gini_extra_long
df_gini_extra_long |> filter(country == "Japan") |>
drop_na(gini) |> distinct(country, year, gini, levels, value)
COUNTRIES_D <- c("Japan","United States", "South Africa")
df_gini_extra |> select(country, year, gini:`90-100`) |> filter(country %in% COUNTRIES_D) |>
drop_na(gini) |> group_by(country) |> filter(year == max(year))
df_gini_extra_long |> filter(country %in% COUNTRIES_D) |>
drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
ggplot(aes(factor(country, levels = COUNTRIES_D), value, fill = levels)) + geom_col(position = "dodge", col = "black", linewidth = 0.1) +
geom_text(aes(group = levels, label = value), vjust = -0.2, position = position_dodge(width = 0.9)) +
labs(title = "Distribution of Wealth in Three Countries in Recent Year", x = "")
df_gini_extra_long |> filter(country %in% COUNTRIES_D) |>
drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
filter(!(levels %in% c('0-10','90-100'))) |>
ggplot(aes(factor(country, levels = COUNTRIES_D), value, fill = levels)) + geom_col(position = "dodge", col = "black", linewidth = 0.1) +
geom_text(aes(group = levels, label = value), vjust = -0.2, position = position_dodge(width = 0.9)) +
labs(title = "Distribution of Wealth in Three Countries in Recent Year", x = "")
考察:0-10, 90-100 はなくても良いかもしれない。
df_gini_extra_long |> filter(country %in% COUNTRIES_D) |> filter(year == 2010) |>
drop_na(gini) |> ggplot(aes(factor(country, levels = COUNTRIES_D), value, fill = levels)) + geom_col(position = "dodge", col = "black", linewidth = 0.1) +
geom_text(aes(group = levels, label = value), vjust = -0.2, position = position_dodge(width = 0.9)) +
labs(title = "Distribution of Wealth in Three Countries in 2010", x = "")
考察:データが限られているので、年を揃えるのは難しい。最新のデータのみを使う
df_gini_extra_long |> filter(country %in% COUNTRIES_D) |>
drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
filter(!(levels %in% c('0-10','90-100'))) |>
ggplot(aes(factor(country, levels = COUNTRIES_D), value, fill = levels)) + geom_col(position = "dodge", col = "black", linewidth = 0.1) +
geom_text(aes(group = levels, label = value), vjust = -0.2, position = position_dodge(width = 0.9)) +
labs(title = "Distribution of Wealth in Three Countries in Recent Year", x = "")
Derivation of the Lorenz curve and Gini coefficient for global income in 2011 [リンク]
df_gini_calc_recent <- df_gini_extra_recent |>
mutate(`0` = 0, `10` = `0-10`, `20` = `0-20`,
`30` = `0-20`+`20-40`/2, `40` = `0-20` + `20-40`,
`50` = `0-20` + `20-40` + `40-60`/2,
`60` = `0-20` + `20-40` + `40-60`,
`70` = `0-20` + `20-40` + `40-60` + `60-80`/2,
`80` = `0-20` + `20-40` + `40-60` + `60-80`,
`90` = `0-20` + `20-40` + `40-60` + `60-80` + `80-100`-`90-100`,
`100` = 100) |>
select(-c(`0-10`:`90-100`)) # 不必要な部分を消去
df_gini_calc %>% drop_na()
df_gini_calc_recent_long <- df_gini_calc_recent |> pivot_longer(`0`:`100`, names_to = "classes", values_to = "cumulative_share") |> mutate(classes = as.numeric(classes))
df_gini_calc_long %>% drop_na()
df_gini_calc_recent_long |> filter(country == "Japan") |>
ggplot() +
geom_line(aes(classes, cumulative_share)) +
geom_segment(aes(x = 0, y = 0, xend = 100, yend = 100), color = 'red') +
scale_x_continuous(breaks = seq(0,100,by=20)) +
scale_y_continuous(breaks = seq(0,100,by=20)) #+
#annotate("text", x = 10, y = 80, label = gini)
ジニ指数の降順
df_gini_extra_recent <-df_gini_extra |> drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
arrange(desc(gini))
df_gini_extra_recent
top30gini <- df_gini_extra_recent |>
arrange(desc(gini)) |> head(30) |> pull(country)
df_gini_extra_recent |> filter(country %in% top30gini) |>
ggplot(aes(factor(country, levels = top30gini), gini, fill = region)) + geom_col() +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1), legend.position = "top") +
labs(title = "Top 30 Countries of Recent Gini Index", x = "")
考察:二地域に限られています
箱ひげ図とは? [リンク]
df_gini_extra_recent |> drop_na(region) |> filter(region != "Aggregates") |>
ggplot(aes(gini, region, fill = region)) + geom_boxplot() +
theme(legend.position = "")
SOUTH_AFRICA_FIVE <- c("South Africa", "Namibia", "Eswatini", "Botswana", "Lesotho")
df_gini_extra |> filter(country %in% SOUTH_AFRICA_FIVE ) |>
drop_na(gini) |>
ggplot(aes(year, gini, col = factor(country, level = SOUTH_AFRICA_FIVE))) +
geom_line() + labs(title = "Gini Index of Five Countries", col = "From gini top")
df_gini_extra_long |> filter(country %in% SOUTH_AFRICA_FIVE ) |>
drop_na(value) |>
ggplot(aes(year, value, col = levels)) + geom_line() + facet_wrap(~factor(country, level = SOUTH_AFRICA_FIVE)) + labs(title = "Change of ratio of each level")
早くからデータがある国のジニ指数が低いように見える
データの数も少ないので、最新のデータのみに限る
df_gini_extra_recent |> filter(country %in% SOUTH_AFRICA_FIVE)
SOUTH_AFRICA_FIVE
の国の順番を、Gini
指数の大きい順に並べておく
df_gini_calc_recent_long |> filter(country %in% SOUTH_AFRICA_FIVE) |>
ggplot() +
geom_line(aes(classes, cumulative_share, col = factor(country, levels = SOUTH_AFRICA_FIVE))) +
geom_segment(aes(x = 0, y = 0, xend = 100, yend = 100, col = country), color = 'red') +
scale_x_continuous(breaks = seq(0,100,by=20)) +
scale_y_continuous(breaks = seq(0,100,by=20)) +
labs(col = "From gini top")
df_gini_extra_long |> filter(country %in% SOUTH_AFRICA_FIVE) |>
drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
filter(!(levels %in% c('0-10','90-100'))) |>
ggplot(aes(factor(country, levels = SOUTH_AFRICA_FIVE), value, fill = levels)) + geom_col(position = "dodge", col = "black", linewidth = 0.1) +
geom_text(aes(group = levels, label = value), vjust = -0.2, position = position_dodge(width = 0.9)) +
labs(title = "Distribution of Wealth in Five Countries in Recent Year", x = "")
df_gini_extra_recent |> filter(region != "Aggregates") |> drop_na(`80-100`) |>
ggplot(aes(gini, `80-100`)) + geom_point(aes(col = region)) +
geom_smooth(formula = 'y ~ x', method = "lm")
考察:かなり強い正の相関があり、回帰直線がかなり適合している。
CHOSEN_GINI_COUNTRIES <- c("Suriname", "Belize", "Brazil", "Colombia")
df_gini_extra |> filter(country %in% CHOSEN_GINI_COUNTRIES) |>
drop_na(gini) |>
ggplot(aes(year, gini, col = factor(country, level = CHOSEN_GINI_COUNTRIES))) +
geom_line() + labs(title = "Gini Index of Chosen Countries", col = "From gini top")
df_gini_extra_long |> filter(country %in% CHOSEN_GINI_COUNTRIES) |>
drop_na(value) |>
ggplot(aes(year, value, col = levels)) + geom_line() + facet_wrap(~factor(country, level = CHOSEN_GINI_COUNTRIES)) + labs(title = "Change of ratio of each level")
考察:
データの数も少ないので、最新のデータのみに限る
df_gini_extra_recent |> filter(country %in% CHOSEN_GINI_COUNTRIES)
考察:
CHOSEN_GINI_COUNTRIES
の国の順番を、Gini
指数の大きい順に並べておく
df_gini_calc_recent_long |> filter(country %in% CHOSEN_GINI_COUNTRIES) |>
ggplot() +
geom_line(aes(classes, cumulative_share, col = factor(country, levels = CHOSEN_GINI_COUNTRIES))) +
geom_segment(aes(x = 0, y = 0, xend = 100, yend = 100, col = country), color = 'red') +
scale_x_continuous(breaks = seq(0,100,by=20)) +
scale_y_continuous(breaks = seq(0,100,by=20)) +
labs(col = "From top gini")
考察:
df_gini_extra_long |> filter(country %in% CHOSEN_GINI_COUNTRIES) |>
drop_na(gini) |> group_by(country) |> filter(year == max(year)) |>
filter(!(levels %in% c('0-10','90-100'))) |> drop_na(value) |>
ggplot(aes(factor(country, levels = CHOSEN_GINI_COUNTRIES), value, fill = levels)) + geom_col(position = "dodge", col = "black", linewidth = 0.1) +
geom_text(aes(group = levels, label = value), vjust = -0.2, position = position_dodge(width = 0.9)) +
labs(title = "Distribution of Wealth in Chosen Countries in Recent Year", x = "")
考察: