From 559762182856bc28cf97e50b81281c001c1d8c5b Mon Sep 17 00:00:00 2001 From: Reese Hirota <122996073+rhirotacouncil@users.noreply.github.com> Date: Thu, 9 Nov 2023 15:32:24 -0500 Subject: [PATCH] 2021 update --- code/02_agency_demo_improv.Rmd | 10 +- code/04_updated_python_visual.ipynb | 538 ++++++++++++++++---- visuals/agency_diversity_distance_2021.html | 71 +++ 3 files changed, 522 insertions(+), 97 deletions(-) create mode 100644 visuals/agency_diversity_distance_2021.html diff --git a/code/02_agency_demo_improv.Rmd b/code/02_agency_demo_improv.Rmd index 9423a77..0438983 100644 --- a/code/02_agency_demo_improv.Rmd +++ b/code/02_agency_demo_improv.Rmd @@ -243,7 +243,7 @@ percent_plot <- cp %>% filter(agency_name =='DSNY',gender2!="Other") %>% ggplot(aes(x = data_year, y = pct.chg_middle)) + facet_wrap(~gender2, nrow=1) + geom_hline(yintercept=0, size = 0.5, color="grey") + - geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places + geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020,2021)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places scale_color_discrete() + theme_nycc(facet=TRUE) + ggtitle("Percent Change") + xlab("Year") + ylab("Percent") + easy_plot_title_size(14) @@ -255,7 +255,7 @@ relative_plot <- cp %>% filter(agency_name =='DSNY',gender2!="Other") %>% scale_color_discrete(name = "Year") + theme_nycc(facet=TRUE) + ggtitle("Relative Difference") + xlab("Year") + ylab("Number") + easy_plot_title_size(14) -final <- grid.arrange(percent_plot, relative_plot, ncol=2, top = textGrob("DSNY: Gender Diversity (2018-2020)",gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) +final <- grid.arrange(percent_plot, relative_plot, ncol=2, top = textGrob("DSNY: Gender Diversity (2018-2021)",gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) ggsave(plot = final, "../visuals/DSNY_diversity_by-gender.png") ``` @@ -275,7 +275,7 @@ percent_plot <- cp %>% filter(agency_name =='DOP') %>% ggplot(aes(x = data_year, y = pct.chg_middle)) + facet_wrap(~race_ethnicity, nrow=1) + geom_hline(yintercept=0, size = 0.25, color="grey") + - geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places + geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020,2021)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places scale_color_discrete() + theme_nycc(facet=TRUE) + ggtitle("DOP Race") + xlab("Year") + ylab("Percent") + easy_plot_title_size(14) @@ -283,11 +283,11 @@ relative_plot <- cp %>% filter(agency_name =='DOP') %>% ggplot(aes(x = data_year, y = diff_middle)) + facet_wrap(~race_ethnicity, nrow=1) + geom_hline(yintercept=0, size = 0.25, color="grey") + - geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020)),aes(label = round(diff_middle, 0))) + # diff_middle rounded to 0 to make more sense + geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020,2021)),aes(label = round(diff_middle, 0))) + # diff_middle rounded to 0 to make more sense scale_color_discrete() + theme_nycc(facet=TRUE) + ggtitle("DOP Race") + xlab("Year") + ylab("Relative Difference") + easy_plot_title_size(14) -final2 <- grid.arrange(percent_plot, relative_plot, nrow=2, top = textGrob("DOP: Racial Diversity (2018-2020)", gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) +final2 <- grid.arrange(percent_plot, relative_plot, nrow=2, top = textGrob("DOP: Racial Diversity (2018-2021)", gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) ggsave(plot = final2, "../visuals/DOP_diversity_by-race.png") ``` diff --git a/code/04_updated_python_visual.ipynb b/code/04_updated_python_visual.ipynb index 11ba049..c5194b9 100644 --- a/code/04_updated_python_visual.ipynb +++ b/code/04_updated_python_visual.ipynb @@ -16,33 +16,31 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# ----- reading in 2018, 2019, 2020 data from DCAS worker profile\n", - "dat = pd.read_csv('../data/input/agency_diversity_2018.csv')\n", - "dat_19 = pd.read_csv('../data/input/agency_diversity_2019.csv')\n", - "dat_20 = pd.read_csv('../data/input/agency_diversity_2020.csv')\n", + "# dat = pd.read_csv('../data/input/agency_diversity_2018.csv')\n", + "# dat_19 = pd.read_csv('../data/input/agency_diversity_2019.csv')\n", + "# dat_20 = pd.read_csv('../data/input/agency_diversity_2020.csv')\n", + "dat_21 = pd.read_csv('../data/input/agency_diversity_2021.csv')\n", "\n", "# ----- from ACS 2018 1-year\n", "# note for self: labor_force not filtered\n", - "dat_city = [52,48,32,22,29,14,3] # f, m, white, black, hisp, asian, sor\n", + "# dat_city = [52,48,32,22,29,14,3] # f, m, white, black, hisp, asian, sor\n", "# # using ACS 2019 for BOTH 2019 & 2020 \n", "# # 2020 not fully released due to pandemic\n", "# dat_city_19 = [52,48,32,22,29,14,3]\n", "# dat_city_20 = [52,48,32,22,29,14,3]\n", + "dat_city = [52,48,31,20,29,14,5] # 2021\n", "\n", - "# --- ** TO DO: add ACS data for 2019, 2020 ** ---\n", - "# dat_city_19=[]\n", - "# dat_city_20=[]\n", - "\n", - "columns=dat.columns" + "columns=dat_21.columns" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -50,70 +48,44 @@ "output_type": "stream", "text": [ " Agency # Employees %Female %Male %White %Black %Hispanic %Asian \\\n", - "0 DCAS 2386 38 62 29 35 24 10 \n", - "1 DOITT 1528 39 61 37 27 14 19 \n", - "2 MAYORALTY 1230 58 42 44 20 16 16 \n", - "3 BOE 765 48 52 31 30 15 4 \n", - "4 NYCERS 453 61 39 31 36 9 22 \n", + "0 DCAS 2298 39 61 26 34 25 12 \n", + "1 DOITT 1673 38 62 35 26 15 20 \n", + "2 MAYORALTY 1243 59 41 42 20 16 18 \n", + "3 BOE 946 48 52 31 31 18 4 \n", + "4 NYCERS 484 61 39 30 35 11 22 \n", ".. ... ... ... ... ... ... ... ... \n", - "67 FDNY 17526 11 89 66 13 17 4 \n", - "68 DOC 12680 42 58 14 57 22 5 \n", - "69 DOP 1053 69 31 14 64 16 3 \n", - "70 DOI 376 54 46 42 25 15 13 \n", - "71 NYCEM 194 48 52 57 14 12 9 \n", - "\n", - " %SOR check gender ... d3_race d4_gender d4_race d3_gender_norm \\\n", - "0 3 100 ... 14.798649 0.141414 0.269128 32.558140 \n", - "1 4 100 ... 17.349352 0.131062 0.408857 30.232558 \n", - "2 5 100 ... 18.027756 0.060606 0.405535 13.953488 \n", - "3 20 100 ... 25.495098 0.040000 0.891293 9.302326 \n", - "4 2 100 ... 25.729361 0.091547 0.602895 20.930233 \n", - ".. ... ... ... ... ... ... ... \n", - "67 1 100 ... 38.535698 0.475032 0.960253 95.348837 \n", - "68 2 100 ... 40.987803 0.100361 0.822641 23.255814 \n", - "69 2 100 ... 48.774994 0.177843 1.007812 39.534884 \n", - "70 5 100 ... 17.606817 0.020072 0.402092 4.651163 \n", - "71 8 100 ... 32.062439 0.040000 0.794846 9.302326 \n", + "67 FDNY 17564 12 88 62 13 19 4 \n", + "68 DOC 10372 44 56 12 59 22 5 \n", + "69 DOP 1084 69 31 11 63 20 3 \n", + "70 DOI 341 55 45 43 26 16 10 \n", + "71 NYCEM 206 50 49 53 18 11 10 \n", "\n", - " d3_race_norm d3_norm_high d3_gender_norm_high d3_race_norm_high \\\n", - "0 16.773531 False False False \n", - "1 22.360975 False False False \n", - "2 23.847056 False False False \n", - "3 40.204647 False False False \n", - "4 40.717812 False False False \n", - ".. ... ... ... ... \n", - "67 68.770746 True True False \n", - "68 74.142207 False False False \n", - "69 91.200445 False False True \n", - "70 22.924966 False False False \n", - "71 54.590743 False False False \n", + " %SOR check gender check eth/race \n", + "0 3 100 100 \n", + "1 4 100 100 \n", + "2 4 100 100 \n", + "3 16 100 100 \n", + "4 2 100 100 \n", + ".. ... ... ... \n", + "67 1 100 99 \n", + "68 2 100 100 \n", + "69 2 100 99 \n", + "70 5 100 100 \n", + "71 8 99 100 \n", "\n", - " d3_gender_race_norm_high Median_Salary \n", - "0 False 60552 \n", - "1 False 87731 \n", - "2 False 78000 \n", - "3 False 45461 \n", - "4 False 55011 \n", - ".. ... ... \n", - "67 True 85292 \n", - "68 False 78090 \n", - "69 True 61210 \n", - "70 False 67906 \n", - "71 False 76000 \n", - "\n", - "[72 rows x 31 columns]\n" + "[72 rows x 11 columns]\n" ] } ], "source": [ "# print(columns)\n", "# print(dat_20.columns)\n", - "print(dat)" + "print(dat_21)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -139,18 +111,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "dat = calculate(dat)\n", - "dat_19 = calculate(dat_19)\n", - "dat_20 = calculate(dat_20)" + "# dat = calculate(dat)\n", + "# dat_19 = calculate(dat_19)\n", + "# dat_20 = calculate(dat_20)\n", + "dat_21 = calculate(dat_21)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -165,40 +138,421 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "dat = calc_high(dat)\n", - "dat_19 = calc_high(dat_19)\n", - "dat_20 = calc_high(dat_20)" + "# dat = calc_high(dat)\n", + "# dat_19 = calc_high(dat_19)\n", + "# dat_20 = calc_high(dat_20)\n", + "dat_21 = calc_high(dat_21)" ] }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Agency | \n", + "# Employees | \n", + "%Female | \n", + "%Male | \n", + "%White | \n", + "%Black | \n", + "%Hispanic | \n", + "%Asian | \n", + "%SOR | \n", + "check gender | \n", + "... | \n", + "d3_race | \n", + "d4_gender | \n", + "d4_race | \n", + "d3_gender_norm | \n", + "d3_race_norm | \n", + "d3_norm_high | \n", + "d3_gender_norm_high | \n", + "d3_race_norm_high | \n", + "d3_gender_race_norm_high | \n", + "Median_Salary | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "DCAS | \n", + "2298 | \n", + "39 | \n", + "61 | \n", + "26 | \n", + "34 | \n", + "25 | \n", + "12 | \n", + "3 | \n", + "100 | \n", + "... | \n", + "15.652476 | \n", + "0.131062 | \n", + "0.373988 | \n", + "30.232558 | \n", + "18.536471 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "65913 | \n", + "
1 | \n", + "DOITT | \n", + "1673 | \n", + "38 | \n", + "62 | \n", + "35 | \n", + "26 | \n", + "15 | \n", + "20 | \n", + "4 | \n", + "100 | \n", + "... | \n", + "16.881943 | \n", + "0.141414 | \n", + "0.398402 | \n", + "32.558140 | \n", + "21.214165 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "94533 | \n", + "
2 | \n", + "MAYORALTY | \n", + "1243 | \n", + "59 | \n", + "41 | \n", + "42 | \n", + "20 | \n", + "16 | \n", + "18 | \n", + "4 | \n", + "100 | \n", + "... | \n", + "17.521415 | \n", + "0.070857 | \n", + "0.337842 | \n", + "16.279070 | \n", + "22.606891 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "86476 | \n", + "
3 | \n", + "BOE | \n", + "946 | \n", + "48 | \n", + "52 | \n", + "31 | \n", + "31 | \n", + "18 | \n", + "4 | \n", + "16 | \n", + "100 | \n", + "... | \n", + "21.517435 | \n", + "0.040000 | \n", + "0.764547 | \n", + "9.302326 | \n", + "31.309944 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "50000 | \n", + "
4 | \n", + "NYCERS | \n", + "484 | \n", + "61 | \n", + "39 | \n", + "30 | \n", + "35 | \n", + "11 | \n", + "22 | \n", + "2 | \n", + "100 | \n", + "... | \n", + "24.959968 | \n", + "0.091547 | \n", + "0.694957 | \n", + "20.930233 | \n", + "38.807543 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "64802 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
67 | \n", + "FDNY | \n", + "17564 | \n", + "12 | \n", + "88 | \n", + "62 | \n", + "13 | \n", + "19 | \n", + "4 | \n", + "1 | \n", + "100 | \n", + "... | \n", + "35.014283 | \n", + "0.459559 | \n", + "0.988005 | \n", + "93.023256 | \n", + "60.705143 | \n", + "True | \n", + "True | \n", + "False | \n", + "True | \n", + "85292 | \n", + "
68 | \n", + "DOC | \n", + "10372 | \n", + "44 | \n", + "56 | \n", + "12 | \n", + "59 | \n", + "22 | \n", + "5 | \n", + "2 | \n", + "100 | \n", + "... | \n", + "44.955534 | \n", + "0.080128 | \n", + "0.987521 | \n", + "18.604651 | \n", + "82.356497 | \n", + "False | \n", + "False | \n", + "True | \n", + "True | \n", + "85292 | \n", + "
69 | \n", + "DOP | \n", + "1084 | \n", + "69 | \n", + "31 | \n", + "11 | \n", + "63 | \n", + "20 | \n", + "3 | \n", + "2 | \n", + "100 | \n", + "... | \n", + "49.598387 | \n", + "0.177843 | \n", + "1.126783 | \n", + "39.534884 | \n", + "92.468310 | \n", + "False | \n", + "False | \n", + "True | \n", + "True | \n", + "58106 | \n", + "
70 | \n", + "DOI | \n", + "341 | \n", + "55 | \n", + "45 | \n", + "43 | \n", + "26 | \n", + "16 | \n", + "10 | \n", + "5 | \n", + "100 | \n", + "... | \n", + "19.104973 | \n", + "0.030148 | \n", + "0.374076 | \n", + "6.976744 | \n", + "26.055770 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "73886 | \n", + "
71 | \n", + "NYCEM | \n", + "206 | \n", + "50 | \n", + "49 | \n", + "53 | \n", + "18 | \n", + "11 | \n", + "10 | \n", + "8 | \n", + "99 | \n", + "... | \n", + "28.930952 | \n", + "0.014959 | \n", + "0.580986 | \n", + "3.677067 | \n", + "47.456071 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "81410 | \n", + "
72 rows × 31 columns
\n", + "