From 559762182856bc28cf97e50b81281c001c1d8c5b Mon Sep 17 00:00:00 2001 From: Reese Hirota <122996073+rhirotacouncil@users.noreply.github.com> Date: Thu, 9 Nov 2023 15:32:24 -0500 Subject: [PATCH] 2021 update --- code/02_agency_demo_improv.Rmd | 10 +- code/04_updated_python_visual.ipynb | 538 ++++++++++++++++---- visuals/agency_diversity_distance_2021.html | 71 +++ 3 files changed, 522 insertions(+), 97 deletions(-) create mode 100644 visuals/agency_diversity_distance_2021.html diff --git a/code/02_agency_demo_improv.Rmd b/code/02_agency_demo_improv.Rmd index 9423a77..0438983 100644 --- a/code/02_agency_demo_improv.Rmd +++ b/code/02_agency_demo_improv.Rmd @@ -243,7 +243,7 @@ percent_plot <- cp %>% filter(agency_name =='DSNY',gender2!="Other") %>% ggplot(aes(x = data_year, y = pct.chg_middle)) + facet_wrap(~gender2, nrow=1) + geom_hline(yintercept=0, size = 0.5, color="grey") + - geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places + geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020,2021)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places scale_color_discrete() + theme_nycc(facet=TRUE) + ggtitle("Percent Change") + xlab("Year") + ylab("Percent") + easy_plot_title_size(14) @@ -255,7 +255,7 @@ relative_plot <- cp %>% filter(agency_name =='DSNY',gender2!="Other") %>% scale_color_discrete(name = "Year") + theme_nycc(facet=TRUE) + ggtitle("Relative Difference") + xlab("Year") + ylab("Number") + easy_plot_title_size(14) -final <- grid.arrange(percent_plot, relative_plot, ncol=2, top = textGrob("DSNY: Gender Diversity (2018-2020)",gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) +final <- grid.arrange(percent_plot, relative_plot, ncol=2, top = textGrob("DSNY: Gender Diversity (2018-2021)",gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) ggsave(plot = final, "../visuals/DSNY_diversity_by-gender.png") ``` @@ -275,7 +275,7 @@ percent_plot <- cp %>% filter(agency_name =='DOP') %>% ggplot(aes(x = data_year, y = pct.chg_middle)) + facet_wrap(~race_ethnicity, nrow=1) + geom_hline(yintercept=0, size = 0.25, color="grey") + - geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places + geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020,2021)), aes(label = round(pct.chg_middle, digits=2))) + # pct.chg_middle rounded to 3 decimal places scale_color_discrete() + theme_nycc(facet=TRUE) + ggtitle("DOP Race") + xlab("Year") + ylab("Percent") + easy_plot_title_size(14) @@ -283,11 +283,11 @@ relative_plot <- cp %>% filter(agency_name =='DOP') %>% ggplot(aes(x = data_year, y = diff_middle)) + facet_wrap(~race_ethnicity, nrow=1) + geom_hline(yintercept=0, size = 0.25, color="grey") + - geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020)),aes(label = round(diff_middle, 0))) + # diff_middle rounded to 0 to make more sense + geom_point(aes(color=data_year), show.legend = FALSE) + geom_label_repel(data = ~ subset(., data_year %in% c(2019,2020,2021)),aes(label = round(diff_middle, 0))) + # diff_middle rounded to 0 to make more sense scale_color_discrete() + theme_nycc(facet=TRUE) + ggtitle("DOP Race") + xlab("Year") + ylab("Relative Difference") + easy_plot_title_size(14) -final2 <- grid.arrange(percent_plot, relative_plot, nrow=2, top = textGrob("DOP: Racial Diversity (2018-2020)", gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) +final2 <- grid.arrange(percent_plot, relative_plot, nrow=2, top = textGrob("DOP: Racial Diversity (2018-2021)", gp=gpar(fontsize=19, fontfamily="Georgia", font=2))) ggsave(plot = final2, "../visuals/DOP_diversity_by-race.png") ``` diff --git a/code/04_updated_python_visual.ipynb b/code/04_updated_python_visual.ipynb index 11ba049..c5194b9 100644 --- a/code/04_updated_python_visual.ipynb +++ b/code/04_updated_python_visual.ipynb @@ -16,33 +16,31 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# ----- reading in 2018, 2019, 2020 data from DCAS worker profile\n", - "dat = pd.read_csv('../data/input/agency_diversity_2018.csv')\n", - "dat_19 = pd.read_csv('../data/input/agency_diversity_2019.csv')\n", - "dat_20 = pd.read_csv('../data/input/agency_diversity_2020.csv')\n", + "# dat = pd.read_csv('../data/input/agency_diversity_2018.csv')\n", + "# dat_19 = pd.read_csv('../data/input/agency_diversity_2019.csv')\n", + "# dat_20 = pd.read_csv('../data/input/agency_diversity_2020.csv')\n", + "dat_21 = pd.read_csv('../data/input/agency_diversity_2021.csv')\n", "\n", "# ----- from ACS 2018 1-year\n", "# note for self: labor_force not filtered\n", - "dat_city = [52,48,32,22,29,14,3] # f, m, white, black, hisp, asian, sor\n", + "# dat_city = [52,48,32,22,29,14,3] # f, m, white, black, hisp, asian, sor\n", "# # using ACS 2019 for BOTH 2019 & 2020 \n", "# # 2020 not fully released due to pandemic\n", "# dat_city_19 = [52,48,32,22,29,14,3]\n", "# dat_city_20 = [52,48,32,22,29,14,3]\n", + "dat_city = [52,48,31,20,29,14,5] # 2021\n", "\n", - "# --- ** TO DO: add ACS data for 2019, 2020 ** ---\n", - "# dat_city_19=[]\n", - "# dat_city_20=[]\n", - "\n", - "columns=dat.columns" + "columns=dat_21.columns" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -50,70 +48,44 @@ "output_type": "stream", "text": [ " Agency # Employees %Female %Male %White %Black %Hispanic %Asian \\\n", - "0 DCAS 2386 38 62 29 35 24 10 \n", - "1 DOITT 1528 39 61 37 27 14 19 \n", - "2 MAYORALTY 1230 58 42 44 20 16 16 \n", - "3 BOE 765 48 52 31 30 15 4 \n", - "4 NYCERS 453 61 39 31 36 9 22 \n", + "0 DCAS 2298 39 61 26 34 25 12 \n", + "1 DOITT 1673 38 62 35 26 15 20 \n", + "2 MAYORALTY 1243 59 41 42 20 16 18 \n", + "3 BOE 946 48 52 31 31 18 4 \n", + "4 NYCERS 484 61 39 30 35 11 22 \n", ".. ... ... ... ... ... ... ... ... \n", - "67 FDNY 17526 11 89 66 13 17 4 \n", - "68 DOC 12680 42 58 14 57 22 5 \n", - "69 DOP 1053 69 31 14 64 16 3 \n", - "70 DOI 376 54 46 42 25 15 13 \n", - "71 NYCEM 194 48 52 57 14 12 9 \n", - "\n", - " %SOR check gender ... d3_race d4_gender d4_race d3_gender_norm \\\n", - "0 3 100 ... 14.798649 0.141414 0.269128 32.558140 \n", - "1 4 100 ... 17.349352 0.131062 0.408857 30.232558 \n", - "2 5 100 ... 18.027756 0.060606 0.405535 13.953488 \n", - "3 20 100 ... 25.495098 0.040000 0.891293 9.302326 \n", - "4 2 100 ... 25.729361 0.091547 0.602895 20.930233 \n", - ".. ... ... ... ... ... ... ... \n", - "67 1 100 ... 38.535698 0.475032 0.960253 95.348837 \n", - "68 2 100 ... 40.987803 0.100361 0.822641 23.255814 \n", - "69 2 100 ... 48.774994 0.177843 1.007812 39.534884 \n", - "70 5 100 ... 17.606817 0.020072 0.402092 4.651163 \n", - "71 8 100 ... 32.062439 0.040000 0.794846 9.302326 \n", + "67 FDNY 17564 12 88 62 13 19 4 \n", + "68 DOC 10372 44 56 12 59 22 5 \n", + "69 DOP 1084 69 31 11 63 20 3 \n", + "70 DOI 341 55 45 43 26 16 10 \n", + "71 NYCEM 206 50 49 53 18 11 10 \n", "\n", - " d3_race_norm d3_norm_high d3_gender_norm_high d3_race_norm_high \\\n", - "0 16.773531 False False False \n", - "1 22.360975 False False False \n", - "2 23.847056 False False False \n", - "3 40.204647 False False False \n", - "4 40.717812 False False False \n", - ".. ... ... ... ... \n", - "67 68.770746 True True False \n", - "68 74.142207 False False False \n", - "69 91.200445 False False True \n", - "70 22.924966 False False False \n", - "71 54.590743 False False False \n", + " %SOR check gender check eth/race \n", + "0 3 100 100 \n", + "1 4 100 100 \n", + "2 4 100 100 \n", + "3 16 100 100 \n", + "4 2 100 100 \n", + ".. ... ... ... \n", + "67 1 100 99 \n", + "68 2 100 100 \n", + "69 2 100 99 \n", + "70 5 100 100 \n", + "71 8 99 100 \n", "\n", - " d3_gender_race_norm_high Median_Salary \n", - "0 False 60552 \n", - "1 False 87731 \n", - "2 False 78000 \n", - "3 False 45461 \n", - "4 False 55011 \n", - ".. ... ... \n", - "67 True 85292 \n", - "68 False 78090 \n", - "69 True 61210 \n", - "70 False 67906 \n", - "71 False 76000 \n", - "\n", - "[72 rows x 31 columns]\n" + "[72 rows x 11 columns]\n" ] } ], "source": [ "# print(columns)\n", "# print(dat_20.columns)\n", - "print(dat)" + "print(dat_21)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -139,18 +111,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "dat = calculate(dat)\n", - "dat_19 = calculate(dat_19)\n", - "dat_20 = calculate(dat_20)" + "# dat = calculate(dat)\n", + "# dat_19 = calculate(dat_19)\n", + "# dat_20 = calculate(dat_20)\n", + "dat_21 = calculate(dat_21)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -165,40 +138,421 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "dat = calc_high(dat)\n", - "dat_19 = calc_high(dat_19)\n", - "dat_20 = calc_high(dat_20)" + "# dat = calc_high(dat)\n", + "# dat_19 = calc_high(dat_19)\n", + "# dat_20 = calc_high(dat_20)\n", + "dat_21 = calc_high(dat_21)" ] }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Agency# Employees%Female%Male%White%Black%Hispanic%Asian%SORcheck gender...d3_raced4_genderd4_raced3_gender_normd3_race_normd3_norm_highd3_gender_norm_highd3_race_norm_highd3_gender_race_norm_highMedian_Salary
0DCAS22983961263425123100...15.6524760.1310620.37398830.23255818.536471FalseFalseFalseFalse65913
1DOITT16733862352615204100...16.8819430.1414140.39840232.55814021.214165FalseFalseFalseFalse94533
2MAYORALTY12435941422016184100...17.5214150.0708570.33784216.27907022.606891FalseFalseFalseFalse86476
3BOE9464852313118416100...21.5174350.0400000.7645479.30232631.309944FalseFalseFalseFalse50000
4NYCERS4846139303511222100...24.9599680.0915470.69495720.93023338.807543FalseFalseFalseFalse64802
..................................................................
67FDNY17564128862131941100...35.0142830.4595590.98800593.02325660.705143TrueTrueFalseTrue85292
68DOC10372445612592252100...44.9555340.0801280.98752118.60465182.356497FalseFalseTrueTrue85292
69DOP1084693111632032100...49.5983870.1778431.12678339.53488492.468310FalseFalseTrueTrue58106
70DOI3415545432616105100...19.1049730.0301480.3740766.97674426.055770FalseFalseFalseFalse73886
71NYCEM206504953181110899...28.9309520.0149590.5809863.67706747.456071FalseFalseFalseFalse81410
\n", + "

72 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " Agency # Employees %Female %Male %White %Black %Hispanic %Asian \\\n", + "0 DCAS 2298 39 61 26 34 25 12 \n", + "1 DOITT 1673 38 62 35 26 15 20 \n", + "2 MAYORALTY 1243 59 41 42 20 16 18 \n", + "3 BOE 946 48 52 31 31 18 4 \n", + "4 NYCERS 484 61 39 30 35 11 22 \n", + ".. ... ... ... ... ... ... ... ... \n", + "67 FDNY 17564 12 88 62 13 19 4 \n", + "68 DOC 10372 44 56 12 59 22 5 \n", + "69 DOP 1084 69 31 11 63 20 3 \n", + "70 DOI 341 55 45 43 26 16 10 \n", + "71 NYCEM 206 50 49 53 18 11 10 \n", + "\n", + " %SOR check gender ... d3_race d4_gender d4_race d3_gender_norm \\\n", + "0 3 100 ... 15.652476 0.131062 0.373988 30.232558 \n", + "1 4 100 ... 16.881943 0.141414 0.398402 32.558140 \n", + "2 4 100 ... 17.521415 0.070857 0.337842 16.279070 \n", + "3 16 100 ... 21.517435 0.040000 0.764547 9.302326 \n", + "4 2 100 ... 24.959968 0.091547 0.694957 20.930233 \n", + ".. ... ... ... ... ... ... ... \n", + "67 1 100 ... 35.014283 0.459559 0.988005 93.023256 \n", + "68 2 100 ... 44.955534 0.080128 0.987521 18.604651 \n", + "69 2 100 ... 49.598387 0.177843 1.126783 39.534884 \n", + "70 5 100 ... 19.104973 0.030148 0.374076 6.976744 \n", + "71 8 99 ... 28.930952 0.014959 0.580986 3.677067 \n", + "\n", + " d3_race_norm d3_norm_high d3_gender_norm_high d3_race_norm_high \\\n", + "0 18.536471 False False False \n", + "1 21.214165 False False False \n", + "2 22.606891 False False False \n", + "3 31.309944 False False False \n", + "4 38.807543 False False False \n", + ".. ... ... ... ... \n", + "67 60.705143 True True False \n", + "68 82.356497 False False True \n", + "69 92.468310 False False True \n", + "70 26.055770 False False False \n", + "71 47.456071 False False False \n", + "\n", + " d3_gender_race_norm_high Median_Salary \n", + "0 False 65913 \n", + "1 False 94533 \n", + "2 False 86476 \n", + "3 False 50000 \n", + "4 False 64802 \n", + ".. ... ... \n", + "67 True 85292 \n", + "68 True 85292 \n", + "69 True 58106 \n", + "70 False 73886 \n", + "71 False 81410 \n", + "\n", + "[72 rows x 31 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# ---- used to find marker points\n", "# print(dat_19)\n", - "# dat_20[dat_20['d3_gender_norm']>45]" + "# dat_20[dat_20['d3_gender_norm']>45]\n", + "# dat_21[dat_21['d3_gender_norm']>45]\n", + "dat_21" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# --- ** TO DO: add salary data for 2019, 2020 ** ---\n", "# print(dat)\n", "# MEDIAN SALARIES DONT MATCH ?????????\n", - "dat = dat.assign(Median_Salary = [\"60552\", \"87731\", \"78000\", \"45461\", \"55011\", \"70543\", \"79484\", \"64378\", \"55590\", \"46000\", \"73000\", \"117420\", \"70154\", \"73903\", \"44319\", \"112516\", \"56872\", \"69901\", \"59482\", \"86392\", \"82264\", \"91792\", \"73939\", \"77000\", \"64475\", \"76191\", \"64139\", \"61349\", \"70868\", \"65850\", \"62246\", \"46698\", \"58987\", \"61800\", \"61800\", \"61824\", \"70514\", \"82278\", \"107022\", \"59963\", \"46689\", \"65073\", \"57590\", \"46737\", \"75000\", \"68000\", \"77318\", \"78877\", \"69632\", \"65568\", \"75691\", \"75000\", \"70959\", \"72000\", \"61800\", \"62698\", \"65000\", \"60276\", \"78446\", \"64460\", \"69593\", \"64796\", \"65625\", \"78000\", \"76275\", \"80829\", \"85292\", \"85292\", \"78090\", \"61210\", \"67906\", \"76000\"])\n", - "dat_19 = dat_19.assign(Median_Salary = [\"61336\", \"91499\", \"83436\", \"38463\", \"57295\", \"72839\", \"79862\", \"64640\", \"55798\", \"49025\", \"71001\", \"123237\", \"73148\", \"74396\", \"44675\", \"117468\", \"59502\", \"71491\", \"60000\", \"96081\", \"91499\", \"93728\", \"84053\", \"64418\", \"66205\", \"77387\", \"64139\", \"64202\", \"70000\", \"55000\", \"67500\", \"56432\", \"60167\", \"47135\", \"63939\", \"63946\", \"66398\", \"69498\", \"114752\", \"60788\", \"50763\", \"62577\", \"57426\", \"51993\", \"72874\", \"70152\", \"77318\", \"80449\", \"67624\", \"67792\", \"81872\", \"76484\", \"73986\", \"72000\", \"69000\", \"67800\", \"72904\", \"93067\", \"82209\", \"69228\", \"72957\", \"65292\", \"68444\", \"78000\", \"79568\", \"79150\", \"79802\", \"85292\", \"85292\", \"56658\", \"72079\", \"81350\"])\n", - "dat_20 = dat_20.assign(Median_Salary = [\"65062\", \"94244\", \"85000\", \"45000\", \"59014\", \"76600\", \"80982\", \"66511\", \"56072\", \"46474\", \"77488\", \"135000\", \"74842\", \"79174\", \"53712\", \"120875\", \"61093\", \"76303\", \"64062\", \"99275\", \"90812\", \"91899\", \"92482\", \"66950\", \"70450\", \"89273\", \"66024\", \"64888\", \"72100\", \"55000\", \"70324\", \"53816\", \"61521\", \"50635\", \"66400\", \"65864\", \"73829\", \"71506\", \"113300\", \"62000\", \"52433\", \"64890\", \"60327\", \"55853\", \"77362\", \"74650\", \"77318\", \"83905\", \"70094\", \"69826\", \"85646\", \"77768\", \"75591\", \"72500\", \"69000\", \"67000\", \"73722\", \"100357\", \"81000\", \"69245\", \"70000\", \"67275\", \"70497\", \"83164\", \"91563\", \"76587\", \"81266\", \"85292\", \"85292\", \"58106\", \"73886\", \"81410\"])\n", - "index = dat.index\n", - "# print(index)" + "# dat = dat.assign(Median_Salary = [\"60552\", \"87731\", \"78000\", \"45461\", \"55011\", \"70543\", \"79484\", \"64378\", \"55590\", \"46000\", \"73000\", \"117420\", \"70154\", \"73903\", \"44319\", \"112516\", \"56872\", \"69901\", \"59482\", \"86392\", \"82264\", \"91792\", \"73939\", \"77000\", \"64475\", \"76191\", \"64139\", \"61349\", \"70868\", \"65850\", \"62246\", \"46698\", \"58987\", \"61800\", \"61800\", \"61824\", \"70514\", \"82278\", \"107022\", \"59963\", \"46689\", \"65073\", \"57590\", \"46737\", \"75000\", \"68000\", \"77318\", \"78877\", \"69632\", \"65568\", \"75691\", \"75000\", \"70959\", \"72000\", \"61800\", \"62698\", \"65000\", \"60276\", \"78446\", \"64460\", \"69593\", \"64796\", \"65625\", \"78000\", \"76275\", \"80829\", \"85292\", \"85292\", \"78090\", \"61210\", \"67906\", \"76000\"])\n", + "# dat_19 = dat_19.assign(Median_Salary = [\"61336\", \"91499\", \"83436\", \"38463\", \"57295\", \"72839\", \"79862\", \"64640\", \"55798\", \"49025\", \"71001\", \"123237\", \"73148\", \"74396\", \"44675\", \"117468\", \"59502\", \"71491\", \"60000\", \"96081\", \"91499\", \"93728\", \"84053\", \"64418\", \"66205\", \"77387\", \"64139\", \"64202\", \"70000\", \"55000\", \"67500\", \"56432\", \"60167\", \"47135\", \"63939\", \"63946\", \"66398\", \"69498\", \"114752\", \"60788\", \"50763\", \"62577\", \"57426\", \"51993\", \"72874\", \"70152\", \"77318\", \"80449\", \"67624\", \"67792\", \"81872\", \"76484\", \"73986\", \"72000\", \"69000\", \"67800\", \"72904\", \"93067\", \"82209\", \"69228\", \"72957\", \"65292\", \"68444\", \"78000\", \"79568\", \"79150\", \"79802\", \"85292\", \"85292\", \"56658\", \"72079\", \"81350\"])\n", + "# dat_20 = dat_20.assign(Median_Salary = [\"65062\", \"94244\", \"85000\", \"45000\", \"59014\", \"76600\", \"80982\", \"66511\", \"56072\", \"46474\", \"77488\", \"135000\", \"74842\", \"79174\", \"53712\", \"120875\", \"61093\", \"76303\", \"64062\", \"99275\", \"90812\", \"91899\", \"92482\", \"66950\", \"70450\", \"89273\", \"66024\", \"64888\", \"72100\", \"55000\", \"70324\", \"53816\", \"61521\", \"50635\", \"66400\", \"65864\", \"73829\", \"71506\", \"113300\", \"62000\", \"52433\", \"64890\", \"60327\", \"55853\", \"77362\", \"74650\", \"77318\", \"83905\", \"70094\", \"69826\", \"85646\", \"77768\", \"75591\", \"72500\", \"69000\", \"67000\", \"73722\", \"100357\", \"81000\", \"69245\", \"70000\", \"67275\", \"70497\", \"83164\", \"91563\", \"76587\", \"81266\", \"85292\", \"85292\", \"58106\", \"73886\", \"81410\"])\n", + "dat_21 = dat_21.assign(Median_Salary = [\"65913\", \"94533\", \"86476\", \"50000\", \"64802\", \"78054\", \"82730\", \"65655\", \"65297\", \"46474\", \"73275\", \"129217\", \"72437\", \"79746\", \"58028\", \"121423\", \"61093\", \"78197\", \"64434\", \"107129\", \"88380\", \"94831\", \"98388\", \"90573\", \"72562\", \"80000\", \"68000\", \"68563\", \"75296\", \"69500\", \"69762\", \"53816\", \"62598\", \"67828\", \"67828\", \"69656\", \"83900\", \"89890\", \"115043\", \"65000\", \"54064\", \"70554\", \"60327\", \"58741\", \"80000\", \"75053\", \"83465\", \"86251\", \"74289\", \"69826\", \"85847\", \"79953\", \"76206\", \"72500\", \"70300\", \"69604\", \"73049\", \"66950\", \"83140\", \"69245\", \"71722\", \"72712\", \"70497\", \"79370\", \"79568\", \"86830\", \"85292\", \"85292\", \"92073\", \"58343\", \"75824\", \"83791\"])\n", + "index = dat_21.index\n", + "# print(index)\n", + "\n", + "# double check median salaries from past 4 years" ] }, { @@ -224,16 +578,16 @@ " 'Median_Salary': 'Median Salary',\n", " 'd3_gender_race_norm_high': ''})\n", " # ---- 2019 highlights: add LPC, remove IBO ----\n", - " fig.add_trace(go.Scatter(x=[44.186047],y=[84.970717], mode ='markers',\n", - " marker_symbol = 'circle',\n", - " marker_size = dat[columns[4]],\n", - " marker_color = '#611919',\n", - " hoverinfo='none'))\n", - " fig.add_trace(go.Scatter(x=[11.627907],y=[75.258282], mode ='markers',\n", - " marker_symbol = 'circle',\n", - " marker_size = dat[columns[4]],\n", - " marker_color = '#193161',\n", - " hoverinfo='none'))\n", + " # fig.add_trace(go.Scatter(x=[44.186047],y=[84.970717], mode ='markers',\n", + " # marker_symbol = 'circle',\n", + " # marker_size = dat[columns[4]],\n", + " # marker_color = '#611919',\n", + " # hoverinfo='none'))\n", + " # fig.add_trace(go.Scatter(x=[11.627907],y=[75.258282], mode ='markers',\n", + " # marker_symbol = 'circle',\n", + " # marker_size = dat[columns[4]],\n", + " # marker_color = '#193161',\n", + " # hoverinfo='none'))\n", " # ---- 2020 highlights: add FDNYPF, remove DA-SI\n", " # fig.add_trace(go.Scatter(x=[48.837209],y=[83.206688], mode ='markers',\n", " # marker_symbol = 'circle',\n", @@ -255,7 +609,7 @@ " fig.add_annotation(xref=\"x domain\", yref=\"y domain\", x=0.005,\n", " y=0.98, text=\"Farthest\", showarrow=False)\n", " fig.add_annotation(xref=\"x domain\", yref=\"y domain\",\n", - " x=0.99, y=0.9, text=\"2019 NYC Population:
Female = 52%
Black = 22%
Hispanic = 29%
Asian = 14%
SOR = 3%\", showarrow=False)\n", + " x=0.99, y=0.9, text=\"2021 NYC Population:
Female = 52%
Black = 20%
Hispanic = 29%
Asian = 14%
SOR = 5%\", showarrow=False)\n", " # fig.update_layout(showlegend=False)\n", " newnames = set()\n", " fig.for_each_trace(\n", @@ -299,7 +653,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.2" + "version": "3.10.9" }, "orig_nbformat": 4 }, diff --git a/visuals/agency_diversity_distance_2021.html b/visuals/agency_diversity_distance_2021.html new file mode 100644 index 0000000..bae7b66 --- /dev/null +++ b/visuals/agency_diversity_distance_2021.html @@ -0,0 +1,71 @@ + + + +
+
+ + \ No newline at end of file