DataViz in R | 04. Bar Chart Multiple Responses

Target result

http://www.datavisualisation-r.com/pdf/barcharts_multiple_all_2.pdf

In the previous bar charts, each bar aligns to the left of x-axis. We will highlight the difference between positive and negative responses by dividing each bar into two halves from a central axis. This may obscure the comparison of the length between the bars. However, this is a chart that represents a total of 100% for each bar, and the lengths of the bars are equal, so we can omit it.

#Load neccessary lib
library(ggplot2)
library(dplyr)
#Setting width and height
theme_set(theme_minimal())
options(repr.plot.width=10, repr.plot.height=6)

#Load data
ZA4800_DE <- readRDS("./myData/EVS_2008/germany.Rda")
ZA4800_DE <- ZA4800_DE[,-1]
head(ZA4800_DE)

A tibble: 6 × 7
v159	v160	v161	v162	v163	v164	v165
<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>
4	1	4	3	3	4	3
3	2	2	2	2	4	4
1	3	3	3	1	1	1
1	4	2	3	1	1	2
3	1	1	2	3	2	3
2	2	2	2	2	2	1

#Load library HAVEN for reading hvn_* type
library(haven)
#Data wrangling by library TIDYR
library(tidyr)

evs <- ZA4800_DE %>%
    pivot_longer(cols=everything(), names_to = "Question", values_to = "Answer") %>%
    mutate(Anstype = factor(Answer, levels = c(-2, -1, 1, 2, 3, 4), 
                            labels = c("n.a./don't know", "n.a./don't know", "agree strongly", "agree", "disagree", "disagree strongly"))) %>%
    #the default .add=FALSE group_by() will override existing groups.
    group_by(Question, Anstype, .add=T) %>%
    #Add summarized column after grouping
    summarize(Count = n())

[1m[22m`summarise()` has grouped output by 'Question'. You can override using the `.groups` argument.

head(evs,10)

A grouped_df: 10 × 3
Question	Anstype	Count
<chr>	<fct>	<int>
v159	n.a./don't know	67
v159	agree strongly	803
v159	agree	782
v159	disagree	311
v159	disagree strongly	112
v160	n.a./don't know	104
v160	agree strongly	332
v160	agree	647
v160	disagree	655
v160	disagree strongly	337

#Load question description
#These descriptions have already been incorporated into label of "Question"
#However, it's short and not suitable for presentation purpose

Quesdesc = c("v159" = "A working mother can establish just as warm and\nsecure an environment as a non-working mother", 
             "v160" = "A pre-school child is likely to suffer if\nhis or her mother is working",
             "v161" = "A job is alright, but what most women\nreally want is a home and children",
             "v162" = "Being a housewife is just as fulfilling as\nworking",
             "v163" = "Having a job is the best way for a woman\nto be independent",
             "v164" = "Both the husband and wife should contribute\nto the family income",
             "v165" = "In general, fathers are as well suited to\nlook after their children as women")

#Create custom color vector based on origin graph(using eye-dropper)
color_evs <- c("n.a./don't know" = "#bebebe", 
               "agree strongly" = "#00d0e2", 
               "agree" = "#6ddde1", 
               "disagree" = "#ff8aee", 
               "disagree strongly" = "#ff00d2")

Initially, I have no idea about the key word to search how to draw this. It seems like a “likert plot”, however, the many links I found using another package (HH). The more appropriate term is “diverging stacked bar” Try with this one: https://rfortherestofus.com/2021/10/diverging-bar-chart/

The trick here is turn the negative answers to negative percentage value

Bonus: A very interesting article that I found, in which they compare the diverging stacked bar with others https://blog.datawrapper.de/divergingbars/

#We need to create a percentage column, in which the negative percentage represents negative answer
evs_614 <- evs %>%
           group_by(Question, .add=T) %>%
           mutate(percent_answers = Count / sum(Count)) %>%
           mutate(plot_perc = ifelse(Anstype %in% c("disagree", "disagree strongly"), percent_answers, -percent_answers))

head(evs_614)

A grouped_df: 6 × 5
Question	Anstype	Count	percent_answers	plot_perc
<chr>	<fct>	<int>	<dbl>	<dbl>
v159	n.a./don't know	67	0.03228916	-0.03228916
v159	agree strongly	803	0.38698795	-0.38698795
v159	agree	782	0.37686747	-0.37686747
v159	disagree	311	0.14987952	0.14987952
v159	disagree strongly	112	0.05397590	0.05397590
v160	n.a./don't know	104	0.05012048	-0.05012048

ggplot(evs_614, aes(x=plot_perc, y=Question)) +
    geom_col(mapping=aes(fill=Anstype))

png

#Looks good. Now we change the order of level using forcats fct_relevel

library(forcats)
levels(evs_614$Anstype)

'n.a./don\'t know''agree strongly''agree''disagree''disagree strongly'

evs_614 %>%
    mutate(Anstype = fct_relevel(Anstype, 'agree strongly', 'agree', 'n.a./don\'t know', 'disagree strongly','disagree')) %>%
    ggplot(aes(x=plot_perc, y=Question)) +
    geom_col(mapping=aes(fill=Anstype)) +
    #Change the order of legend accordingly
    scale_fill_manual(values=color_evs,
                      limits=c('agree strongly', 'agree', 'n.a./don\'t know', 'disagree strongly','disagree'))

png

Next, I want the n.a tag goes to the very left of the plot.

The original idea in R-base is using multiple layers: one for the neutral group, one for the negative answers, one for the positive answers and the fourth for the gap between them.

#add a geom_col layer and move to x=-1 using position_nudge
#The gap is too big, so I reordered it
#To avoid changing "negative answer" to "positive percent" and have unneccessary confusion
#I decided to reverse the x-axis

evs_614 %>%
    mutate(Anstype = fct_relevel(Anstype, 'agree strongly', 'agree', 'n.a./don\'t know', 'disagree strongly','disagree')) %>%
    ggplot(aes(x=plot_perc, y=Question)) +
    #subset to create geom_col for non-neutral group
    geom_col(data=subset(evs_614, Anstype != "n.a./don't know"), mapping=aes(fill=Anstype)) +
    #subset to create geom_col for neutral group
    geom_col(data=subset(evs_614, Anstype == "n.a./don't know"), mapping=aes(x=-plot_perc, fill=Anstype),
             position = position_nudge(-1)) +
    #Change the order of legend accordingly
    scale_fill_manual(values=color_evs,
                     limits=c('agree strongly', 'agree', 'n.a./don\'t know', 'disagree strongly','disagree')) +
    #Reverse x-axis and also label them
    scale_x_reverse(breaks = seq(-0.6, 0.8, 0.2), labels= function(x) round(abs(x)*100,0))

png

In above graph, subset function does perfectly, except for the levels new subset need to be reordered again.

In addition, the order of question is also wrong direction. We try to reorder it based on the number of total negative answer (i.e disagree + disagree strongly).

Found the idea for solution in SO

#Data wrangling and reorder all levels
#reorder: reorder based on levels of another column
#But before it, we need to identify which is the correct order of level using relevel

evs_614X <-    
    evs_614 %>%
    mutate(Anstype = fct_relevel(Anstype, 'agree strongly', 'agree', 'n.a./don\'t know', 'disagree strongly','disagree')) %>%
    group_by(Question) %>%
    #Be aware of "Count" in the below bracket is the name of column, not a function
    mutate(num_positive = sum(Count[Anstype %in% c('disagree strongly','disagree')])) %>%
    #ungroup to clear the grouping in reorder Question 
    ungroup() %>%
    mutate(Question = fct_reorder(Question, num_positive))

head(evs_614X)

A tibble: 6 × 6
Question	Anstype	Count	percent_answers	plot_perc	num_positive
<fct>	<fct>	<int>	<dbl>	<dbl>	<int>
v159	n.a./don't know	67	0.03228916	-0.03228916	423
v159	agree strongly	803	0.38698795	-0.38698795	423
v159	agree	782	0.37686747	-0.37686747	423
v159	disagree	311	0.14987952	0.14987952	423
v159	disagree strongly	112	0.05397590	0.05397590	423
v160	n.a./don't know	104	0.05012048	-0.05012048	992

#Create our plot

plot_614 <- evs_614X %>%
    ggplot(aes(x=plot_perc, y=Question)) +
    #subset to create geom_col for non-neutral group
    geom_col(data=subset(evs_614X, Anstype != "n.a./don't know"), mapping=aes(fill=Anstype)) +
    #subset to create geom_col for neutral group
    geom_col(data=subset(evs_614X, Anstype == "n.a./don't know"), mapping=aes(x=-plot_perc, fill=Anstype),
             position = position_nudge(-1)) +
    #Add zero point segment, to distingush positive and negative answer
    geom_segment(aes(x=0, y=0.25, xend=0, yend=+7.75), color="#6ca6cd", linewidth=0.75) +
    #Change the order of legend accordingly
    scale_fill_manual(values=color_evs,
                     limits=c('n.a./don\'t know', 'agree strongly', 'agree', 'disagree','disagree strongly')) +
    #Reverse x-axis and also label them
    scale_x_continuous(breaks = seq(-0.8, 0.6, 0.2), labels= function(x) round(abs(x)*100,0)) +
    #mapping label of y axis to description
    scale_y_discrete(labels=Quesdesc) +
    #add annotate
    annotate("text", x=-1, y=8, label = "N=2,075", hjust=0) +
    annotate("text", x=0.7, y=8, label="all values in percent", hjust=1, fontface="italic") +
    #avoid clipping through the annotate
    coord_cartesian(clip="off") +
    #edit the labels
    labs(x=NULL, y=NULL,
         title="It is often said that attitudes towards gender roles are changing",
         caption="Source: European Values Study 2008 Germany, ZA4800. www.gesis.org.") +
    #changing theme
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.caption = element_text(face="italic"),
          plot.title.position = "plot",
          plot.title = element_text(margin=margin(b=20)),
          legend.position = "bottom",
          legend.title = element_blank(),
          legend.spacing.x = unit(0.5, "cm"),)
                       
plot_614

png

#Changing the font
library(extrafont)
theme_set(theme_minimal(base_family = "Lato Light"))
plot_614 + 
    theme(plot.title = element_text(family="Lato Black"))

png

Final result in svg

ggsave("6.1.4 Bar Chart for mulres - Advance.svg", last_plot(), device=svg, bg="white", width = 20, height = 12, units="cm")

Barchart Likert

DataViz in R | 04. Bar Chart Multiple Responses - Likert Plot

Target result

Final result in svg