DataViz in R | 03B. Bar Chart Multiple Responses and Questions

library(ggplot2)
library(viridis)
library(dplyr)
theme_set(theme_minimal())

#load the data that I have extracted
evs <- readRDS("./myData/EVS_2008/germany.Rda")
head(evs)

A tibble: 6 × 8
v106	v159	v160	v161	v162	v163	v164	v165
<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>	<hvn_lbll>
2	4	1	4	3	3	4	3
2	3	2	2	2	2	4	4
-3	1	3	3	3	1	1	1
-3	1	4	2	3	1	1	2
-3	3	1	1	2	3	2	3
2	2	2	2	2	2	2	1

#Using library haven and labelled to read or extract the label(s)
library(haven)
library(labelled)
head(evs$v159)

<labelled<double>[6]>: working mother warm relationship with children (Q48A)
[1] 4 3 1 1 3 2

Labels:
 value              label
    -5      other missing
    -4 question not asked
    -3     not applicable
    -2          no answer
    -1         don't know
     1     agree strongly
     2              agree
     3           disagree
     4  disagree strongly

#Using val_labels, we see that the values for 7 cols are the same: from -5 to 4
val_labels(evs)

$v106 other missing -5 question not asked -4 not applicable -3 no answer -2 don't know -1 roman catholic 1 protestant 2 free church/ non-conformist/ evangelical 3 jew 4 muslim 5 hindu 6 buddhist 7 orthodox 8 other 9

$v159 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v160 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v161 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v162 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v163 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v164 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v165 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

Data wrangling

The negative values (from -5 to -1) are deﬁned as missing values and are therefore not taken into account during statistical calculations. However, the answers of dk (don’t know) and na (no answer) gave us some insights because the question was indeed asked and the respondent has already provided an answer to it.

As a result, we will remove the negative values from -5 to -3 (other missing, not asked, and not applicable) and convert both no answer and don't know to a category of value.

#For this example we will drop the first column "v106" on the respondent's religious

evs <- evs[,-1]
head(evs)

A tibble: 6 × 7
v159	v160	v161	v162	v163	v164	v165
<dbl+lbl>	<dbl+lbl>	<dbl+lbl>	<dbl+lbl>	<dbl+lbl>	<dbl+lbl>	<dbl+lbl>
4	1	4	3	3	4	3
3	2	2	2	2	4	4
1	3	3	3	1	1	1
1	4	2	3	1	1	2
3	1	1	2	3	2	3
2	2	2	2	2	2	1

#Firstly we need to convert the data from "wider" to "longer" format
#i.e few columns, more rows - by pivot_longer in tidyr lib
#However, please keep in mind that the pivot_longer can not summary each value
#As can be seen from the dim(), the pivot dataframe has exactly 2075 x 7 = 14,525 rows

library(tidyr)
pivot_evs_longer <- pivot_longer(evs, cols=everything(), names_to = "Question", values_to = "Answer")
head(pivot_evs_longer)
dim(evs)
dim(pivot_evs_longer)

A tibble: 6 × 2
Question	Answer
<chr>	<dbl+lbl>
v159	4
v160	1
v161	4
v162	3
v163	3
v164	4

2075
7

14525
2

#Then we use pivot_wider with values_fn to summary the data
#Remember to add values_fill for NA case
#https://stackoverflow.com/questions/28873057/sum-across-multiple-columns-with-dplyr

pivot_evs <- pivot_wider(pivot_evs_longer, names_from = "Answer", values_from = "Answer", values_fn = length, values_fill = 0)
pivot_evs

A tibble: 7 × 7
Question	4	1	3	2	-1	-2
<chr>	<int>	<int>	<int>	<int>	<int>	<int>
v159	112	803	311	782	67	0
v160	337	332	655	647	101	3
v161	477	163	769	523	138	5
v162	490	174	708	553	147	3
v163	43	851	205	909	64	3
v164	52	772	205	985	55	6
v165	70	568	447	887	96	7

#Create percentage columns
#This solution I asked ChatGPT from OpenAI and the result is good

pivot_evs %>%  
mutate(Total = rowSums(select(., -Question)),
         across(-c(Question, Total), ~./Total*100, .names = "{.col}_%"))

A tibble: 7 × 14
Question	4	1	3	2	-1	-2	Total	4_%	1_%	3_%	2_%	-1_%	-2_%
<chr>	<int>	<int>	<int>	<int>	<int>	<int>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
v159	112	803	311	782	67	0	2075	5.397590	38.698795	14.987952	37.68675	3.228916	0.0000000
v160	337	332	655	647	101	3	2075	16.240964	16.000000	31.566265	31.18072	4.867470	0.1445783
v161	477	163	769	523	138	5	2075	22.987952	7.855422	37.060241	25.20482	6.650602	0.2409639
v162	490	174	708	553	147	3	2075	23.614458	8.385542	34.120482	26.65060	7.084337	0.1445783
v163	43	851	205	909	64	3	2075	2.072289	41.012048	9.879518	43.80723	3.084337	0.1445783
v164	52	772	205	985	55	6	2075	2.506024	37.204819	9.879518	47.46988	2.650602	0.2891566
v165	70	568	447	887	96	7	2075	3.373494	27.373494	21.542169	42.74699	4.626506	0.3373494

var_label(evs)

$v159 ‘working mother warm relationship with children (Q48A)’

$v160 ‘pre-school child suffers with working mother (Q48B)’

$v161 ‘women really want home and children (Q48C)’

$v162 ‘being housewife as fulfilling as paid job (Q48D)’

$v163 ‘job best way for independence women (Q48E)’

$v164 ‘husband+wife contribute to household income (Q48F)’

$v165 ‘fathers as well suited to look after children as mothers (Q48G)‘

#However, we need to change the name of column also
#Adding the question desc
#To call a column with name as numeric character, use with ``
#use last_col()

Quesdesc = c("v159" = "A working mother can establish just as warm and\nsecure an environment as a non-working mother", 
             "v160" = "A pre-school child is likely to suffer if\nhis or her mother is working",
             "v161" = "A job is alright, but what most women\nreally want is a home and children",
             "v162" = "Being a housewife is just as fulfilling as\nworking",
             "v163" = "Having a job is the best way for a woman\nto be independent",
             "v164" = "Both the husband and wife should contribute\nto the family income",
             "v165" = "In general, fathers are as well suited to\nlook after their children as women")

pivot_evs <- pivot_evs %>%
    mutate(Quesdesc = Quesdesc,
           Total = rowSums(select(., -Question)),
           "Agree strongly" = round(`1`/Total*100,2),
           "Agree" = round(`2`/Total*100,2),
           "Disagree" = round(`3`/Total*100,2),
           "Disagree strongly" = round(`4`/Total*100,2),
           ) %>%
    #We have to separate 2 mutate call due to the Total 
    mutate("n.a./don't know" = 100 - rowSums(select(.,`Agree strongly`:`Disagree strongly`)))

pivot_evs

A tibble: 7 × 14
Question	4	1	3	2	-1	-2	Quesdesc	Total	Agree strongly	Agree	Disagree	Disagree strongly	n.a./don't know
<chr>	<int>	<int>	<int>	<int>	<int>	<int>	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
v159	112	803	311	782	67	0	A working mother can establish just as warm and secure an environment as a non-working mother	2075	38.70	37.69	14.99	5.40	3.22
v160	337	332	655	647	101	3	A pre-school child is likely to suffer if his or her mother is working	2075	16.00	31.18	31.57	16.24	5.01
v161	477	163	769	523	138	5	A job is alright, but what most women really want is a home and children	2075	7.86	25.20	37.06	22.99	6.89
v162	490	174	708	553	147	3	Being a housewife is just as fulfilling as working	2075	8.39	26.65	34.12	23.61	7.23
v163	43	851	205	909	64	3	Having a job is the best way for a woman to be independent	2075	41.01	43.81	9.88	2.07	3.23
v164	52	772	205	985	55	6	Both the husband and wife should contribute to the family income	2075	37.20	47.47	9.88	2.51	2.94
v165	70	568	447	887	96	7	In general, fathers are as well suited to look after their children as women	2075	27.37	42.75	21.54	3.37	4.97

Target result

http://www.datavisualisation-r.com/pdf/barcharts_multiple_all.pdf

This is a stacked bar for multiple variables in the dataframe. So basically the dataset should be in the “longer” form. Let’s start

library(forcats)

#Instead of using pivot_longer for 2-times-pivoted df pivot_evs
#We will create it from start, for our need.
#Explanation for friendly warning message "`summarise()` has grouped output by"
#https://stackoverflow.com/questions/62140483/how-to-interpret-dplyr-message-summarise-regrouping-output-by-x-override

evs_613 <- evs %>%
    pivot_longer(cols=everything(), names_to = "Question", values_to = "Answer") %>%
    mutate(Anstype = factor(Answer, levels = c(-2, -1, 1, 2, 3, 4), 
                            labels = c("n.a./don't know", "n.a./don't know", "agree strongly", "agree", "disagree", "disagree strongly"))) %>%
    #reorder the Anstype to reverse order
    #mutate(Question = fct_reorder(Question, .desc=T))
    #the default .add=FALSE group_by() will override existing groups.
    group_by(Question, Answer, Anstype, .add=T) %>%
    #Add summarized column after grouping
    summarize(Count = n())

[1m[22m`summarise()` has grouped output by 'Question', 'Answer'. You can override using the `.groups` argument.

head(evs_613)

A grouped_df: 6 × 4
Question	Answer	Anstype	Count
<chr>	<dbl+lbl>	<fct>	<int>
v159	-1	n.a./don't know	67
v159	1	agree strongly	803
v159	2	agree	782
v159	3	disagree	311
v159	4	disagree strongly	112
v160	-2	n.a./don't know	3

#Setting width and height
options(repr.plot.width=10, repr.plot.height=6)

#Remember to add discrete = TRUE in viridis
ggplot(evs_613, aes(x=Count, y=Question)) +
    geom_bar(mapping=aes(fill=Anstype), position = "fill", stat = "identity") +
    scale_fill_viridis(discrete=T, option = "plasma")

png

#Create custom color vector based on origin (using eye-dropper)
color_613 <- c("n.a./don't know" = "#bebebe", 
               "agree strongly" = "#00d0e2", 
               "agree" = "#6ddde1", 
               "disagree" = "#ff8aee", 
               "disagree strongly" = "#ff00d2")

#Result - seem easy?

ggplot(evs_613, aes(x=Count, y=Question)) +
    #reverse position
    geom_col(mapping=aes(fill=Anstype), position = position_fill(reverse = T)) +
    #add annotate
    annotate("text", x=0, y=7.75, label = "N=2,075", hjust=0) +
    annotate("text", x=1, y=7.75, label="all values in percent", hjust=1, fontface="italic") +
    #manual fill color
    scale_fill_manual(values=color_613) +
    #mapping label of y axis to description
    scale_y_discrete(labels=Quesdesc, limits=rev) +
    #edit the break label in x-axis and turn scale 1 to 100 percent
    scale_x_continuous(breaks = seq(0, 1, 0.2), labels= function(x) x*100) +
    #edit the labels
    labs(x=NULL, y=NULL,
         title="It is often said that attitudes towards gender roles are changing",
         caption="Source: European Values Study 2008 Germany, ZA4800. www.gesis.org.") +
    #changing theme
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.caption = element_text(face="italic"),
          plot.title.position = "plot",
          legend.position = "top",
          legend.title = element_blank(),) +
    guides(fill = guide_legend(title.position = "right", 
                               label.position = "left", 
                               label.hjust = 0
                              )) +
    coord_cartesian(clip="off")

png