DataViz in R | 03B. Bar Chart Multiple Responses and Questions
Published on Apr 29, 2023
ggplot

Playing with EVS data in previous post

DataViz in R | 03B. Bar Chart Multiple Responses and Questions
library(ggplot2)
library(viridis)
library(dplyr)
theme_set(theme_minimal())
#load the data that I have extracted
evs <- readRDS("./myData/EVS_2008/germany.Rda")
head(evs)
A tibble: 6 × 8
v106v159v160v161v162v163v164v165
<hvn_lbll><hvn_lbll><hvn_lbll><hvn_lbll><hvn_lbll><hvn_lbll><hvn_lbll><hvn_lbll>
24143343
23222244
-31333111
-31423112
-33112323
22222221
#Using library haven and labelled to read or extract the label(s)
library(haven)
library(labelled)
head(evs$v159)
<labelled<double>[6]>: working mother warm relationship with children (Q48A)
[1] 4 3 1 1 3 2

Labels:
 value              label
    -5      other missing
    -4 question not asked
    -3     not applicable
    -2          no answer
    -1         don't know
     1     agree strongly
     2              agree
     3           disagree
     4  disagree strongly
#Using val_labels, we see that the values for 7 cols are the same: from -5 to 4
val_labels(evs)
$v106 other missing -5 question not asked -4 not applicable -3 no answer -2 don't know -1 roman catholic 1 protestant 2 free church/ non-conformist/ evangelical 3 jew 4 muslim 5 hindu 6 buddhist 7 orthodox 8 other 9

$v159 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v160 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v161 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v162 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v163 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v164 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

$v165 other missing -5 question not asked -4 not applicable -3 no answer -2 don’t know -1 agree strongly 1 agree 2 disagree 3 disagree strongly 4

Data wrangling

The negative values (from -5 to -1) are defined as missing values and are therefore not taken into account during statistical calculations. However, the answers of dk (don’t know) and na (no answer) gave us some insights because the question was indeed asked and the respondent has already provided an answer to it.

As a result, we will remove the negative values from -5 to -3 (other missing, not asked, and not applicable) and convert both no answer and don't know to a category of value.

#For this example we will drop the first column "v106" on the respondent's religious

evs <- evs[,-1]
head(evs)
A tibble: 6 × 7
v159v160v161v162v163v164v165
<dbl+lbl><dbl+lbl><dbl+lbl><dbl+lbl><dbl+lbl><dbl+lbl><dbl+lbl>
4143343
3222244
1333111
1423112
3112323
2222221
#Firstly we need to convert the data from "wider" to "longer" format
#i.e few columns, more rows - by pivot_longer in tidyr lib
#However, please keep in mind that the pivot_longer can not summary each value
#As can be seen from the dim(), the pivot dataframe has exactly 2075 x 7 = 14,525 rows

library(tidyr)
pivot_evs_longer <- pivot_longer(evs, cols=everything(), names_to = "Question", values_to = "Answer")
head(pivot_evs_longer)
dim(evs)
dim(pivot_evs_longer)
A tibble: 6 × 2
QuestionAnswer
<chr><dbl+lbl>
v1594
v1601
v1614
v1623
v1633
v1644
  1. 2075
  2. 7
  1. 14525
  2. 2
#Then we use pivot_wider with values_fn to summary the data
#Remember to add values_fill for NA case
#https://stackoverflow.com/questions/28873057/sum-across-multiple-columns-with-dplyr

pivot_evs <- pivot_wider(pivot_evs_longer, names_from = "Answer", values_from = "Answer", values_fn = length, values_fill = 0)
pivot_evs
A tibble: 7 × 7
Question4132-1-2
<chr><int><int><int><int><int><int>
v159112803311782 670
v1603373326556471013
v1614771637695231385
v1624901747085531473
v163 43851205909 643
v164 52772205985 556
v165 70568447887 967
#Create percentage columns
#This solution I asked ChatGPT from OpenAI and the result is good

pivot_evs %>%  
mutate(Total = rowSums(select(., -Question)),
         across(-c(Question, Total), ~./Total*100, .names = "{.col}_%"))
A tibble: 7 × 14
Question4132-1-2Total4_%1_%3_%2_%-1_%-2_%
<chr><int><int><int><int><int><int><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
v159112803311782 6702075 5.39759038.69879514.98795237.686753.2289160.0000000
v1603373326556471013207516.24096416.00000031.56626531.180724.8674700.1445783
v1614771637695231385207522.987952 7.85542237.06024125.204826.6506020.2409639
v1624901747085531473207523.614458 8.38554234.12048226.650607.0843370.1445783
v163 43851205909 6432075 2.07228941.012048 9.87951843.807233.0843370.1445783
v164 52772205985 5562075 2.50602437.204819 9.87951847.469882.6506020.2891566
v165 70568447887 9672075 3.37349427.37349421.54216942.746994.6265060.3373494
var_label(evs)

$v159 ‘working mother warm relationship with children (Q48A)’

$v160 ‘pre-school child suffers with working mother (Q48B)’

$v161 ‘women really want home and children (Q48C)’

$v162 ‘being housewife as fulfilling as paid job (Q48D)’

$v163 ‘job best way for independence women (Q48E)’

$v164 ‘husband+wife contribute to household income (Q48F)’

$v165 ‘fathers as well suited to look after children as mothers (Q48G)’

#However, we need to change the name of column also
#Adding the question desc
#To call a column with name as numeric character, use with ``
#use last_col()

Quesdesc = c("v159" = "A working mother can establish just as warm and\nsecure an environment as a non-working mother", 
             "v160" = "A pre-school child is likely to suffer if\nhis or her mother is working",
             "v161" = "A job is alright, but what most women\nreally want is a home and children",
             "v162" = "Being a housewife is just as fulfilling as\nworking",
             "v163" = "Having a job is the best way for a woman\nto be independent",
             "v164" = "Both the husband and wife should contribute\nto the family income",
             "v165" = "In general, fathers are as well suited to\nlook after their children as women")

pivot_evs <- pivot_evs %>%
    mutate(Quesdesc = Quesdesc,
           Total = rowSums(select(., -Question)),
           "Agree strongly" = round(`1`/Total*100,2),
           "Agree" = round(`2`/Total*100,2),
           "Disagree" = round(`3`/Total*100,2),
           "Disagree strongly" = round(`4`/Total*100,2),
           ) %>%
    #We have to separate 2 mutate call due to the Total 
    mutate("n.a./don't know" = 100 - rowSums(select(.,`Agree strongly`:`Disagree strongly`)))
pivot_evs
A tibble: 7 × 14
Question4132-1-2QuesdescTotalAgree stronglyAgreeDisagreeDisagree stronglyn.a./don't know
<chr><int><int><int><int><int><int><chr><dbl><dbl><dbl><dbl><dbl><dbl>
v159112803311782 670A working mother can establish just as warm and secure an environment as a non-working mother207538.7037.6914.99 5.403.22
v1603373326556471013A pre-school child is likely to suffer if his or her mother is working 207516.0031.1831.5716.245.01
v1614771637695231385A job is alright, but what most women really want is a home and children 2075 7.8625.2037.0622.996.89
v1624901747085531473Being a housewife is just as fulfilling as working 2075 8.3926.6534.1223.617.23
v163 43851205909 643Having a job is the best way for a woman to be independent 207541.0143.81 9.88 2.073.23
v164 52772205985 556Both the husband and wife should contribute to the family income 207537.2047.47 9.88 2.512.94
v165 70568447887 967In general, fathers are as well suited to look after their children as women 207527.3742.7521.54 3.374.97

Target result

http://www.datavisualisation-r.com/pdf/barcharts_multiple_all.pdf

This is a stacked bar for multiple variables in the dataframe. So basically the dataset should be in the “longer” form. Let’s start

library(forcats)
#Instead of using pivot_longer for 2-times-pivoted df pivot_evs
#We will create it from start, for our need.
#Explanation for friendly warning message "`summarise()` has grouped output by"
#https://stackoverflow.com/questions/62140483/how-to-interpret-dplyr-message-summarise-regrouping-output-by-x-override

evs_613 <- evs %>%
    pivot_longer(cols=everything(), names_to = "Question", values_to = "Answer") %>%
    mutate(Anstype = factor(Answer, levels = c(-2, -1, 1, 2, 3, 4), 
                            labels = c("n.a./don't know", "n.a./don't know", "agree strongly", "agree", "disagree", "disagree strongly"))) %>%
    #reorder the Anstype to reverse order
    #mutate(Question = fct_reorder(Question, .desc=T))
    #the default .add=FALSE group_by() will override existing groups.
    group_by(Question, Answer, Anstype, .add=T) %>%
    #Add summarized column after grouping
    summarize(Count = n())
`summarise()` has grouped output by 'Question', 'Answer'. You can override using the `.groups` argument.
head(evs_613)
A grouped_df: 6 × 4
QuestionAnswerAnstypeCount
<chr><dbl+lbl><fct><int>
v159-1n.a./don't know 67
v159 1agree strongly 803
v159 2agree 782
v159 3disagree 311
v159 4disagree strongly112
v160-2n.a./don't know 3
#Setting width and height
options(repr.plot.width=10, repr.plot.height=6)
#Remember to add discrete = TRUE in viridis
ggplot(evs_613, aes(x=Count, y=Question)) +
    geom_bar(mapping=aes(fill=Anstype), position = "fill", stat = "identity") +
    scale_fill_viridis(discrete=T, option = "plasma")

png

#Create custom color vector based on origin (using eye-dropper)
color_613 <- c("n.a./don't know" = "#bebebe", 
               "agree strongly" = "#00d0e2", 
               "agree" = "#6ddde1", 
               "disagree" = "#ff8aee", 
               "disagree strongly" = "#ff00d2")
#Result - seem easy?

ggplot(evs_613, aes(x=Count, y=Question)) +
    #reverse position
    geom_col(mapping=aes(fill=Anstype), position = position_fill(reverse = T)) +
    #add annotate
    annotate("text", x=0, y=7.75, label = "N=2,075", hjust=0) +
    annotate("text", x=1, y=7.75, label="all values in percent", hjust=1, fontface="italic") +
    #manual fill color
    scale_fill_manual(values=color_613) +
    #mapping label of y axis to description
    scale_y_discrete(labels=Quesdesc, limits=rev) +
    #edit the break label in x-axis and turn scale 1 to 100 percent
    scale_x_continuous(breaks = seq(0, 1, 0.2), labels= function(x) x*100) +
    #edit the labels
    labs(x=NULL, y=NULL,
         title="It is often said that attitudes towards gender roles are changing",
         caption="Source: European Values Study 2008 Germany, ZA4800. www.gesis.org.") +
    #changing theme
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.caption = element_text(face="italic"),
          plot.title.position = "plot",
          legend.position = "top",
          legend.title = element_blank(),) +
    guides(fill = guide_legend(title.position = "right", 
                               label.position = "left", 
                               label.hjust = 0
                              )) +
    coord_cartesian(clip="off")

png