在数据框架中,用达到总最大值所需的余数来补充数值,这样当将值和余数组合在一起时,就会形成长度一致的线条组合。
max_amount <- max(word_data$amount) word_data$remainder <- (max_amount - word_data$amount) 0.2
每个英雄角色仅提取5个关键词。
word_data_top5 <- word_data %>% group_by(character) %>% arrange(desc(amount)) %>% slice(1:5) %>% ungroup()
将“amount”和“remaining”的格式进行转换
确保每个角色有两个长条;一个用于显示amount,另一个用于选择结束位置。
这会将“amount”和“remaining”折叠成一个名为“variable”的列,指示它是哪个值,另一列“value”包含每个值中的数字。
word_data_top5_m <- melt(word_data_top5, measure.vars = c("amount","remainder"))
将这些条形图放在有序因素中,与在数据融合中相反。否则,“amount”和“remainder”将在图上以相反的顺序显示。
word_data_top5_m$variable2 <- factor(word_data_top5_m$variable, levels = rev(levels(word_data_top5_m$variable)))
每个角色仅仅显示五个词汇
注意角色名称的版本问题,例如采用“black_panther”而不是“Black Panther”
plot_char <- function(character_name){ # example: character_name = "black_panther" # plot details that we might want to fiddle with # thickness of lines between bars bar_outline_size <- 0.5 # transparency of lines between bars bar_outline_alpha <- 0.25 # # The function takes the simple character name, # but here, we convert it to the pretty name, # because we'll want to use that on the plot. pretty_character_name <- convert_simple_to_pretty(character_name) # Get the image for this character, # from the list of all images. temp_image <- all_images[character_name] # Make a data frame for only this character temp_data <- word_data_top5_m %>% dplyr::filter(character == character_name) %>% mutate(character = character_name) # order the words by frequency # First, make an ordered vector of the most common words # for this character ordered_words <- temp_data %>% mutate(word = as.character(word)) %>% dplyr::filter(variable == "amount") %>% arrange(value) %>% `[[`(., "word") # order the words in a factor, # so that they plot in this order, # rather than alphabetical order temp_data$word = factor(temp_data$word, levels = ordered_words) # Get the max value, # so that the image scales out to the end of the longest bar max_value <- max(temp_data$value) fill_colors <- c(`remainder` = "white", `value` = "white") # Make a grid object out of the character's image character_image <- rasterGrob(all_images[[character_name]], width = unit(1,"npc"), height = unit(1,"npc")) # make the plot for this character output_plot <- ggplot(temp_data) aes(x = word, y = value, fill = variable2) # add image # draw it completely bottom to top (x), # and completely from left to the the maximum log-odds value (y) # note that x and y are flipped here, # in prep for the coord_flip() annotation_custom(character_image, xmin = -Inf, xmax = Inf, ymin = 0, ymax = max_value) geom_bar(stat = "identity", color = alpha("white", bar_outline_alpha), size = bar_outline_size, width = 1) scale_fill_manual(values = fill_colors) theme_classic() coord_flip(expand = FALSE) # use a facet strip, # to serve as a title, but with color facet_grid(. ~ character, labeller = labeller(character = character_labeler)) # figure out color swatch for the facet strip fill # using character name to index the color palette # color= NA means there's no outline color. theme(strip.background = element_rect(fill = character_palette[character_name], color = NA)) # other theme elements theme(strip.text.x = element_text(size = rel(1.15), color = "white"), text = element_text(family = "Franklin"), legend.position = "none", panel.grid = element_blank(), axis.text.x = element_text(size = rel(0.8))) # omit the axis title for the individual plot, # because we'll have one for the entire ensemble theme(axis.title = element_blank()) return(output_plot) }
单个角色是如何设置?
sample_plot <- plot_char("black_panther") theme(axis.title = element_text()) # x lab is still declared as y lab # because of coord_flip() ylab(plot_x_axis_text) sample_plot
横轴为什么这么特殊?因为随着数值的增加,条形图会变得越来越高,因此需要转换刻度。
如下所示☟
logit2prob <- function(logit){ odds <- exp(logit) prob <- odds / (1 odds) return(prob) }
…这就是这个轴的样子:
logit2prob(seq(0, 2.5, 0.5))
## [1] 0.5000000 0.6224593 0.7310586 0.8175745 0.8807971 0.9241418
注意该列表中连续项之间的递减差异:
diff(logit2prob(seq(0, 2.5, 0.5)))
## [1] 0.12245933 0.10859925 0.08651590 0.06322260 0.04334474
好了,可以进行下一项了:探讨一些细节,并把上面设置的函数应用到所有角色的列表中,并把所有的结果放入一个列表中。
all_plots <- lapply(character_names, plot_char)
从图片中提取标题
get_axis_grob <- function(plot_to_pick, which_axis){ # plot_to_pick <- sample_plot tmp <- ggplot_gtable(ggplot_build(plot_to_pick)) # tmp$grobs # find the grob that looks like # it would be the x axis axis_x_index <- which(sapply(tmp$grobs, function(x){ # for all the grobs, # return the index of the one # where you can find the text # "axis.title.x" or "axis.title.y" # based on input argument `which_axis` grepl(paste0("axis.title.",which_axis), x)} )) axis_grob <- tmp$grobs[[axis_x_index]] return(axis_grob) }
提取轴标题
px_axis_x <- get_axis_grob(sample_plot, "x") px_axis_y <- get_axis_grob(sample_plot, "y")
下面是如何使用提取出来的坐标轴:
grid.newpage() grid.draw(px_axis_x)
# grid.draw(px_axis_y)
汇总所有的英雄
big_plot <- arrangeGrob(grobs = all_plots)
加入图注,注意图和坐标轴的比例关系
big_plot_w_x_axis_title <- arrangeGrob(big_plot, px_axis_x, heights = c(10,1)) grid.newpage() grid.draw(big_plot_w_x_axis_title)