最新消息:Welcome to the puzzle paradise for programmers! Here, a well-designed puzzle awaits you. From code logic puzzles to algorithmic challenges, each level is closely centered on the programmer's expertise and skills. Whether you're a novice programmer or an experienced tech guru, you'll find your own challenges on this site. In the process of solving puzzles, you can not only exercise your thinking skills, but also deepen your understanding and application of programming knowledge. Come to start this puzzle journey full of wisdom and challenges, with many programmers to compete with each other and show your programming wisdom! Translated with DeepL.com (free version)

r - Alluvial plots using ggplot2 - highlight certain pairings - Stack Overflow

matteradmin5PV0评论

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!

Share Improve this question edited Nov 18, 2024 at 21:31 stefan 128k6 gold badges38 silver badges77 bronze badges asked Nov 18, 2024 at 21:20 Zoya QaiyumZoya Qaiyum 998 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 5

To highlight some of the catgories you can map a condition on the fill aes, then set your desired colors using scale_fill_manual, e.g. to highlight the top 3 categories you can do:

library(ggplot2)
library(ggalluvial)

# Highlight Top 3
.highlight <- a[order(a$n, decreasing = TRUE), ] |>
  head(3) |>
  subset(select = CTaa_alpha, drop = TRUE)

ggplot(
  data = a,
  aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)
) +
  geom_alluvium(aes(fill = CTaa_alpha %in% .highlight)) +
  geom_stratum() +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum))
  ) +
  scale_x_discrete(
    limits = c("CDR3_alpha", "CDR3_beta"),
    expand = c(0.15, 0.05)
  ) +
  scale_fill_manual(
    values = c("grey65", "steelblue")
  ) +
  theme_classic() +
  theme(legend.position = "none")

Post a comment

comment list (0)

  1. No comments so far