admin管理员组

文章数量:1391925

I have a data frame that looks like this :

# A tibble: 6 × 8
      N     S    DG     P locus replicate      MLE_s_BwS      MLE_s_WF
  <int> <dbl> <int> <int> <int>     <int>          <dbl>         <dbl>
1   100     0     1     1     0         0        -0.174        -0.183 
2   100     0     1     1     1         0         0.143         0.143 
3   100     0     1     1     2         0        -0.0758       -0.0758
4   100     0.1   10    1     3         0        -0.141        -0.141 
5   100     0.1   10    1     4         0         0.102         0.102 
6   100     0.1   10    1     5         0         0.102         0.102

I use ggplot to plot the column MLE_s_BwS against MLE_s_WF when DG == 1 and when DG == 10 (two different plots).

my_plot_DG1 <- ggplot(
    estimations_pivoted[estimations_pivoted$DG == 1,],
    aes(x = MLE_s_WF, y = MLE_s_BwS)
  ) +
  geom_point(aes(col = as.factor(S)), alpha = 0.5) +
  scale_color_manual(values = c("purple2", "yellow")) +
  geom_abline(linetype = "dotted") +
  xlim(x_lim) + ylim(y_lim) +
  theme_minimal()

I am trying to draw two horizontal and two vertical lines: in each case, one line for the mean of MLE_s[BwS/WF] when S == 0 and another when S == 0.1.

I don't find a solution with aes. The only thing I can think of is doing

geom_hline(yintercept = mean(estimations_pivoted[estimations_pivoted$DG == 1 & estimations_pivoted$S == 0,]$MLE_s_BwS), col = "purple2")
geom_hline(yintercept = mean(estimations_pivoted[estimations_pivoted$DG == 1 & estimations_pivoted$S == 0.1,]$MLE_s_BwS), col = "yellow")

But it looks inelegant. Is there a simpler/more elegant way of doing this ?

I have a data frame that looks like this :

# A tibble: 6 × 8
      N     S    DG     P locus replicate      MLE_s_BwS      MLE_s_WF
  <int> <dbl> <int> <int> <int>     <int>          <dbl>         <dbl>
1   100     0     1     1     0         0        -0.174        -0.183 
2   100     0     1     1     1         0         0.143         0.143 
3   100     0     1     1     2         0        -0.0758       -0.0758
4   100     0.1   10    1     3         0        -0.141        -0.141 
5   100     0.1   10    1     4         0         0.102         0.102 
6   100     0.1   10    1     5         0         0.102         0.102

I use ggplot to plot the column MLE_s_BwS against MLE_s_WF when DG == 1 and when DG == 10 (two different plots).

my_plot_DG1 <- ggplot(
    estimations_pivoted[estimations_pivoted$DG == 1,],
    aes(x = MLE_s_WF, y = MLE_s_BwS)
  ) +
  geom_point(aes(col = as.factor(S)), alpha = 0.5) +
  scale_color_manual(values = c("purple2", "yellow")) +
  geom_abline(linetype = "dotted") +
  xlim(x_lim) + ylim(y_lim) +
  theme_minimal()

I am trying to draw two horizontal and two vertical lines: in each case, one line for the mean of MLE_s[BwS/WF] when S == 0 and another when S == 0.1.

I don't find a solution with aes. The only thing I can think of is doing

geom_hline(yintercept = mean(estimations_pivoted[estimations_pivoted$DG == 1 & estimations_pivoted$S == 0,]$MLE_s_BwS), col = "purple2")
geom_hline(yintercept = mean(estimations_pivoted[estimations_pivoted$DG == 1 & estimations_pivoted$S == 0.1,]$MLE_s_BwS), col = "yellow")

But it looks inelegant. Is there a simpler/more elegant way of doing this ?

Share Improve this question edited Mar 13 at 13:19 Kiffikiffe asked Mar 13 at 13:14 KiffikiffeKiffikiffe 1318 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 1

With ggplot it is generally best to manipulate and summarize the data first.

Create a data frame with the mean values for each group.

df <- tibble::tribble(
    ~N,  ~S, ~DG, ~P, ~locus, ~replicate, ~MLE_s_BwS, ~MLE_s_WF,
  100L,   0,  1L, 1L,     0L,         0L,     -0.174,    -0.183,
  100L,   0,  1L, 1L,     1L,         0L,      0.143,     0.143,
  100L,   0,  1L, 1L,     2L,         0L,    -0.0758,   -0.0758,
  100L, 0.1, 10L, 1L,     3L,         0L,     -0.141,    -0.141,
  100L, 0.1, 10L, 1L,     4L,         0L,      0.102,     0.102,
  100L, 0.1, 10L, 1L,     5L,         0L,      0.102,     0.102,
  100L, 0.1, 10L, 1L,     5L,         0L,      0.102,     0.102
  )


library(tidyverse)

mean_lines <- df |>
  summarize(mean_s_BwS = mean(MLE_s_BwS),
            mean_s_WF = mean(MLE_s_WF),
            .by = c(S, DG))

mean_lines  
#> # A tibble: 2 × 4
#>       S    DG mean_s_BwS mean_s_WF
#>   <dbl> <int>      <dbl>     <dbl>
#> 1   0       1    -0.0356   -0.0386
#> 2   0.1    10     0.0412    0.0412

When creating the plot you can take advantage of facet functions to avoid needing to write separate code for each value of DG.

ggplot(data = df,
       aes(x = MLE_s_WF, y = MLE_s_BwS)) +
  geom_point(aes(color = factor(S)), alpha = 0.5) +
  scale_color_manual(values = c("purple2", "yellow")) +
  geom_abline(linetype = "dotted") +
  geom_hline(data = mean_lines, aes(yintercept = mean_s_BwS)) +
  geom_vline(data = mean_lines, aes(xintercept = mean_s_WF)) +
  facet_wrap(~DG) +
  labs(color = 'S') +
  theme_minimal()

本文标签: rDraw line on mean of column depending on factorStack Overflow