Disclaimer

Breeding values are estimated from data

Read Data

This is the notebook developed in class. It shows how genotype frequencies can be determined using R.

We want to read a dataset with genotype and phenotype information.

s_gen_phen_data <- "https://charlotte-ngs.github.io/lbgfs2022/data/p1_mrk_one_locus.csv"
library(readr)
tbl_gen_phen <- read_delim(file = s_gen_phen_data,
                           delim = ";",
                           col_types = cols(
                             ID = col_integer(),
                             Genotype = col_integer(),
                             Phenotype = col_double()
                           ))
head(tbl_gen_phen)

Descriptive statistics

library(dplyr)
tbl_gen_phen %>%
  group_by(Genotype) %>%
  summarise(geno_count = n()) %>%
  summarise(geno_freq = geno_count / sum(geno_count))
NA

Genotypic Values

Regression of phenotypes on Genotypes using homozygous genotypes

tbl_hom_gen <- tbl_gen_phen %>%
  filter(Genotype < 3)
head(tbl_hom_gen)
tbl_hom_gen %>%
  group_by(Genotype) %>%
  summarise(geno_count = n())

Regression model, with or without intercept? Compute the mean phenotypic value for all animals with Genotype \(0\)

tbl_hom_gen0 <- tbl_hom_gen %>%
  filter(Genotype < 1)
mean(tbl_hom_gen0$Phenotype)
[1] 0.1384745

Plot the data

library(ggplot2)
p_g0 <- ggplot(data = tbl_hom_gen, mapping = aes(x = Genotype, y = Phenotype)) +
  geom_point()
p_g0

From the plot and from the mean of the animals with genotype \(0\), it becomes clear that we need to fit a regression with intercept

lm_geno_val <- lm(Phenotype ~ Genotype, data = tbl_hom_gen)
(sry_geno_val <- summary(lm_geno_val))

Call:
lm(formula = Phenotype ~ Genotype, data = tbl_hom_gen)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.1200 -0.6895  0.0200  0.7068  3.8270 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  0.13847    0.17801   0.778    0.437
Genotype     0.04542    0.08966   0.507    0.613

Residual standard error: 1.007 on 2182 degrees of freedom
Multiple R-squared:  0.0001176, Adjusted R-squared:  -0.0003407 
F-statistic: 0.2566 on 1 and 2182 DF,  p-value: 0.6125

Add the regression line to the plot

p_g0_line <- p_g0 + 
  geom_abline(slope = sry_geno_val$coefficients["Genotype", "Estimate"],
              intercept = sry_geno_val$coefficients["(Intercept)", "Estimate"])
p_g0_line

The genotypic value \(a\) is

(n_geno_a <- sry_geno_val$coefficients["Genotype", "Estimate"])
[1] 0.04541911

The genotypic values for heterozygous animals

tbl_het_gen <- tbl_gen_phen %>%
  filter(Genotype > 2)
head(tbl_het_gen)
n_geno_d <- mean(tbl_het_gen$Phenotype) - sry_geno_val$coefficients["(Intercept)", "Estimate"] - n_geno_a
n_geno_d
[1] -0.05784937
tbl_het_gen$Genotype <- 1L
head(tbl_het_gen)
tbl_gen_all <- bind_rows(tbl_hom_gen, tbl_het_gen)
head(tbl_gen_all)
p_g_all <- ggplot(data = tbl_gen_all, mapping = aes(x=Genotype,  y=Phenotype))+
  geom_point()+ 
  geom_abline(slope = sry_geno_val$coefficients["Genotype", "Estimate"],
              intercept = sry_geno_val$coefficients["(Intercept)", "Estimate"])
p_g_all

Now that the genotypic values \(a\) and \(d\) are computed, we can do the transformations of the phenotypic values such that the three genotypes have the correct genotypic value

tbl_trans_gen <- tbl_gen_all
tbl_trans_gen$Phenotype <- tbl_trans_gen$Phenotype - sry_geno_val$coefficients["(Intercept)", "Estimate"] - n_geno_a

To check the result of the transformation, we can compute the means of the transformed phenotypic values

tbl_trans_gen %>%
  group_by(Genotype) %>%
  summarise(mean_phen = mean(Phenotype))
LS0tCnRpdGxlOiAiRXN0aW1hdGUgU2luZ2xlIExvY3VzIEJyZWVkaW5nIFZhbHVlcyBmcm9tIERhdGEiCmRhdGU6ICIyMDIyLTEwLTA3IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCiMjIERpc2NsYWltZXIKQnJlZWRpbmcgdmFsdWVzIGFyZSBlc3RpbWF0ZWQgZnJvbSBkYXRhCgoKIyMgUmVhZCBEYXRhClRoaXMgaXMgdGhlIG5vdGVib29rIGRldmVsb3BlZCBpbiBjbGFzcy4gSXQgc2hvd3MgaG93IGdlbm90eXBlIGZyZXF1ZW5jaWVzIGNhbiBiZSBkZXRlcm1pbmVkIHVzaW5nIFIuCgpXZSB3YW50IHRvIHJlYWQgYSBkYXRhc2V0IHdpdGggZ2Vub3R5cGUgYW5kIHBoZW5vdHlwZSBpbmZvcm1hdGlvbi4gCgpgYGB7cn0Kc19nZW5fcGhlbl9kYXRhIDwtICJodHRwczovL2NoYXJsb3R0ZS1uZ3MuZ2l0aHViLmlvL2xiZ2ZzMjAyMi9kYXRhL3AxX21ya19vbmVfbG9jdXMuY3N2IgpsaWJyYXJ5KHJlYWRyKQp0YmxfZ2VuX3BoZW4gPC0gcmVhZF9kZWxpbShmaWxlID0gc19nZW5fcGhlbl9kYXRhLAogICAgICAgICAgICAgICAgICAgICAgICAgICBkZWxpbSA9ICI7IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sX3R5cGVzID0gY29scygKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBJRCA9IGNvbF9pbnRlZ2VyKCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgR2Vub3R5cGUgPSBjb2xfaW50ZWdlcigpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIFBoZW5vdHlwZSA9IGNvbF9kb3VibGUoKQogICAgICAgICAgICAgICAgICAgICAgICAgICApKQpoZWFkKHRibF9nZW5fcGhlbikKYGBgCgoKRGVzY3JpcHRpdmUgc3RhdGlzdGljcwoKYGBge3J9CmxpYnJhcnkoZHBseXIpCnRibF9nZW5fcGhlbiAlPiUKICBncm91cF9ieShHZW5vdHlwZSkgJT4lCiAgc3VtbWFyaXNlKGdlbm9fY291bnQgPSBuKCkpICU+JQogIHN1bW1hcmlzZShnZW5vX2ZyZXEgPSBnZW5vX2NvdW50IC8gc3VtKGdlbm9fY291bnQpKQoKYGBgCgoKIyMgR2Vub3R5cGljIFZhbHVlcwpSZWdyZXNzaW9uIG9mIHBoZW5vdHlwZXMgb24gR2Vub3R5cGVzIHVzaW5nIGhvbW96eWdvdXMgZ2Vub3R5cGVzCgpgYGB7cn0KdGJsX2hvbV9nZW4gPC0gdGJsX2dlbl9waGVuICU+JQogIGZpbHRlcihHZW5vdHlwZSA8IDMpCmhlYWQodGJsX2hvbV9nZW4pCmBgYAoKYGBge3J9CnRibF9ob21fZ2VuICU+JQogIGdyb3VwX2J5KEdlbm90eXBlKSAlPiUKICBzdW1tYXJpc2UoZ2Vub19jb3VudCA9IG4oKSkKYGBgCgpSZWdyZXNzaW9uIG1vZGVsLCB3aXRoIG9yIHdpdGhvdXQgaW50ZXJjZXB0PyBDb21wdXRlIHRoZSBtZWFuIHBoZW5vdHlwaWMgdmFsdWUgZm9yIGFsbCBhbmltYWxzIHdpdGggR2Vub3R5cGUgJDAkCgpgYGB7cn0KdGJsX2hvbV9nZW4wIDwtIHRibF9ob21fZ2VuICU+JQogIGZpbHRlcihHZW5vdHlwZSA8IDEpCm1lYW4odGJsX2hvbV9nZW4wJFBoZW5vdHlwZSkKYGBgCgpQbG90IHRoZSBkYXRhCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpwX2cwIDwtIGdncGxvdChkYXRhID0gdGJsX2hvbV9nZW4sIG1hcHBpbmcgPSBhZXMoeCA9IEdlbm90eXBlLCB5ID0gUGhlbm90eXBlKSkgKwogIGdlb21fcG9pbnQoKQpwX2cwCmBgYAoKCkZyb20gdGhlIHBsb3QgYW5kIGZyb20gdGhlIG1lYW4gb2YgdGhlIGFuaW1hbHMgd2l0aCBnZW5vdHlwZSAkMCQsIGl0IGJlY29tZXMgY2xlYXIgdGhhdCB3ZSBuZWVkIHRvIGZpdCBhIHJlZ3Jlc3Npb24gd2l0aCBpbnRlcmNlcHQKCmBgYHtyfQpsbV9nZW5vX3ZhbCA8LSBsbShQaGVub3R5cGUgfiBHZW5vdHlwZSwgZGF0YSA9IHRibF9ob21fZ2VuKQooc3J5X2dlbm9fdmFsIDwtIHN1bW1hcnkobG1fZ2Vub192YWwpKQpgYGAKCkFkZCB0aGUgcmVncmVzc2lvbiBsaW5lIHRvIHRoZSBwbG90CgpgYGB7cn0KcF9nMF9saW5lIDwtIHBfZzAgKyAKICBnZW9tX2FibGluZShzbG9wZSA9IHNyeV9nZW5vX3ZhbCRjb2VmZmljaWVudHNbIkdlbm90eXBlIiwgIkVzdGltYXRlIl0sCiAgICAgICAgICAgICAgaW50ZXJjZXB0ID0gc3J5X2dlbm9fdmFsJGNvZWZmaWNpZW50c1siKEludGVyY2VwdCkiLCAiRXN0aW1hdGUiXSkKcF9nMF9saW5lCmBgYAoKVGhlIGdlbm90eXBpYyB2YWx1ZSAkYSQgaXMKCmBgYHtyfQoobl9nZW5vX2EgPC0gc3J5X2dlbm9fdmFsJGNvZWZmaWNpZW50c1siR2Vub3R5cGUiLCAiRXN0aW1hdGUiXSkKYGBgCgpUaGUgZ2Vub3R5cGljIHZhbHVlcyBmb3IgaGV0ZXJvenlnb3VzIGFuaW1hbHMKCmBgYHtyfQp0YmxfaGV0X2dlbiA8LSB0YmxfZ2VuX3BoZW4gJT4lCiAgZmlsdGVyKEdlbm90eXBlID4gMikKaGVhZCh0YmxfaGV0X2dlbikKYGBgCgpgYGB7cn0Kbl9nZW5vX2QgPC0gbWVhbih0YmxfaGV0X2dlbiRQaGVub3R5cGUpIC0gc3J5X2dlbm9fdmFsJGNvZWZmaWNpZW50c1siKEludGVyY2VwdCkiLCAiRXN0aW1hdGUiXSAtIG5fZ2Vub19hCm5fZ2Vub19kCmBgYAoKCmBgYHtyfQp0YmxfaGV0X2dlbiRHZW5vdHlwZSA8LSAxTApoZWFkKHRibF9oZXRfZ2VuKQpgYGAKCmBgYHtyfQp0YmxfZ2VuX2FsbCA8LSBiaW5kX3Jvd3ModGJsX2hvbV9nZW4sIHRibF9oZXRfZ2VuKQpoZWFkKHRibF9nZW5fYWxsKQpgYGAKCmBgYHtyfQpwX2dfYWxsIDwtIGdncGxvdChkYXRhID0gdGJsX2dlbl9hbGwsIG1hcHBpbmcgPSBhZXMoeD1HZW5vdHlwZSwgIHk9UGhlbm90eXBlKSkrCiAgZ2VvbV9wb2ludCgpKyAKICBnZW9tX2FibGluZShzbG9wZSA9IHNyeV9nZW5vX3ZhbCRjb2VmZmljaWVudHNbIkdlbm90eXBlIiwgIkVzdGltYXRlIl0sCiAgICAgICAgICAgICAgaW50ZXJjZXB0ID0gc3J5X2dlbm9fdmFsJGNvZWZmaWNpZW50c1siKEludGVyY2VwdCkiLCAiRXN0aW1hdGUiXSkKcF9nX2FsbAoKYGBgCgoKTm93IHRoYXQgdGhlIGdlbm90eXBpYyB2YWx1ZXMgJGEkIGFuZCAkZCQgYXJlIGNvbXB1dGVkLCB3ZSBjYW4gZG8gdGhlIHRyYW5zZm9ybWF0aW9ucyBvZiB0aGUgcGhlbm90eXBpYyB2YWx1ZXMgc3VjaCB0aGF0IHRoZSB0aHJlZSBnZW5vdHlwZXMgaGF2ZSB0aGUgY29ycmVjdCBnZW5vdHlwaWMgdmFsdWUKCmBgYHtyfQp0YmxfdHJhbnNfZ2VuIDwtIHRibF9nZW5fYWxsCnRibF90cmFuc19nZW4kUGhlbm90eXBlIDwtIHRibF90cmFuc19nZW4kUGhlbm90eXBlIC0gc3J5X2dlbm9fdmFsJGNvZWZmaWNpZW50c1siKEludGVyY2VwdCkiLCAiRXN0aW1hdGUiXSAtIG5fZ2Vub19hCmBgYAoKVG8gY2hlY2sgdGhlIHJlc3VsdCBvZiB0aGUgdHJhbnNmb3JtYXRpb24sIHdlIGNhbiBjb21wdXRlIHRoZSBtZWFucyBvZiB0aGUgdHJhbnNmb3JtZWQgcGhlbm90eXBpYyB2YWx1ZXMKCmBgYHtyfQp0YmxfdHJhbnNfZ2VuICU+JQogIGdyb3VwX2J5KEdlbm90eXBlKSAlPiUKICBzdW1tYXJpc2UobWVhbl9waGVuID0gbWVhbihQaGVub3R5cGUpKQpgYGAKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCg==