#small functions to calculate tajima based on the wiki page

##for a,b,c,e functions n is the number of chr in the sample
a1 <- function(n) sum(1/(1:(n-1)))
a2 <- function(n) sum(1/(1:(n-1))^2)

b1 <- function(n) (n+1)/(3*(n-1))
b2 <- function(n) (2*(n^2+n+3))/(9*n*(n-1))

c1 <- function(n) b1(n)-1/a1(n)
c2 <- function(n) b2(n)-(n+2)/(a1(n)*n)+a2(n)/a1(n)^2

e1 <- function(n) c1(n)/a1(n)
e2 <- function(n) c2(n)/(a1(n)^2+a2(n))


##assumes the sfs contains the 2 invariable categories. lengths should be 2*n+1,n is number of diploid samples
pair <- function(sfs,n){
    if(length(sfs)!=2*n+1)
        stop("Problem with dims")
    scal <- (0:(2*n))*((2*n):0)
    sum(sfs*scal/choose(2*n,2))
}


pair.fold <- function(sfs,n){
    if(length(sfs)!=2*n+1)
        stop("Problem with dims")
    scal <- (0:(2*n))*((2*n):0)
    sum(sfs*scal/choose(2*n,2))
}


##input is thetaD and number of segregating sites
tajima.d <- function(pairwise,S,n){
    top <- pairwise-S/a1(n)
    bot <- sqrt(e1(n)*S+e2(n)*S*(S-1))
    top/bot
}

tajima.d2 <- function(pairwise,S,n){
    S <- thetaW*a1(n)
    top <- pairwise-thetaW
    bot <- sqrt(e1(n)*S+e2(n)*S*(S-1))
    top/bot
}


##tajima.d(pair(b,20),sum(b),n=40)