Code to annotate our P. evermanni reference files (the P.evermanni transcriptome and genome) with GO information

1 Transcriptome

1.1 Retrieve transcriptome fasta file

We’ll be using the P. evermanni genes fasta file constructed using the P. evermanni gff and scaffold fasta, stored here. Accessible on the deep-dive genomic resources page, including links to relevant code.

curl https://gannet.fish.washington.edu/kdurkin1/deep-dive/E-Peve/data/Porites_evermanni_CDS.fasta \
-k \
> ../../data/Porites_evermanni_CDS.fasta

Let’s check the file

echo "First few lines:"
head -3 ../../data/Porites_evermanni_CDS.fasta

echo ""
echo "How many sequences are there?"
grep -c ">" ../../data/Porites_evermanni_CDS.fasta
## First few lines:
## >Parent=Peve_00000001 Porites_evermani_scaffold_1:3106-3444,4283-4488
## TTACTGCTTCAGTATGTGAATTTCGATGGTGGCTTGACCGGAGTTAGACATGGCCCCCCTTGCTCGGAGTCCCGATCCCAAATCTCTCTCGTGCCAGTAGTTATCTCCTTTGAAGGGATTATCGTAATACATCCGGTACCAAAGATTGTAGTTGGCGCGTCTTTTACCACTGTAAAGCCAAACATCAAACCAGTTGCTGTACCAGTTGTAGTCAAATGGAACGGAATACATAACAGCAAGTGTTTTATCGATGTGTGGGATGTAGTATGTTAATACTCCTACTGCGCCTCTCGCAACGGGCCCCGCAGTTTTTCGTGCGCCGTAAAGCAAAGCTGTGCCTGAAGAAACATCATGAGGCAAAACACGATTTGACGTCCCTGAATAAAAATATATGTTGACTGCTCTCCATTTATATCCACTTTCGTTATCAACACCAATGGCGACCTTGCGGCTTATGCTACCAAGGGTGTTTAAAATTGTTGTGAGAATGCCCAAGCCAAGTTGAGCACCGCTGATGACAGCACCAGCGTCAGCTAAAATTTT
## >Parent=Peve_00000002 Porites_evermani_scaffold_1:424478-425361,426180-426735,427012-427140,427664-427724,428641-429034
## 
## How many sequences are there?
## 40389

For simplicity, let’s reduce the sequence names to just the unique identifier “Parent=Peve_########”

# Read FASTA file
fasta_file <- "../../data/Porites_evermanni_CDS.fasta"  # Replace with the name of your FASTA file
sequences <- readDNAStringSet(fasta_file)

# For simplicity, let's reduce the sequence names to just the unique identifier "Parent=Peve_########"
names(sequences) <- gsub("^(Parent=Peve_\\d+).*", "\\1", names(sequences))
head(names(sequences))
## [1] "Parent=Peve_00000001" "Parent=Peve_00000002" "Parent=Peve_00000003"
## [4] "Parent=Peve_00000004" "Parent=Peve_00000005" "Parent=Peve_00000006"
# Calculate sequence lengths
sequence_lengths <- width(sequences)

# Create a data frame
sequence_lengths_df <- data.frame(Length = sequence_lengths)

# Plot histogram using ggplot2
ggplot(sequence_lengths_df, aes(x = Length)) +
  geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
  labs(title = "Histogram of Sequence Lengths",
       x = "Sequence Length",
       y = "Frequency") +
  theme_minimal()

summary(sequence_lengths_df)
##      Length     
##  Min.   :  102  
##  1st Qu.:  519  
##  Median :  933  
##  Mean   : 1338  
##  3rd Qu.: 1617  
##  Max.   :63597
# Calculate base composition
base_composition <- alphabetFrequency(sequences, baseOnly = TRUE)

# Convert to data frame and reshape for ggplot2
base_composition_df <- as.data.frame(base_composition)
base_composition_df$ID <- rownames(base_composition_df)
base_composition_melted <- reshape2::melt(base_composition_df, id.vars = "ID", variable.name = "Base", value.name = "Count")

# Plot base composition bar chart using ggplot2
ggplot(base_composition_melted, aes(x = Base, y = Count, fill = Base)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Base Composition",
       x = "Base",
       y = "Count") +
  theme_minimal() +
  scale_fill_manual(values = c("A" = "green", "C" = "blue", "G" = "yellow", "T" = "red"))

# Count CG motifs in each sequence
count_cg_motifs <- function(sequence) {
  cg_motif <- "CG"
  return(length(gregexpr(cg_motif, sequence, fixed = TRUE)[[1]]))
}

cg_motifs_counts <- sapply(sequences, count_cg_motifs)

# Create a data frame
cg_motifs_counts_df <- data.frame(CG_Count = cg_motifs_counts)

# Plot CG motifs distribution using ggplot2
ggplot(cg_motifs_counts_df, aes(x = CG_Count)) +
  geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
  labs(title = "Distribution of CG Motifs",
       x = "Number of CG Motifs",
       y = "Frequency") +
  theme_minimal()

1.2 Database Creation

1.2.1 Obtain Fasta (UniProt/Swiss-Prot)

cd ../../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2023_04.fasta.gz
gunzip -k uniprot_sprot_r2023_04.fasta.gz

1.2.2 Making the database

/home/shared/ncbi-blast-2.11.0+/bin/makeblastdb \
-in ../../data/uniprot_sprot_r2023_04.fasta \
-dbtype prot \
-out ../../blastdb/uniprot_sprot_r2023_04

1.3 Running Blastx

/home/shared/ncbi-blast-2.11.0+/bin/blastx \
-query ../../data/Porites_evermanni_CDS.fasta \
-db ../../blastdb/uniprot_sprot_r2023_04 \
-out ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 20 \
-max_target_seqs 1 \
-outfmt 6
echo "First few lines:"
head -2 ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx.tab

echo "Number of lines in output:"
wc -l ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx.tab
## First few lines:
## Parent=Peve_00000001 sp|P61915|ACTPC_ACTTE   57.714  175 72  2   528 7   40  213 2.22e-58    184
## Parent=Peve_00000002 sp|Q569C3|UBP1_RAT  48.000  200 94  5   1287    703 407 601 1.43e-45    177
## Number of lines in output:
## 25243 ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx.tab

1.4 Joining Blast table with annoations.

1.4.1 Prepping Blast table for easy join

tr '|' '\t' < ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx.tab \
> ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx_sep.tab

head -1 ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx_sep.tab
## Parent=Peve_00000001 sp  P61915  ACTPC_ACTTE 57.714  175 72  2   528 7   40  213 2.22e-58    184

1.4.2 Could do some cool stuff in R here reading in table

bltabl <- read.csv("../output/02-Peve-reference-annotation/Porites_evermanni_CDS-uniprot_blastx_sep.tab", sep = '\t', header = FALSE)

spgo <- read.csv("https://gannet.fish.washington.edu/seashell/snaps/uniprot_table_r2023_01.tab", sep = '\t', header = TRUE)

datatable(head(bltabl), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(head(spgo), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs)
)
annot_tab <-
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs)

write.table(annot_tab, file = "../output/02-Peve-reference-annotation/Porites_evermanni_CDS-IDmapping-2024_09_04.tab", sep = "\t",
            row.names = TRUE, col.names = NA)
head -n 3 ../output/02-Peve-reference-annotation/Porites_evermanni_CDS-IDmapping-2024_09_04.tab
# Read dataset
#dataset <- read.csv("../output/blast_annot_go.tab", sep = '\t')  # Replace with the path to your dataset

# Select the column of interest
column_name <- "Organism"  # Replace with the name of the column of interest
column_data <- annot_tab[[column_name]]

# Count the occurrences of the strings in the column
string_counts <- table(column_data)

# Convert to a data frame, sort by count, and select the top 10
string_counts_df <- as.data.frame(string_counts)
colnames(string_counts_df) <- c("String", "Count")
string_counts_df <- string_counts_df[order(string_counts_df$Count, decreasing = TRUE), ]
top_10_strings <- head(string_counts_df, n = 10)

# Plot the top 10 most common strings using ggplot2
ggplot(top_10_strings, aes(x = reorder(String, -Count), y = Count, fill = String)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Top 10 Species hits",
       x = column_name,
       y = "Count") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

#data <- read.csv("../output/blast_annot_go.tab", sep = '\t')

# Rename the `Gene.Ontology..biological.process.` column to `Biological_Process`
colnames(annot_tab)[colnames(annot_tab) == "Gene.Ontology..biological.process."] <- "Biological_Process"

# Separate the `Biological_Process` column into individual biological processes
data_separated <- unlist(strsplit(annot_tab$Biological_Process, split = ";"))

# Trim whitespace from the biological processes
data_separated <- gsub("^\\s+|\\s+$", "", data_separated)

# Count the occurrences of each biological process
process_counts <- table(data_separated)
process_counts <- data.frame(Biological_Process = names(process_counts), Count = as.integer(process_counts))
process_counts <- process_counts[order(-process_counts$Count), ]

# Select the 20 most predominant biological processes
top_20_processes <- process_counts[1:20, ]

# Create a color palette for the bars
bar_colors <- rainbow(nrow(top_20_processes))

# Create a staggered vertical bar plot with different colors for each bar
barplot(top_20_processes$Count, names.arg = rep("", nrow(top_20_processes)), col = bar_colors,
        ylim = c(0, max(top_20_processes$Count) * 1.25),
        main = "Occurrences of the 20 Most Predominant Biological Processes", xlab = "Biological Process", ylab = "Count")

# Create a separate plot for the legend
png("../output/02-Peve-reference-annotation/GOlegend.png", width = 800, height = 600)
par(mar = c(0, 0, 0, 0))
plot.new()
legend("center", legend = top_20_processes$Biological_Process, fill = bar_colors, cex = 1, title = "Biological Processes")
dev.off()
## png 
##   2
knitr::include_graphics("../output/02-Peve-reference-annotation/GOlegend.png")

rm ../output/02-Peve-reference-annotation/GOlegend.png
LS0tCnRpdGxlOiAiMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbiIKYXV0aG9yOiAiS2F0aGxlZW4gRHVya2luIgpkYXRlOiAiMjAyNC0wOS0wNCIKYWx3YXlzX2FsbG93X2h0bWw6IHRydWUKb3V0cHV0OiAKICBib29rZG93bjo6aHRtbF9kb2N1bWVudDI6CiAgICB0aGVtZTogY29zbW8KICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGdpdGh1Yl9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2RlcHRoOiAzCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGh0bWxfcHJldmlldzogdHJ1ZSAKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShrbml0cikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoa2FibGVFeHRyYSkKbGlicmFyeShEVCkKbGlicmFyeShCaW9zdHJpbmdzKQpsaWJyYXJ5KHRtKQprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgZWNobyA9IFRSVUUsICAgICAgICAgIyBEaXNwbGF5IGNvZGUgY2h1bmtzCiAgZXZhbCA9IEZBTFNFLCAgICAgICAgICMgRXZhbHVhdGUgY29kZSBjaHVua3MKICB3YXJuaW5nID0gRkFMU0UsICAgICAjIEhpZGUgd2FybmluZ3MKICBtZXNzYWdlID0gRkFMU0UsICAgICAjIEhpZGUgbWVzc2FnZXMKICBmaWcud2lkdGggPSA2LCAgICAgICAjIFNldCBwbG90IHdpZHRoIGluIGluY2hlcwogIGZpZy5oZWlnaHQgPSA0LCAgICAgICMgU2V0IHBsb3QgaGVpZ2h0IGluIGluY2hlcwogIGZpZy5hbGlnbiA9ICJjZW50ZXIiICMgQWxpZ24gcGxvdHMgdG8gdGhlIGNlbnRlcgopCmBgYAoKQ29kZSB0byBhbm5vdGF0ZSBvdXIgKlAuIGV2ZXJtYW5uaSogcmVmZXJlbmNlIGZpbGVzICh0aGUgKlAuZXZlcm1hbm5pKiB0cmFuc2NyaXB0b21lIGFuZCBnZW5vbWUpIHdpdGggR08gaW5mb3JtYXRpb24KCiMgVHJhbnNjcmlwdG9tZQojIyBSZXRyaWV2ZSB0cmFuc2NyaXB0b21lIGZhc3RhIGZpbGUKCldlJ2xsIGJlIHVzaW5nIHRoZSAqUC4gZXZlcm1hbm5pKiBnZW5lcyBmYXN0YSBmaWxlIGNvbnN0cnVjdGVkIHVzaW5nIHRoZSAqUC4gZXZlcm1hbm5pKiBnZmYgYW5kIHNjYWZmb2xkIGZhc3RhLCBzdG9yZWQgW2hlcmVdKGh0dHBzOi8vZ2FubmV0LmZpc2gud2FzaGluZ3Rvbi5lZHUva2R1cmtpbjEvZGVlcC1kaXZlL0UtUGV2ZS9kYXRhL1Bvcml0ZXNfZXZlcm1hbm5pX0NEUy5mYXN0YSkuIEFjY2Vzc2libGUgb24gdGhlIGBkZWVwLWRpdmVgIFtnZW5vbWljIHJlc291cmNlcyBwYWdlXShodHRwczovL2dpdGh1Yi5jb20vdXJvbC1lNS9kZWVwLWRpdmUvd2lraS9TcGVjaWVzLUNoYXJhY3RlcmlzdGljcy1hbmQtR2Vub21pYy1SZXNvdXJjZXMjZ2Vub21pYy1yZXNvdXJjZXMpLCBpbmNsdWRpbmcgbGlua3MgdG8gcmVsZXZhbnQgY29kZS4KCmBgYHtyIGRvd25sb2FkLXRyYW5zY3JpcHRvbWUsIGVuZ2luZT0nYmFzaCd9CmN1cmwgaHR0cHM6Ly9nYW5uZXQuZmlzaC53YXNoaW5ndG9uLmVkdS9rZHVya2luMS9kZWVwLWRpdmUvRS1QZXZlL2RhdGEvUG9yaXRlc19ldmVybWFubmlfQ0RTLmZhc3RhIFwKLWsgXAo+IC4uLy4uL2RhdGEvUG9yaXRlc19ldmVybWFubmlfQ0RTLmZhc3RhCmBgYAoKTGV0J3MgY2hlY2sgdGhlIGZpbGUKCmBgYHtyIHRyYW5zY3JpcHRvbWUtdmlldy1xdWVyeSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQplY2hvICJGaXJzdCBmZXcgbGluZXM6IgpoZWFkIC0zIC4uLy4uL2RhdGEvUG9yaXRlc19ldmVybWFubmlfQ0RTLmZhc3RhCgplY2hvICIiCmVjaG8gIkhvdyBtYW55IHNlcXVlbmNlcyBhcmUgdGhlcmU/IgpncmVwIC1jICI+IiAuLi8uLi9kYXRhL1Bvcml0ZXNfZXZlcm1hbm5pX0NEUy5mYXN0YQpgYGAKCkZvciBzaW1wbGljaXR5LCBsZXQncyByZWR1Y2UgdGhlIHNlcXVlbmNlIG5hbWVzIHRvIGp1c3QgdGhlIHVuaXF1ZSBpZGVudGlmaWVyICJQYXJlbnQ9UGV2ZV8jIyMjIyMjIyIKCgpgYGB7ciB0cmFuc2NyaXB0b21lLXNlcWxlbmd0aC1oaXN0b2dyYW0sIGV2YWw9VFJVRX0KIyBSZWFkIEZBU1RBIGZpbGUKZmFzdGFfZmlsZSA8LSAiLi4vLi4vZGF0YS9Qb3JpdGVzX2V2ZXJtYW5uaV9DRFMuZmFzdGEiICAjIFJlcGxhY2Ugd2l0aCB0aGUgbmFtZSBvZiB5b3VyIEZBU1RBIGZpbGUKc2VxdWVuY2VzIDwtIHJlYWRETkFTdHJpbmdTZXQoZmFzdGFfZmlsZSkKCiMgRm9yIHNpbXBsaWNpdHksIGxldCdzIHJlZHVjZSB0aGUgc2VxdWVuY2UgbmFtZXMgdG8ganVzdCB0aGUgdW5pcXVlIGlkZW50aWZpZXIgIlBhcmVudD1QZXZlXyMjIyMjIyMjIgpuYW1lcyhzZXF1ZW5jZXMpIDwtIGdzdWIoIl4oUGFyZW50PVBldmVfXFxkKykuKiIsICJcXDEiLCBuYW1lcyhzZXF1ZW5jZXMpKQpoZWFkKG5hbWVzKHNlcXVlbmNlcykpCgojIENhbGN1bGF0ZSBzZXF1ZW5jZSBsZW5ndGhzCnNlcXVlbmNlX2xlbmd0aHMgPC0gd2lkdGgoc2VxdWVuY2VzKQoKIyBDcmVhdGUgYSBkYXRhIGZyYW1lCnNlcXVlbmNlX2xlbmd0aHNfZGYgPC0gZGF0YS5mcmFtZShMZW5ndGggPSBzZXF1ZW5jZV9sZW5ndGhzKQoKIyBQbG90IGhpc3RvZ3JhbSB1c2luZyBnZ3Bsb3QyCmdncGxvdChzZXF1ZW5jZV9sZW5ndGhzX2RmLCBhZXMoeCA9IExlbmd0aCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEsIGNvbG9yID0gImJsYWNrIiwgZmlsbCA9ICJibHVlIiwgYWxwaGEgPSAwLjc1KSArCiAgbGFicyh0aXRsZSA9ICJIaXN0b2dyYW0gb2YgU2VxdWVuY2UgTGVuZ3RocyIsCiAgICAgICB4ID0gIlNlcXVlbmNlIExlbmd0aCIsCiAgICAgICB5ID0gIkZyZXF1ZW5jeSIpICsKICB0aGVtZV9taW5pbWFsKCkKCnN1bW1hcnkoc2VxdWVuY2VfbGVuZ3Roc19kZikKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLUFDR1QtY29tcG9zaXRpb24sIGV2YWw9VFJVRX0KCiMgQ2FsY3VsYXRlIGJhc2UgY29tcG9zaXRpb24KYmFzZV9jb21wb3NpdGlvbiA8LSBhbHBoYWJldEZyZXF1ZW5jeShzZXF1ZW5jZXMsIGJhc2VPbmx5ID0gVFJVRSkKCiMgQ29udmVydCB0byBkYXRhIGZyYW1lIGFuZCByZXNoYXBlIGZvciBnZ3Bsb3QyCmJhc2VfY29tcG9zaXRpb25fZGYgPC0gYXMuZGF0YS5mcmFtZShiYXNlX2NvbXBvc2l0aW9uKQpiYXNlX2NvbXBvc2l0aW9uX2RmJElEIDwtIHJvd25hbWVzKGJhc2VfY29tcG9zaXRpb25fZGYpCmJhc2VfY29tcG9zaXRpb25fbWVsdGVkIDwtIHJlc2hhcGUyOjptZWx0KGJhc2VfY29tcG9zaXRpb25fZGYsIGlkLnZhcnMgPSAiSUQiLCB2YXJpYWJsZS5uYW1lID0gIkJhc2UiLCB2YWx1ZS5uYW1lID0gIkNvdW50IikKCiMgUGxvdCBiYXNlIGNvbXBvc2l0aW9uIGJhciBjaGFydCB1c2luZyBnZ3Bsb3QyCmdncGxvdChiYXNlX2NvbXBvc2l0aW9uX21lbHRlZCwgYWVzKHggPSBCYXNlLCB5ID0gQ291bnQsIGZpbGwgPSBCYXNlKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiQmFzZSBDb21wb3NpdGlvbiIsCiAgICAgICB4ID0gIkJhc2UiLAogICAgICAgeSA9ICJDb3VudCIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGMoIkEiID0gImdyZWVuIiwgIkMiID0gImJsdWUiLCAiRyIgPSAieWVsbG93IiwgIlQiID0gInJlZCIpKQpgYGAKCgpgYGB7ciB0cmFuc2NyaXB0b21lLWNnLW1vdGlmcywgZXZhbD1UUlVFfQoKIyBDb3VudCBDRyBtb3RpZnMgaW4gZWFjaCBzZXF1ZW5jZQpjb3VudF9jZ19tb3RpZnMgPC0gZnVuY3Rpb24oc2VxdWVuY2UpIHsKICBjZ19tb3RpZiA8LSAiQ0ciCiAgcmV0dXJuKGxlbmd0aChncmVnZXhwcihjZ19tb3RpZiwgc2VxdWVuY2UsIGZpeGVkID0gVFJVRSlbWzFdXSkpCn0KCmNnX21vdGlmc19jb3VudHMgPC0gc2FwcGx5KHNlcXVlbmNlcywgY291bnRfY2dfbW90aWZzKQoKIyBDcmVhdGUgYSBkYXRhIGZyYW1lCmNnX21vdGlmc19jb3VudHNfZGYgPC0gZGF0YS5mcmFtZShDR19Db3VudCA9IGNnX21vdGlmc19jb3VudHMpCgojIFBsb3QgQ0cgbW90aWZzIGRpc3RyaWJ1dGlvbiB1c2luZyBnZ3Bsb3QyCmdncGxvdChjZ19tb3RpZnNfY291bnRzX2RmLCBhZXMoeCA9IENHX0NvdW50KSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMSwgY29sb3IgPSAiYmxhY2siLCBmaWxsID0gImJsdWUiLCBhbHBoYSA9IDAuNzUpICsKICBsYWJzKHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBDRyBNb3RpZnMiLAogICAgICAgeCA9ICJOdW1iZXIgb2YgQ0cgTW90aWZzIiwKICAgICAgIHkgPSAiRnJlcXVlbmN5IikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCiMjIERhdGFiYXNlIENyZWF0aW9uCgojIyMgT2J0YWluIEZhc3RhIChVbmlQcm90L1N3aXNzLVByb3QpCgpgYGB7ciBkb3dubG9hZC1VbmlQU3dpc3NQLWRhdGEsIGVuZ2luZT0nYmFzaCd9CmNkIC4uLy4uL2RhdGEKY3VybCAtTyBodHRwczovL2Z0cC51bmlwcm90Lm9yZy9wdWIvZGF0YWJhc2VzL3VuaXByb3QvY3VycmVudF9yZWxlYXNlL2tub3dsZWRnZWJhc2UvY29tcGxldGUvdW5pcHJvdF9zcHJvdC5mYXN0YS5negptdiB1bmlwcm90X3Nwcm90LmZhc3RhLmd6IHVuaXByb3Rfc3Byb3RfcjIwMjNfMDQuZmFzdGEuZ3oKZ3VuemlwIC1rIHVuaXByb3Rfc3Byb3RfcjIwMjNfMDQuZmFzdGEuZ3oKYGBgCgojIyMgTWFraW5nIHRoZSBkYXRhYmFzZQoKYGBge3IgbWFrZS1VbmlQU3dpc3NQLWJsYXN0ZGIsIGVuZ2luZT0nYmFzaCd9Ci9ob21lL3NoYXJlZC9uY2JpLWJsYXN0LTIuMTEuMCsvYmluL21ha2VibGFzdGRiIFwKLWluIC4uLy4uL2RhdGEvdW5pcHJvdF9zcHJvdF9yMjAyM18wNC5mYXN0YSBcCi1kYnR5cGUgcHJvdCBcCi1vdXQgLi4vLi4vYmxhc3RkYi91bmlwcm90X3Nwcm90X3IyMDIzXzA0CmBgYAoKCiMjIFJ1bm5pbmcgQmxhc3R4CgpgYGB7ciB0cmFuc2NyaXB0b21lLWJsYXN0eCwgZW5naW5lPSdiYXNoJ30KL2hvbWUvc2hhcmVkL25jYmktYmxhc3QtMi4xMS4wKy9iaW4vYmxhc3R4IFwKLXF1ZXJ5IC4uLy4uL2RhdGEvUG9yaXRlc19ldmVybWFubmlfQ0RTLmZhc3RhIFwKLWRiIC4uLy4uL2JsYXN0ZGIvdW5pcHJvdF9zcHJvdF9yMjAyM18wNCBcCi1vdXQgLi4vb3V0cHV0LzAyLVBldmUtcmVmZXJlbmNlLWFubm90YXRpb24vUG9yaXRlc19ldmVybWFubmlfQ0RTLXVuaXByb3RfYmxhc3R4LnRhYiBcCi1ldmFsdWUgMUUtMjAgXAotbnVtX3RocmVhZHMgMjAgXAotbWF4X3RhcmdldF9zZXFzIDEgXAotb3V0Zm10IDYKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLWJsYXN0LWxvb2ssIGVuZ2luZT0nYmFzaCcsIGV2YWw9VFJVRX0KZWNobyAiRmlyc3QgZmV3IGxpbmVzOiIKaGVhZCAtMiAuLi9vdXRwdXQvMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbi9Qb3JpdGVzX2V2ZXJtYW5uaV9DRFMtdW5pcHJvdF9ibGFzdHgudGFiCgplY2hvICJOdW1iZXIgb2YgbGluZXMgaW4gb3V0cHV0OiIKd2MgLWwgLi4vb3V0cHV0LzAyLVBldmUtcmVmZXJlbmNlLWFubm90YXRpb24vUG9yaXRlc19ldmVybWFubmlfQ0RTLXVuaXByb3RfYmxhc3R4LnRhYgpgYGAKCgojIyBKb2luaW5nIEJsYXN0IHRhYmxlIHdpdGggYW5ub2F0aW9ucy4KCiMjIyBQcmVwcGluZyBCbGFzdCB0YWJsZSBmb3IgZWFzeSBqb2luCgpgYGB7ciB0cmFuc2NyaXB0b21lLXNlcGFyYXRlLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CnRyICd8JyAnXHQnIDwgLi4vb3V0cHV0LzAyLVBldmUtcmVmZXJlbmNlLWFubm90YXRpb24vUG9yaXRlc19ldmVybWFubmlfQ0RTLXVuaXByb3RfYmxhc3R4LnRhYiBcCj4gLi4vb3V0cHV0LzAyLVBldmUtcmVmZXJlbmNlLWFubm90YXRpb24vUG9yaXRlc19ldmVybWFubmlfQ0RTLXVuaXByb3RfYmxhc3R4X3NlcC50YWIKCmhlYWQgLTEgLi4vb3V0cHV0LzAyLVBldmUtcmVmZXJlbmNlLWFubm90YXRpb24vUG9yaXRlc19ldmVybWFubmlfQ0RTLXVuaXByb3RfYmxhc3R4X3NlcC50YWIKCmBgYAoKIyMjIENvdWxkIGRvIHNvbWUgY29vbCBzdHVmZiBpbiBSIGhlcmUgcmVhZGluZyBpbiB0YWJsZQoKYGBge3IgdHJhbnNjcmlwdG9tZS1yZWFkLWRhdGEsIGV2YWw9VFJVRSwgY2FjaGU9VFJVRX0KYmx0YWJsIDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbi9Qb3JpdGVzX2V2ZXJtYW5uaV9DRFMtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYiIsIHNlcCA9ICdcdCcsIGhlYWRlciA9IEZBTFNFKQoKc3BnbyA8LSByZWFkLmNzdigiaHR0cHM6Ly9nYW5uZXQuZmlzaC53YXNoaW5ndG9uLmVkdS9zZWFzaGVsbC9zbmFwcy91bmlwcm90X3RhYmxlX3IyMDIzXzAxLnRhYiIsIHNlcCA9ICdcdCcsIGhlYWRlciA9IFRSVUUpCgpkYXRhdGFibGUoaGVhZChibHRhYmwpLCBvcHRpb25zID0gbGlzdChzY3JvbGxYID0gVFJVRSwgc2Nyb2xsWSA9ICI0MDBweCIsIHNjcm9sbENvbGxhcHNlID0gVFJVRSwgcGFnaW5nID0gRkFMU0UpKQpgYGAKCmBgYHtyIHRyYW5zY3JpcHRvbWUtc3Bnby10YWJsZSwgZXZhbD1UUlVFfQpkYXRhdGFibGUoaGVhZChzcGdvKSwgb3B0aW9ucyA9IGxpc3Qoc2Nyb2xsWCA9IFRSVUUsIHNjcm9sbFkgPSAiNDAwcHgiLCBzY3JvbGxDb2xsYXBzZSA9IFRSVUUsIHBhZ2luZyA9IEZBTFNFKSkKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLXNlZSwgZXZhbD1UUlVFfQpkYXRhdGFibGUoCiAgbGVmdF9qb2luKGJsdGFibCwgc3BnbywgIGJ5ID0gYygiVjMiID0gIkVudHJ5IikpICU+JQogIHNlbGVjdChWMSwgVjMsIFYxMywgUHJvdGVpbi5uYW1lcywgT3JnYW5pc20sIEdlbmUuT250b2xvZ3kuLmJpb2xvZ2ljYWwucHJvY2Vzcy4sIEdlbmUuT250b2xvZ3kuSURzKQopCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS1qb2luLCBldmFsPVRSVUV9CmFubm90X3RhYiA8LQogIGxlZnRfam9pbihibHRhYmwsIHNwZ28sICBieSA9IGMoIlYzIiA9ICJFbnRyeSIpKSAlPiUKICBzZWxlY3QoVjEsIFYzLCBWMTMsIFByb3RlaW4ubmFtZXMsIE9yZ2FuaXNtLCBHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuLCBHZW5lLk9udG9sb2d5LklEcykKCndyaXRlLnRhYmxlKGFubm90X3RhYiwgZmlsZSA9ICIuLi9vdXRwdXQvMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbi9Qb3JpdGVzX2V2ZXJtYW5uaV9DRFMtSURtYXBwaW5nLTIwMjRfMDlfMDQudGFiIiwgc2VwID0gIlx0IiwKICAgICAgICAgICAgcm93Lm5hbWVzID0gVFJVRSwgY29sLm5hbWVzID0gTkEpCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS12aWV3LWhlYWRlcnMsIGVuZ2luZT0nYmFzaCd9CmhlYWQgLW4gMyAuLi9vdXRwdXQvMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbi9Qb3JpdGVzX2V2ZXJtYW5uaV9DRFMtSURtYXBwaW5nLTIwMjRfMDlfMDQudGFiCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS1zcGVjaWVzLWhpdHMsIGV2YWw9VFJVRX0KIyBSZWFkIGRhdGFzZXQKI2RhdGFzZXQgPC0gcmVhZC5jc3YoIi4uL291dHB1dC9ibGFzdF9hbm5vdF9nby50YWIiLCBzZXAgPSAnXHQnKSAgIyBSZXBsYWNlIHdpdGggdGhlIHBhdGggdG8geW91ciBkYXRhc2V0CgojIFNlbGVjdCB0aGUgY29sdW1uIG9mIGludGVyZXN0CmNvbHVtbl9uYW1lIDwtICJPcmdhbmlzbSIgICMgUmVwbGFjZSB3aXRoIHRoZSBuYW1lIG9mIHRoZSBjb2x1bW4gb2YgaW50ZXJlc3QKY29sdW1uX2RhdGEgPC0gYW5ub3RfdGFiW1tjb2x1bW5fbmFtZV1dCgojIENvdW50IHRoZSBvY2N1cnJlbmNlcyBvZiB0aGUgc3RyaW5ncyBpbiB0aGUgY29sdW1uCnN0cmluZ19jb3VudHMgPC0gdGFibGUoY29sdW1uX2RhdGEpCgojIENvbnZlcnQgdG8gYSBkYXRhIGZyYW1lLCBzb3J0IGJ5IGNvdW50LCBhbmQgc2VsZWN0IHRoZSB0b3AgMTAKc3RyaW5nX2NvdW50c19kZiA8LSBhcy5kYXRhLmZyYW1lKHN0cmluZ19jb3VudHMpCmNvbG5hbWVzKHN0cmluZ19jb3VudHNfZGYpIDwtIGMoIlN0cmluZyIsICJDb3VudCIpCnN0cmluZ19jb3VudHNfZGYgPC0gc3RyaW5nX2NvdW50c19kZltvcmRlcihzdHJpbmdfY291bnRzX2RmJENvdW50LCBkZWNyZWFzaW5nID0gVFJVRSksIF0KdG9wXzEwX3N0cmluZ3MgPC0gaGVhZChzdHJpbmdfY291bnRzX2RmLCBuID0gMTApCgojIFBsb3QgdGhlIHRvcCAxMCBtb3N0IGNvbW1vbiBzdHJpbmdzIHVzaW5nIGdncGxvdDIKZ2dwbG90KHRvcF8xMF9zdHJpbmdzLCBhZXMoeCA9IHJlb3JkZXIoU3RyaW5nLCAtQ291bnQpLCB5ID0gQ291bnQsIGZpbGwgPSBTdHJpbmcpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIHBvc2l0aW9uID0gImRvZGdlIiwgY29sb3IgPSAiYmxhY2siKSArCiAgbGFicyh0aXRsZSA9ICJUb3AgMTAgU3BlY2llcyBoaXRzIiwKICAgICAgIHggPSBjb2x1bW5fbmFtZSwKICAgICAgIHkgPSAiQ291bnQiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpICsKICBjb29yZF9mbGlwKCkKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLXRvcC1nbywgZXZhbD1UUlVFfQoKI2RhdGEgPC0gcmVhZC5jc3YoIi4uL291dHB1dC9ibGFzdF9hbm5vdF9nby50YWIiLCBzZXAgPSAnXHQnKQoKIyBSZW5hbWUgdGhlIGBHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuYCBjb2x1bW4gdG8gYEJpb2xvZ2ljYWxfUHJvY2Vzc2AKY29sbmFtZXMoYW5ub3RfdGFiKVtjb2xuYW1lcyhhbm5vdF90YWIpID09ICJHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuIl0gPC0gIkJpb2xvZ2ljYWxfUHJvY2VzcyIKCiMgU2VwYXJhdGUgdGhlIGBCaW9sb2dpY2FsX1Byb2Nlc3NgIGNvbHVtbiBpbnRvIGluZGl2aWR1YWwgYmlvbG9naWNhbCBwcm9jZXNzZXMKZGF0YV9zZXBhcmF0ZWQgPC0gdW5saXN0KHN0cnNwbGl0KGFubm90X3RhYiRCaW9sb2dpY2FsX1Byb2Nlc3MsIHNwbGl0ID0gIjsiKSkKCiMgVHJpbSB3aGl0ZXNwYWNlIGZyb20gdGhlIGJpb2xvZ2ljYWwgcHJvY2Vzc2VzCmRhdGFfc2VwYXJhdGVkIDwtIGdzdWIoIl5cXHMrfFxccyskIiwgIiIsIGRhdGFfc2VwYXJhdGVkKQoKIyBDb3VudCB0aGUgb2NjdXJyZW5jZXMgb2YgZWFjaCBiaW9sb2dpY2FsIHByb2Nlc3MKcHJvY2Vzc19jb3VudHMgPC0gdGFibGUoZGF0YV9zZXBhcmF0ZWQpCnByb2Nlc3NfY291bnRzIDwtIGRhdGEuZnJhbWUoQmlvbG9naWNhbF9Qcm9jZXNzID0gbmFtZXMocHJvY2Vzc19jb3VudHMpLCBDb3VudCA9IGFzLmludGVnZXIocHJvY2Vzc19jb3VudHMpKQpwcm9jZXNzX2NvdW50cyA8LSBwcm9jZXNzX2NvdW50c1tvcmRlcigtcHJvY2Vzc19jb3VudHMkQ291bnQpLCBdCgojIFNlbGVjdCB0aGUgMjAgbW9zdCBwcmVkb21pbmFudCBiaW9sb2dpY2FsIHByb2Nlc3Nlcwp0b3BfMjBfcHJvY2Vzc2VzIDwtIHByb2Nlc3NfY291bnRzWzE6MjAsIF0KCiMgQ3JlYXRlIGEgY29sb3IgcGFsZXR0ZSBmb3IgdGhlIGJhcnMKYmFyX2NvbG9ycyA8LSByYWluYm93KG5yb3codG9wXzIwX3Byb2Nlc3NlcykpCgojIENyZWF0ZSBhIHN0YWdnZXJlZCB2ZXJ0aWNhbCBiYXIgcGxvdCB3aXRoIGRpZmZlcmVudCBjb2xvcnMgZm9yIGVhY2ggYmFyCmJhcnBsb3QodG9wXzIwX3Byb2Nlc3NlcyRDb3VudCwgbmFtZXMuYXJnID0gcmVwKCIiLCBucm93KHRvcF8yMF9wcm9jZXNzZXMpKSwgY29sID0gYmFyX2NvbG9ycywKICAgICAgICB5bGltID0gYygwLCBtYXgodG9wXzIwX3Byb2Nlc3NlcyRDb3VudCkgKiAxLjI1KSwKICAgICAgICBtYWluID0gIk9jY3VycmVuY2VzIG9mIHRoZSAyMCBNb3N0IFByZWRvbWluYW50IEJpb2xvZ2ljYWwgUHJvY2Vzc2VzIiwgeGxhYiA9ICJCaW9sb2dpY2FsIFByb2Nlc3MiLCB5bGFiID0gIkNvdW50IikKCgojIENyZWF0ZSBhIHNlcGFyYXRlIHBsb3QgZm9yIHRoZSBsZWdlbmQKcG5nKCIuLi9vdXRwdXQvMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbi9HT2xlZ2VuZC5wbmciLCB3aWR0aCA9IDgwMCwgaGVpZ2h0ID0gNjAwKQpwYXIobWFyID0gYygwLCAwLCAwLCAwKSkKcGxvdC5uZXcoKQpsZWdlbmQoImNlbnRlciIsIGxlZ2VuZCA9IHRvcF8yMF9wcm9jZXNzZXMkQmlvbG9naWNhbF9Qcm9jZXNzLCBmaWxsID0gYmFyX2NvbG9ycywgY2V4ID0gMSwgdGl0bGUgPSAiQmlvbG9naWNhbCBQcm9jZXNzZXMiKQpkZXYub2ZmKCkKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLWdvLWxlZ2VuZCwgZXZhbD1UUlVFLCBmaWcud2lkdGggPSAxMDAgLGZpZy5oZWlnaHQgPSAxMDB9CmtuaXRyOjppbmNsdWRlX2dyYXBoaWNzKCIuLi9vdXRwdXQvMDItUGV2ZS1yZWZlcmVuY2UtYW5ub3RhdGlvbi9HT2xlZ2VuZC5wbmciKQpgYGAKCmBgYHtyIHRyYW5zY3JpcHRvbWUtcmVtb3ZlLWxlZ2VuZC1maWxlLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CnJtIC4uL291dHB1dC8wMi1QZXZlLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0dPbGVnZW5kLnBuZwpgYGA=