Code to annotate our A. pulchra reference files (the A. millipora transcriptome and genome) with GO information

1 Transcriptome

1.1 Retrieve transcriptome fasta file

We’ll be using the A. millipora NCBI rna.fna file, stored here and accessible on the deep-dive genomic resources page

curl https://gannet.fish.washington.edu/acropora/E5-deep-dive/Transcripts/Apul_GCF_013753865.1_rna.fna \
-k \
> ../../data/Apul_GCF_013753865.1_rna.fna

Let’s check the file

echo "First few lines:"
head -3 ../../data/Apul_GCF_013753865.1_rna.fna

echo ""
echo "How many sequences are there?"
grep -c ">" ../../data/Apul_GCF_013753865.1_rna.fna
## First few lines:
## >XM_029323402.2 PREDICTED: Acropora millepora lipase ZK262.3-like (LOC114977611), mRNA
## GAAAGACCCTGGGAACGAGGTTGCAGGTTTTCCTAATGTTAATCTCGGTAATTGAAAAGGTTGGACTTTTGGAAGCGAGA
## ATTCAACGAAAAATTCATAATAAAATTAAGTGGGGCGGATCGACCTTGATGATGTGGGGCGGAACGATTGTAATTCCGTC
## 
## How many sequences are there?
## 50570
# Read FASTA file
fasta_file <- "../../data/Apul_GCF_013753865.1_rna.fna"  # Replace with the name of your FASTA file
sequences <- readDNAStringSet(fasta_file)

# Calculate sequence lengths
sequence_lengths <- width(sequences)

# Create a data frame
sequence_lengths_df <- data.frame(Length = sequence_lengths)

# Plot histogram using ggplot2
ggplot(sequence_lengths_df, aes(x = Length)) +
  geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
  labs(title = "Histogram of Sequence Lengths",
       x = "Sequence Length",
       y = "Frequency") +
  theme_minimal()

summary(sequence_lengths_df)
##      Length     
##  Min.   :   66  
##  1st Qu.: 1077  
##  Median : 1778  
##  Mean   : 2218  
##  3rd Qu.: 2811  
##  Max.   :65009
# Calculate base composition
base_composition <- alphabetFrequency(sequences, baseOnly = TRUE)

# Convert to data frame and reshape for ggplot2
base_composition_df <- as.data.frame(base_composition)
base_composition_df$ID <- rownames(base_composition_df)
base_composition_melted <- reshape2::melt(base_composition_df, id.vars = "ID", variable.name = "Base", value.name = "Count")

# Plot base composition bar chart using ggplot2
ggplot(base_composition_melted, aes(x = Base, y = Count, fill = Base)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Base Composition",
       x = "Base",
       y = "Count") +
  theme_minimal() +
  scale_fill_manual(values = c("A" = "green", "C" = "blue", "G" = "yellow", "T" = "red"))

# Count CG motifs in each sequence
count_cg_motifs <- function(sequence) {
  cg_motif <- "CG"
  return(length(gregexpr(cg_motif, sequence, fixed = TRUE)[[1]]))
}

cg_motifs_counts <- sapply(sequences, count_cg_motifs)

# Create a data frame
cg_motifs_counts_df <- data.frame(CG_Count = cg_motifs_counts)

# Plot CG motifs distribution using ggplot2
ggplot(cg_motifs_counts_df, aes(x = CG_Count)) +
  geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
  labs(title = "Distribution of CG Motifs",
       x = "Number of CG Motifs",
       y = "Frequency") +
  theme_minimal()

1.2 Database Creation

1.2.1 Obtain Fasta (UniProt/Swiss-Prot)

cd ../../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2023_04.fasta.gz
gunzip -k uniprot_sprot_r2023_04.fasta.gz

1.2.2 Making the database

/home/shared/ncbi-blast-2.11.0+/bin/makeblastdb \
-in ../../data/uniprot_sprot_r2023_04.fasta \
-dbtype prot \
-out ../../blastdb/uniprot_sprot_r2023_04

1.3 Running Blastx

/home/shared/ncbi-blast-2.11.0+/bin/blastx \
-query ../../data/Apul_GCF_013753865.1_rna.fna \
-db ../../blastdb/uniprot_sprot_r2023_04 \
-out ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 20 \
-max_target_seqs 1 \
-outfmt 6
echo "First few lines:"
head -2 ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx.tab

echo "Number of lines in output:"
wc -l ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx.tab
## First few lines:
## XM_029323402.2   sp|Q9XTR8|LIP1_CAEEL    31.321  265 157 5   578 1306    61  322 3.59e-25    111
## XM_029323410.2   sp|Q9NUQ6|SPS2L_HUMAN   32.447  376 201 8   280 1284    9   372 7.10e-39    157
## Number of lines in output:
## 31701 ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx.tab

1.4 Joining Blast table with annoations.

1.4.1 Prepping Blast table for easy join

tr '|' '\t' < ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx.tab \
> ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx_sep.tab

head -1 ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx_sep.tab
## XM_029323402.2   sp  Q9XTR8  LIP1_CAEEL  31.321  265 157 5   578 1306    61  322 3.59e-25    111

1.4.2 Could do some cool stuff in R here reading in table

bltabl <- read.csv("../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-uniprot_blastx_sep.tab", sep = '\t', header = FALSE)

spgo <- read.csv("https://gannet.fish.washington.edu/seashell/snaps/uniprot_table_r2023_01.tab", sep = '\t', header = TRUE)

datatable(head(bltabl), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(head(spgo), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) 
 # %>% mutate(V1 = str_replace_all(V1,pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab"))
)
annot_tab <-
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs)

write.table(annot_tab, file = "../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-IDmapping-2024_08_21.tab", sep = "\t",
            row.names = TRUE, col.names = NA)
head -n 3 ../output/02-Apul-reference-annotation/Apul_GCF_013753865.1_rna-IDmapping-2024_08_21.tab
# Read dataset
#dataset <- read.csv("../output/blast_annot_go.tab", sep = '\t')  # Replace with the path to your dataset

# Select the column of interest
column_name <- "Organism"  # Replace with the name of the column of interest
column_data <- annot_tab[[column_name]]

# Count the occurrences of the strings in the column
string_counts <- table(column_data)

# Convert to a data frame, sort by count, and select the top 10
string_counts_df <- as.data.frame(string_counts)
colnames(string_counts_df) <- c("String", "Count")
string_counts_df <- string_counts_df[order(string_counts_df$Count, decreasing = TRUE), ]
top_10_strings <- head(string_counts_df, n = 10)

# Plot the top 10 most common strings using ggplot2
ggplot(top_10_strings, aes(x = reorder(String, -Count), y = Count, fill = String)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Top 10 Species hits",
       x = column_name,
       y = "Count") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

#data <- read.csv("../output/blast_annot_go.tab", sep = '\t')

# Rename the `Gene.Ontology..biological.process.` column to `Biological_Process`
colnames(annot_tab)[colnames(annot_tab) == "Gene.Ontology..biological.process."] <- "Biological_Process"

# Separate the `Biological_Process` column into individual biological processes
data_separated <- unlist(strsplit(annot_tab$Biological_Process, split = ";"))

# Trim whitespace from the biological processes
data_separated <- gsub("^\\s+|\\s+$", "", data_separated)

# Count the occurrences of each biological process
process_counts <- table(data_separated)
process_counts <- data.frame(Biological_Process = names(process_counts), Count = as.integer(process_counts))
process_counts <- process_counts[order(-process_counts$Count), ]

# Select the 20 most predominant biological processes
top_20_processes <- process_counts[1:20, ]

# Create a color palette for the bars
bar_colors <- rainbow(nrow(top_20_processes))

# Create a staggered vertical bar plot with different colors for each bar
barplot(top_20_processes$Count, names.arg = rep("", nrow(top_20_processes)), col = bar_colors,
        ylim = c(0, max(top_20_processes$Count) * 1.25),
        main = "Occurrences of the 20 Most Predominant Biological Processes", xlab = "Biological Process", ylab = "Count")

# Create a separate plot for the legend
png("../output/02-Apul-reference-annotation/GOlegend.png", width = 800, height = 600)
par(mar = c(0, 0, 0, 0))
plot.new()
legend("center", legend = top_20_processes$Biological_Process, fill = bar_colors, cex = 1, title = "Biological Processes")
dev.off()
## png 
##   2
knitr::include_graphics("../output/02-Apul-reference-annotation/GOlegend.png")

rm ../output/02-Apul-reference-annotation/GOlegend.png
LS0tCnRpdGxlOiAiMDItQXB1bC1yZWZlcmVuY2UtYW5ub3RhdGlvbiIKYXV0aG9yOiAiS2F0aGxlZW4gRHVya2luIgpkYXRlOiAiMjAyNC0wOC0yMCIKYWx3YXlzX2FsbG93X2h0bWw6IHRydWUKb3V0cHV0OiAKICBib29rZG93bjo6aHRtbF9kb2N1bWVudDI6CiAgICB0aGVtZTogY29zbW8KICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGdpdGh1Yl9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2RlcHRoOiAzCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGh0bWxfcHJldmlldzogdHJ1ZSAKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShrbml0cikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoa2FibGVFeHRyYSkKbGlicmFyeShEVCkKbGlicmFyeShCaW9zdHJpbmdzKQpsaWJyYXJ5KHRtKQprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgZWNobyA9IFRSVUUsICAgICAgICAgIyBEaXNwbGF5IGNvZGUgY2h1bmtzCiAgZXZhbCA9IEZBTFNFLCAgICAgICAgICMgRXZhbHVhdGUgY29kZSBjaHVua3MKICB3YXJuaW5nID0gRkFMU0UsICAgICAjIEhpZGUgd2FybmluZ3MKICBtZXNzYWdlID0gRkFMU0UsICAgICAjIEhpZGUgbWVzc2FnZXMKICBmaWcud2lkdGggPSA2LCAgICAgICAjIFNldCBwbG90IHdpZHRoIGluIGluY2hlcwogIGZpZy5oZWlnaHQgPSA0LCAgICAgICMgU2V0IHBsb3QgaGVpZ2h0IGluIGluY2hlcwogIGZpZy5hbGlnbiA9ICJjZW50ZXIiICMgQWxpZ24gcGxvdHMgdG8gdGhlIGNlbnRlcgopCmBgYAoKQ29kZSB0byBhbm5vdGF0ZSBvdXIgKkEuIHB1bGNocmEqIHJlZmVyZW5jZSBmaWxlcyAodGhlICpBLiBtaWxsaXBvcmEqIHRyYW5zY3JpcHRvbWUgYW5kIGdlbm9tZSkgd2l0aCBHTyBpbmZvcm1hdGlvbgoKIyBUcmFuc2NyaXB0b21lCiMjIFJldHJpZXZlIHRyYW5zY3JpcHRvbWUgZmFzdGEgZmlsZQoKV2UnbGwgYmUgdXNpbmcgdGhlICpBLiBtaWxsaXBvcmEqIFtOQ0JJXShodHRwczovL3d3dy5uY2JpLm5sbS5uaWguZ292L2RhdGFzZXRzL2dlbm9tZS9HQ0ZfMDEzNzUzODY1LjEvKSBybmEuZm5hIGZpbGUsIHN0b3JlZCBbaGVyZV0oaHR0cHM6Ly9nYW5uZXQuZmlzaC53YXNoaW5ndG9uLmVkdS9hY3JvcG9yYS9FNS1kZWVwLWRpdmUvVHJhbnNjcmlwdHMvQXB1bF9HQ0ZfMDEzNzUzODY1LjFfcm5hLmZuYSkgYW5kIGFjY2Vzc2libGUgb24gdGhlIGBkZWVwLWRpdmVgIFtnZW5vbWljIHJlc291cmNlcyBwYWdlXShodHRwczovL2dpdGh1Yi5jb20vdXJvbC1lNS9kZWVwLWRpdmUvd2lraS9TcGVjaWVzLUNoYXJhY3RlcmlzdGljcy1hbmQtR2Vub21pYy1SZXNvdXJjZXMjZ2Vub21pYy1yZXNvdXJjZXMpCgpgYGB7ciBkb3dubG9hZC10cmFuc2NyaXB0b21lLCBlbmdpbmU9J2Jhc2gnfQpjdXJsIGh0dHBzOi8vZ2FubmV0LmZpc2gud2FzaGluZ3Rvbi5lZHUvYWNyb3BvcmEvRTUtZGVlcC1kaXZlL1RyYW5zY3JpcHRzL0FwdWxfR0NGXzAxMzc1Mzg2NS4xX3JuYS5mbmEgXAotayBcCj4gLi4vLi4vZGF0YS9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEuZm5hCmBgYAoKTGV0J3MgY2hlY2sgdGhlIGZpbGUKCmBgYHtyIHRyYW5zY3JpcHRvbWUtdmlldy1xdWVyeSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQplY2hvICJGaXJzdCBmZXcgbGluZXM6IgpoZWFkIC0zIC4uLy4uL2RhdGEvQXB1bF9HQ0ZfMDEzNzUzODY1LjFfcm5hLmZuYQoKZWNobyAiIgplY2hvICJIb3cgbWFueSBzZXF1ZW5jZXMgYXJlIHRoZXJlPyIKZ3JlcCAtYyAiPiIgLi4vLi4vZGF0YS9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEuZm5hCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS1zZXFsZW5ndGgtaGlzdG9ncmFtLCBldmFsPVRSVUV9CiMgUmVhZCBGQVNUQSBmaWxlCmZhc3RhX2ZpbGUgPC0gIi4uLy4uL2RhdGEvQXB1bF9HQ0ZfMDEzNzUzODY1LjFfcm5hLmZuYSIgICMgUmVwbGFjZSB3aXRoIHRoZSBuYW1lIG9mIHlvdXIgRkFTVEEgZmlsZQpzZXF1ZW5jZXMgPC0gcmVhZEROQVN0cmluZ1NldChmYXN0YV9maWxlKQoKIyBDYWxjdWxhdGUgc2VxdWVuY2UgbGVuZ3RocwpzZXF1ZW5jZV9sZW5ndGhzIDwtIHdpZHRoKHNlcXVlbmNlcykKCiMgQ3JlYXRlIGEgZGF0YSBmcmFtZQpzZXF1ZW5jZV9sZW5ndGhzX2RmIDwtIGRhdGEuZnJhbWUoTGVuZ3RoID0gc2VxdWVuY2VfbGVuZ3RocykKCiMgUGxvdCBoaXN0b2dyYW0gdXNpbmcgZ2dwbG90MgpnZ3Bsb3Qoc2VxdWVuY2VfbGVuZ3Roc19kZiwgYWVzKHggPSBMZW5ndGgpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAxLCBjb2xvciA9ICJibGFjayIsIGZpbGwgPSAiYmx1ZSIsIGFscGhhID0gMC43NSkgKwogIGxhYnModGl0bGUgPSAiSGlzdG9ncmFtIG9mIFNlcXVlbmNlIExlbmd0aHMiLAogICAgICAgeCA9ICJTZXF1ZW5jZSBMZW5ndGgiLAogICAgICAgeSA9ICJGcmVxdWVuY3kiKSArCiAgdGhlbWVfbWluaW1hbCgpCgpzdW1tYXJ5KHNlcXVlbmNlX2xlbmd0aHNfZGYpCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS1BQ0dULWNvbXBvc2l0aW9uLCBldmFsPVRSVUV9CgojIENhbGN1bGF0ZSBiYXNlIGNvbXBvc2l0aW9uCmJhc2VfY29tcG9zaXRpb24gPC0gYWxwaGFiZXRGcmVxdWVuY3koc2VxdWVuY2VzLCBiYXNlT25seSA9IFRSVUUpCgojIENvbnZlcnQgdG8gZGF0YSBmcmFtZSBhbmQgcmVzaGFwZSBmb3IgZ2dwbG90MgpiYXNlX2NvbXBvc2l0aW9uX2RmIDwtIGFzLmRhdGEuZnJhbWUoYmFzZV9jb21wb3NpdGlvbikKYmFzZV9jb21wb3NpdGlvbl9kZiRJRCA8LSByb3duYW1lcyhiYXNlX2NvbXBvc2l0aW9uX2RmKQpiYXNlX2NvbXBvc2l0aW9uX21lbHRlZCA8LSByZXNoYXBlMjo6bWVsdChiYXNlX2NvbXBvc2l0aW9uX2RmLCBpZC52YXJzID0gIklEIiwgdmFyaWFibGUubmFtZSA9ICJCYXNlIiwgdmFsdWUubmFtZSA9ICJDb3VudCIpCgojIFBsb3QgYmFzZSBjb21wb3NpdGlvbiBiYXIgY2hhcnQgdXNpbmcgZ2dwbG90MgpnZ3Bsb3QoYmFzZV9jb21wb3NpdGlvbl9tZWx0ZWQsIGFlcyh4ID0gQmFzZSwgeSA9IENvdW50LCBmaWxsID0gQmFzZSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZG9kZ2UiLCBjb2xvciA9ICJibGFjayIpICsKICBsYWJzKHRpdGxlID0gIkJhc2UgQ29tcG9zaXRpb24iLAogICAgICAgeCA9ICJCYXNlIiwKICAgICAgIHkgPSAiQ291bnQiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjKCJBIiA9ICJncmVlbiIsICJDIiA9ICJibHVlIiwgIkciID0gInllbGxvdyIsICJUIiA9ICJyZWQiKSkKYGBgCgoKYGBge3IgdHJhbnNjcmlwdG9tZS1jZy1tb3RpZnMsIGV2YWw9VFJVRX0KCiMgQ291bnQgQ0cgbW90aWZzIGluIGVhY2ggc2VxdWVuY2UKY291bnRfY2dfbW90aWZzIDwtIGZ1bmN0aW9uKHNlcXVlbmNlKSB7CiAgY2dfbW90aWYgPC0gIkNHIgogIHJldHVybihsZW5ndGgoZ3JlZ2V4cHIoY2dfbW90aWYsIHNlcXVlbmNlLCBmaXhlZCA9IFRSVUUpW1sxXV0pKQp9CgpjZ19tb3RpZnNfY291bnRzIDwtIHNhcHBseShzZXF1ZW5jZXMsIGNvdW50X2NnX21vdGlmcykKCiMgQ3JlYXRlIGEgZGF0YSBmcmFtZQpjZ19tb3RpZnNfY291bnRzX2RmIDwtIGRhdGEuZnJhbWUoQ0dfQ291bnQgPSBjZ19tb3RpZnNfY291bnRzKQoKIyBQbG90IENHIG1vdGlmcyBkaXN0cmlidXRpb24gdXNpbmcgZ2dwbG90MgpnZ3Bsb3QoY2dfbW90aWZzX2NvdW50c19kZiwgYWVzKHggPSBDR19Db3VudCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEsIGNvbG9yID0gImJsYWNrIiwgZmlsbCA9ICJibHVlIiwgYWxwaGEgPSAwLjc1KSArCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgQ0cgTW90aWZzIiwKICAgICAgIHggPSAiTnVtYmVyIG9mIENHIE1vdGlmcyIsCiAgICAgICB5ID0gIkZyZXF1ZW5jeSIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgojIyBEYXRhYmFzZSBDcmVhdGlvbgoKIyMjIE9idGFpbiBGYXN0YSAoVW5pUHJvdC9Td2lzcy1Qcm90KQoKYGBge3IgZG93bmxvYWQtVW5pUFN3aXNzUC1kYXRhLCBlbmdpbmU9J2Jhc2gnfQpjZCAuLi8uLi9kYXRhCmN1cmwgLU8gaHR0cHM6Ly9mdHAudW5pcHJvdC5vcmcvcHViL2RhdGFiYXNlcy91bmlwcm90L2N1cnJlbnRfcmVsZWFzZS9rbm93bGVkZ2ViYXNlL2NvbXBsZXRlL3VuaXByb3Rfc3Byb3QuZmFzdGEuZ3oKbXYgdW5pcHJvdF9zcHJvdC5mYXN0YS5neiB1bmlwcm90X3Nwcm90X3IyMDIzXzA0LmZhc3RhLmd6Cmd1bnppcCAtayB1bmlwcm90X3Nwcm90X3IyMDIzXzA0LmZhc3RhLmd6CmBgYAoKIyMjIE1ha2luZyB0aGUgZGF0YWJhc2UKCmBgYHtyIG1ha2UtVW5pUFN3aXNzUC1ibGFzdGRiLCBlbmdpbmU9J2Jhc2gnfQovaG9tZS9zaGFyZWQvbmNiaS1ibGFzdC0yLjExLjArL2Jpbi9tYWtlYmxhc3RkYiBcCi1pbiAuLi8uLi9kYXRhL3VuaXByb3Rfc3Byb3RfcjIwMjNfMDQuZmFzdGEgXAotZGJ0eXBlIHByb3QgXAotb3V0IC4uLy4uL2JsYXN0ZGIvdW5pcHJvdF9zcHJvdF9yMjAyM18wNApgYGAKCgojIyBSdW5uaW5nIEJsYXN0eAoKYGBge3IgdHJhbnNjcmlwdG9tZS1ibGFzdHgsIGVuZ2luZT0nYmFzaCd9Ci9ob21lL3NoYXJlZC9uY2JpLWJsYXN0LTIuMTEuMCsvYmluL2JsYXN0eCBcCi1xdWVyeSAuLi8uLi9kYXRhL0FwdWxfR0NGXzAxMzc1Mzg2NS4xX3JuYS5mbmEgXAotZGIgLi4vLi4vYmxhc3RkYi91bmlwcm90X3Nwcm90X3IyMDIzXzA0IFwKLW91dCAuLi9vdXRwdXQvMDItQXB1bC1yZWZlcmVuY2UtYW5ub3RhdGlvbi9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEtdW5pcHJvdF9ibGFzdHgudGFiIFwKLWV2YWx1ZSAxRS0yMCBcCi1udW1fdGhyZWFkcyAyMCBcCi1tYXhfdGFyZ2V0X3NlcXMgMSBcCi1vdXRmbXQgNgpgYGAKCmBgYHtyIHRyYW5zY3JpcHRvbWUtYmxhc3QtbG9vaywgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQplY2hvICJGaXJzdCBmZXcgbGluZXM6IgpoZWFkIC0yIC4uL291dHB1dC8wMi1BcHVsLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0FwdWxfR0NGXzAxMzc1Mzg2NS4xX3JuYS11bmlwcm90X2JsYXN0eC50YWIKCmVjaG8gIk51bWJlciBvZiBsaW5lcyBpbiBvdXRwdXQ6Igp3YyAtbCAuLi9vdXRwdXQvMDItQXB1bC1yZWZlcmVuY2UtYW5ub3RhdGlvbi9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEtdW5pcHJvdF9ibGFzdHgudGFiCmBgYAoKCiMjIEpvaW5pbmcgQmxhc3QgdGFibGUgd2l0aCBhbm5vYXRpb25zLgoKIyMjIFByZXBwaW5nIEJsYXN0IHRhYmxlIGZvciBlYXN5IGpvaW4KCmBgYHtyIHRyYW5zY3JpcHRvbWUtc2VwYXJhdGUsIGVuZ2luZT0nYmFzaCcsIGV2YWw9VFJVRX0KdHIgJ3wnICdcdCcgPCAuLi9vdXRwdXQvMDItQXB1bC1yZWZlcmVuY2UtYW5ub3RhdGlvbi9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEtdW5pcHJvdF9ibGFzdHgudGFiIFwKPiAuLi9vdXRwdXQvMDItQXB1bC1yZWZlcmVuY2UtYW5ub3RhdGlvbi9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYgoKaGVhZCAtMSAuLi9vdXRwdXQvMDItQXB1bC1yZWZlcmVuY2UtYW5ub3RhdGlvbi9BcHVsX0dDRl8wMTM3NTM4NjUuMV9ybmEtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYgoKYGBgCgojIyMgQ291bGQgZG8gc29tZSBjb29sIHN0dWZmIGluIFIgaGVyZSByZWFkaW5nIGluIHRhYmxlCgpgYGB7ciB0cmFuc2NyaXB0b21lLXJlYWQtZGF0YSwgZXZhbD1UUlVFLCBjYWNoZT1UUlVFfQpibHRhYmwgPC0gcmVhZC5jc3YoIi4uL291dHB1dC8wMi1BcHVsLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0FwdWxfR0NGXzAxMzc1Mzg2NS4xX3JuYS11bmlwcm90X2JsYXN0eF9zZXAudGFiIiwgc2VwID0gJ1x0JywgaGVhZGVyID0gRkFMU0UpCgpzcGdvIDwtIHJlYWQuY3N2KCJodHRwczovL2dhbm5ldC5maXNoLndhc2hpbmd0b24uZWR1L3NlYXNoZWxsL3NuYXBzL3VuaXByb3RfdGFibGVfcjIwMjNfMDEudGFiIiwgc2VwID0gJ1x0JywgaGVhZGVyID0gVFJVRSkKCmRhdGF0YWJsZShoZWFkKGJsdGFibCksIG9wdGlvbnMgPSBsaXN0KHNjcm9sbFggPSBUUlVFLCBzY3JvbGxZID0gIjQwMHB4Iiwgc2Nyb2xsQ29sbGFwc2UgPSBUUlVFLCBwYWdpbmcgPSBGQUxTRSkpCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS1zcGdvLXRhYmxlLCBldmFsPVRSVUV9CmRhdGF0YWJsZShoZWFkKHNwZ28pLCBvcHRpb25zID0gbGlzdChzY3JvbGxYID0gVFJVRSwgc2Nyb2xsWSA9ICI0MDBweCIsIHNjcm9sbENvbGxhcHNlID0gVFJVRSwgcGFnaW5nID0gRkFMU0UpKQpgYGAKCmBgYHtyIHRyYW5zY3JpcHRvbWUtc2VlLCBldmFsPVRSVUV9CmRhdGF0YWJsZSgKICBsZWZ0X2pvaW4oYmx0YWJsLCBzcGdvLCAgYnkgPSBjKCJWMyIgPSAiRW50cnkiKSkgJT4lCiAgc2VsZWN0KFYxLCBWMywgVjEzLCBQcm90ZWluLm5hbWVzLCBPcmdhbmlzbSwgR2VuZS5PbnRvbG9neS4uYmlvbG9naWNhbC5wcm9jZXNzLiwgR2VuZS5PbnRvbG9neS5JRHMpIAogIyAlPiUgbXV0YXRlKFYxID0gc3RyX3JlcGxhY2VfYWxsKFYxLHBhdHRlcm4gPSAic29saWQwMDc4XzIwMTEwNDEyX0ZSQUdfQkNfV0hJVEVfV0hJVEVfRjNfUVZfU0VfdHJpbW1lZCIsIHJlcGxhY2VtZW50ID0gIkFiIikpCikKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLWpvaW4sIGV2YWw9VFJVRX0KYW5ub3RfdGFiIDwtCiAgbGVmdF9qb2luKGJsdGFibCwgc3BnbywgIGJ5ID0gYygiVjMiID0gIkVudHJ5IikpICU+JQogIHNlbGVjdChWMSwgVjMsIFYxMywgUHJvdGVpbi5uYW1lcywgT3JnYW5pc20sIEdlbmUuT250b2xvZ3kuLmJpb2xvZ2ljYWwucHJvY2Vzcy4sIEdlbmUuT250b2xvZ3kuSURzKQoKd3JpdGUudGFibGUoYW5ub3RfdGFiLCBmaWxlID0gIi4uL291dHB1dC8wMi1BcHVsLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0FwdWxfR0NGXzAxMzc1Mzg2NS4xX3JuYS1JRG1hcHBpbmctMjAyNF8wOF8yMS50YWIiLCBzZXAgPSAiXHQiLAogICAgICAgICAgICByb3cubmFtZXMgPSBUUlVFLCBjb2wubmFtZXMgPSBOQSkKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLXZpZXctaGVhZGVycywgZW5naW5lPSdiYXNoJ30KaGVhZCAtbiAzIC4uL291dHB1dC8wMi1BcHVsLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0FwdWxfR0NGXzAxMzc1Mzg2NS4xX3JuYS1JRG1hcHBpbmctMjAyNF8wOF8yMS50YWIKYGBgCgpgYGB7ciB0cmFuc2NyaXB0b21lLXNwZWNpZXMtaGl0cywgZXZhbD1UUlVFfQojIFJlYWQgZGF0YXNldAojZGF0YXNldCA8LSByZWFkLmNzdigiLi4vb3V0cHV0L2JsYXN0X2Fubm90X2dvLnRhYiIsIHNlcCA9ICdcdCcpICAjIFJlcGxhY2Ugd2l0aCB0aGUgcGF0aCB0byB5b3VyIGRhdGFzZXQKCiMgU2VsZWN0IHRoZSBjb2x1bW4gb2YgaW50ZXJlc3QKY29sdW1uX25hbWUgPC0gIk9yZ2FuaXNtIiAgIyBSZXBsYWNlIHdpdGggdGhlIG5hbWUgb2YgdGhlIGNvbHVtbiBvZiBpbnRlcmVzdApjb2x1bW5fZGF0YSA8LSBhbm5vdF90YWJbW2NvbHVtbl9uYW1lXV0KCiMgQ291bnQgdGhlIG9jY3VycmVuY2VzIG9mIHRoZSBzdHJpbmdzIGluIHRoZSBjb2x1bW4Kc3RyaW5nX2NvdW50cyA8LSB0YWJsZShjb2x1bW5fZGF0YSkKCiMgQ29udmVydCB0byBhIGRhdGEgZnJhbWUsIHNvcnQgYnkgY291bnQsIGFuZCBzZWxlY3QgdGhlIHRvcCAxMApzdHJpbmdfY291bnRzX2RmIDwtIGFzLmRhdGEuZnJhbWUoc3RyaW5nX2NvdW50cykKY29sbmFtZXMoc3RyaW5nX2NvdW50c19kZikgPC0gYygiU3RyaW5nIiwgIkNvdW50IikKc3RyaW5nX2NvdW50c19kZiA8LSBzdHJpbmdfY291bnRzX2RmW29yZGVyKHN0cmluZ19jb3VudHNfZGYkQ291bnQsIGRlY3JlYXNpbmcgPSBUUlVFKSwgXQp0b3BfMTBfc3RyaW5ncyA8LSBoZWFkKHN0cmluZ19jb3VudHNfZGYsIG4gPSAxMCkKCiMgUGxvdCB0aGUgdG9wIDEwIG1vc3QgY29tbW9uIHN0cmluZ3MgdXNpbmcgZ2dwbG90MgpnZ3Bsb3QodG9wXzEwX3N0cmluZ3MsIGFlcyh4ID0gcmVvcmRlcihTdHJpbmcsIC1Db3VudCksIHkgPSBDb3VudCwgZmlsbCA9IFN0cmluZykpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZG9kZ2UiLCBjb2xvciA9ICJibGFjayIpICsKICBsYWJzKHRpdGxlID0gIlRvcCAxMCBTcGVjaWVzIGhpdHMiLAogICAgICAgeCA9IGNvbHVtbl9uYW1lLAogICAgICAgeSA9ICJDb3VudCIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCmBgYHtyIHRyYW5zY3JpcHRvbWUtdG9wLWdvLCBldmFsPVRSVUV9CgojZGF0YSA8LSByZWFkLmNzdigiLi4vb3V0cHV0L2JsYXN0X2Fubm90X2dvLnRhYiIsIHNlcCA9ICdcdCcpCgojIFJlbmFtZSB0aGUgYEdlbmUuT250b2xvZ3kuLmJpb2xvZ2ljYWwucHJvY2Vzcy5gIGNvbHVtbiB0byBgQmlvbG9naWNhbF9Qcm9jZXNzYApjb2xuYW1lcyhhbm5vdF90YWIpW2NvbG5hbWVzKGFubm90X3RhYikgPT0gIkdlbmUuT250b2xvZ3kuLmJpb2xvZ2ljYWwucHJvY2Vzcy4iXSA8LSAiQmlvbG9naWNhbF9Qcm9jZXNzIgoKIyBTZXBhcmF0ZSB0aGUgYEJpb2xvZ2ljYWxfUHJvY2Vzc2AgY29sdW1uIGludG8gaW5kaXZpZHVhbCBiaW9sb2dpY2FsIHByb2Nlc3NlcwpkYXRhX3NlcGFyYXRlZCA8LSB1bmxpc3Qoc3Ryc3BsaXQoYW5ub3RfdGFiJEJpb2xvZ2ljYWxfUHJvY2Vzcywgc3BsaXQgPSAiOyIpKQoKIyBUcmltIHdoaXRlc3BhY2UgZnJvbSB0aGUgYmlvbG9naWNhbCBwcm9jZXNzZXMKZGF0YV9zZXBhcmF0ZWQgPC0gZ3N1YigiXlxccyt8XFxzKyQiLCAiIiwgZGF0YV9zZXBhcmF0ZWQpCgojIENvdW50IHRoZSBvY2N1cnJlbmNlcyBvZiBlYWNoIGJpb2xvZ2ljYWwgcHJvY2Vzcwpwcm9jZXNzX2NvdW50cyA8LSB0YWJsZShkYXRhX3NlcGFyYXRlZCkKcHJvY2Vzc19jb3VudHMgPC0gZGF0YS5mcmFtZShCaW9sb2dpY2FsX1Byb2Nlc3MgPSBuYW1lcyhwcm9jZXNzX2NvdW50cyksIENvdW50ID0gYXMuaW50ZWdlcihwcm9jZXNzX2NvdW50cykpCnByb2Nlc3NfY291bnRzIDwtIHByb2Nlc3NfY291bnRzW29yZGVyKC1wcm9jZXNzX2NvdW50cyRDb3VudCksIF0KCiMgU2VsZWN0IHRoZSAyMCBtb3N0IHByZWRvbWluYW50IGJpb2xvZ2ljYWwgcHJvY2Vzc2VzCnRvcF8yMF9wcm9jZXNzZXMgPC0gcHJvY2Vzc19jb3VudHNbMToyMCwgXQoKIyBDcmVhdGUgYSBjb2xvciBwYWxldHRlIGZvciB0aGUgYmFycwpiYXJfY29sb3JzIDwtIHJhaW5ib3cobnJvdyh0b3BfMjBfcHJvY2Vzc2VzKSkKCiMgQ3JlYXRlIGEgc3RhZ2dlcmVkIHZlcnRpY2FsIGJhciBwbG90IHdpdGggZGlmZmVyZW50IGNvbG9ycyBmb3IgZWFjaCBiYXIKYmFycGxvdCh0b3BfMjBfcHJvY2Vzc2VzJENvdW50LCBuYW1lcy5hcmcgPSByZXAoIiIsIG5yb3codG9wXzIwX3Byb2Nlc3NlcykpLCBjb2wgPSBiYXJfY29sb3JzLAogICAgICAgIHlsaW0gPSBjKDAsIG1heCh0b3BfMjBfcHJvY2Vzc2VzJENvdW50KSAqIDEuMjUpLAogICAgICAgIG1haW4gPSAiT2NjdXJyZW5jZXMgb2YgdGhlIDIwIE1vc3QgUHJlZG9taW5hbnQgQmlvbG9naWNhbCBQcm9jZXNzZXMiLCB4bGFiID0gIkJpb2xvZ2ljYWwgUHJvY2VzcyIsIHlsYWIgPSAiQ291bnQiKQoKCiMgQ3JlYXRlIGEgc2VwYXJhdGUgcGxvdCBmb3IgdGhlIGxlZ2VuZApwbmcoIi4uL291dHB1dC8wMi1BcHVsLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0dPbGVnZW5kLnBuZyIsIHdpZHRoID0gODAwLCBoZWlnaHQgPSA2MDApCnBhcihtYXIgPSBjKDAsIDAsIDAsIDApKQpwbG90Lm5ldygpCmxlZ2VuZCgiY2VudGVyIiwgbGVnZW5kID0gdG9wXzIwX3Byb2Nlc3NlcyRCaW9sb2dpY2FsX1Byb2Nlc3MsIGZpbGwgPSBiYXJfY29sb3JzLCBjZXggPSAxLCB0aXRsZSA9ICJCaW9sb2dpY2FsIFByb2Nlc3NlcyIpCmRldi5vZmYoKQpgYGAKCmBgYHtyIHRyYW5zY3JpcHRvbWUtZ28tbGVnZW5kLCBldmFsPVRSVUUsIGZpZy53aWR0aCA9IDEwMCAsZmlnLmhlaWdodCA9IDEwMH0Ka25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MoIi4uL291dHB1dC8wMi1BcHVsLXJlZmVyZW5jZS1hbm5vdGF0aW9uL0dPbGVnZW5kLnBuZyIpCmBgYAoKYGBge3IgdHJhbnNjcmlwdG9tZS1yZW1vdmUtbGVnZW5kLWZpbGUsIGVuZ2luZT0nYmFzaCcsIGV2YWw9VFJVRX0Kcm0gLi4vb3V0cHV0LzAyLUFwdWwtcmVmZXJlbmNlLWFubm90YXRpb24vR09sZWdlbmQucG5nCmBgYA==