1 Getting the query fasta file

curl https://gannet.fish.washington.edu/seashell/snaps/Gadus_macrocephalus.coding.gene.V1.cds \
-k \
> ../data/Gadus_macrocephalus.coding.gene.V1.cds

Exploring what fasta file

head -3 ../data/Gadus_macrocephalus.coding.gene.V1.cds
## >Gma_1G0000010.1 locus=chr1:81612:97483:+    len:2343
## ATGCCTGTGAACGCGCGGGACCGGACAGTGCTGGGGCGTTTCCCCGGGGTCACGCTGGAA
## CCGGTGGAGGAGGAGGTGGAGGAGGAGGAGGAGGTGGAAGAGGACCAGGTGGAGCGAGGC
echo "How many sequences are there?"
grep -c ">" ../data/Gadus_macrocephalus.coding.gene.V1.cds
## How many sequences are there?
## 23843
# Read FASTA file
fasta_file <- "../data/Gadus_macrocephalus.coding.gene.V1.cds"  # Replace with the name of your FASTA file
sequences <- readDNAStringSet(fasta_file)

# Calculate sequence lengths
sequence_lengths <- width(sequences)

# Create a data frame
sequence_lengths_df <- data.frame(Length = sequence_lengths)

# Plot histogram using ggplot2
ggplot(sequence_lengths_df, aes(x = Length)) +
  geom_histogram(binwidth = 1, color = "grey", fill = "blue", alpha = 0.75) +
  labs(title = "Histogram of Sequence Lengths",
       x = "Sequence Length",
       y = "Frequency") +
  theme_minimal()

# Read FASTA file
fasta_file <- "../data/Gadus_macrocephalus.coding.gene.V1.cds"
sequences <- readDNAStringSet(fasta_file)

# Calculate base composition
base_composition <- alphabetFrequency(sequences, baseOnly = TRUE)

# Convert to data frame and reshape for ggplot2
base_composition_df <- as.data.frame(base_composition)
base_composition_df$ID <- rownames(base_composition_df)
base_composition_melted <- reshape2::melt(base_composition_df, id.vars = "ID", variable.name = "Base", value.name = "Count")

# Plot base composition bar chart using ggplot2
ggplot(base_composition_melted, aes(x = Base, y = Count, fill = Base)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Base Composition",
       x = "Base",
       y = "Count") +
  theme_minimal() +
  scale_fill_manual(values = c("A" = "green", "C" = "blue", "G" = "yellow", "T" = "red"))

# Read FASTA file
fasta_file <- "../data/Gadus_macrocephalus.coding.gene.V1.cds"
sequences <- readDNAStringSet(fasta_file)

# Count CG motifs in each sequence
count_cg_motifs <- function(sequence) {
  cg_motif <- "CG"
  return(length(gregexpr(cg_motif, sequence, fixed = TRUE)[[1]]))
}

cg_motifs_counts <- sapply(sequences, count_cg_motifs)

# Create a data frame
cg_motifs_counts_df <- data.frame(CG_Count = cg_motifs_counts)

# Plot CG motifs distribution using ggplot2
ggplot(cg_motifs_counts_df, aes(x = CG_Count)) +
  geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
  labs(title = "Distribution of CG Motifs",
       x = "Number of CG Motifs",
       y = "Frequency") +
  theme_minimal()

2 Database Creation

2.1 Obtain Fasta (UniProt/Swiss-Prot)

This is from here picur reviewe sequences I named based on the identify of the database given

cd ../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2023_04.fasta.gz
gunzip -k uniprot_sprot_r2023_04.fasta.gz

2.2 Making the database

mkdir ../blastdb
/home/shared/ncbi-blast-2.11.0+/bin/makeblastdb \
-in ../data/uniprot_sprot_r2023_04.fasta \
-dbtype prot \
-out ../blastdb/uniprot_sprot_r2023_04

3 Running Blastx

/home/shared/ncbi-blast-2.11.0+/bin/blastx \
-query ../data/Gadus_macrocephalus.coding.gene.V1.cds \
-db ../blastdb/uniprot_sprot_r2023_04 \
-out ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 20 \
-max_target_seqs 1 \
-outfmt 6
head -2 ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab
## Gma_1G0000010.1  sp|P22735|TGM1_HUMAN    50.659  683 318 7   328 2334    109 786 0.0 688
## Gma_1G0000020.1  sp|Q9JI35|HRH3_CAVPO    54.684  395 160 4   136 1266    50  443 1.98e-140   411
echo "Number of lines in output"
wc -l ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab
## Number of lines in output
## 22481 ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab

4 Joining Blast table with annoations.

4.1 Prepping Blast table for easy join

tr '|' '\t' < ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab \
> ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx_sep.tab

head -1 ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx_sep.tab
## Gma_1G0000010.1  sp  P22735  TGM1_HUMAN  50.659  683 318 7   328 2334    109 786 0.0 688

4.2 Could do some cool stuff in R here reading in table

bltabl <- read.csv("../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx_sep.tab", sep = '\t', header = FALSE)

spgo <- read.csv("https://gannet.fish.washington.edu/seashell/snaps/uniprot_table_r2023_01.tab", sep = '\t', header = TRUE)
datatable(head(bltabl), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(head(spgo), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) 
 # %>% mutate(V1 = str_replace_all(V1,pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab"))
)
annot_tab <-
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs)
# Read dataset
#dataset <- read.csv("../output/blast_annot_go.tab", sep = '\t')  # Replace with the path to your dataset

# Select the column of interest
column_name <- "Organism"  # Replace with the name of the column of interest
column_data <- annot_tab[[column_name]]

# Count the occurrences of the strings in the column
string_counts <- table(column_data)

# Convert to a data frame, sort by count, and select the top 10
string_counts_df <- as.data.frame(string_counts)
colnames(string_counts_df) <- c("String", "Count")
string_counts_df <- string_counts_df[order(string_counts_df$Count, decreasing = TRUE), ]
top_10_strings <- head(string_counts_df, n = 10)

# Plot the top 10 most common strings using ggplot2
ggplot(top_10_strings, aes(x = reorder(String, -Count), y = Count, fill = String)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Top 10 Species hits",
       x = column_name,
       y = "Count") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

#data <- read.csv("../output/blast_annot_go.tab", sep = '\t')

# Rename the `Gene.Ontology..biological.process.` column to `Biological_Process`
colnames(annot_tab)[colnames(annot_tab) == "Gene.Ontology..biological.process."] <- "Biological_Process"

# Separate the `Biological_Process` column into individual biological processes
data_separated <- unlist(strsplit(annot_tab$Biological_Process, split = ";"))

# Trim whitespace from the biological processes
data_separated <- gsub("^\\s+|\\s+$", "", data_separated)

# Count the occurrences of each biological process
process_counts <- table(data_separated)
process_counts <- data.frame(Biological_Process = names(process_counts), Count = as.integer(process_counts))
process_counts <- process_counts[order(-process_counts$Count), ]

# Select the 20 most predominant biological processes
top_20_processes <- process_counts[1:20, ]

# Create a color palette for the bars
bar_colors <- rainbow(nrow(top_20_processes))

# Create a staggered vertical bar plot with different colors for each bar
barplot(top_20_processes$Count, names.arg = rep("", nrow(top_20_processes)), col = bar_colors,
        ylim = c(0, max(top_20_processes$Count) * 1.25),
        main = "Occurrences of the 20 Most Predominant Biological Processes", xlab = "Biological Process", ylab = "Count")

# Create a separate plot for the legend
png("../output/GOlegend.png", width = 800, height = 600)
par(mar = c(0, 0, 0, 0))
plot.new()
legend("center", legend = top_20_processes$Biological_Process, fill = bar_colors, cex = 1, title = "Biological Processes")
dev.off()
## png 
##   2
knitr::include_graphics("../output/GOlegend.png")

LS0tCnRpdGxlOiAiT2ggV2hhdCBhIEJsYXN0ISIKYXV0aG9yOiBTdGV2ZW4gUm9iZXJ0cwpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiICAKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICBoaWdobGlnaHQ6IHplbmJ1cm4KICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KGtuaXRyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShrYWJsZUV4dHJhKQpsaWJyYXJ5KERUKQpsaWJyYXJ5KEJpb3N0cmluZ3MpCmxpYnJhcnkodG0pCmtuaXRyOjpvcHRzX2NodW5rJHNldCgKICBlY2hvID0gVFJVRSwgICAgICAgICAjIERpc3BsYXkgY29kZSBjaHVua3MKICBldmFsID0gRkFMU0UsICAgICAgICAgIyBFdmFsdWF0ZSBjb2RlIGNodW5rcwogIHdhcm5pbmcgPSBGQUxTRSwgICAgICMgSGlkZSB3YXJuaW5ncwogIG1lc3NhZ2UgPSBGQUxTRSwgICAgICMgSGlkZSBtZXNzYWdlcwogIGZpZy53aWR0aCA9IDYsICAgICAgICMgU2V0IHBsb3Qgd2lkdGggaW4gaW5jaGVzCiAgZmlnLmhlaWdodCA9IDQsICAgICAgIyBTZXQgcGxvdCBoZWlnaHQgaW4gaW5jaGVzCiAgZmlnLmFsaWduID0gImNlbnRlciIgIyBBbGlnbiBwbG90cyB0byB0aGUgY2VudGVyCikKYGBgCgoKCiMgR2V0dGluZyB0aGUgcXVlcnkgZmFzdGEgZmlsZQoKYGBge3IgZG93bmxvYWQtcXVlcnksIGVuZ2luZT0nYmFzaCd9CmN1cmwgaHR0cHM6Ly9nYW5uZXQuZmlzaC53YXNoaW5ndG9uLmVkdS9zZWFzaGVsbC9zbmFwcy9HYWR1c19tYWNyb2NlcGhhbHVzLmNvZGluZy5nZW5lLlYxLmNkcyBcCi1rIFwKPiAuLi9kYXRhL0dhZHVzX21hY3JvY2VwaGFsdXMuY29kaW5nLmdlbmUuVjEuY2RzCmBgYAoKRXhwbG9yaW5nIHdoYXQgZmFzdGEgZmlsZQoKYGBge3Igdmlldy1xdWVyeSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQpoZWFkIC0zIC4uL2RhdGEvR2FkdXNfbWFjcm9jZXBoYWx1cy5jb2RpbmcuZ2VuZS5WMS5jZHMKYGBgCgpgYGB7ciB2aWV3Mi1xdWVyeSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQplY2hvICJIb3cgbWFueSBzZXF1ZW5jZXMgYXJlIHRoZXJlPyIKZ3JlcCAtYyAiPiIgLi4vZGF0YS9HYWR1c19tYWNyb2NlcGhhbHVzLmNvZGluZy5nZW5lLlYxLmNkcwpgYGAKCmBgYHtyIGhpc3RvZ3JhbSwgZXZhbD1UUlVFfQojIFJlYWQgRkFTVEEgZmlsZQpmYXN0YV9maWxlIDwtICIuLi9kYXRhL0dhZHVzX21hY3JvY2VwaGFsdXMuY29kaW5nLmdlbmUuVjEuY2RzIiAgIyBSZXBsYWNlIHdpdGggdGhlIG5hbWUgb2YgeW91ciBGQVNUQSBmaWxlCnNlcXVlbmNlcyA8LSByZWFkRE5BU3RyaW5nU2V0KGZhc3RhX2ZpbGUpCgojIENhbGN1bGF0ZSBzZXF1ZW5jZSBsZW5ndGhzCnNlcXVlbmNlX2xlbmd0aHMgPC0gd2lkdGgoc2VxdWVuY2VzKQoKIyBDcmVhdGUgYSBkYXRhIGZyYW1lCnNlcXVlbmNlX2xlbmd0aHNfZGYgPC0gZGF0YS5mcmFtZShMZW5ndGggPSBzZXF1ZW5jZV9sZW5ndGhzKQoKIyBQbG90IGhpc3RvZ3JhbSB1c2luZyBnZ3Bsb3QyCmdncGxvdChzZXF1ZW5jZV9sZW5ndGhzX2RmLCBhZXMoeCA9IExlbmd0aCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEsIGNvbG9yID0gImdyZXkiLCBmaWxsID0gImJsdWUiLCBhbHBoYSA9IDAuNzUpICsKICBsYWJzKHRpdGxlID0gIkhpc3RvZ3JhbSBvZiBTZXF1ZW5jZSBMZW5ndGhzIiwKICAgICAgIHggPSAiU2VxdWVuY2UgTGVuZ3RoIiwKICAgICAgIHkgPSAiRnJlcXVlbmN5IikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCmBgYHtyIEFDR1QsIGV2YWw9VFJVRX0KCiMgUmVhZCBGQVNUQSBmaWxlCmZhc3RhX2ZpbGUgPC0gIi4uL2RhdGEvR2FkdXNfbWFjcm9jZXBoYWx1cy5jb2RpbmcuZ2VuZS5WMS5jZHMiCnNlcXVlbmNlcyA8LSByZWFkRE5BU3RyaW5nU2V0KGZhc3RhX2ZpbGUpCgojIENhbGN1bGF0ZSBiYXNlIGNvbXBvc2l0aW9uCmJhc2VfY29tcG9zaXRpb24gPC0gYWxwaGFiZXRGcmVxdWVuY3koc2VxdWVuY2VzLCBiYXNlT25seSA9IFRSVUUpCgojIENvbnZlcnQgdG8gZGF0YSBmcmFtZSBhbmQgcmVzaGFwZSBmb3IgZ2dwbG90MgpiYXNlX2NvbXBvc2l0aW9uX2RmIDwtIGFzLmRhdGEuZnJhbWUoYmFzZV9jb21wb3NpdGlvbikKYmFzZV9jb21wb3NpdGlvbl9kZiRJRCA8LSByb3duYW1lcyhiYXNlX2NvbXBvc2l0aW9uX2RmKQpiYXNlX2NvbXBvc2l0aW9uX21lbHRlZCA8LSByZXNoYXBlMjo6bWVsdChiYXNlX2NvbXBvc2l0aW9uX2RmLCBpZC52YXJzID0gIklEIiwgdmFyaWFibGUubmFtZSA9ICJCYXNlIiwgdmFsdWUubmFtZSA9ICJDb3VudCIpCgojIFBsb3QgYmFzZSBjb21wb3NpdGlvbiBiYXIgY2hhcnQgdXNpbmcgZ2dwbG90MgpnZ3Bsb3QoYmFzZV9jb21wb3NpdGlvbl9tZWx0ZWQsIGFlcyh4ID0gQmFzZSwgeSA9IENvdW50LCBmaWxsID0gQmFzZSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZG9kZ2UiLCBjb2xvciA9ICJibGFjayIpICsKICBsYWJzKHRpdGxlID0gIkJhc2UgQ29tcG9zaXRpb24iLAogICAgICAgeCA9ICJCYXNlIiwKICAgICAgIHkgPSAiQ291bnQiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjKCJBIiA9ICJncmVlbiIsICJDIiA9ICJibHVlIiwgIkciID0gInllbGxvdyIsICJUIiA9ICJyZWQiKSkKYGBgCgoKYGBge3IgY2csIGV2YWw9VFJVRX0KIyBSZWFkIEZBU1RBIGZpbGUKZmFzdGFfZmlsZSA8LSAiLi4vZGF0YS9HYWR1c19tYWNyb2NlcGhhbHVzLmNvZGluZy5nZW5lLlYxLmNkcyIKc2VxdWVuY2VzIDwtIHJlYWRETkFTdHJpbmdTZXQoZmFzdGFfZmlsZSkKCiMgQ291bnQgQ0cgbW90aWZzIGluIGVhY2ggc2VxdWVuY2UKY291bnRfY2dfbW90aWZzIDwtIGZ1bmN0aW9uKHNlcXVlbmNlKSB7CiAgY2dfbW90aWYgPC0gIkNHIgogIHJldHVybihsZW5ndGgoZ3JlZ2V4cHIoY2dfbW90aWYsIHNlcXVlbmNlLCBmaXhlZCA9IFRSVUUpW1sxXV0pKQp9CgpjZ19tb3RpZnNfY291bnRzIDwtIHNhcHBseShzZXF1ZW5jZXMsIGNvdW50X2NnX21vdGlmcykKCiMgQ3JlYXRlIGEgZGF0YSBmcmFtZQpjZ19tb3RpZnNfY291bnRzX2RmIDwtIGRhdGEuZnJhbWUoQ0dfQ291bnQgPSBjZ19tb3RpZnNfY291bnRzKQoKIyBQbG90IENHIG1vdGlmcyBkaXN0cmlidXRpb24gdXNpbmcgZ2dwbG90MgpnZ3Bsb3QoY2dfbW90aWZzX2NvdW50c19kZiwgYWVzKHggPSBDR19Db3VudCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEsIGNvbG9yID0gImJsYWNrIiwgZmlsbCA9ICJibHVlIiwgYWxwaGEgPSAwLjc1KSArCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgQ0cgTW90aWZzIiwKICAgICAgIHggPSAiTnVtYmVyIG9mIENHIE1vdGlmcyIsCiAgICAgICB5ID0gIkZyZXF1ZW5jeSIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgoKCgoKIyBEYXRhYmFzZSBDcmVhdGlvbgoKIyMgT2J0YWluIEZhc3RhIChVbmlQcm90L1N3aXNzLVByb3QpCgpUaGlzIGlzIGZyb20gaGVyZSBwaWN1ciByZXZpZXdlIHNlcXVlbmNlcyBJIG5hbWVkIGJhc2VkIG9uIHRoZSBpZGVudGlmeSBvZiB0aGUgZGF0YWJhc2UgZ2l2ZW4KCgoKYGBge3IgZG93bmxvYWQtZGF0YSwgZW5naW5lPSdiYXNoJ30KY2QgLi4vZGF0YQpjdXJsIC1PIGh0dHBzOi8vZnRwLnVuaXByb3Qub3JnL3B1Yi9kYXRhYmFzZXMvdW5pcHJvdC9jdXJyZW50X3JlbGVhc2Uva25vd2xlZGdlYmFzZS9jb21wbGV0ZS91bmlwcm90X3Nwcm90LmZhc3RhLmd6Cm12IHVuaXByb3Rfc3Byb3QuZmFzdGEuZ3ogdW5pcHJvdF9zcHJvdF9yMjAyM18wNC5mYXN0YS5negpndW56aXAgLWsgdW5pcHJvdF9zcHJvdF9yMjAyM18wNC5mYXN0YS5negpgYGAKCiMjIE1ha2luZyB0aGUgZGF0YWJhc2UKCmBgYHtyIG1ha2UtYmxhc3RkYiwgZW5naW5lPSdiYXNoJ30KbWtkaXIgLi4vYmxhc3RkYgovaG9tZS9zaGFyZWQvbmNiaS1ibGFzdC0yLjExLjArL2Jpbi9tYWtlYmxhc3RkYiBcCi1pbiAuLi9kYXRhL3VuaXByb3Rfc3Byb3RfcjIwMjNfMDQuZmFzdGEgXAotZGJ0eXBlIHByb3QgXAotb3V0IC4uL2JsYXN0ZGIvdW5pcHJvdF9zcHJvdF9yMjAyM18wNApgYGAKCgoKCiMgUnVubmluZyBCbGFzdHgKCmBgYHtyIGJsYXN0eCwgZW5naW5lPSdiYXNoJ30KL2hvbWUvc2hhcmVkL25jYmktYmxhc3QtMi4xMS4wKy9iaW4vYmxhc3R4IFwKLXF1ZXJ5IC4uL2RhdGEvR2FkdXNfbWFjcm9jZXBoYWx1cy5jb2RpbmcuZ2VuZS5WMS5jZHMgXAotZGIgLi4vYmxhc3RkYi91bmlwcm90X3Nwcm90X3IyMDIzXzA0IFwKLW91dCAuLi9vdXRwdXQvMDMtdHJhbnNjcmlwdG9tZS1hbm5vdGF0aW9uL0dtLmNkcy11bmlwcm90X2JsYXN0eC50YWIgXAotZXZhbHVlIDFFLTIwIFwKLW51bV90aHJlYWRzIDIwIFwKLW1heF90YXJnZXRfc2VxcyAxIFwKLW91dGZtdCA2CmBgYAoKYGBge3IgYmxhc3QtbG9vaywgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQpoZWFkIC0yIC4uL291dHB1dC8wMy10cmFuc2NyaXB0b21lLWFubm90YXRpb24vR20uY2RzLXVuaXByb3RfYmxhc3R4LnRhYgpgYGAKCmBgYHtyIGJsYXN0LWxvb2syLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmVjaG8gIk51bWJlciBvZiBsaW5lcyBpbiBvdXRwdXQiCndjIC1sIC4uL291dHB1dC8wMy10cmFuc2NyaXB0b21lLWFubm90YXRpb24vR20uY2RzLXVuaXByb3RfYmxhc3R4LnRhYgpgYGAKCgoKCiMgSm9pbmluZyBCbGFzdCB0YWJsZSB3aXRoIGFubm9hdGlvbnMuCgojIyBQcmVwcGluZyBCbGFzdCB0YWJsZSBmb3IgZWFzeSBqb2luCgpgYGB7ciBzZXBhcmF0ZSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQp0ciAnfCcgJ1x0JyA8IC4uL291dHB1dC8wMy10cmFuc2NyaXB0b21lLWFubm90YXRpb24vR20uY2RzLXVuaXByb3RfYmxhc3R4LnRhYiBcCj4gLi4vb3V0cHV0LzAzLXRyYW5zY3JpcHRvbWUtYW5ub3RhdGlvbi9HbS5jZHMtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYgoKaGVhZCAtMSAuLi9vdXRwdXQvMDMtdHJhbnNjcmlwdG9tZS1hbm5vdGF0aW9uL0dtLmNkcy11bmlwcm90X2JsYXN0eF9zZXAudGFiCgpgYGAKCiMjIENvdWxkIGRvIHNvbWUgY29vbCBzdHVmZiBpbiBSIGhlcmUgcmVhZGluZyBpbiB0YWJsZQoKYGBge3IgcmVhZC1kYXRhLCBldmFsPVRSVUUsIGNhY2hlPVRSVUV9CmJsdGFibCA8LSByZWFkLmNzdigiLi4vb3V0cHV0LzAzLXRyYW5zY3JpcHRvbWUtYW5ub3RhdGlvbi9HbS5jZHMtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYiIsIHNlcCA9ICdcdCcsIGhlYWRlciA9IEZBTFNFKQoKc3BnbyA8LSByZWFkLmNzdigiaHR0cHM6Ly9nYW5uZXQuZmlzaC53YXNoaW5ndG9uLmVkdS9zZWFzaGVsbC9zbmFwcy91bmlwcm90X3RhYmxlX3IyMDIzXzAxLnRhYiIsIHNlcCA9ICdcdCcsIGhlYWRlciA9IFRSVUUpCmBgYAoKYGBge3IsIGV2YWw9VFJVRX0KZGF0YXRhYmxlKGhlYWQoYmx0YWJsKSwgb3B0aW9ucyA9IGxpc3Qoc2Nyb2xsWCA9IFRSVUUsIHNjcm9sbFkgPSAiNDAwcHgiLCBzY3JvbGxDb2xsYXBzZSA9IFRSVUUsIHBhZ2luZyA9IEZBTFNFKSkKYGBgCgpgYGB7ciBzcGdvLXRhYmxlLCBldmFsPVRSVUV9CmRhdGF0YWJsZShoZWFkKHNwZ28pLCBvcHRpb25zID0gbGlzdChzY3JvbGxYID0gVFJVRSwgc2Nyb2xsWSA9ICI0MDBweCIsIHNjcm9sbENvbGxhcHNlID0gVFJVRSwgcGFnaW5nID0gRkFMU0UpKQpgYGAKCmBgYHtyIHNlZSwgZXZhbD1UUlVFfQpkYXRhdGFibGUoCiAgbGVmdF9qb2luKGJsdGFibCwgc3BnbywgIGJ5ID0gYygiVjMiID0gIkVudHJ5IikpICU+JQogIHNlbGVjdChWMSwgVjMsIFYxMywgUHJvdGVpbi5uYW1lcywgT3JnYW5pc20sIEdlbmUuT250b2xvZ3kuLmJpb2xvZ2ljYWwucHJvY2Vzcy4sIEdlbmUuT250b2xvZ3kuSURzKSAKICMgJT4lIG11dGF0ZShWMSA9IHN0cl9yZXBsYWNlX2FsbChWMSxwYXR0ZXJuID0gInNvbGlkMDA3OF8yMDExMDQxMl9GUkFHX0JDX1dISVRFX1dISVRFX0YzX1FWX1NFX3RyaW1tZWQiLCByZXBsYWNlbWVudCA9ICJBYiIpKQopCmBgYAoKYGBge3Igam9pbiwgZXZhbD1UUlVFfQphbm5vdF90YWIgPC0KICBsZWZ0X2pvaW4oYmx0YWJsLCBzcGdvLCAgYnkgPSBjKCJWMyIgPSAiRW50cnkiKSkgJT4lCiAgc2VsZWN0KFYxLCBWMywgVjEzLCBQcm90ZWluLm5hbWVzLCBPcmdhbmlzbSwgR2VuZS5PbnRvbG9neS4uYmlvbG9naWNhbC5wcm9jZXNzLiwgR2VuZS5PbnRvbG9neS5JRHMpCmBgYAoKCgoKCmBgYHtyLCBldmFsPVRSVUV9CiMgUmVhZCBkYXRhc2V0CiNkYXRhc2V0IDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvYmxhc3RfYW5ub3RfZ28udGFiIiwgc2VwID0gJ1x0JykgICMgUmVwbGFjZSB3aXRoIHRoZSBwYXRoIHRvIHlvdXIgZGF0YXNldAoKIyBTZWxlY3QgdGhlIGNvbHVtbiBvZiBpbnRlcmVzdApjb2x1bW5fbmFtZSA8LSAiT3JnYW5pc20iICAjIFJlcGxhY2Ugd2l0aCB0aGUgbmFtZSBvZiB0aGUgY29sdW1uIG9mIGludGVyZXN0CmNvbHVtbl9kYXRhIDwtIGFubm90X3RhYltbY29sdW1uX25hbWVdXQoKIyBDb3VudCB0aGUgb2NjdXJyZW5jZXMgb2YgdGhlIHN0cmluZ3MgaW4gdGhlIGNvbHVtbgpzdHJpbmdfY291bnRzIDwtIHRhYmxlKGNvbHVtbl9kYXRhKQoKIyBDb252ZXJ0IHRvIGEgZGF0YSBmcmFtZSwgc29ydCBieSBjb3VudCwgYW5kIHNlbGVjdCB0aGUgdG9wIDEwCnN0cmluZ19jb3VudHNfZGYgPC0gYXMuZGF0YS5mcmFtZShzdHJpbmdfY291bnRzKQpjb2xuYW1lcyhzdHJpbmdfY291bnRzX2RmKSA8LSBjKCJTdHJpbmciLCAiQ291bnQiKQpzdHJpbmdfY291bnRzX2RmIDwtIHN0cmluZ19jb3VudHNfZGZbb3JkZXIoc3RyaW5nX2NvdW50c19kZiRDb3VudCwgZGVjcmVhc2luZyA9IFRSVUUpLCBdCnRvcF8xMF9zdHJpbmdzIDwtIGhlYWQoc3RyaW5nX2NvdW50c19kZiwgbiA9IDEwKQoKIyBQbG90IHRoZSB0b3AgMTAgbW9zdCBjb21tb24gc3RyaW5ncyB1c2luZyBnZ3Bsb3QyCmdncGxvdCh0b3BfMTBfc3RyaW5ncywgYWVzKHggPSByZW9yZGVyKFN0cmluZywgLUNvdW50KSwgeSA9IENvdW50LCBmaWxsID0gU3RyaW5nKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiVG9wIDEwIFNwZWNpZXMgaGl0cyIsCiAgICAgICB4ID0gY29sdW1uX25hbWUsCiAgICAgICB5ID0gIkNvdW50IikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSArCiAgY29vcmRfZmxpcCgpCgoKYGBgCmBgYHtyIGdvLCBldmFsPVRSVUV9CgoKI2RhdGEgPC0gcmVhZC5jc3YoIi4uL291dHB1dC9ibGFzdF9hbm5vdF9nby50YWIiLCBzZXAgPSAnXHQnKQoKIyBSZW5hbWUgdGhlIGBHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuYCBjb2x1bW4gdG8gYEJpb2xvZ2ljYWxfUHJvY2Vzc2AKY29sbmFtZXMoYW5ub3RfdGFiKVtjb2xuYW1lcyhhbm5vdF90YWIpID09ICJHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuIl0gPC0gIkJpb2xvZ2ljYWxfUHJvY2VzcyIKCiMgU2VwYXJhdGUgdGhlIGBCaW9sb2dpY2FsX1Byb2Nlc3NgIGNvbHVtbiBpbnRvIGluZGl2aWR1YWwgYmlvbG9naWNhbCBwcm9jZXNzZXMKZGF0YV9zZXBhcmF0ZWQgPC0gdW5saXN0KHN0cnNwbGl0KGFubm90X3RhYiRCaW9sb2dpY2FsX1Byb2Nlc3MsIHNwbGl0ID0gIjsiKSkKCiMgVHJpbSB3aGl0ZXNwYWNlIGZyb20gdGhlIGJpb2xvZ2ljYWwgcHJvY2Vzc2VzCmRhdGFfc2VwYXJhdGVkIDwtIGdzdWIoIl5cXHMrfFxccyskIiwgIiIsIGRhdGFfc2VwYXJhdGVkKQoKIyBDb3VudCB0aGUgb2NjdXJyZW5jZXMgb2YgZWFjaCBiaW9sb2dpY2FsIHByb2Nlc3MKcHJvY2Vzc19jb3VudHMgPC0gdGFibGUoZGF0YV9zZXBhcmF0ZWQpCnByb2Nlc3NfY291bnRzIDwtIGRhdGEuZnJhbWUoQmlvbG9naWNhbF9Qcm9jZXNzID0gbmFtZXMocHJvY2Vzc19jb3VudHMpLCBDb3VudCA9IGFzLmludGVnZXIocHJvY2Vzc19jb3VudHMpKQpwcm9jZXNzX2NvdW50cyA8LSBwcm9jZXNzX2NvdW50c1tvcmRlcigtcHJvY2Vzc19jb3VudHMkQ291bnQpLCBdCgojIFNlbGVjdCB0aGUgMjAgbW9zdCBwcmVkb21pbmFudCBiaW9sb2dpY2FsIHByb2Nlc3Nlcwp0b3BfMjBfcHJvY2Vzc2VzIDwtIHByb2Nlc3NfY291bnRzWzE6MjAsIF0KCiMgQ3JlYXRlIGEgY29sb3IgcGFsZXR0ZSBmb3IgdGhlIGJhcnMKYmFyX2NvbG9ycyA8LSByYWluYm93KG5yb3codG9wXzIwX3Byb2Nlc3NlcykpCgojIENyZWF0ZSBhIHN0YWdnZXJlZCB2ZXJ0aWNhbCBiYXIgcGxvdCB3aXRoIGRpZmZlcmVudCBjb2xvcnMgZm9yIGVhY2ggYmFyCmJhcnBsb3QodG9wXzIwX3Byb2Nlc3NlcyRDb3VudCwgbmFtZXMuYXJnID0gcmVwKCIiLCBucm93KHRvcF8yMF9wcm9jZXNzZXMpKSwgY29sID0gYmFyX2NvbG9ycywKICAgICAgICB5bGltID0gYygwLCBtYXgodG9wXzIwX3Byb2Nlc3NlcyRDb3VudCkgKiAxLjI1KSwKICAgICAgICBtYWluID0gIk9jY3VycmVuY2VzIG9mIHRoZSAyMCBNb3N0IFByZWRvbWluYW50IEJpb2xvZ2ljYWwgUHJvY2Vzc2VzIiwgeGxhYiA9ICJCaW9sb2dpY2FsIFByb2Nlc3MiLCB5bGFiID0gIkNvdW50IikKCgojIENyZWF0ZSBhIHNlcGFyYXRlIHBsb3QgZm9yIHRoZSBsZWdlbmQKcG5nKCIuLi9vdXRwdXQvR09sZWdlbmQucG5nIiwgd2lkdGggPSA4MDAsIGhlaWdodCA9IDYwMCkKcGFyKG1hciA9IGMoMCwgMCwgMCwgMCkpCnBsb3QubmV3KCkKbGVnZW5kKCJjZW50ZXIiLCBsZWdlbmQgPSB0b3BfMjBfcHJvY2Vzc2VzJEJpb2xvZ2ljYWxfUHJvY2VzcywgZmlsbCA9IGJhcl9jb2xvcnMsIGNleCA9IDEsIHRpdGxlID0gIkJpb2xvZ2ljYWwgUHJvY2Vzc2VzIikKZGV2Lm9mZigpCmBgYAoKYGBge3IgbGVnZW5kLCBldmFsPVRSVUUsIGZpZy53aWR0aCA9IDEwMCAsZmlnLmhlaWdodCA9IDEwMH0Ka25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MoIi4uL291dHB1dC9HT2xlZ2VuZC5wbmciKQpgYGAKCgoK