#!/usr/bin/perl # FASTX-toolkit - FASTA/FASTQ preprocessing tools. # Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . use strict; use warnings; use GD::Graph::bars; use Data::Dumper; use PerlIO::gzip; if (scalar @ARGV==0) { print<$ARGV[1]") or die "Cannot create output file $ARGV[1]\n"; binmode OUT; my %histogram ; while (my $name = ) { my $sequence = ; chomp $sequence; my $sequence_length = length($sequence); my $count; if ( index($name, "-")==-1 ) { #Assume this file is not collapsed, just count each seqeunce as 1 $count = 1 ; } else { #Assume file is collapsed (that is - sequence-ID has two numbers with a separating dash) ($count) = $name =~ /^\>[^-]+\-(\d+)$/ ; # If the match failed, treat this fasta as not collapsed; $count = 1 if not defined $count ; } $histogram{$sequence_length} += $count ; } #Textual Output if (0) { print "Length\tCount\n"; foreach my $length_key ( sort { $a <=> $b } keys %histogram ) { print $length_key,"\t", $histogram{$length_key},"\n"; } exit 0; } ## Build the data as required by GD::Graph::bars. ## Data list has two items (each item is itself a list) ## 1. a list of x-axis labels (these are the keys from the histogram) ## 2. a list of values my @data = ( [ sort { $a <=> $b } keys %histogram ], [ map { $histogram{$_} } sort { $a <=> $b } keys %histogram ] ) ; my $graph = new GD::Graph::bars (1000,800); $graph->set( x_label => 'Length', y_label => 'Amount', title => 'Sequences lengths Distribution (after clipping)', bar_spacing => 10, transparent => 0, t_margin => 10, y_tick_number => 20, y_long_ticks => 1, ) or die $graph->error; $graph->plot(\@data) or die $graph->error; print OUT $graph->gd->png; close IN; close OUT;