#!/usr/bin/perl # Time-stamp: <2005-07-21 17:04:10 kasper> use warnings; use strict; use LFasta; use LFastaIO; use Getopt::Long; use constant USAGE =>< Specifies the prefix to to with the added prediction track. --labels Dump annotation. --nbest Dump all prediction tracks with integers as labels and print GTF lines. --labelkey Specifies which labels that go with which features. Default is: 'C:CDS E:exon I:intron x:default' --labelgroup Specifies which labels that together form features. Eg: 'CEI:transcript' EXAMPLES: cat input.lfa | dumpprediction.pl > ouput.gff dumpprediction.pl input.lfa ouput.gff AUTHOR: Kasper Munch COPYRIGHT: This program is free software. You may copy and redistribute it under the same terms as Perl itself. END my $labelkey = 'C:CDS E:exon I:intron x:default'; my $labelgroups = ''; my $help = 0; my $onlydefined = 0; my $prefix = undef; my $labels = 0; my $nbest = 0; my $onlygroups = 0; GetOptions( "help" => \$help, "prefix=s" => \$prefix, "labels" => \$labels, "nbest" => \$nbest, "labelkey=s" => \$labelkey, "labelgroups=s" => \$labelgroups, "onlygroups" => \$onlygroups, ) or die USAGE; $help and die USAGE; $onlydefined || defined $prefix || $labels || $nbest or die USAGE; # Input and output: @ARGV = ('-') unless @ARGV; my $input = shift @ARGV; open my $in, "$input" or die "$input: $!\n"; @ARGV = ('>&STDOUT') unless @ARGV; my $output = shift @ARGV; open my $out, ">$output" or die "$output: $!\n"; $in = LFastaIO->new(fh => $in); $labelkey = { split /[:\s]+/, $labelkey }; $labelgroups = { split /[:\s]+/, $labelgroups }; use Data::Dumper; while (my $lfa = <$in>) { $lfa->labelkey($labelkey); $lfa->labelgroups($labelgroups) if $labelgroups; my $preds = $lfa->preds_hash(); my $id = $lfa->id(); if ($labels) { my $gfflines = labels2gff({prefix => 'labels', pred => $lfa->labels}, $lfa->id, $labelkey); print $out $_ for (@$gfflines); } elsif ($nbest) { for my $pred (@$preds) { if ($$pred{prefix} =~ /\d+/) { my $gfflines = labels2gff($pred, $lfa->id, $labelkey); print $out $_ for (@$gfflines); } } } else { for my $pred (@$preds) { if ($$pred{prefix} eq $prefix) { my $gfflines = labels2gff($pred, $lfa->id, $labelkey); my $idx = 0; for my $l (@$gfflines) { $idx++; $l =~ s/status=prediction/gene_id "$id"; transcript_id "$id.$idx";/; print $out $l; } } } } } sub labels2gff { my $p = shift; my $id = shift; my $labelkey = shift; my @gfflines = (); # Make label regular expression my $label_regexp .= join("+|",keys(%{$labelkey})); $label_regexp .= "+"; my $i = 1; my $attribute = ''; if ($$p{prefix} eq 'labels') { $attribute = 'status=known'; } elsif ($$p{prefix} =~ /\d+/) { $attribute = 'status=prediction'; } # Look for labeled regions unless ($onlygroups) { while ( $$p{pred} =~ /$label_regexp/g ) { my $b = length($`); my $e = $b + length($&); my $key = substr($&,0,1); my $method = $0; $method =~ s/.*?([^\/]+)$/$1/; my $fase = $key =~ /\d/ ? $key : '.'; my @gff = ($id,$$p{prefix},$$labelkey{$key},$b+1,$e,".","+","$fase","$attribute"); my $gffstr = join("\t", @gff) . "\n"; push @gfflines, $gffstr; } } while (my ($labels, $groupname) = each %{$labelgroups}) { my $regex = "[$labels]+"; while ( $$p{pred} =~ /$regex/g ) { my $b = length($`); my $e = $b + length($&); my $key = substr($&,0,1); $key = '[CEIx012]+'; my @gff = ($id,$$p{prefix},$groupname,$b+1,$e,".","+",".","$attribute"); my $gffstr = join("\t", @gff) . "\n"; push @gfflines, $gffstr; } } return \@gfflines; } =head1 SYNOPSIS: dumpprediction.pl [OPTIONS] [infile [outfile]] =head1 DESCRIPTION: =head1 OPTIONS: =over 4 =item --help Prints this help. =item --prefix Specifies the prefix to to with the added prediction track. =item --labels Dump annotation. =item --nbest Dump all prediction tracks with integers as labels and print GTF lines. =item --labelkey Specifies which labels that go with which features. Default is: 'C:CDS E:exon I:intron x:default' =item --labelgroup Specifies which labels that together form features. Eg: 'CEI:transcript' =back =head1 EXAMPLES: cat input.lfa | dumpprediction.pl > ouput.gff dumpprediction.pl input.lfa ouput.gff =head1 AUTHOR: Kasper Munch =head1 COPYRIGHT: This program is free software. You may copy and redistribute it under the same terms as Perl itself. =cut