#!/usr/bin/perl # Time-stamp: <2008-01-07 14:41:37 kasper> use warnings; use strict; use Getopt::Long; use constant USAGE =>< untangled.lfa untangle.pl file.lfa > untangled.lfa AUTHOR: Kasper Munch COPYRIGHT: This program is free software. You may copy and redistribute it under the same terms as Perl itself. END my $help = 0; GetOptions( "help" => \$help, ) or die USAGE; $help and die USAGE; @ARGV = ('-') unless @ARGV; my $input = shift @ARGV; my $fh; open $fh, $input; $/ = ">"; while (<$fh>) { chomp; next unless $_; unless (/\n$/) { # Seems we only got part of an entry, so we probably encountered a # '>' in the description. So let's add another line: my $l = $_; $l = "$l>" . <$fh>; $_ = $l; chomp; } # Get the id line: s/(^[^\n]+\n)//s; my $id = $1; s/\n//sg; # Get the prediction prefixes if any: my %prefixes = (); my @prefixes = (); while (/(\?\S)/sg) { $prefixes{$1} = 1; push @prefixes, $1; } my $preds = (keys %prefixes); s/(\?\S)//sg; # Get the line length in the original lfasta: /^\s*([\w]+)#/gi or die "Could not get line length in the original LFasta"; my $length = length $1; s/[#\s]//sg; print ">$id"; my @list = split //, $_; my $totallength = scalar(@list); # Hvor mange hele gange gaar to gange laengden op i totalen: my $full_lines = (2+$preds) * int($totallength / ((2+$preds) * $length)); my $sublist = (); my @lol; while (@list && $full_lines) { my $entry = shift @list; push @$sublist, $entry; if (@$sublist == $length) { push @lol, $sublist; $sublist = (); $full_lines--; } } if (@list) { my $lastlength = @list/(2+$preds); my $prevpos = 0; for (my $i=1; $i<=2+$preds; $i++) { $sublist = (); my $pos = $i*$lastlength - 1; for (@list[$prevpos..$pos]) { push @$sublist, $_; } push @lol, $sublist; $prevpos = $pos + 1; } } if ($preds) { for (my $i=0; $i<@lol; $i++) { my $str = join("",@{$lol[$i]}); if ($i % (2+$preds) == 0) { print "\n" if $i; print " $str\n"; } elsif ($i % (2+$preds) == 1) { print "# $str\n"; } else { my $nr = $i / (2+$preds); print "$prefixes[$nr] $str\n"; } } print "\n"; } else { for (my $i=0; $i<@lol; $i++) { if ($i/2 == int($i/2)) { print " ", join("",@{$lol[$i]}), "\n"; } else { print "# ", join("",@{$lol[$i]}), "\n"; } } print "\n"; } } =head1 SYNOPSIS: untangle.pl [OPTIONS] [file] =head1 DESCRIPTION: This script untangles Labeled Fasta as it comes out if you treat it as ordinary Fasta in a Seq or SeqIO object. =head1 OPTIONS: =over 4 =item --help Prints this help. =back =head1 EXAMPLES: cat file.lfa | untangle.pl > untangled.lfa untangle.pl file.lfa > untangled.lfa =head1 AUTHOR: Kasper Munch =head1 COPYRIGHT: This program is free software. You may copy and redistribute it under the same terms as Perl itself. =cut