#!/usr/bin/perl # Time-stamp: <2005-03-03 15:21:04 kasper> use warnings; use strict; use LFasta; use LFastaIO; use Getopt::Long; use constant USAGE =>< Sample bases from regions with the same label in sequences from another file. --labels Specifies string og labels where sequence is to be scrambled. Default is 'C'. --mix Scramble sequence with all specifyed letters in one pool. EXAMPLES: cat file.lfa | lfascramble.pl > scrambled.lfa lfascramble.pl --labels I120 AUTHOR: Kasper Munch COPYRIGHT: This program is free software. You may copy, modify, and redistribute it under the same terms as Perl itself. END my $help = 0; my $sample = ''; my $labels = 'C'; my $mix = 0; GetOptions( "help" => \$help, "mix" => \$mix, "sample=s" => \$sample, "labels=s" => \$labels, ) or die USAGE; $help and die USAGE; @ARGV = ('-') unless @ARGV; my $input = shift @ARGV; @ARGV = ('>&STDOUT') unless @ARGV; my $output = shift @ARGV; my $in = LFastaIO->new(file => $input); my $out = LFastaIO->new(file => ">$output"); $sample ||= $input; my @labels = split //, $labels; if ($mix) { my $s = join "", @labels; @labels = ("[$s]"); } my $maxlength = 5e7; my $samplelength = 0; my $samplesource = (); my $fh = LFastaIO->new(file => $sample); while (my $lfa = <$fh>) { if ($samplelength > $maxlength) { warn "Limiting in-memory sample source to $samplelength characters\n"; last; } my $seq = $lfa->seq; my $lab = $lfa->labels; for my $l (@labels) { while ($lab =~ /$l+/g) { my $beg = $-[0]; my $len = $+[0] - $-[0]; my $str = substr($seq,$beg,$len); my @str = split //, $str; push @$samplesource, @str; $samplelength += $len; } } } #die $& if $sample_str =~ /[^abcdefgh]/i; while (my $lfa = <$in>) { my $seqstr = $lfa->seq; my $labelstr = $lfa->labels; for my $l (@labels) { # Get each coding region: while ($labelstr =~ /$l+/g) { my $begin = $-[0]; my $length = $+[0] - $-[0]; my $replacement; if ($sample) { my $len = $length; while ($len--) { my $rand = int(rand($samplelength)); $replacement .= $$samplesource[$rand]; } } else { die "Scrambling not implemented yet. Only --sample works.\n"; } substr($seqstr,$begin,$length,$replacement) or die "Couldn't replace"; } } $lfa->seq($seqstr); print $out $lfa; } =head1 SYNOPSIS: lfascramble.pl [OPTIONS] [infile [outfile]] =head1 DESCRIPTION: This script scrambles the bases in the sequence specified by labels. When specifying more than one label each type of sequence is scrampled seperately. =head1 OPTIONS: =over 4 =item --help Prints this help. =item --sample Sample bases from regions with the same label in sequences from another file. =item --labels Specifies string og labels where sequence is to be scrambled. Default is 'C'. =item --mix Scramble sequence with all specifyed letters in one pool. =back =head1 EXAMPLES: cat file.lfa | lfascramble.pl > scrambled.lfa lfascramble.pl --labels I120 =head1 AUTHOR: Kasper Munch =head1 COPYRIGHT: This program is free software. You may copy, modify, and redistribute it under the same terms as Perl itself. =cut