fasta_subset.py

Reads a multiple FASTA files and print a randomly selected fraction of sequences
USAGE:
    python read_fasta.py 0.2 input.fasta

Categories:

  • core/data/io/read_fasta_file

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import sys
from random import random, seed
sys.path.append('../../../../../bin/')
from pybioshell.core.data.io import read_fasta_file, create_fasta_string


if len(sys.argv) < 3 :
  print("""

    Reads a multiple FASTA files and print a randomly selected fraction of sequences
USAGE:
    python read_fasta.py 0.2 input.fasta 

    CATEGORIES: core/data/io/read_fasta_file
    KEYWORDS:   FASTA input; sequence
    GROUP:      File processing;Data filtering

  """)
  sys.exit()

seed(0)
fasta = read_fasta_file(sys.argv[2])
for fname in sys.argv[3:] : read_fasta_file(fname,fasta)

fraction = float(sys.argv[1])
for seq in fasta: 
  if random() < fraction : print(create_fasta_string(seq))
../_images/file_icon.png