This python script is designed to give you a randomly generated 127 nucleotide segment with a unique barcode. The script is currently set to generate 100 sequences but you can change it to generate as many sequences as you would like.
As I have tested it I have found 2 things so far
1.) The 2nd hairpin doesn’t always form on it’s own
2.) Eternafold only predicts pseudoknots in approximately 50% of the sequences that are generated.
There may be other things wrong with it I am very new to programming feel free to edit or change anything as needed. Preformatted text
Here is the script:
import random
import string
def generate_rna_sequence(length):
return ''.join(random.choice('ACGU') for _ in range(length))
def generate_barcode():
barcode = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
return barcode
def generate_pseudoknot_rna_sequences(num_sequences, sequence_length):
rna_sequences = set()
while len(rna_sequences) < num_sequences:
sequence = generate_rna_sequence(sequence_length)
barcode = generate_barcode()
rna_sequences.add(sequence + barcode + sequence[::-1])
return list(rna_sequences)
num_sequences = 100
sequence_length = 129
rna_sequences = generate_pseudoknot_rna_sequences(num_sequences, sequence_length)
# Printing the generated sequences
for i, sequence in enumerate(rna_sequences):
print(f"Sequence {i + 1}: {sequence}")
I’m not sure what you were trying to do with an alphanumeric barcode. If you take the random sequence generator and run it to generate 100 x 100nt sequences you can paste those into the Pseudoknot Finder booster though. I found 47 in one run.
#!/usr/bin/env python3
import random
import string
def generate_rna_sequence(length):
return ''.join(random.choice('ACGU') for _ in range(length))
def generate_pseudoknot_rna_sequences(num_sequences, sequence_length):
rna_sequences = set()
while len(rna_sequences) < num_sequences:
sequence = generate_rna_sequence(sequence_length)
rna_sequences.add(sequence)
return list(rna_sequences)
num_sequences = 100
sequence_length = 100
rna_sequences = generate_pseudoknot_rna_sequences(num_sequences, sequence_length)
# Printing the generated sequences
for i, sequence in enumerate(rna_sequences):
print(f"{sequence}")
1 Like
Well, you need to replace your code with below code.
import random
import string
def generate_rna_sequence(length):
return ''.join(random.choice('ACGU') for _ in range(length))
def generate_barcode():
barcode = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
return barcode
def generate_pseudoknot_rna_sequences(num_sequences, sequence_length):
rna_sequences = set()
while len(rna_sequences) < num_sequences:
sequence = generate_rna_sequence(sequence_length)
barcode = generate_barcode()
rna_sequences.add(sequence + barcode + sequence[::-1])
return list(rna_sequences)
num_sequences = 100
sequence_length = 129
rna_sequences = generate_pseudoknot_rna_sequences(num_sequences, sequence_length)
# Printing the generated sequences
for i, sequence in enumerate(rna_sequences):
print(f"Sequence {i + 1}: {sequence}")
Thanks