13 Object Oriented Programming

13.1 Exercises

13.1.1 Exercise

Modelize a sequence with few attributes and methods

 1class Sequence(object):
 2
 3    def __init__(self, identifier, comment, seq):
 4        self.id = identifier
 5        self.comment = comment
 6        self.seq = self._clean(seq)
 7
 8
 9    def _clean(self, seq):
10        """
11        remove newline from the string representing the sequence
12        :param seq: the string to clean
13        :return: the string without '\n'
14        :rtype: string
15        """
16        return seq.replace('\n')
17
18
19    def gc_percent(self):
20        """
21        :return: the gc ratio
22        :rtype: float
23        """
24        seq = self.seq.upper()
25        return float(seq.count('G') + seq.count('C')) / len(seq)
26
27
28
29
30dna1 = Sequence('gi214', 'the first sequence', 'tcgcgcaacgtcgcctacatctcaagattca')
31dna2 = Sequence('gi3421', 'the second sequence', 'gagcatgagcggaattctgcatagcgcaagaatgcggc')

sequence.py .

13.1.2 Exercise

Instanciate 2 sequences using your Sequence class, and draw schema representing the namespaces

sequence namespace
_images/spacer.png

13.1.3 Exercise

Can you explain this result (draw namespaces to explain) ? how to modify the class variable class_attr

 1class MyClass(object):
 2
 3    class_attr = 'foo'
 4
 5    def __init__(self, val):
 6        self.inst_attr = val
 7
 8
 9
10
11a = MyClass(1)
12b = MyClass(2)
13
14print a.inst_attr
151
16print b.inst_attr
172
18
19print a.class_attr == b.class_attr
20True
21print a.class_attr is b.class_attr
22True
23
24b.class_attr = 4
25
26print a.class_attr
274
28del a.class_attr
29
30MyClass.class_attr = 4

class_attribute.py .

13.1.4 Exercise

Write the definition of a Point class. Objects from this class should have a

  • a method show to display the coordinates of the point

  • a method move to change these coordinates.

  • a method dist that computes the distance between 2 points.

Note

the distance between 2 points A(x0, y0) and B(x1, y1) can be compute

\[d(AB) = \sqrt{(x1-x0))^2 + (y1-y0)^2}\]

(http://www.mathwarehouse.com/algebra/distance_formula/index.php)

The following python code provides an example of the expected behaviour of objects belonging to this class:

>>> p1 = Point(2, 3)
>>> p2 = Point(3, 3)
>>> p1.show()
(2, 3)
>>> p2.show()
(3, 3)
>>> p1.move(10, -10)
>>> p1.show()
(12, -7)
>>> p2.show()
(3, 3)
>>> p1.dist(p2)
1.0
 1import math
 2
 3
 4class Point(object):
 5    """Class to handle point in a 2 dimensions space"""
 6
 7    def __init__(self, x, y):
 8        """
 9        :param x: the value on the X-axis
10        :type x: float
11        :param y: the value on the Y-axis
12        :type y: float
13        """
14        self.x = x
15        self.y = y
16
17
18    def show(self):
19        """
20        :return: the coordinate of this point
21        :rtype: a tuple of 2 elements (float, float)
22        """
23        return (self.x, self.y)
24
25
26    def move(self, x, y):
27        """
28        :param x: the value to move on the X-axis
29        :type x: float
30        :param y: the value to move on the Y-axis
31        :type y: float
32        """
33        self.x += x
34        self.y += y
35
36
37    def dist(self, pt):
38        """
39        :param pt: the point to compute the distance with
40        :type pt: :class:`Point` object
41        :return: the distance between this point ant pt
42        :rtype: int
43        """
44        dx = pt.x - self.x
45        dy = pt.y - self.y
46        return math.sqrt(dx ** 2 + dy ** 2)

point.py .

13.1.5 Exercise

Use biopython to read a fasta file (sv40.fasta) and display the attributes

  • id

  • name

  • description

  • seq

use the module SeqIO in biopython A tutorial is available https://biopython.org/wiki/SeqIO

from Bio import SeqIO

sv40_rcd = SeqIO.read("sv40.fasta", "fasta")
print("id =", sv40_rcd.id)
print("name =", sv40_rcd.name)
print("description =", sv40_rcd.description)
print("sequence =", sv40_rcd.seq)

Other example of usage of SeqIO: seq_io.py

13.1.6 Exercise

Translate the sequence in phase 1, 2, -2

sv40_seq_phase1 = sv40_rcd.seq
sv40_seq_phase2 = sv40_rcd[1:]
sv40_seq_phase_2 = sv40_rcd[1:].reverse_complement(id=True)

13.1.7 Exercise

  • Create a sequence with the first 42 nucleotides

  • Translate this sequence

  • Mutate the nucleotide in position 18 ‘A’ -> ‘C’

  • and translate the mutated sequence

see tutorial http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc28

short_seq = sv40_seq_phase2[0:42]
short_seq.translate()
mutable_seq = short_seq.seq.tomutable()
mutable_seq[19] = 'C'
mutate_seq = mutable_seq.toseq()
mutate_seq.translate()

13.1.8 Exercise

Open the file abcd.fasta (abcd.fasta) and convert it in genbank format

Hint: the seq alphabet attribute must be set to extended_protein see Bio.Alphabet.IUPAC module

from Bio.Alphabet.IUPAC import extended_protein
with open("abcd.fasta", "r") as fasta, open('abcd.gb', 'w') as genbank:
    for record in SeqIO.parse(fasta, "fasta"):
        record.seq.alphabet = extended_protein
        print(len(record.seq))
        SeqIO.write(record, genbank, 'genbank')

13.1.9 Exercice

Open the file abcd.fasta (abcd.fasta) and filter out sequence <= 700 Write the results in fasta file

with open("abcd.fasta", "r") as input, open("abcd_short.fasta", "w") as output:
    for record in SeqIO.parse(input, "fasta"):
        if len(record.seq) > 700:
            SeqIO.write(record, output, 'fasta')

13.1.10 Exercise

Use OOP to modelize restriction enzyme, and sequences.

the sequence must implement the following methods

  • enzyme_filter which take as a list of enzymes as argument and return a new list containing the enzymes which have binding site in sequence

the restriction enzyme must implements the following methods

  • binds which take a sequence as argument and return True if the sequence contains a binding site, False otherwise.

solve the exercise 7.1.4   Exercise using this new implementation.

 1
 2class Sequence(object):
 3
 4    def __init__(self, identifier, comment, seq):
 5        self.id = identifier
 6        self.comment = comment
 7        self.seq = self._clean(seq)
 8
 9
10    def _clean(self, seq):
11        """
12
13        :param seq:
14        :return:
15        """
16        return seq.replace('\n')
17
18    def enzyme_filter(self, enzymes):
19        """
20
21        :param enzymes:
22        :return:
23        """
24        enzymes_which_binds = []
25        for enz in enzymes:
26            if enz.binds(self.seq):
27                enzymes_which_binds.append(enz)
28        return
29
30
31class RestrictionEnzyme(object):
32
33    def __init__(self, name, binding, cut, end, comment=''):
34        self._name = name
35        self._binding = binding
36        self._cut = cut
37        self._end = end
38        self._comment = comment
39
40    @property
41    def name(self):
42        return self._name
43
44    def binds(self, seq):
45        """
46
47        :param seq:
48        :return:
49        """
50        return self.binding in seq.seq

enzyme.py .

13.1.11 Exercise

refactor your code of 8.1.15   Exercise in OOP style programming. implements only

  • size: return the number of rows, and number of columns

  • get_cell: that take the number of rows, the number of columns as parameters, and returns the content of cell corresponding to row number col number

  • set_cell: that take the number of rows, the number of columns as parameters, and a value and set the value val in cell specified by row number x column number

  • to_str: return a string representation of the matrix

  • mult: that take a scalar and return a new matrix which is the scalar product of matrix x val

you can change the name of the methods to be more pythonic

 1
 2
 3
 4
 5class Matrix(object):
 6
 7    def __init__(self, row, col, val=None):
 8        self._row = row
 9        self._col = col
10        self._matrix = []
11        for i in range(row):
12            c = [val] * col
13            self._matrix.append(c)
14
15    def size(self):
16        return self._row, self._col
17
18    def get_cell(self, row, col):
19        self._check_index(row, col)
20        return self._matrix[i][j]
21
22    def matrix_set(self, row, col, val):
23        self._check_index(row, col)
24        self._matrix[row][col] = val
25
26    def __str__(self):
27        s = ''
28        for i in range(self._row):
29            s += self._matrix[i]
30            s += '\n'
31        return s
32
33    def _check_index(self, row, col):
34        if not (0 < row <= self._row) or not (0 < col <= self._col):
35            raise IndexError("matrix index out of range")

matrix_obj.py .

13.1.12 Exercise

Use the code to read multiple sequences fasta file in procedural style and refactor it in OOP style. use the file abcd.fasta to test your code.

What is the benefit to use oop style instead of procedural style?

 1class Sequence(object):
 2
 3    def __init__(self, id_, sequence, comment=''):
 4        self.id = id_
 5        self.comment = comment
 6        self.sequence = sequence
 7
 8    def gc_percent(self):
 9        seq = self.sequence.upper()
10        return float(seq.count('G') + seq.count('C')) / float(len(seq))
11
12class FastaParser(object):
13
14
15    def __init__(self, fasta_path):
16        self.path = fasta_path
17        self._file = open(fasta_path)
18        self._current_id = ''
19        self._current_comment = ''
20        self._current_sequence = ''
21
22    def _parse_header(self, line):
23        """
24        parse the header line and  _current_id|comment|sequence attributes
25        :param line: the line of header in fasta format
26        :type line: string
27        """
28        header = line.split()
29        self._current_id = header[0][1:]
30        self._current_comment = ' '.join(header[1:])
31        self._current_sequence = ''
32
33    def __iter__(self):
34        return self
35
36    def next(self):
37        """
38        :return: at each call return a new :class:`Sequence` object
39        :raise: StopIteration
40        """
41        for line in self._file:
42            if line.startswith('>'):
43                # a new sequence begin
44                if self._current_id != '':
45                    new_seq = Sequence(self._current_id,
46                                       self._current_sequence,
47                                       comment=self._current_comment)
48                    self._parse_header(line)
49                    return new_seq
50                else:
51                    self._parse_header(line)
52            else:
53                self._current_sequence += line.strip()
54        if not self._current_id and not self._current_sequence:
55            self._file.close()
56            raise StopIteration()
57        else:
58            new_seq = Sequence(self._current_id,
59                               self._current_sequence,
60                               comment=self._current_comment)
61            self._current_id = ''
62            self._current_sequence = ''
63            return new_seq
64
65
66if __name__ == '__main__':
67    import sys
68    import os.path
69
70    if len(sys.argv) != 2:
71        sys.exit("usage fasta_object fasta_path")
72    fasta_path = sys.argv[1]
73    if not os.path.exists(fasta_path):
74        sys.exit("No such file: {}".format(fasta_path))
75
76    fasta_parser = FastaParser(fasta_path)
77    for sequence in fasta_parser:
78        print "----------------"
79        print "{seqid} = {gc:.3%}".format(gc=sequence.gc_percent(),
80                                          seqid = sequence.id)

fasta_object.py .