9 Modules and Packages¶
9.1 Exercises¶
9.1.1 Exercise¶
Implement a matrix and functions to handle it. choose the data structure of your choice. The API (Application Programming Interface) to implement is the following:
We propose 2 implementations. These 2 implementations work with a list of lists as matrix modelling. But it is possible to implement it with a single list or a dict of list, …
The first implementation follow the api used explicit name for inner variables and good documentation.
1"""
2Implementation of simple matrix
3"""
4
5
6def create(row_num, col_num, val=None):
7 """
8 :param row_num: the number of rows
9 :type row_num: int
10 :param col_num: the number of columns
11 :type col_num: int
12 :param val: the default value to fill the matrix
13 :type val: any (None by default)
14 :return: matrix of rows_num x col_num
15 :rtype: matrix
16 """
17 matrix = []
18 for i in range(col_num):
19 col = [val] * row_num
20 matrix.append(col)
21 return matrix
22
23
24def _check_index(matrix, row_no, col_no):
25 """
26 check if row_no and col_no are in matrix bound
27
28 :param matrix: the matrix to compute the size
29 :type matrix: matrix
30 :param rows_no: the index of row to check
31 :type rows_no: int
32 :param col_no: the index of column to check
33 :type col_no: int
34 :raise: IndexError if row_no or col_no are out of matrix bounds
35 """
36 row_max, col_max = size(matrix)
37 if (row_no < 0 or row_no >= row_max) or (col_no < 0 or col_no >= col_max):
38 raise IndexError("matrix index out of range")
39
40
41def size(matrix):
42 """
43 :param matrix: the matrix to compute the size
44 :type matrix: matrix
45 :return: the size of matrix (number of rows, number of cols)
46 :rtype: typle of 2 int
47 """
48 return len(matrix[0]), len(matrix)
49
50
51def get_cell(matrix, row_no, col_no):
52 """
53 :param matrix: the matrix
54 :type matrix: matrix
55 :param rows_no: the row number
56 :type rows_no: int
57 :param col_no: the column number
58 :type col_no: int
59 :retrun: the content of cell corresponding to row_no x col_no
60 :rtype: any
61 """
62 _check_index(matrix, row_no, col_no)
63 return matrix[col_no][row_no]
64
65
66def set_cell(matrix, row_no, col_no, val):
67 """
68 set the value val in cell specified by row_no x col_no
69
70 :param matrix: the matrix to modify
71 :type matrix: matrix
72 :param row_no: the row number of cell to set
73 :type rows_no: int
74 :param col_no: the column number of cell to set
75 :type col_no: int
76 :param val: the value to set in cell
77 :type val: int
78 """
79 _check_index(matrix, row_no, col_no)
80 matrix[col_no][row_no] = val
81
82
83def to_str(matrix):
84 """
85 :param matrix: the matrix to compute the size
86 :type matrix: matrix
87 :return: a string representation of the matrix
88 :rtype: str
89 """
90 s = ""
91 # by design all matrix cols have same size
92 for row in zip(*matrix):
93 cells = [str(cell) for cell in row]
94 s += "\t".join(cells) + "\n"
95 return s
96
97
98def mult(matrix, val):
99 """
100 :param matrix: the matrix
101 :type matrix: matrix
102 :param val: the value to mult the matrix with
103 :type val: int
104 :return: a new matrix corresponding the scalar product of matrix * val
105 :rtype: matrix
106 """
107 new_matrix = []
108 for col in matrix:
109 new_col = [cell * val for cell in col]
110 new_matrix.append(new_col)
111 return new_matrix
112
113
114def mult_inplace(matrix, val):
115 """
116 compute the scalar product of a matrix and a value
117 do this operation in place
118
119 :param matrix: the matrix
120 :type matrix: matrix
121 :param val: the value to mult the matrix with
122 :type val: int
123 """
124 for col in matrix:
125 for row_nb, cell in enumerate(col):
126 col[row_nb] = cell * val
127
128
129def get_row(matrix, row_no):
130 """
131 :param matrix: the matrix to compute the size
132 :type matrix: matrix
133 :param rows_no: row number
134 :type rows_no: int
135 :return: the row of matrix corresponding to row_no
136 a shallow copy of the row
137 :rtype: list
138 """
139 _check_index(matrix, row_no, 0)
140 row_max, col_max = size(matrix)
141 row = []
142 for col_n in range(col_max):
143 row.append(get_cell(matrix, row_no, col_n))
144 return row
145
146
147def set_row(matrix, row_no, val):
148 """
149 set all cells of row row_no with val
150
151 :param matrix: the matrix to modify
152 :type matrix: matrix
153 :param row_no: the row number
154 :type row_no: int
155 :param val: the value to put in cells
156 :type val: any
157 """
158 _check_index(matrix, row_no, 0)
159 row_max, col_max = size(matrix)
160 for col_n in range(col_max):
161 set_cell(matrix, row_no, col_n, val)
162
163
164def get_col(matrix, col_no):
165 """
166 :param matrix: the matrix get row
167 :type matrix: matrix
168 :param col_no: the column number
169 :type col_no: int
170 :return: the column corresponding to col_no of matrix
171 a shallow copy of the col
172 :rtype: list
173 """
174 _check_index(matrix, 0, col_no)
175 col = matrix[col_no][:]
176 return col
177
178
179def set_col(matrix, col_no, val):
180 """
181 set all cells of col col_no with val
182
183 :param matrix: the matrix to compute the size
184 :type matrix: matrix
185 :param col_no: the column number
186 :type col_no: int
187 :param val: the value to put in cells
188 :type val: any
189 """
190 _check_index(matrix, 0, col_no)
191 for row_n in range(matrix):
192 set_cell(matrix, row_n, col_no, val)
193
194
195def replace_col(matrix, col_no, col):
196 """
197 replace column col_no with col
198
199 :param matrix: the matrix to compute the size
200 :type matrix: matrix
201 :param col_no: the column number to replace
202 :type col_no: int
203 :param col: the list of values to use as replacement of column
204 :type col: list
205 """
206 row_max, col_max = size(matrix)
207 if len(col) != col_max:
208 raise RuntimeError("the size of col {0} does not fit to matrix size {1}x{2}".format(len(col),
209 row_max,
210 col_max))
211 _check_index(matrix, 0, col_no)
212 matrix[col_no] = col
213
214
215def replace_row(matrix, row_no, row):
216 """
217 replace row row_no with row
218
219 :param matrix: the matrix to compute the size
220 :type matrix: matrix
221 :param row_no: the column number
222 :type row_no: int
223 :param row: the list of value to use as replacement of row
224 :type row: list
225 """
226 row_max, col_max = size(matrix)
227 if len(row) != row_max:
228 raise RuntimeError("the size of row {0} does not fit to matrix size {1}x{2}".format(len(row),
229 row_max,
230 col_max))
231 _check_index(matrix, row_no, 0)
232 for col_no, value in enumerate(row):
233 set_cell(matrix, row_no, col_no, value)
234
235
236if __name__ == '__main__':
237 m = create(5, 3)
238 print(m)
239 set_cell(m, 0, 0, 1)
240 set_cell(m, 0, 2, 2)
241 set_cell(m, 4, 0, 12)
242 set_cell(m, 4, 2, 15)
243 print(to_str(m))
244 print("get row 0", get_row(m, 0))
245 print("get col 0", get_col(m, 0))
246
247 m2 = create(3, 2, 4)
248 mult_inplace(m2, 2)
249 print(to_str(m2))
But the problem with this implementation is, if we decide to change the inner model for a dixt of list for instance. We must reimplements most of the functions.
In the following implementation we have only 4 functions that handle directly the lists. All other functions manipulate the matrix through these 4 functions. So if we change the inner model we will have to modifiy only these functions. This implementation will be more maintainable.
But this implementation use one letter names for inner variables and is poorly documented which not help to maintain or develop with this.
The Best solution should be the second implementation but with the name of variables and documentation as in the firsr implementation.
1# matrix is implemented by list of list
2def matrix_maker(ligne, col, val=None):
3 m = []
4 for i in range(ligne):
5 c = [val] * col
6 m.append(c)
7 return m
8
9#---- functions that depends on the matrix srtructure
10
11def matrix_size(m):
12 return len(m), len(m[0])
13
14def matrix_get(matrix, i, j):
15 _check_matindex(matrix, i, j)
16 return matrix[i][j]
17
18def matrix_set(matrix, i, j, val):
19 _check_matindex(matrix,i,j)
20 matrix[i][j] = val
21
22def to_str(m):
23 m_str = []
24 for row in m:
25 m_str.append('\t'.join([str(cell) for cell in row]))
26 m_str = '\n'.join(m_str)
27 return m_str
28
29
30
31#---- independant regarding matrix structure
32def _check_matindex(matrix,i,j):
33 imax, jmax = matrix_size(matrix)
34 if (i < 0 or i >= imax) or (j < 0 or j >= jmax):
35 raise IndexError("matrix index out of range")
36
37
38def matrix_get_line(matrix, i):
39 _check_matindex(matrix,i,0)
40 im, jm = matrix_size(matrix)
41 line = []
42 for n in range(jm):
43 line.append(matrix_get(matrix, i, n))
44 return line
45
46
47def matrix_set_line(matrix, i, val):
48 _check_matindex(matrix,i,0)
49 im, jm = matrix_size(matrix)
50 for n in range(jm):
51 matrix_set(matrix, i, n, val)
52
53
54def matrix_get_col(matrix, j):
55 _check_matindex(matrix,0,j)
56 im, jm = matrix_size(matrix)
57 col = []
58 for n in range(im):
59 col.append(matrix_get(matrix, n, j))
60 return col
61
62
63def matrix_set_col(matrix, j, val):
64 _check_matindex(matrix,0,j)
65 im, jm = matrix_size(matrix)
66 for n in range(im):
67 matrix_set(matrix, n, j, val)
68
69
70if __name__ == '__main__':
71 m = matrix_maker(5, 3)
72 matrix_set(m,0, 0, 1)
73 matrix_set(m,0, 2, 2)
74 matrix_set(m,4, 0, 12)
75 matrix_set(m,4, 2, 15)
76 print(to_str(m))
77 print("get line 0", matrix_get_line(m, 0))
78 print("get col 0", matrix_get_col(m, 0))
9.1.2 Exercise¶
Write a program that calculates the similarity of 2 RNA sequences.
To compute the simalirity you need to parse a file containing the
similarity matrix
. Hint: use the module containing the functions that handle a matrix from previous chapter. put this matrix.py file in a directory named “my_python_lib” in your home or Desktop and import it in your current program (the similarity script must be placed elsewhere).The similarity of the 2 sequences is the sum of base similarities. so you have to compare the first base of two sequences and use the matrix to get the similarity from the similarity table, on so on for all bases then sum these similarities.
1import sys
2import os.path
3
4sys.path.insert(0, os.path.join(expanduser('~'), "my_python_lib"))
5
6import matrix
7
8
9def parse_similarity_file(path):
10 """
11 parse file containing RNA similarity matrix and return a matrix
12 """
13 sim_matrix = matrix.create(4, 4)
14 with open(path, 'r') as sim_file:
15 #skip first line
16 next(sim_file)
17 for row_no, line in enumerate(sim_file):
18 line = line.strip()
19 fields = line.split()
20 values = [float(val) for val in fields[1:]]
21 matrix.replace_row(sim_matrix, row_no, values)
22 return sim_matrix
23
24
25def get_similarity(b1, b2, sim_matrix):
26 """
27 :param b1: the first base must be in ('A', 'G', 'C', 'U')
28 :type b1: string
29 :param b2: the first base must be in ('A', 'G', 'C', 'U')
30 :type b2: string
31 :param sim_matrix: a similarity matrix
32 :type sim_matrix: matrix
33 :return: the similarity between b1 and b2
34 :rtype: float
35 """
36 bases = {'A':0 , 'G':1, 'C':2, 'U':3}
37 b1 = b1.upper()
38 b2 = b2.upper()
39 if not b1 in bases:
40 raise KeyError("unknown base b1: " + str(b1))
41 if not b2 in bases:
42 raise KeyError("unknown base b2: " + str(b2))
43 return matrix.get_cell(sim_matrix, bases[b1], bases[b2])
44
45
46def compute_similarity(seq1, seq2, sim_matrix):
47 """
48 compute a similarity score between 2 RNA sequence of same lenght
49 :param seq1: first sequence to compare
50 :type seq1: string
51 :param seq2: second sequence to compare
52 :type seq2: string
53 :param sim_matrix: the similarity between b1 and b2
54 :type sim_matrix: matrix
55 :return: the similarity score between seq1 and seq2
56 :rtype: float
57 """
58 similarities = []
59 for b1, b2 in zip(seq1, seq2):
60 sim = get_similarity(b1, b2, sim_matrix)
61 similarities.append(sim)
62 return sum(similarities)
63
64
65if __name__ == '__main__':
66 seq1 = 'AGCAUCUA'
67 seq2 = 'ACCGUUCU'
68 sim_matrix = parse_similarity_file("similarity_matrix")
69 print matrix.to_str(sim_matrix)
70 similarity = compute_similarity(seq1, seq2, sim_matrix)
71 print similarity
72