9 Modules and Packages

9.1 Exercises

9.1.1 Exercise

Implement a matrix and functions to handle it. choose the data structure of your choice. The API (Application Programming Interface) to implement is the following:

We propose 2 implementations. These 2 implementations work with a list of lists as matrix modelling. But it is possible to implement it with a single list or a dict of list, …

The first implementation follow the api used explicit name for inner variables and good documentation.

  1"""
  2Implementation of simple matrix 
  3"""
  4
  5
  6def create(row_num, col_num, val=None):
  7	"""
  8	:param row_num: the number of rows
  9	:type row_num: int
 10	:param col_num: the number of columns
 11	:type col_num: int
 12	:param val: the default value to fill the matrix
 13	:type val: any (None by default)
 14	:return: matrix of rows_num x col_num
 15	:rtype: matrix
 16	"""
 17	matrix = []
 18	for i in range(col_num):
 19		col = [val] * row_num
 20		matrix.append(col)
 21	return matrix
 22
 23
 24def _check_index(matrix, row_no, col_no):
 25	"""
 26	check if row_no and col_no are in matrix bound
 27	
 28	:param matrix: the matrix to compute the size
 29	:type matrix: matrix
 30	:param rows_no: the index of row to check
 31	:type rows_no: int
 32	:param col_no: the index of column to check
 33	:type col_no: int
 34	:raise: IndexError if row_no or col_no are out of matrix bounds
 35	""" 
 36	row_max, col_max = size(matrix)
 37	if (row_no < 0 or row_no >= row_max) or (col_no < 0 or col_no >= col_max):
 38		raise IndexError("matrix index out of range")
 39	
 40	
 41def size(matrix):
 42	"""
 43	:param matrix: the matrix to compute the size
 44	:type matrix: matrix
 45	:return: the size of matrix (number of rows, number of cols)
 46	:rtype: typle of 2 int
 47	"""
 48	return len(matrix[0]), len(matrix)
 49
 50
 51def get_cell(matrix, row_no, col_no):
 52	"""
 53	:param matrix: the matrix 
 54	:type matrix: matrix
 55	:param rows_no: the row number
 56	:type rows_no: int
 57	:param col_no: the column number
 58	:type col_no: int
 59	:retrun: the content of cell corresponding to row_no x col_no
 60	:rtype: any
 61	"""
 62	_check_index(matrix, row_no, col_no)
 63	return matrix[col_no][row_no]
 64
 65
 66def set_cell(matrix, row_no, col_no, val):
 67	"""
 68	set the value val in cell specified by row_no x col_no
 69	
 70	:param matrix: the matrix to modify
 71	:type matrix: matrix
 72	:param row_no: the row number of cell to set
 73	:type rows_no: int
 74	:param col_no: the column number of cell to set
 75	:type col_no: int
 76	:param val: the value to set in cell 
 77	:type val: int
 78	"""
 79	_check_index(matrix, row_no, col_no)
 80	matrix[col_no][row_no] = val
 81
 82
 83def to_str(matrix):
 84	"""
 85	:param matrix: the matrix to compute the size
 86	:type matrix: matrix
 87	:return: a string representation of the matrix
 88	:rtype: str
 89	"""
 90	s = ""
 91	# by design all matrix cols have same size
 92	for row in zip(*matrix):
 93		cells = [str(cell) for cell in row]
 94		s += "\t".join(cells) + "\n"
 95	return s
 96
 97
 98def mult(matrix, val):
 99	"""
100	:param matrix: the matrix
101	:type matrix: matrix
102	:param val: the value to mult the matrix with
103	:type val: int
104	:return: a new matrix corresponding the scalar product of matrix * val
105	:rtype: matrix
106	"""
107	new_matrix = []
108	for col in matrix:
109		new_col = [cell * val for cell in col]
110		new_matrix.append(new_col)
111	return new_matrix
112
113
114def mult_inplace(matrix, val):
115	"""
116	compute the scalar product of a matrix and a value
117	do this operation in place
118
119	:param matrix: the matrix
120	:type matrix: matrix
121	:param val: the value to mult the matrix with
122	:type val: int
123	"""
124	for col in matrix:
125		for row_nb, cell in enumerate(col):
126			col[row_nb] = cell * val
127
128
129def get_row(matrix, row_no):
130	"""
131	:param matrix: the matrix to compute the size
132	:type matrix: matrix
133	:param rows_no: row number
134	:type rows_no: int
135	:return: the row of matrix corresponding to row_no
136	         a shallow copy of the row
137	:rtype: list
138	"""
139	_check_index(matrix, row_no, 0)
140	row_max, col_max = size(matrix)
141	row = []
142	for col_n in range(col_max):
143		row.append(get_cell(matrix, row_no, col_n))
144	return row
145	
146	
147def set_row(matrix, row_no, val):
148	"""
149	set all cells of row row_no with val
150	
151	:param matrix: the matrix to modify
152	:type matrix: matrix
153	:param row_no: the row number
154	:type row_no: int
155	:param val: the value to put in cells
156	:type val: any
157	"""
158	_check_index(matrix, row_no, 0)
159	row_max, col_max = size(matrix)
160	for col_n in range(col_max):
161		set_cell(matrix, row_no, col_n, val)
162
163
164def get_col(matrix, col_no):
165	"""
166	:param matrix: the matrix get row
167	:type matrix: matrix
168	:param col_no: the column number
169	:type col_no: int
170	:return: the column corresponding to col_no of matrix
171	         a shallow copy of the col
172	:rtype: list
173	"""
174	_check_index(matrix, 0, col_no)
175	col = matrix[col_no][:]
176	return col
177	
178	
179def set_col(matrix, col_no, val):
180	"""
181	set all cells of col col_no with val
182	
183	:param matrix: the matrix to compute the size
184	:type matrix: matrix
185	:param col_no: the column number
186	:type col_no: int
187	:param val: the value to put in cells
188	:type val: any
189	"""
190	_check_index(matrix, 0, col_no)
191	for row_n in range(matrix):
192		set_cell(matrix, row_n, col_no, val)
193
194
195def replace_col(matrix, col_no, col):
196	"""
197	replace column col_no with col
198	
199	:param matrix: the matrix to compute the size
200	:type matrix: matrix
201	:param col_no: the column number to replace
202	:type col_no: int
203	:param col: the list of values to use as replacement of column 
204	:type col: list
205	"""
206	row_max, col_max = size(matrix)
207	if len(col) != col_max:
208		raise RuntimeError("the size of col {0} does not fit to matrix size {1}x{2}".format(len(col),
209																						row_max,
210																						col_max))
211	_check_index(matrix, 0, col_no)
212	matrix[col_no] = col
213
214
215def replace_row(matrix, row_no, row):
216	"""
217	replace row row_no with row
218	
219	:param matrix: the matrix to compute the size
220	:type matrix: matrix
221	:param row_no: the column number
222	:type row_no: int
223	:param row: the list of value to use as replacement of row 
224	:type row: list
225	"""
226	row_max, col_max = size(matrix)
227	if len(row) != row_max:
228		raise RuntimeError("the size of row {0} does not fit to matrix size {1}x{2}".format(len(row),
229																							row_max,
230																							col_max))
231	_check_index(matrix, row_no, 0)
232	for col_no, value in enumerate(row):
233		set_cell(matrix, row_no, col_no, value)
234
235
236if __name__ == '__main__':
237	m = create(5, 3)
238	print(m)
239	set_cell(m, 0, 0, 1)
240	set_cell(m, 0, 2, 2)
241	set_cell(m, 4, 0, 12)
242	set_cell(m, 4, 2, 15)
243	print(to_str(m))
244	print("get row 0",  get_row(m, 0))
245	print("get col 0", get_col(m, 0))
246
247	m2 = create(3, 2, 4)
248	mult_inplace(m2, 2)
249	print(to_str(m2))

matrix.py .

But the problem with this implementation is, if we decide to change the inner model for a dixt of list for instance. We must reimplements most of the functions.

In the following implementation we have only 4 functions that handle directly the lists. All other functions manipulate the matrix through these 4 functions. So if we change the inner model we will have to modifiy only these functions. This implementation will be more maintainable.

But this implementation use one letter names for inner variables and is poorly documented which not help to maintain or develop with this.

The Best solution should be the second implementation but with the name of variables and documentation as in the firsr implementation.

 1# matrix is implemented by list of list
 2def matrix_maker(ligne, col, val=None):
 3	m = []
 4	for i in range(ligne):
 5		c = [val] * col
 6		m.append(c)
 7	return m
 8
 9#---- functions that depends on the matrix srtructure 
10
11def matrix_size(m):
12	return len(m), len(m[0])
13
14def matrix_get(matrix, i, j):
15	_check_matindex(matrix, i, j)
16	return matrix[i][j]
17
18def matrix_set(matrix, i, j, val):
19	_check_matindex(matrix,i,j)
20	matrix[i][j] = val
21
22def to_str(m):
23	m_str = []
24	for row in m:
25		m_str.append('\t'.join([str(cell) for cell in row]))
26	m_str = '\n'.join(m_str)
27	return m_str
28
29
30
31#---- independant regarding matrix structure  
32def _check_matindex(matrix,i,j):
33	imax, jmax = matrix_size(matrix)
34	if (i < 0 or i >= imax) or (j < 0 or j >= jmax):
35		raise IndexError("matrix index out of range")
36
37
38def matrix_get_line(matrix, i):
39	_check_matindex(matrix,i,0)
40	im, jm = matrix_size(matrix)
41	line = []
42	for n in range(jm):
43		line.append(matrix_get(matrix, i, n))
44	return line
45
46
47def matrix_set_line(matrix, i, val):
48	_check_matindex(matrix,i,0)
49	im, jm = matrix_size(matrix)
50	for n in range(jm):
51		matrix_set(matrix, i, n, val)
52
53
54def matrix_get_col(matrix, j):
55	_check_matindex(matrix,0,j)
56	im, jm = matrix_size(matrix)
57	col = []
58	for n in range(im):
59		col.append(matrix_get(matrix, n, j))
60	return col
61
62
63def matrix_set_col(matrix, j, val):
64	_check_matindex(matrix,0,j)
65	im, jm = matrix_size(matrix)
66	for n in range(im):
67		matrix_set(matrix, n, j, val)
68	
69
70if __name__ == '__main__':
71	m = matrix_maker(5, 3)
72	matrix_set(m,0, 0, 1)
73	matrix_set(m,0, 2, 2)
74	matrix_set(m,4, 0, 12)
75	matrix_set(m,4, 2, 15)
76	print(to_str(m))
77	print("get line 0",  matrix_get_line(m, 0))
78	print("get col 0", matrix_get_col(m, 0))

matrix2.py .

9.1.2 Exercise

Write a program that calculates the similarity of 2 RNA sequences.

  • To compute the simalirity you need to parse a file containing the similarity matrix. Hint: use the module containing the functions that handle a matrix from previous chapter. put this matrix.py file in a directory named “my_python_lib” in your home or Desktop and import it in your current program (the similarity script must be placed elsewhere).

  • The similarity of the 2 sequences is the sum of base similarities. so you have to compare the first base of two sequences and use the matrix to get the similarity from the similarity table, on so on for all bases then sum these similarities.

 1import sys
 2import os.path
 3
 4sys.path.insert(0, os.path.join(expanduser('~'), "my_python_lib"))
 5
 6import matrix
 7
 8
 9def parse_similarity_file(path):
10    """
11    parse file containing RNA similarity matrix and return a matrix
12    """
13    sim_matrix = matrix.create(4, 4)
14    with open(path, 'r') as sim_file:
15        #skip first line
16        next(sim_file)
17        for row_no, line in enumerate(sim_file):
18            line = line.strip()
19            fields = line.split()
20            values = [float(val) for val in fields[1:]]
21            matrix.replace_row(sim_matrix, row_no, values)
22    return sim_matrix
23
24
25def get_similarity(b1, b2, sim_matrix):
26    """
27    :param b1: the first base must be in ('A', 'G', 'C', 'U')
28    :type b1: string
29    :param b2: the first base must be in ('A', 'G', 'C', 'U')
30    :type b2: string
31    :param sim_matrix: a similarity matrix
32    :type sim_matrix: matrix
33    :return: the similarity between b1 and b2
34    :rtype: float
35    """
36    bases = {'A':0 , 'G':1, 'C':2, 'U':3}
37    b1 = b1.upper()
38    b2 = b2.upper()
39    if not b1 in bases:
40        raise KeyError("unknown base b1: " + str(b1))
41    if not b2 in bases:
42        raise KeyError("unknown base b2: " + str(b2))
43    return matrix.get_cell(sim_matrix, bases[b1], bases[b2])
44                      
45                           
46def compute_similarity(seq1, seq2, sim_matrix):
47    """
48    compute a similarity score between 2 RNA sequence of same lenght
49    :param seq1: first sequence to compare
50    :type seq1: string
51    :param seq2: second sequence to compare
52    :type seq2: string
53    :param sim_matrix: the similarity between b1 and b2
54    :type sim_matrix: matrix
55    :return: the similarity score between seq1 and seq2
56    :rtype: float
57    """
58    similarities = []
59    for b1, b2 in zip(seq1, seq2):
60        sim = get_similarity(b1, b2, sim_matrix)
61        similarities.append(sim)
62    return sum(similarities)
63       
64            
65if __name__ == '__main__':
66    seq1 = 'AGCAUCUA'
67    seq2 = 'ACCGUUCU'
68    sim_matrix = parse_similarity_file("similarity_matrix")
69    print matrix.to_str(sim_matrix)
70    similarity = compute_similarity(seq1, seq2, sim_matrix)
71    print similarity
72            

similarity.py .