メモ

講習会中に使ったメモや問題の解答など。

In [1]:
a = 1
b = 2
x = a + b
print(x)
3
In [7]:
s = 'ATGCGATCAGGCTATCGTCGTAGCTAGCTATCTAGCTATG'
result = s.rfind('ATG')
print(result)
37
In [10]:
import numpy as np
a = np.array([1, 3, 5, 7, 9, 2, 4, 6, 8, 0])
print(a[:3])
print(a[[2, 4, 6]])
print(a[1:7])
print(a[[8, 4, 2]])
[1 3 5]
[5 9 4]
[3 5 7 9 2 4]
[8 9 5]
In [11]:
np.array(['a', 'b', 'c'])
Out[11]:
array(['a', 'b', 'c'], dtype='<U1')
In [14]:
a = 1
b = 1
c = 0
if a > 0:
    if b >= 1:
        d = 1
    else:
        d = 2
else:
    d = 3
e = c + d
e
Out[14]:
1
In [4]:
a = 4
d = ''
if a == 1:
    d = 'Mon'
elif a == 2:
    d = 'Tue'
elif a == 3:
    d = 'Wed'
elif a == 4:
    d = 'Thu'
elif a == 5:
    d = 'Fri'
elif a == 6:
    d = 'Sat'
elif a == 7:
    d = 'Sun'


print(d)
Thu
In [6]:
a = [1, 1, 2, 3, 4, 5]
b = [1, 3, 5, 7, 9, 10]
z = []

i = 0
while i < len(a):
    aa = a[i]
    bb = b[i]
    zz = aa + bb
    z.append(zz)
    i = i + 1

print(z)
[2, 4, 7, 10, 13, 15]
In [12]:
a = [1, 7, 4, 8, 3]

def calc_odd_sum(x):
    n = 0
    s = 0
    for w in a:
        if w % 2 == 1:
            n = n + 1
            s = s + w
    return s
        
y = calc_odd_sum(a)
print(y)
11
In [9]:
a = [1, 7, 4, 8, 3]

s = 0

for w in a:
    if w % 2 == 1:
        s = s + w

print(s)
11
In [15]:
fh = ['>1ALK:A', 'TPEMPVL', 'TGQYTHA', '>1ALK:B', 'TPEMPVL', 'TGQYTHA']
n = 0
for w in fh:
    if w[0] == '>':
        n = n + 1
print(n)
2
In [18]:
f = '1alk.fa'
n = 0
with open(f, 'r') as fh:
    for line in fh:
        if line[0] == '>':
            n = n + 1

print(n)
2
In [36]:
f = '1alk.fa'
name = ''    # タンパク質の名前
n_chars = 0  # タンパク質(アミノ酸)の長さ
with open(f,  'r') as fh:
    for line in fh:
        line = line.replace('\n', '')
        if line[0] == '>':
            if name != '':
                print(name)
                print(n_chars)
            name = line
            n_chars = 0
        else:
            n_chars = n_chars + len(line)
print(name)
print(n_chars)
>1ALK:A|PDBID|CHAIN|SEQUENCE
449
>1ALK:B|PDBID|CHAIN|SEQUENCE
449
In [62]:
f = 'diversity_galapagos.txt'

with open(f, 'r') as fh:
    for line in fh:
        if (line[0] != '#') and (line[0:6] != 'Island'):
            line_data = line.split('\t')
            n_species = int(line_data[1])
            area = float(line_data[3])
            r = n_species / area
            print(r)
2.311677959346353
25.0
14.285714285714286
250.0
40.0
52.94117647058823
300.0
4.291845493562231
266.6666666666667
11.11111111111111
1.6646644928779817
0.14657441409636085
101.75438596491229
6.41025641025641
2.3054755043227666
0.07431488953423625
0.39385280716657656
200.0
1.746138347884486
6.016713091922006
52.17391304347826
14.314928425357873
0.5075958087088938
0.4140967623573812
0.4912482573963842
2.5747508305647844
1.6674467587175288
23.913043478260867
12.903225806451614
7.368421052631579
In [69]:
import pandas as pd
file = 'sleep_in_mammals.txt'
d = pd.read_csv(file, comment='#', header=0, sep='\t')
print(d.tail())
                 Species  BodyWt  BrainWt  NonDreaming  Dreaming  TotalSleep  \
57             Treehyrax   2.000     12.3          4.9       0.5         5.4   
58             Treeshrew   0.104      2.5         13.2       2.6        15.8   
59                Vervet   4.190     58.0          9.7       0.6        10.3   
60          Wateropossum   3.500      3.9         12.8       6.6        19.4   
61  Yellow-belliedmarmot   4.050     17.0          NaN       NaN         NaN   

    LifeSpan  Gestation  Predation  Exposure  Danger  
57       7.5      200.0          3         1       3  
58       2.3       46.0          3         2       2  
59      24.0      210.0          4         3       4  
60       3.0       14.0          2         1       1  
61      13.0       38.0          3         1       1  
In [83]:
import pandas as pd
file = 'diversity_galapagos.txt'
d = pd.read_csv(file, comment='#', header=0, sep='\t', index_col=0)
x = d.loc[:, 'Area']
y = d.index.values
keep = (x == max(x))
print(x[keep])
print(y[keep])
Island
Isabela    4669.32
Name: Area, dtype: float64
['Isabela']
In [89]:
import pandas as pd
file = 'diversity_galapagos.txt'
d = pd.read_csv(file, comment='#', header=0, sep='\t', index_col=0)
a = d.loc[:, 'Area']
s = d.loc[:, 'Species']
r = s / a
print(r)
Island
Baltra            2.311678
Bartolome        25.000000
Caldwell         14.285714
Champion        250.000000
Coamano          40.000000
Daphne.Major     52.941176
Daphne.Minor    300.000000
Darwin            4.291845
Eden            266.666667
Enderby          11.111111
Espanola          1.664664
Fernandina        0.146574
Gardner1        101.754386
Gardner2          6.410256
Genovesa          2.305476
Isabela           0.074315
Marchena          0.393853
Onslow          200.000000
Pinta             1.746138
Pinzon            6.016713
Las.Plazas       52.173913
Rabida           14.314928
SanCristobal      0.507596
SanSalvador       0.414097
SantaCruz         0.491248
SantaFe           2.574751
SantaMaria        1.667447
Seymour          23.913043
Tortuga          12.903226
Wolf              7.368421
dtype: float64
In [119]:
import pandas as pd
file = 'pines.txt'
d = pd.read_csv(file, header=0, sep='\t')
hgt97 = d.loc[:, 'Hgt97']
hgt90 = d.loc[:, 'Hgt90']
treeid = d.loc[:, 'ID']
diff = hgt97 - hgt90
keep = (diff == diff.max())
print(treeid[keep])
723    724
Name: ID, dtype: int64
In [140]:
# 花弁 petal の長さ length と幅 width の比率の平均値、
# がく sepal の長さと幅の比率の平均値を求めよ
import pandas as pd
file = 'iris.txt'
d = pd.read_csv(file, header=0, sep='\t')

# 欲しいデータを取り出す
sepal_length = d.loc[:, 'Sepal.Length']
sepal_width = d.loc[:, 'Sepal.Width']

# 比率を計算
sepal_ratio = sepal_length / sepal_width

# フィルターを作成
setosa_f = (d.loc[:, 'Species'] == 'setosa')
versicolor_f = (d.loc[:, 'Species'] == 'versicolor')
virginica_f = (d.loc[:, 'Species'] == 'virginica')

# 平均を計算
print(sepal_ratio[setosa_f].mean())
print(sepal_ratio[versicolor_f].mean())
print(sepal_ratio[virginica_f].mean())
1.4701876810227483
2.160402191687831
2.230452738894224
In [3]:
#%conda install pandas_profiling
import pandas_profiling as pdp
WARNING: The conda.compat module is deprecated and will be removed in a future release.
Collecting package metadata: done
Solving environment: failed

PackagesNotFoundError: The following packages are not available from current channels:

  - pandas_profiling

Current channels:

  - https://repo.anaconda.com/pkgs/main/osx-64
  - https://repo.anaconda.com/pkgs/main/noarch
  - https://repo.anaconda.com/pkgs/free/osx-64
  - https://repo.anaconda.com/pkgs/free/noarch
  - https://repo.anaconda.com/pkgs/r/osx-64
  - https://repo.anaconda.com/pkgs/r/noarch

To search for alternate channels that may provide the conda package you're
looking for, navigate to

    https://anaconda.org

and use the search bar at the top of the page.



Note: you may need to restart the kernel to use updated packages.
In [5]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# データを用意
x = np.array([-100, 100])
y = np.array([-100, 100])
# y = x 直線を描く(縦軸・横軸の範囲を 1〜10 とする)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(x, y)
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
fig.show()
In [21]:
import seaborn as sns
sns.set()
sns.set_style('whitegrid')
sns.set_palette('Set1')
In [14]:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
x = np.random.uniform(0, 100, 10)
y = x + np.random.normal(0, 5, 10)
ax.scatter(x, y)
fig.show()
In [16]:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
x = np.array(['ele', 'mouse'])
y = np.array([3.3, 13.2])
ax.bar(x, y)
fig.show()
In [22]:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(2018)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
x = np.random.normal(50, 10, 1000)
ax.hist(x, bins=50)
fig.show()
In [36]:
a = [19, 342, 14, 38, 39, 20, 1]
has_changed = 1 #
while has_changed > 0:
    has_changed = 0
    # 入れ替え
    for i in range(len(a) - 1):
        a_left = a[ i]
        a_right = a[i+1]
        if a_left > a_right:
            a[i+1] = a_left
            a[i] = a_right
            has_changed = has_changed + 1

print(a)
[1, 14, 19, 20, 38, 39, 342]