from keras.models import load_model
import numpy as np
import argparse
import os
"""
Part 0, define amino acid map, must be consistent with PSSM ordering generated by psiblast
treat protein X->A, protein B->N and protein Z->Q
"""
aaMap = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7, 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12,
'F': 13, 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'X': 0, 'B': 2, 'Z': 5, 'NoSeq': 20}
"""
Part 1, Load seqeunce from fasta
"""
def printWarmingMessage(seq):
if 'B' in seq:
print('Your query sequence contains protein B, which will be treated as protein N')
if 'Z' in seq:
print('Your query sequence contains protein Z, which will be treated as protein Q')
if 'X' in seq:
print('Your query sequence contains protein X, which will be treated as protein A')
if len(seq) > 700:
print('too long, please provide a shorter one')
exit(1)
def getPhys(fastaFile):
phys_dic = {'A': [-0.350, -0.680, -0.677, -0.171, -0.170, 0.900, -0.476],
'C': [-0.140, -0.329, -0.359, 0.508, -0.114, -0.652, 0.476],
'D': [-0.213, -0.417, -0.281, -0.767, -0.900, -0.155, -0.635],
'E': [-0.230, -0.241, -0.058, -0.696, -0.868, 0.900, -0.582],
'F': [0.363, 0.373, 0.412, 0.646, -0.272, 0.155, 0.318],
'G': [-0.900, -0.900, -0.900, -0.342, -0.179, -0.900, -0.900],
'H': [0.384, 0.110, 0.138, -0.271, 0.195, -0.031, -0.106],
'I': [0.900, -0.066, -0.009, 0.652, -0.186, 0.155, 0.688],
'K': [-0.088, 0.066, 0.163, -0.889, 0.727, 0.279, -0.265],
'L': [0.213, -0.066, -0.009, 0.596, -0.186, 0.714, -0.053],
'M': [0.110, 0.066, 0.087, 0.337, -0.262, 0.652, -0.001],
'N': [-0.213, -0.329, -0.243, -0.674, -0.075, -0.403, -0.529],
'P': [0.247, -0.900, -0.294, 0.055, -0.010, -0.900, 0.106],
'Q': [-0.230, -0.110, -0.020, -0.464, -0.276, 0.528, -0.371],
'R': [0.105, 0.373, 0.466, -0.900, 0.900, 0.528, -0.371],
'S': [-0.337, -0.637, -0.544, -0.364, -0.265, -0.466, -0.212],
'T': [0.402, -0.417, -0.321, -0.199, -0.288, -0.403, 0.212],
'V': [0.677, -0.285, -0.232, 0.331, -0.191, -0.031, 0.900],
'W': [0.479, 0.900, 0.900, 0.900, -0.209, 0.279, 0.529],
'Y': [0.363, 0.417, 0.541, 0.188, -0.274, -0.155, 0.476],
'X': [0.0771, -0.1536, -0.0620, -0.0762, -0.1451, 0.0497, -0.0398],
'Z': [0.0771, -0.1536, -0.0620, -0.0762, -0.1451, 0.0497, -0.0398]}
f = open(fastaFile, 'r')
line1 = f.readline()
line2 = f.readline().rstrip()
arrLen = len(line2)
f.close()
seqArr = np.zeros([700, 8])
for i in range(0, arrLen):
seqArr[i, 0:7] = phys_dic[line2[i]]
for i in range(arrLen, 700):
seqArr[i, -1] = 1
return seqArr
"""
Part 2, Load profile generated by psiblast
"""
def logistic(t):
return 1.0 / (1 + np.exp(-t))
def processProfileFile(PSSMFileName):
npArr = np.zeros([700, 21])
index = 0
with open(PSSMFileName) as fileIn:
next(fileIn)
next(fileIn)
for line in fileIn:
if '-I' in line:
print('bad profile file, it contains -I in the profile!')
exit(-1)
elements = line.split()
if (len(elements) == 44 or len(elements) == 22):
npArr[index, 0:20] = [logistic(int(x)) for x in elements[2:22]]
index = index + 1
for i in range(index, 700):
npArr[i, -1] = 1
return npArr
"""
Part 3, Load hhm generated by hhblits
"""
def read_hmm(hhm_file):
f = open(hhm_file)
line = f.readline()
while line[0] != '#':
line = f.readline()
f.readline()
f.readline()
f.readline()
f.readline()
seq = []
extras = np.zeros([0, 10])
prob = np.zeros([0, 20])
line = f.readline()
while line[0:2] != '//':
lineinfo = line.split()
seq.append(lineinfo[0])
probs_ = [2 ** (-float(lineinfo[i]) / 1000) if lineinfo[i] != '*' else 0. for i in range(2, 22)]
prob = np.concatenate((prob, np.matrix(probs_)), axis=0)
line = f.readline()
lineinfo = line.split()
extras_ = [2 ** (-float(lineinfo[i]) / 1000) if lineinfo[i] != '*' else 0. for i in range(0, 10)]
extras = np.concatenate((extras, np.matrix(extras_)), axis=0)
line = f.readline()
assert len(line.strip()) == 0
line = f.readline()
# return (''.join(seq),prob,extras)
return (seq, np.concatenate((prob, extras), axis=1))
def convertPredictQ3Result2HumanReadable(predictedSS):
predSS = np.argmax(predictedSS, axis=-1)
# convert back map meaning; 0 for Helix, 1 for strand, 2 for coil, 3 for noSeq and protein 'X', if any
ssConvertMap = {0: 'H', 1: 'E', 2: 'C', 3: ''}
result = []
for i in range(0, 700):
result.append(ssConvertMap[predSS[i]])
return ''.join(result)
def convertPredictQ8Result2HumanReadable(predictedSS):
predSS = np.argmax(predictedSS, axis=-1)
ssConvertMap = {0: 'C', 1: 'B', 2: 'E', 3: 'G', 4: 'I', 5: 'H', 6: 'S', 7: 'T', 8: ''}
result = []
for i in range(0, 700):
result.append(ssConvertMap[predSS[i]])
return ''.join(result)
"""
Part 3, Load trained weights for proposed deep Inception Networks
"""
def main():
"""
You should only change BASIC_DIR to the path you download the tool
"""
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--sequence', help='protein sequence')
parser.add_argument('-p', '--profile', help='protein profile')
parser.add_argument('-m', '--hhm', help='protein hhm')
parser.add_argument('-w', '--weights', help='deep neural network pre-trained weights')
args = parser.parse_args()
sequenceFile = args.sequence
sequence_vector = getPhys(sequenceFile)
sequence_vector = np.reshape(sequence_vector, (1, 700, 8))
profileFile = args.profile
profile_vector = processProfileFile(profileFile)
profile_vector = np.reshape(profile_vector, (1, 700, 21))
hhmFile = args.hhm
[seqTemp, hhmArr] = read_hmm(hhmFile)
hhm_vector = np.zeros([700, 31])
for j in range(0, len(seqTemp)):
hhm_vector[j, 0:30] = hhmArr[j, :]
for j in range(len(seqTemp), 700):
hhm_vector[j, -1] = 1
hhm_vector = np.reshape(hhm_vector, (1, 700, 31))
modelWeights = args.weights
model = load_model(modelWeights)
[s3_hat, s8_hat] = model.predict([sequence_vector, profile_vector,hhm_vector])
predictedS3 = np.reshape(s3_hat, (700, 4))
finalResultQ3 = convertPredictQ3Result2HumanReadable(predictedS3)
print('Q3 results:')
print(finalResultQ3)
predictedS8 = np.reshape(s8_hat, (700, 9))
finalResultQ8 = convertPredictQ8Result2HumanReadable(predictedS8)
print('Q8 results:')
print(finalResultQ8)
if __name__ == "__main__":
main()
没有合适的资源?快使用搜索试试~ 我知道了~
生物信息学-python-蛋白质二级结构预测-开源-参考文献(PMID:29492997)
需积分: 27 6 下载量 146 浏览量
2022-12-09
11:13:27
上传
评论
收藏 93.44MB GZ 举报
温馨提示
参考文献:MUFOLD-SS: New deep inception-inside-inception networks for protein secondary structure prediction 目的:蛋白质二级结构预测。 上传原因:由于文献中的链接失效,因此将之前下载的开源源码上传。仅限学术研究使用。
资源推荐
资源详情
资源评论





























收起资源包目录
















共 11 条
- 1
资源评论


@ZyuanZhang
- 粉丝: 98
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助


最新资源
- 青岛版信息技术初中第三册第一单元第-课算法的描述课件.ppt
- Excel模板:销售管理系统(带销售提成-销售订单).xlsx
- 服装鞋包库存盘点表(自动统计)excel模板精选.xlsx
- 网络妈妈观后感.docx
- 项目管理个人小结.doc
- 项目管理检查考核评分表(单位).doc
- 《JAVA设计模式》期末考试复习.doc
- 综合布线方案毕业论文.docx
- 《人工智能》读书笔记及心得感悟2000字.docx
- 物联网技术综述103p.ppt
- 2021年关于网络的调查报告-.doc
- 《人工智能+智慧城市》课件.ppt
- 技改项目管理办法(定稿)【模板范本】.doc
- EXCEl模板:员工个人档案清单管理工具(完整清单-自动统计).xlsx
- 寒假电子商务实习周记10篇.doc
- 微软平衡计分卡架构PPT.ppt
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈



安全验证
文档复制为VIP权益,开通VIP直接复制
