生物信息学-python-蛋白质二级结构预测-开源-参考文献(PMID:29492997)资源-CSDN下载

python

需积分: 27 146 浏览量 2022-12-09 11:13:27 上传评论收藏 93.44MB GZ 举报

共11个文件

ss：2个

py：2个

reference：1个

资源推荐

资源详情

资源评论

收起资源包目录

StandAlone_withProfiles.tar.gz （11个子文件）

StandAlone_withProfiles

bin

executable_withHHBlits.py 7KB

example

.ss 0B

T0644.profile 26KB

T0644.hhm 21KB

T0644_out.ss 308B

T0644.fasta 149B

data

model.h5 102.78MB

README 1KB

VERSION 4B

runStandAlone.py 440B

REFERENCE 698B

from keras.models import load_model import numpy as np import argparse import os """ Part 0, define amino acid map, must be consistent with PSSM ordering generated by psiblast treat protein X->A, protein B->N and protein Z->Q """ aaMap = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7, 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12, 'F': 13, 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'X': 0, 'B': 2, 'Z': 5, 'NoSeq': 20} """ Part 1, Load seqeunce from fasta """ def printWarmingMessage(seq): if 'B' in seq: print('Your query sequence contains protein B, which will be treated as protein N') if 'Z' in seq: print('Your query sequence contains protein Z, which will be treated as protein Q') if 'X' in seq: print('Your query sequence contains protein X, which will be treated as protein A') if len(seq) > 700: print('too long, please provide a shorter one') exit(1) def getPhys(fastaFile): phys_dic = {'A': [-0.350, -0.680, -0.677, -0.171, -0.170, 0.900, -0.476], 'C': [-0.140, -0.329, -0.359, 0.508, -0.114, -0.652, 0.476], 'D': [-0.213, -0.417, -0.281, -0.767, -0.900, -0.155, -0.635], 'E': [-0.230, -0.241, -0.058, -0.696, -0.868, 0.900, -0.582], 'F': [0.363, 0.373, 0.412, 0.646, -0.272, 0.155, 0.318], 'G': [-0.900, -0.900, -0.900, -0.342, -0.179, -0.900, -0.900], 'H': [0.384, 0.110, 0.138, -0.271, 0.195, -0.031, -0.106], 'I': [0.900, -0.066, -0.009, 0.652, -0.186, 0.155, 0.688], 'K': [-0.088, 0.066, 0.163, -0.889, 0.727, 0.279, -0.265], 'L': [0.213, -0.066, -0.009, 0.596, -0.186, 0.714, -0.053], 'M': [0.110, 0.066, 0.087, 0.337, -0.262, 0.652, -0.001], 'N': [-0.213, -0.329, -0.243, -0.674, -0.075, -0.403, -0.529], 'P': [0.247, -0.900, -0.294, 0.055, -0.010, -0.900, 0.106], 'Q': [-0.230, -0.110, -0.020, -0.464, -0.276, 0.528, -0.371], 'R': [0.105, 0.373, 0.466, -0.900, 0.900, 0.528, -0.371], 'S': [-0.337, -0.637, -0.544, -0.364, -0.265, -0.466, -0.212], 'T': [0.402, -0.417, -0.321, -0.199, -0.288, -0.403, 0.212], 'V': [0.677, -0.285, -0.232, 0.331, -0.191, -0.031, 0.900], 'W': [0.479, 0.900, 0.900, 0.900, -0.209, 0.279, 0.529], 'Y': [0.363, 0.417, 0.541, 0.188, -0.274, -0.155, 0.476], 'X': [0.0771, -0.1536, -0.0620, -0.0762, -0.1451, 0.0497, -0.0398], 'Z': [0.0771, -0.1536, -0.0620, -0.0762, -0.1451, 0.0497, -0.0398]} f = open(fastaFile, 'r') line1 = f.readline() line2 = f.readline().rstrip() arrLen = len(line2) f.close() seqArr = np.zeros([700, 8]) for i in range(0, arrLen): seqArr[i, 0:7] = phys_dic[line2[i]] for i in range(arrLen, 700): seqArr[i, -1] = 1 return seqArr """ Part 2, Load profile generated by psiblast """ def logistic(t): return 1.0 / (1 + np.exp(-t)) def processProfileFile(PSSMFileName): npArr = np.zeros([700, 21]) index = 0 with open(PSSMFileName) as fileIn: next(fileIn) next(fileIn) for line in fileIn: if '-I' in line: print('bad profile file, it contains -I in the profile!') exit(-1) elements = line.split() if (len(elements) == 44 or len(elements) == 22): npArr[index, 0:20] = [logistic(int(x)) for x in elements[2:22]] index = index + 1 for i in range(index, 700): npArr[i, -1] = 1 return npArr """ Part 3, Load hhm generated by hhblits """ def read_hmm(hhm_file): f = open(hhm_file) line = f.readline() while line[0] != '#': line = f.readline() f.readline() f.readline() f.readline() f.readline() seq = [] extras = np.zeros([0, 10]) prob = np.zeros([0, 20]) line = f.readline() while line[0:2] != '//': lineinfo = line.split() seq.append(lineinfo[0]) probs_ = [2 ** (-float(lineinfo[i]) / 1000) if lineinfo[i] != '*' else 0. for i in range(2, 22)] prob = np.concatenate((prob, np.matrix(probs_)), axis=0) line = f.readline() lineinfo = line.split() extras_ = [2 ** (-float(lineinfo[i]) / 1000) if lineinfo[i] != '*' else 0. for i in range(0, 10)] extras = np.concatenate((extras, np.matrix(extras_)), axis=0) line = f.readline() assert len(line.strip()) == 0 line = f.readline() # return (''.join(seq),prob,extras) return (seq, np.concatenate((prob, extras), axis=1)) def convertPredictQ3Result2HumanReadable(predictedSS): predSS = np.argmax(predictedSS, axis=-1) # convert back map meaning; 0 for Helix, 1 for strand, 2 for coil, 3 for noSeq and protein 'X', if any ssConvertMap = {0: 'H', 1: 'E', 2: 'C', 3: ''} result = [] for i in range(0, 700): result.append(ssConvertMap[predSS[i]]) return ''.join(result) def convertPredictQ8Result2HumanReadable(predictedSS): predSS = np.argmax(predictedSS, axis=-1) ssConvertMap = {0: 'C', 1: 'B', 2: 'E', 3: 'G', 4: 'I', 5: 'H', 6: 'S', 7: 'T', 8: ''} result = [] for i in range(0, 700): result.append(ssConvertMap[predSS[i]]) return ''.join(result) """ Part 3, Load trained weights for proposed deep Inception Networks """ def main(): """ You should only change BASIC_DIR to the path you download the tool """ parser = argparse.ArgumentParser() parser.add_argument('-s', '--sequence', help='protein sequence') parser.add_argument('-p', '--profile', help='protein profile') parser.add_argument('-m', '--hhm', help='protein hhm') parser.add_argument('-w', '--weights', help='deep neural network pre-trained weights') args = parser.parse_args() sequenceFile = args.sequence sequence_vector = getPhys(sequenceFile) sequence_vector = np.reshape(sequence_vector, (1, 700, 8)) profileFile = args.profile profile_vector = processProfileFile(profileFile) profile_vector = np.reshape(profile_vector, (1, 700, 21)) hhmFile = args.hhm [seqTemp, hhmArr] = read_hmm(hhmFile) hhm_vector = np.zeros([700, 31]) for j in range(0, len(seqTemp)): hhm_vector[j, 0:30] = hhmArr[j, :] for j in range(len(seqTemp), 700): hhm_vector[j, -1] = 1 hhm_vector = np.reshape(hhm_vector, (1, 700, 31)) modelWeights = args.weights model = load_model(modelWeights) [s3_hat, s8_hat] = model.predict([sequence_vector, profile_vector,hhm_vector]) predictedS3 = np.reshape(s3_hat, (700, 4)) finalResultQ3 = convertPredictQ3Result2HumanReadable(predictedS3) print('Q3 results:') print(finalResultQ3) predictedS8 = np.reshape(s8_hat, (700, 9)) finalResultQ8 = convertPredictQ8Result2HumanReadable(predictedS8) print('Q8 results:') print(finalResultQ8) if __name__ == "__main__": main()

评论收藏

内容反馈