Source code for pdb2sql.pdb2sql_base

import os


class pdb2sql_base(object):

    def __init__(
            self,
            pdbfile,
            sqlfile=None,
            fix_chainID=False,
            verbose=False):
        """Base class for the definition of sql database.

        Args:
            pdbfile (str, list(str/bytes), ndarray) : name of pdbfile or
                list or ndarray containing the pdb data
            sqlfile (str, optional): name of the sqlfile.
                By default it is created in memory only.
            fix_chainID (bool, optinal): check if the name of the chains
                are A,B,C, .... and fix it if not.
            verbose (bool): probably print stuff
        """
        self.pdbfile = pdbfile
        self.sqlfile = sqlfile
        self.fix_chainID = fix_chainID
        self.is_valid = True
        self.verbose = verbose

        self.backbone_atoms = ['CA', 'C', 'N', 'O']

        # hard limit for the number of SQL varaibles
        self.SQLITE_LIMIT_VARIABLE_NUMBER = 999
        self.max_sql_values = 950

        # column names and types
        self.col = {'serial': 'INT',
                    'name': 'TEXT',
                    'altLoc': 'TEXT',
                    'resName': 'TEXT',
                    'chainID': 'TEXT',
                    'resSeq': 'INT',
                    'iCode': 'TEXT',
                    'x': 'REAL',
                    'y': 'REAL',
                    'z': 'REAL',
                    'occ': 'REAL',
                    'temp': 'REAL',
                    'element': 'TEXT',
                    'model': 'INT'}

        # delimtier of the column format
        # taken from
        # http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM
        self.delimiter = {
            'serial': [6, 11],
            'name': [12, 16],
            'altLoc': [16, 17],
            'resName': [17, 20],
            'chainID': [21, 22],
            'resSeq': [22, 26],
            'iCode': [26, 27],
            'x': [30, 38],
            'y': [38, 46],
            'z': [46, 54],
            'occ': [54, 60],
            'temp': [60, 66],
            'element': [76, 78]}

    ##########################################################################
    #
    #   CREATION AND PRINTING
    #
    ##########################################################################

    '''
    Main function to create the SQL data base
    '''

    def _create_sql(self):
        raise NotImplementedError()

    def _get_table_names(self):
        names = self.conn.execute(
            "SELECT name from sqlite_master WHERE type='table';")
        return [n[0] for n in names]

    # get the properties
    def get(self, atnames, **kwargs):
        raise NotImplementedError()

    def get_xyz(self, tablename='atom', **kwargs):
        """Shortcut to get the xyz coordinates."""
        return self.get('x,y,z', tablename=tablename, **kwargs)

    def get_residues(self, tablename='atom', **kwargs):
        """Get the residue sequence.

        Returns:
            list : residue sequence

        Examples:
            >>> db.get_residues()
        """

        res = [tuple(x) for x in self.get(
            'chainID,resName,resSeq', tablename=tablename, **kwargs)]
        return sorted(set(res), key=res.index)

    def get_chains(self, tablename='atom', **kwargs):
        """Get the chain IDs.

        Returns:
            list : chain IDs in alphabetical order.

        Examples:
            >>> db.get_chains()
        """
        chains = self.get('chainID', tablename=tablename, **kwargs)
        return sorted(set(chains))

    def update(self, attribute, values, **kwargs):
        raise NotImplementedError()

    def update_xyz(self, xyz, tablename='atom',  **kwargs):
        """Update the xyz coordinates."""
        self.update('x,y,z', xyz, **kwargs)

    def update_column(self, colname, values, index=None):
        """Update a single column."""
        raise NotImplementedError()

    def add_column(self, colname, coltype='FLOAT', default=0):
        """Add a new column to the ATOM table."""
        raise NotImplementedError()

[docs] def exportpdb(self, fname, append=False, tablename='atom', **kwargs): """Export a PDB file. Args: fname(str): output filename append(bool): append expored data to file or not kwargs: argument to select atoms, dict value must be list, e.g.: - name = ['CA', 'O'] - no_name = ['CA', 'C'] - chainID = ['A'] - no_chainID = ['A'] """ if append: f = open(fname, 'a') else: f = open(fname, 'w') lines = self.sql2pdb(tablename=tablename, **kwargs) for i in lines: f.write(i + '\n') f.close()
[docs] def sql2pdb(self, tablename='atom', **kwargs): """Convert SQL data to PDB formatted lines. Args: kwargs: argument to select atoms, dict value must be list, e.g.: - name = ['CA', 'O'] - no_name = ['CA', 'C'] - chainID = ['A'] - no_chainID = ['A'] Returns: list: pdb-format lines """ cols = ','.join(self.col.keys()) data = self.get(cols, tablename=tablename, **kwargs) return self.data2pdb(data)
def data2pdb(self, data): """converts data from a get method to a pdb Args: data (list): data from a get statement Returns: list: the formatted pdb data """ pdb = [] # the PDB format is pretty strict # http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM for d in data: line = 'ATOM ' line += '{:>5}'.format(d[0]) # serial line += ' ' line += self._format_atomname(d) # name line += '{:>1}'.format(d[2]) # altLoc line += '{:>3}'.format(d[3]) # resname line += ' ' line += '{:>1}'.format(d[4]) # chainID line += '{:>4}'.format(d[5]) # resSeq line += '{:>1}'.format(d[6]) # iCODE line += ' ' line += pdb2sql_base._format_xyz(d[7]) # x line += pdb2sql_base._format_xyz(d[8]) # y line += pdb2sql_base._format_xyz(d[9]) # z line += '{:>6.2f}'.format(d[10]) # occ line += '{:>6.2f}'.format(d[11]) # temp line += ' ' * 10 line += '{:>2}'.format(d[12]) # element line += ' ' * 2 # charge, keep it blank pdb.append(line) return pdb def _format_atomname(self, data): """Format atom name to align with PDB reqireuments. - alignment of one-letter atom name starts at column 14, - while two-letter atom name such as FE starts at column 13. Args: data(list): sql output for one pdb line Returns: str: formatted atom name """ name = data[1] lname = len(name) if lname in (1, 4): name = '{:^4}'.format(name) elif lname == 2: if name == data[12]: # name == element name = '{:<4}'.format(name) else: name = '{:^4}'.format(name) else: if name[0] in '0123456789': name = '{:<4}'.format(name) else: name = '{:>4}'.format(name) return name @staticmethod def _format_xyz(i): """Format PDB coordinations x,y or z value. Note: PDB has a fixed 8-column space for x,y or z value. Thus the value should be in the range of (-1e7, 1e8). Args: (float): PDB coordinations x, y or z. Raises: ValueError: Exceed the range of (-1e7, 1e8) Returns: str: formated x, y or z value. """ if i >= 1e8 - 0.5 or i <= -1e7 + 0.5: raise ValueError( f'PDB coordination {i} exceeds the range of (-1e7, 1e8) ' f'after rounding.') elif i >= 1e6 - 0.5 or i <= -1e5 + 0.5: i = '{:>8.0f}'.format(i) elif i >= 1e5 - 0.5 or i <= -1e4 + 0.5: i = '{:>8.1f}'.format(i) elif i >= 1e4 - 0.5 or i <= -1e3 + 0.5: i = '{:>8.2f}'.format(i) else: i = '{:>8.3f}'.format(i) return i def _close(self, rmdb=True): if self.sqlfile is None: self.conn.close() else: if rmdb: self.conn.close() os.system('rm %s' % (self.sqlfile)) else: self._commit() self.conn.close()