# bioseq.schema # ldap schema for bio-sequences # version 0.3, jul02, d.gilbert # Note: as much as possible, match attribute name to SRS databank field names # to simplify srs2ldap: need mapping anyway, but LDAP enforces use of # primary schema-defined names in query filters (substituting alternate for primary) ## ldap Syntax Object Identifiers #binary: 1.3.6.1.4.1.1466.115.121.1.5 #integer:1.3.6.1.4.1.1466.115.121.1.27 #directory string: 1.3.6.1.4.1.1466.115.121.1.15 #printable string: 1.3.6.1.4.1.1466.115.121.1.44 #numeric string: 1.3.6.1.4.1.1466.115.121.1.36 #octet string: 1.3.6.1.4.1.1466.115.121.1.40 ## to some extent this schema should be auto-generated from [srs] databanks/backend ## need to add srs fields or equivalents to handle ## common ones for bioseqs: acc, des, gen(geneName), div ## key, (dat/crd/crlu), cc ## refs: aut, tit, jnl, vol, fp, yr, mid, pmd, rc. # dbn = DbName == lib?? # dbxref == dr for swissprot # = srs field attributetype ( keywords-oid NAME ( 'key' 'Keywords' ) DESC 'Keywords' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( genename-oid NAME ( 'gen' 'GeneName' ) DESC 'Gene name' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( comment-oid NAME ( 'cc' 'Comment' ) DESC 'Comment' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs library key attributetype ( library-oid NAME ( 'lib' 'Library' ) DESC 'Databank library' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # ? use this or use generic 'category' attribute attributetype ( libgroup-oid NAME ( 'libgroup' 'LibraryGroup' ) DESC 'Databank library category' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( DatabankField-attr-oid NAME ( 'field' 'DatabankField' ) DESC 'Databank Field' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( DatabankLink-attr-oid NAME ( 'link' 'DatabankLink' ) DESC 'Linked databank' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( division-oid NAME ( 'div' 'Divison' ) DESC 'Databank division' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Count-oid NAME ( 'count' ) DESC 'count of items' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Startitem-oid NAME ( 'start' 'Startitem' ) DESC 'start of items' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Querystring-oid NAME ( 'query' 'filter' ) DESC 'Query string' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Data-ref-oid NAME 'data-ref' DESC 'referral to ldap://site-info containing data' SUP ref ) ## add from gnomap2.schema for gnomap ldif data attributetype ( Chromosome-attr-oid NAME ( 'chr' 'Chromosome' ) DESC 'Chromosome' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Feature-attr-oid NAME ( 'ftk' 'Feature' ) DESC 'Chromosome feature' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Loc-start-oid NAME ( 'bpb' 'bstart' 'Base-start' ) DESC 'start base number of feature' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Loc-stop-oid NAME ( 'bpe' 'bstop' 'Base-stop' ) DESC 'last base number of feature' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) ## in biomirror-catalog-spec.schema #attributetype ( id-oid # NAME 'id' # DESC 'object id' # EQUALITY caseExactMatch # SUBSTR caseIgnoreSubstringsMatch # SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 # SINGLE-VALUE ) ## in biomirror-catalog-spec.schema #attributetype ( format-oid # NAME ( 'fmt' 'format' ) # DESC 'data format' # EQUALITY caseIgnoreMatch # SUBSTR caseIgnoreSubstringsMatch # SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( acc-oid NAME 'acc' DESC 'accession' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # same as gnomap.schema # org is srs field, make it primary name ?? attributetype ( Species-attr-oid NAME ( 'org' 'spp' 'Species' ) DESC 'Species' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## need to fix this to parsable date - number like 20020130 ## crlu == swissprot/trembl/embl date updated ## do we need a different attribute for each date subtype ? attributetype ( Date-oid NAME ( 'dat' 'crlu' 'crd' 'Date' ) DESC 'Date' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## in biomirror-catalog-spec.schema ## replace with core labeledURI attr #attributetype ( url-oid NAME 'url' # DESC 'universal resource locator' # SUP labeledURI # EQUALITY caseIgnoreMatch # SUBSTR caseIgnoreSubstringsMatch ) # ? use standard Definition attribute # = srs field attributetype ( Description-attr-oid NAME ( 'des' ) DESC 'Description' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## in gnomap schema # srs field names vary (dr, ??) attributetype ( Dbxref-oid NAME ( 'dr' 'dbxref' ) DESC 'Database cross-reference' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## in gnomap schema attributetype ( content-oid NAME ( 'con' 'content' ) DESC 'data content types' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## should be numeric value # = srs field attributetype ( Seqlength-oid NAME ( 'sl' 'Seqlength' ) DESC 'length of sequence' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) # this should be an enum: # = srs field attributetype ( molecule-oid NAME 'mol' DESC 'Molecule type' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # na,bna,aa,baa are in gnomap.schema # ? use mol type and only one seq attr for na/aa ? attributetype ( NA-sequence-attr-oid NAME ( 'seq' 'na' 'nucleic-acid' ) DESC 'sequence of nucleic acid data' EQUALITY caseIgnoreMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Binary-NA-sequence-attr-oid NAME ( 'bseq' 'bna' 'binary-nucleic-acid' ) DESC 'binary coded sequence of nucleic acid data' SUP nucleic-acid SYNTAX 1.3.6.1.4.1.1466.115.121.1.5 ) attributetype ( AA-sequence-attr-oid NAME ( 'aa' 'amino-acid' ) DESC 'sequence of amino acid data' EQUALITY caseIgnoreMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Binary-AA-sequence-attr-oid NAME ( 'baa' 'binary-amino-acid' ) DESC 'binary coded sequence of amino acid data' SUP amino-acid SYNTAX 1.3.6.1.4.1.1466.115.121.1.5 ) # objects ------------ objectclass ( BioseqDirectory-oid NAME 'BioseqDirectory' DESC 'Biosequence Data directory' SUP top MUST ( id ) MAY ( name $ lib $ description $ url $ web $ con ) ) # this is object for directory lib attribute (id == directory.lib ?) objectclass ( BioseqDatabank-oid NAME 'BioseqDatabank' DESC 'Biosequence Databank' SUP top MUST ( id ) MAY ( name $ lib $ description $ url $ web $ con $ fmt ) ) # add some set range info: start,length ; also flavors for Entry/Record/BinRecord ? objectclass ( BioseqSet-oid NAME 'BioseqSet' DESC 'Set of Biosequence Entries' SUP BioseqDatabank MAY ( data-ref $ ref $ fmt $ cc $ key $ des $ dat $ spp ) ) # general data object - nonsequence - SUP for Bioseq? # should also be extensibleObject objectclass ( Biodata-oid NAME 'Biodata' DESC 'Biodata Entry' MUST ( id ) MAY ( data-ref $ ref $ fmt $ name $ cc $ key $ lib $ des $ dat $ spp ) ) objectclass ( BioseqEntry-oid NAME 'BioseqEntry' DESC 'Biosequence Entry' MUST ( id $ acc ) MAY ( fmt $ cc $ div $ gen $ key $ lib $ des $ dat $ mol $ sl $ spp ) ) objectclass ( BioseqRecord-oid NAME 'BioseqRecord' DESC 'Biosequence Record' SUP BioseqEntry MAY ( seq ) ) objectclass ( BinaryBioseqRecord-oid NAME 'BinaryBioseqRecord' DESC 'Binary encoded Biosequence Record' SUP BioseqEntry MAY ( bseq ) ) ## not same as gnomap.schema --- are these objects as well as attributes? ## optional ascii or binary attribute ? ## can we do MUST ( na | bna ) ?? objectclass ( NA-Sequence-oid NAME 'NA-sequence' DESC 'Nucleic acid sequence record' SUP BioseqEntry MAY ( na $ bna ) ) objectclass ( AA-Sequence-oid NAME 'AA-sequence' DESC 'Amino acid sequence record' SUP BioseqEntry MAY ( aa $ baa ) )