# bioseq4.schema # ldap schema for bio-sequence databanks in Bio-Mirror project # minimal info common to many, or needed for some # version 0.4, dec02, d.gilbert ## ?? make this schema self-contained? (no refs to core.schema, other ldap schema) ## grep -- '-oid' bioseq4.schema |\ ## perl -e 'while(<>){if (/([\w\-]+-oid)/) \ ## {$o= $1;unless($h{$o}++){ $n++;print "objectidentifier $o 1953.$n\n";}}}' \ ## > ! bioseq4.oids # Note: as much as possible, match attribute name to SRS databank field names # to simplify srs2ldap: need mapping anyway, but LDAP enforces use of # primary schema-defined names in query filters (substituting alternate for primary) ## ldap Syntax Object Identifiers #binary: 1.3.6.1.4.1.1466.115.121.1.5 #integer:1.3.6.1.4.1.1466.115.121.1.27 #directory string: 1.3.6.1.4.1.1466.115.121.1.15 #printable string: 1.3.6.1.4.1.1466.115.121.1.44 #numeric string: 1.3.6.1.4.1.1466.115.121.1.36 #octet string: 1.3.6.1.4.1.1466.115.121.1.40 ## to some extent this schema should be auto-generated from [srs] databanks/backend ## need to add srs fields or equivalents to handle ## common ones for bioseqs: acc, des, gen(geneName), div ## key, (dat/crd/crlu), cc ## refs: aut, tit, jnl, vol, fp, yr, mid, pmd, rc. # dbn = DbName == lib?? # dbxref == dr for swissprot # = srs field attributetype ( keywords-oid NAME ( 'kw' 'key' 'Keywords' ) DESC 'Keywords' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( genename-oid NAME ( 'gen' 'GeneName' ) DESC 'Gene name' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( comment-oid NAME ( 'cc' 'Comment' ) DESC 'Comment' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( crc-oid NAME ( 'cr' 'crc' ) DESC 'CRC value' EQUALITY caseIgnoreMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( file-oid NAME ( 'fl' 'file' ) DESC 'Databank file' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs library key attributetype ( library-oid NAME ( 'lib' 'Library' ) DESC 'Databank library' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # ? use this or use generic 'category' attribute attributetype ( libgroup-oid NAME ( 'libgroup' 'LibraryGroup' ) DESC 'Databank library category' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( DatabankField-attr-oid NAME ( 'field' 'DatabankField' ) DESC 'Databank Field' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( DatabankLink-attr-oid NAME ( 'link' 'DatabankLink' ) DESC 'Linked databank' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( division-oid NAME ( 'div' 'Divison' ) DESC 'Databank division' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Count-oid NAME ( 'count' ) DESC 'count of items' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Index-oid NAME ( 'in' 'index' ) DESC 'index of items' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Startitem-oid NAME ( 'start' 'Startitem' ) DESC 'start of items' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Querystring-oid NAME ( 'query' 'filter' ) DESC 'Query string' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Data-ref-oid NAME 'data-ref' DESC 'referral to ldap://site-info containing data' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) #was ref, ldap v2.1.9 won't allow !?? SUP ref ## add from gnomap2.schema for gnomap ldif data attributetype ( Chromosome-attr-oid NAME ( 'chr' 'Chromosome' ) DESC 'Chromosome' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Feature-attr-oid NAME ( 'ft' 'ftk' 'Feature' ) DESC 'Sequence feature' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Loc-start-oid NAME ( 'bpb' 'bstart' 'Base-start' ) DESC 'start base number of feature' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) attributetype ( Loc-stop-oid NAME ( 'bpe' 'bstop' 'Base-stop' ) DESC 'last base number of feature' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) ## in biomirror-catalog-spec.schema attributetype ( id-oid NAME 'id' DESC 'object id' EQUALITY caseExactMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 SINGLE-VALUE ) ## in biomirror-catalog-spec.schema attributetype ( format-oid NAME ( 'fmt' 'format' ) DESC 'data format' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # = srs field attributetype ( acc-oid NAME ('ac' 'acc' ) DESC 'accession' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # same as gnomap.schema # org is srs field, make it primary name ?? attributetype ( Species-attr-oid NAME ( 'os' 'org' 'spp' 'Species' ) DESC 'Species' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## need to fix this to parsable date - number like 20020130 ## crlu == swissprot/trembl/embl date updated ## do we need a different attribute for each date subtype ? attributetype ( Date-oid NAME ( 'dt' 'dat' 'lsu' 'lau' 'crlu' 'crd' 'Date' ) DESC 'Date' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## in biomirror-catalog-spec.schema ## replace with core labeledURI attr attributetype ( url-oid NAME 'url' DESC 'universal resource locator' SUP labeledURI EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch ) attributetype ( web-oid NAME 'web' DESC 'web page' SUP labeledURI ) attributetype ( taxonomy-oid NAME ('taxon' 'taxonomy' ) DESC 'taxonomic or phylogenetic classification' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # ? use standard Definition attribute # = srs field attributetype ( Description-attr-oid NAME ( 'de' 'des' ) DESC 'Description' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## in gnomap schema # srs field names vary (dr, ??) attributetype ( Dbxref-oid NAME ( 'dr' 'dbxref' ) DESC 'Database cross-reference' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## in gnomap schema attributetype ( content-oid NAME ( 'con' 'content' ) DESC 'data content types' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) ## should be numeric value # = srs field attributetype ( Seqlength-oid NAME ( 'sl' 'Seqlength' ) DESC 'length of sequence' EQUALITY integerMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 ) # this should be an enum: # = srs field attributetype ( molecule-oid NAME 'mol' DESC 'Molecule type' EQUALITY caseIgnoreMatch SUBSTR caseIgnoreSubstringsMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) # na,bna,aa,baa are in gnomap.schema # ? use mol type and only one seq attr for na/aa ? attributetype ( NA-sequence-attr-oid NAME ( 'sq' 'seq' 'na' 'nucleic-acid' ) DESC 'sequence of nucleic acid data' EQUALITY caseIgnoreMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Binary-NA-sequence-attr-oid NAME ( 'bseq' 'bna' 'binary-nucleic-acid' ) DESC 'binary coded sequence of nucleic acid data' SYNTAX 1.3.6.1.4.1.1466.115.121.1.5 ) # SUP nucleic-acid attributetype ( AA-sequence-attr-oid NAME ( 'aa' 'amino-acid' ) DESC 'sequence of amino acid data' EQUALITY caseIgnoreMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Binary-AA-sequence-attr-oid NAME ( 'baa' 'binary-amino-acid' ) DESC 'binary coded sequence of amino acid data' SYNTAX 1.3.6.1.4.1.1466.115.121.1.5 ) # SUP amino-acid attributetype ( Data-record-attr-oid NAME ( 'drec' 'data-record' ) DESC 'biology data record (native format)' EQUALITY caseIgnoreMatch SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 ) attributetype ( Compressed-data-attr-oid NAME ( 'brec' 'compressed-data-record' ) DESC 'gzip compressed data record' SYNTAX 1.3.6.1.4.1.1466.115.121.1.5 ) # SUP data-record # objects ------------ objectclass ( BioseqDirectory-oid NAME 'BioseqDirectory' DESC 'Biosequence Data directory' SUP top MUST ( id ) MAY ( cn $ name $ lib $ des $ url $ web $ con ) ) # this is object for directory lib attribute (id == directory.lib ?) objectclass ( BioseqDatabank-oid NAME 'BioseqDatabank' DESC 'Biosequence Databank' SUP top MUST ( id ) MAY ( cn $ name $ lib $ des $ url $ web $ con $ fmt ) ) # add some set range info: start,length ; also flavors for Entry/Record/BinRecord ? objectclass ( BioseqSet-oid NAME 'BioseqSet' DESC 'Set of Biosequence Entries' SUP 'BioseqDatabank' MAY ( data-ref $ ref $ fmt $ cc $ key $ des $ dat $ spp ) ) objectclass ( BiodatabankFile-oid NAME 'BiodataFile' DESC 'File of Biodatabank Records' SUP BioseqDatabank MAY ( data-ref $ ref $ cc $ key $ dat $ spp ) ) # general data object - nonsequence - SUP for Bioseq? # should also be extensibleObject #objectclass ( Biodata-oid NAME 'Biodata' # DESC 'Biodata Entry' # MUST ( id ) # MAY ( data-ref $ ref $ fmt $ name $ cc $ key $ lib $ des $ dat $ spp ) # ) objectclass ( Biorecord-oid NAME 'BiodataRecord' DESC 'Biodata Record' MUST ( id ) MAY ( name $ acc $ cc $ key $ lib $ des $ format $ Date $ Species $ file $ crc $ index $ drec $ brec $ data-ref $ ref ) ) #objectclass ( BioseqEntry-oid NAME 'BioseqEntry' # DESC 'Biosequence Entry' # MUST ( id $ acc ) # MAY ( fmt $ cc $ div $ gen $ key $ lib $ des $ dat $ mol $ sl $ spp ) # ) # #objectclass ( BioseqRecord-oid NAME 'BioseqRecord' # DESC 'Biosequence Record' # SUP BioseqEntry # MAY ( seq ) # ) # #objectclass ( BinaryBioseqRecord-oid NAME 'BinaryBioseqRecord' # DESC 'Binary encoded Biosequence Record' # SUP BioseqEntry # MAY ( bseq ) # ) # ### not same as gnomap.schema --- are these objects as well as attributes? ### optional ascii or binary attribute ? ### can we do MUST ( na | bna ) ?? #objectclass ( NA-Sequence-oid NAME 'NA-sequence' # DESC 'Nucleic acid sequence record' # SUP BioseqEntry # MAY ( na $ bna ) # ) # #objectclass ( AA-Sequence-oid NAME 'AA-sequence' # DESC 'Amino acid sequence record' # SUP BioseqEntry # MAY ( aa $ baa ) # )