/* srs6ldap.cc SRS backend for LDAP directory service of biodata objects. d.gilbert, july 2002 revised sep 2002 */ // note -- srs6 is C++, dont use C compiler /************* compiling -- use with c-perl regex substitution package: pcre-3.9.tar.gz by Philip Hazel ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz pcrs-0.0.1-src.tar.gz by Andreas S. Oesterhelt , LGPL - Written and Copyright (C) 2000, 2001 set sx=/bio/mb/srs61 -- compile as backend (back-srs) for openldap slapd -------- see also back-srs/config.c, external.h,init.c, and esp. search.c as revised from back-passwd/ # must use -DSRS6LDAP_NOMAIN to keep out main() CC -w -g -I$sx/src -I./ -L$sx/bin/solaris -lsrs -lz -DSRS6LDAP_NOMAIN=1 -c srs6ldap2.cc # this is a muddle - as slapd is C code and has a complex configure/make system # need to add above SRS, pcre/pcrs libs to it for building slapd/back-srs Makefile parts: SRCS = search.c config.c init.c srs6ldap.cc OBJS = search.lo config.lo init.lo srs6ldap.o Monitor.o XLIBS = libsrs.a libpcrs.a libpcre.a libctreestd.a libgd.a AC_DEFS += -I$(SRS6)/src/ AC_LIBS += -lsrs -lpcre -lpcrs -lz XINCPATH += -I$(SRS6)/src slapd/ Makefile parts, to link w/ CC libs for srs6 : -lCstd -lCrun -lz AC_LIBS += -lCstd -lCrun -lz -- compile as stand-alone main (slapd/back-shell usable) -------- CC -w -g -I$sx/src -I./ -L$sx/bin/solaris -lsrs -lz \ -o srs6ldap2 srs6ldap2.cc \ $sx/bin/solaris/libsrs.a $sx/demoo/libpcrs.a $sx/demoo/libpcre.a \ $sx/bin/solaris/libctreestd.a $sx/bin/solaris/libgd.a $sx/bin/solaris/Monitor.o CC -w -g -I$sx/src -I./ -L. -lsrs -o srs6ldap2 srs6ldap2.cc \ libsrs.a libpcrs.a libpcre.a libctreestd.a libgd.a Monitor.o -- for USEZIP, add -lz and need zlib.h and libz available slapd/backend.c: #ifdef SLAPD_SRS #include "back-srs/external.h" #endif #if defined(SLAPD_SRS) && !defined(SLAPD_SRS_DYNAMIC) {"srs", srs_back_initialize}, #endif ../../include/portable.h #define SLAPD_SRS 1 #-------------- srs5 - wont work w/o major code changes - srs6 has PROGv , other mods slapd shell call for slapd.conf -- dont need, just use below command line in .conf file #!/bin/sh echo "srs6ldif call" >> log.srsslapd date >> log.srsslapd exec ./srs6ldif -s -l log.srsslapd -b 'srv=srs' #-------------- *************/ #include #include /* C++ ? */ #include /* C ?? */ #include "srs.h" #include "srsenv.h" /* for getopt() */ #include extern char *optarg; extern int optind; #if !defined(SRS6LDAP_NOPCRS) || SRS6LDAP_NOPCRS==0 /* perl regex match and substitute */ #define LDAPFILTER 1 #include "pcre.h" #include "pcrs.h" #endif #if !defined(SRS6LDAP_NOGZIP) || SRS6LDAP_NOGZIP==0 #define USEZIP 1 #include #endif #if defined(SRS6LDAP_PTHREADS) #include static pthread_mutex_t srs_mutex; #define THREAD_INIT() pthread_mutex_init( &srs_mutex, NULL); #define THREAD_LOCK() pthread_mutex_lock( &srs_mutex); #define THREAD_UNLOCK() pthread_mutex_unlock( &srs_mutex); #else #define THREAD_INIT() #define THREAD_LOCK() #define THREAD_UNLOCK() #endif #define FULLDN 1 #undef SRSSESSION #define _SRS6LDAP_INFO_ typedef struct srs6ldapInfo; typedef srs6ldapInfo * srsSessionInfo; // public headers of these methods #include "srs6ldap.h" const char * defaultConfig = "srs6ldap.conf"; //? use same as slapd - Not yet //?? "BioData" "BiodataSet" "BioseqSet" "NA-sequence" "BinaryBioseq" "BioseqEntry" // ! need to make these configurable - read from .config file // > BioseqEntry - entry w/o sequence // > BioseqRecord - entry w/ sequence // > BinaryBioseqRecord - binary encoded const char * defquery= "[swissprot-des:kinase]"; //const char * defbasedn= "srv=srs,o=bions"; const char * defObjectClass= "BioseqEntry"; const char * BioseqEntryClass= "BioseqEntry"; const char * BioseqRecordClass= "BioseqRecord"; const char * BinaryBioseqRecordClass= "BinaryBioseqRecord"; // want option to output format xml here ? const char * XMLRecordTag = BioseqEntryClass; // should be one of above ?? // can be globals static int debug= 0; // TEST for slapd - fixme static int doslapd= 0; // main only static FILE * outf= stdout; static FILE * logf= stderr; //static FILE * configf= NULL; // not used yet static int didConfig = 0; static int didSrsInit = 0; // this is a global? static int didOpenLog = 0; // this is a global? static int globalQueryId= 0; static const int maxDsize = 5024000; // FIXME - debug test; static const int kvbufMax= 1024 - 1; static const int kMaxAttr= 30; // FIXME - need expandable arrays here static const int kMaxObjClass = 9; #define LDAP_SCOPE_BASE 0 #define LDAP_SCOPE_ONELEVEL 1 #define LDAP_SCOPE_SUBTREE 2 // objectclass ( BioseqDirectory-oid NAME 'BioseqDirectory' // MUST ( name $ id ) // MAY ( lib $ description $ url $ web $ con ) //static struct DirInfo { char * name, * desc, * url, * id , * web, * con; DirInfo() { id = "iubio-srs6"; name = "IUBio Archive - SRS data directory"; desc = url= web= con= NULL; } }; static DirInfo dirInfo; static PROGv srsProg; // global SRS program initialize variable // ! keep this private to srs6ldap.cc but pass to/from // slapd backend as opaque object typedef struct srs6ldapInfo { // ldap search options char * basedn; char * suffix; int scope; int deref; long sizelimit; // long timelimit; // not used here? int allattr; int attrsonly; char * filter; char * attrlist; long msgid; int queryId; int setStart, setCount; char * contentType; int dozip; int doxml; int errorcode; char * srslib; char ** objClasses; char * query; char * qname; long atEntry, entryN, maxN; int kvbufat; char** kvbuf; // [kvbufMax+1]; -- FIXME - expandable array //void (*keyvalHandler)(char* lkey, char* val); // mapping b/n ldap, srs field names -? use struct char** srsfield; char** ldapattr; int* nattr; /*? drop this limit on # values / fields returned */ SETv srsResultSet; PROGv srsProg; #ifdef SRSSESSION SESSIONv sess; // ?? need this for multiple simultaneous sessions ? #endif }; // srs6ldapInfo; //static srsSessionInfo thisSi; // change to srs6ldapInfo ? // drop these srsStruc2Glob/ temp-dev calls ... static srs6ldapInfo* srsStruc2Glob(srsSessionInfo si) { if (si==NULL) return NULL; //srs6ldapInfo * sp= si; #ifdef SRSSESSION //srsProg= si->srsProg; //if (si->srsProg && si->sess) ProgSetSession (si->srsProg, si->sess); #endif return si; } static void srsGlob2Struc(srsSessionInfo ssi) { if (ssi==NULL) return ; srs6ldapInfo * si = (srs6ldapInfo *)ssi; } static srs6ldapInfo* srsStrucNew() { srs6ldapInfo * si; si = (srs6ldapInfo *) calloc( 1, sizeof(srs6ldapInfo) ); si->srsProg= srsProg; si->scope= -1; si->allattr= 1; si->kvbuf= (char**) calloc( kvbufMax+1, sizeof(char*) ); si->srsfield= (char**)calloc( kMaxAttr+1, sizeof(char*) ); si->ldapattr= (char**)calloc( kMaxAttr+1, sizeof(char*) ); si->nattr= (int*)calloc( kMaxAttr+1, sizeof(int) ); si->objClasses= (char**)calloc( kMaxObjClass+1, sizeof(char*) ); return si; } static void srsFreeList(char** list) { for (int i= 0; (list[i]); i++) { free(list[i]); list[i]= 0; } } static void srsStrucFree(srsSessionInfo si) { if (si) { //srs6ldapInfo * sp= (srs6ldapInfo *) si; free(si->kvbuf); // did free? srsFreeList(si->srsfield); free(si->srsfield);// need to free each in list srsFreeList(si->ldapattr); free(si->ldapattr);// need to free each in list free(si->nattr); srsFreeList(si->objClasses); free(si->objClasses); // need to free each in list if (si->query) free(si->query); if (si->qname) free(si->qname); free(si); } } static srsSessionInfo globalSi= srsStrucNew(); //--------- backend functions ----------------- static int readConfig(srsSessionInfo si, char* config); static void splitAttrs(srsSessionInfo si, char* attrlist); // need public ? static char** breakAttr(char* atr); static char* zipString( char* inbuf, int insize); static void readSlapdShellCommands(srsSessionInfo si); static void parseLdapUrl(srsSessionInfo si, char* lurl); static int pcrsCount; //? can be global static int pcrsDidMalloc; static const int kDoFree = 1; static const int kDontFree = 0; //#ifdef USEPCRS || LDAPFILTER static char* pcrsSubst(char* subpatt, char* line, int dofree); static char* ldap2SrsQuery(srsSessionInfo si, char* filter); //static char* xmlEscape( char* val); //#endif static void xmlEscapeOut(FILE* out, char* val); static int hasValue(char* val) { return (val && strlen (val)); } // ... openldap backend does this // srsSetDnList(si, dnkeys); // srsSetBaseDN(si, be->be_suffix[0]); void srsSetBaseDN(srsSessionInfo si,char* base,char* suffix) { si->basedn= hasValue(base) ? strdup(base) : NULL; si->suffix= hasValue(suffix) ? strdup(suffix) : NULL; if (debug) fprintf(logf,"# srsSetBaseDN base=%s,suffix=%s\n", (si->basedn)?si->basedn:"NULL",(si->suffix)?si->suffix:"NULL" ); } void srsSetDnList(srsSessionInfo si, char** dnkeys) { // array of key=value from caller "dn: k=v,x=y,.." excluding baseDN // parse for lib, id, other fields? // ? add to srsquery/ldapfilter ? } void srsSetSearchScope(srsSessionInfo si,int scope) { si->scope= scope; } void srsSetSizelimit(srsSessionInfo si, long slimit) { si->sizelimit= slimit; } void srsSetLibrary(srsSessionInfo si, char* libname) { si->srslib= hasValue(libname) ? strdup(libname) : NULL; } /* use these slapd srs_back_initialize links to init and close this "db" bi->bi_db_init = 0; // same as srsOpen ? bi->bi_db_config = srs_back_db_config; bi->bi_db_open = 0; srs_back_db_open; == srsOpen bi->bi_db_close = 0; srs_back_db_close; == srsClose bi->bi_db_destroy = 0; */ static STRv EntryToken2Str (ENTRYv entry, TOKv tok, FIELDv field, PROGv prog); /* keyvalHandler's */ static void putKeyVal(srsSessionInfo si,char* lkey, char* val); static void printKeyVal(srsSessionInfo si,char* lkey, char* val); static int printKeyValList(srsSessionInfo si, char** kvl); // need printKeyVal version to print to file // use also for xml formats to send to soap server // FILE *tmpfile __P((void)); // char *tmpnam __P((char *)); static void printAttribs1(srsSessionInfo si, ENTRYv entry, LIBv lib, PROGv prog, void (*keyvalHandler)(srsSessionInfo si,char* lkey, char* val) ); static void printAttribsAll(srsSessionInfo si, ENTRYv entry, LIBv lib, PROGv prog, void (*keyvalHandler)(srsSessionInfo si,char* lkey, char* val) ); // dang mem assumption here is that key is static string, val is calloc() string void srsFreeResult(srsSessionInfo si,char** kvlist) { if (kvlist==NULL) return; for (int i= 0; (kvlist[i]); i++) { if ( (i%2) == 1) free(kvlist[i]); } kvlist[0]= NULL; } // dang mem assumption here is that key is static string, val is calloc() string static void addKeyVal( srsSessionInfo si, char* key, char* val) { if (key==NULL) si->kvbuf[si->kvbufat]= NULL; // else if (si->keyvalHandler!=NULL) // si->keyvalHandler(si, key, val); else if (si->kvbufat+1 < kvbufMax) { si->kvbuf[si->kvbufat++]= key; si->kvbuf[si->kvbufat++]= val; si->kvbuf[si->kvbufat]= NULL; } } static void putKeyVal(srsSessionInfo si,char* lkey, char* val) { if (lkey==NULL) { // && val==NULL addKeyVal(si,NULL,NULL); // end of kvbuf } // what if lkey!=null && val == null ? else if (lkey && hasValue(val) && si->kvbufat < kvbufMax) { int doz= (si->dozip && strcmp(lkey,"seq")==0); if (doz) { char* bval= zipString(val,0); if (bval) { addKeyVal(si, "bseq", bval); return; } } // need to check for newlines in val and space one for ldif val= strdup(val); char * e= strchr(val,'\0'); for ( ; e > val && e[-1] <= ' '; --e) ; *e= '\0'; addKeyVal(si, lkey, val); } } static int didPutRec= 0; static void printKeyVal(srsSessionInfo si,char* lkey, char* val) { if (lkey && hasValue(val)) { if (strncmp(lkey,"dn: ",4)==0) { if (si->doxml) { if (didPutRec>0) fprintf( outf, "\n",XMLRecordTag); //char* xval= xmlEscape(val); //fprintf( outf,"<%s %s=\"%s",XMLRecordTag,lkey+4, xval); //if (pcrsDidMalloc) free(xval); fprintf( outf,"<%s %s=\"",XMLRecordTag,lkey+4); xmlEscapeOut(outf, val); //#ifndef FULLDN // if (hasValue(si->basedn)) fprintf( outf,",%s",si->basedn); //#endif fprintf( outf,"\">\n"); } else { if (didPutRec>0) fprintf( outf,"\n"); fprintf( outf,"%s=%s", lkey, val); //#ifndef FULLDN // if (hasValue(si->basedn)) fprintf( outf,",%s",si->basedn); //#endif fprintf( outf,"\n"); } didPutRec++; return; } char* bval= NULL; int doz= (si->dozip && strcmp(lkey,"seq")==0); if (strcmp(lkey,"bseq")==0) lkey= "bseq:"; // ldif binary: key char * e= strchr(val,'\0'); for ( ; e > val && e[-1] <= ' '; --e) ; *e= '\0'; // need to check for newlines in val and space one for ldif if (doz) { e= 0; bval= zipString(val,0); } else { e= strchr(val,'\n'); if (e) *e++= '\0'; } if (si->doxml) { //char* xval= xmlEscape(val); //fprintf( outf,"<%s>%s",lkey,xval); //if (pcrsDidMalloc) free(xval); fprintf( outf,"<%s>",lkey); xmlEscapeOut(outf, val); if (e) fprintf( outf,"\n"); } else if (doz && bval) { fprintf( outf,"%s: %s\n","bseq:",bval); free(bval); } else fprintf( outf,"%s: %s\n",lkey,val); while (e) { val= e; e= strchr(val,'\n'); if (e) *e++= '\0'; if (si->doxml) { //char* xval= xmlEscape(val); //fprintf( outf," %s\n",xval); //if (pcrsDidMalloc) free(xval); xmlEscapeOut(outf,val); fputc('\n',outf); } else { fprintf( outf," %s\n",val); } } if (si->doxml) fprintf( outf,"\n",lkey); } didPutRec= 0; } static void srsFinishQuery(srsSessionInfo si) { if (!si) return; if (si->srsResultSet!=NULL) SetDel(si->srsResultSet, si->srsProg); if (si->query) free(si->query); si->query= NULL; if (si->qname) free(si->qname); si->qname= NULL; si->srsResultSet= NULL; si->atEntry= 0; si->maxN= 0; si->entryN= 0; srsGlob2Struc(si); //! urk: cant do here, need to do before ldap2SrsQuery: setStart = setCount = 0; } static void srsOpen() { if (!didSrsInit) { didSrsInit= 1; if (!didConfig) readConfig( globalSi, (char*)defaultConfig); SrsEnv (); srsProg = SrsLibInit (); LibOpen (srsProg); // dgg -- need SessionNew for srs6 queries, // even so, cant use session query cache w/o something else //#ifndef SRSSESSION SESSIONv tsess= SessionNew(StrCpyS("dgg"), StrCpyS("dgg.sess"),srsProg); ProgSetSession (srsProg, tsess); //^ ? do this for each new ldap client, but not for repeat client searches //#endif THREAD_INIT() } } void srsClose() /* for bi->bi_db_close = srs_back_db_close; */ { srsFinishQuery(globalSi); } srsSessionInfo srsSessionOpen(long messageid) { // should this be new for each search? or each client? srs6ldapInfo *si= srsStrucNew(); srsGlob2Struc(si); //? // want this? messageid= ++globalQueryId; srsOpen(); si->srsProg= srsProg; #ifdef SRSSESSION { si->srsProg = SrsLibInit (); LibOpen (si->srsProg); //srsProg= si->srsProg; char ubuf[128]; sprintf(ubuf,"ldap%d",messageid); si->sess= SessionNew( StrCpyS("ldap"), StrCpyS(ubuf), si->srsProg); ProgSetSession (si->srsProg, si->sess); } #endif srsStruc2Glob(si); // set some defaults from globalSi == config file ??? //if (hasValue(contype)) si->contentType= contype; //if (hasValue(globalSi->basedn)) si->basedn= globalSi->basedn; if (hasValue(globalSi->suffix)) si->suffix= globalSi->suffix; //if (hasValue(obclasslist)) srsSetObjectClasses(si,obclasslist); srsSetSizelimit(si, globalSi->sizelimit); // for later fix of global sizelimit if (hasValue(globalSi->attrlist))si->attrlist= globalSi->attrlist; //? strdup() //si->filter= globalSi->filter; //si->dozip= globalSi->dozip; //si->doxml= globalSi->doxml; si->msgid= messageid; si->setStart = si->setCount = 0; si->contentType= NULL; if (debug) fprintf(logf,"# srsSessionOpen %d\n",si->msgid); return si; } void srsSessionClose(srsSessionInfo si) { srsFinishQuery(si); srsStrucFree(si); } static long srsStartQuery(srsSessionInfo si, char* query) { srsFinishQuery(si); if (debug) fprintf(logf,"# srsStartQuery = %s\n", (query==NULL)?"NULL":query); if (query==NULL || strlen(query)<2) return 0; // check for some other non-srs queries here to keep from bombing... if (strchr(query,'[') == NULL || strchr(query,']') == NULL) return 0; // is 'lib=' required? globalQueryId++; si->queryId= globalQueryId; char qname[128]; sprintf(qname,"Qi%d",si->queryId); //msgid; << not unique if (si->query) free(si->query); si->query= strdup(query); if (si->qname) free(si->qname); si->qname= strdup(qname); si->srsResultSet= Query (si->query, si->qname, si->srsProg); // change "Q" to Q# counter ? // this/srs crashes here? if query lib is bad ... if (si->srsResultSet) { si->maxN= si->entryN= SetGetSize(si->srsResultSet); if (si->sizelimit > 0 && si->maxN > si->sizelimit) si->maxN= si->sizelimit; if (debug) {fprintf(logf,"# result set size=%d, start=%d, end=%d, output limit=%d\n", si->entryN, si->setStart, si->setStart+si->setCount, si->maxN); fflush(logf);} // patch for problem w/ SetGetID() on library change !! // SetGetLibName() (or SetGetLibN() ?) must initialize something that is needed at lib rollover int nlibs= SetGetLibN (si->srsResultSet, si->srsProg); for (int ilib= 1; ilib<=nlibs; ilib++) { char* slib= SetGetLibName (si->srsResultSet, ilib, si->srsProg); if (slib==NULL) slib= "NULL"; if (debug) { fprintf(logf,"# Set lib[%d]=%s\n", ilib, slib); fflush(logf); } } //? handle setStart, setCount here? - modify atEntry, maxN if (si->setStart>0) si->atEntry= si->setStart; if (si->setCount>0 && (si->atEntry + si->setCount < si->entryN)) si->maxN= si->atEntry+si->setCount; // do some other settings here // -- resultset object class, is-sequence?, seq-formats: from lib.field(seq) } return si->maxN; } long srsLdapQuery(srsSessionInfo si,char* ldapfilter) { srsStruc2Glob(si); si->filter= ldapfilter; // save in si for result info //si->setStart = si->setCount = 0; char* srsquery= ldap2SrsQuery(si,ldapfilter); long res= srsStartQuery(si, srsquery); srsGlob2Struc(si); return res; } long srsGetzQuery(srsSessionInfo si,char* srsquery) { srsStruc2Glob(si); //si->setStart = si->setCount = 0; long res= srsStartQuery(si, srsquery); srsGlob2Struc(si); return res; } // library info functions: getz -libs and getz -info lib equivalents // if (ParGetNum ("printLibs")) LibPrintLibs (); // if (ParGetNum ("printLibInfo") && argc == 2) LibPrintInfo (argv[1]); // for (c=0; (field = LibNextField (EntryGetLib (entry), &c)); ) { // if (FieldIs (field, "index") && FieldIs (field, "active")) // if (*ParGetStr ("fieldList")) EntryPrintFields (entry, NULL); /* add query result set info - maybe use as primary return from query rather than all the items: -- num results -- list of libs: LIBv SetGetNextLib (SETv set, Int4 *n, PROGv prog); -- urls to result data in various forms: as now, individual items, with or w/o raw data all-in-one batch file of data option to split, select subsets by "page" - start, length of result subset optional transport by ftp, grid-ftp ? optional various output formats ? (at least fasta for sequences) -- what does this one do - return list of entries in subrange? OBJv SetGetEntries(SETv set, Int4 fromIndex, Int4 toIndex, PROGv prog); */ static char* makeRef(char* fmt, char* basedn, char* filter) { int n= 32 + strlen(fmt)+strlen(basedn)+strlen(filter); char* buf= (char*) calloc( n, sizeof(char)); sprintf(buf, fmt, basedn, filter); return buf; } /* -- this isn't working well # use alias or ref objects to get to SRS search bases? dn: spp=fly,srv=srsgnomap objectClass: referral ref: ldap://eugenes.org:3891/spp=fly,srv=srsgnomap ---- browsers are not doing anything useful w/ these - need ref ? dn: cn=alias1,ou=bio.indiana.edu,o=Grid objectClass: alias objectClass: extensibleObject description: don's alias mail: software@bio.indiana.edu aliasedObjectName: cn=Don Gilbert, ou=bio.indiana.edu,o=Grid */ char** srsGetBioseqSet(srsSessionInfo si) // was srsGetResultSet { srsStruc2Glob(si); if (si->srsResultSet == NULL || si->atEntry >= si->entryN) { srsFinishQuery(si); return NULL; } //if (atEntry >= 1 || srsResultSet == NULL) { srsFinishQuery(si); return NULL; } //atEntry++; si->atEntry= si->entryN; //? STRv setname= SetGetName (si->srsResultSet); char* qid= _Str(setname); si->kvbufat= 0; //#ifdef FULLDN char idbuf[512]; if (hasValue(si->basedn)) sprintf( idbuf, "%s,%s", qid, si->basedn); else sprintf( idbuf, "%s", qid); addKeyVal( si, "dn: resultset", strdup(idbuf)); //#else // addKeyVal( si, "dn: resultset", strdup(qid)); //#endif if (debug>1) fprintf(logf,"srsGetBioseqSet id=%s\n",qid); addKeyVal( si, "objectClass", strdup("BioseqSet")); // bioseq.schema FIXME ! addKeyVal( si, "objectClass", strdup("extensibleObject")); addKeyVal( si, "id", strdup(qid)); char ebuf[128]; sprintf(ebuf,"%d",si->entryN); addKeyVal( si, "count", strdup(ebuf)); if (hasValue(si->filter)) { addKeyVal( si, "filter", strdup(si->filter)); } int isseq= 1; // get from lib has 'seq' field if (isseq) { addKeyVal( si, "fmt", strdup("native")); // get default format from srs ? see below addKeyVal( si, "fmt", strdup("fasta")); } if (isseq) { // need field for this... addKeyVal( si, "cc", strdup( "Available objectClasses: BioseqEntry BioseqRecord BinaryBioseqRecord")); } int nlibs= SetGetLibN (si->srsResultSet, si->srsProg); for (int ilib= 1; ilib<=nlibs; ilib++) { char* slib= SetGetLibName (si->srsResultSet, ilib, si->srsProg); if (slib) addKeyVal( si, "lib", strdup(slib)); } // alternate urls for each lib? // lurl= "ldap://hostname/lib=xxx,basedn???(&objectClass=BioseqRecord)(query)"; // // // add these as url/dn attributes ?? // if (si->basedn && hasValue(si->filter)) { // //! note, these only show with '+' attribute request or // // if client handles referrals // char * obfilt; //= strdup(filter); // obfilt= pcrsSubst("s/\\(objectclass=([^\\)]+)\\)/(objectClass=BioseqEntry)/i", // si->filter, kDontFree); // addKeyVal( si, "url", makeRef("%s??sub?%s",si->basedn,obfilt)); // if (pcrsDidMalloc) free(obfilt); // // obfilt= pcrsSubst("s/\\(objectclass=([^\\)]+)\\)/(objectClass=BioseqRecord)/i", // si->filter, kDontFree); // addKeyVal( si, "url", makeRef("%s??sub?%s",si->basedn,obfilt)); // if (pcrsDidMalloc) free(obfilt); // } StrDel(setname); srsGlob2Struc(si); return si->kvbuf; } char** srsGetDirectoryInfo(srsSessionInfo si) //srsGetLibList() { LIBvGroup group; LIBv lib; Index * libInx; //? pass ldap search filter for libname ? - parse and check for libname matches // or add base level, scope to srsLdapQuery(char* ldapfilter) si->maxN= si->entryN= 1; //? shouldnt need here si->atEntry= si->entryN; //? srsStruc2Glob(si); //srsOpen(); // avoid this if (debug>1) fprintf(logf,"srsGetDirectoryInfo \n"); // there is no idval for this - only basedn which caller sets ! // but add two blank values for caller ? si->kvbufat= 0; //#ifdef FULLDN //!? need "dn: id", dir.id,basedn here ??? char idbuf[512]; if (hasValue(si->basedn)) sprintf( idbuf, "%s,%s", dirInfo.id, si->basedn); else sprintf( idbuf, "%s", dirInfo.id); addKeyVal( si, "dn: id", strdup(idbuf)); //#else // addKeyVal( si, "dn: id", strdup(dirInfo.id)); //#endif addKeyVal( si, "objectClass", strdup("BioseqDirectory")); // bioseq.schema FIXME ! addKeyVal( si, "objectClass", strdup("extensibleObject")); addKeyVal( si, "id", strdup(dirInfo.id)); // change this to put all of dirInfo key/val list if (hasValue(dirInfo.name)) addKeyVal( si, "name", strdup(dirInfo.name)); if (hasValue(dirInfo.desc)) addKeyVal( si, "desc", strdup(dirInfo.desc)); if (hasValue(dirInfo.url)) addKeyVal( si, "url", strdup(dirInfo.url)); if (hasValue(dirInfo.web)) addKeyVal( si, "web", strdup(dirInfo.web)); if (hasValue(dirInfo.con)) addKeyVal( si, "con", strdup(dirInfo.con)); for (int c1= 0; (group= LibNextLibGroup (&c1, si->srsProg)); ) { char* groupName = LibGetGroupName (group, "full"); int addgroup= 1; //addKeyVal( si, "content", strdup(groupName)); //? "libgroup"; "category"? for (int c2= 0; (lib= LibNextLib (group, &c2, si->srsProg)); ) { if (si->kvbufat >= kvbufMax || LibIs (lib, "subentries", si->srsProg)) continue; if (addgroup) { addKeyVal( si, "content", strdup(groupName)); //? "libgroup"; "category"? addgroup= 0; } char* libName = LibName (lib); addKeyVal( si, "lib", strdup(libName)); // strdup or not ? } } srsGlob2Struc(si); return si->kvbuf; } static char** getOneLib(srsSessionInfo si, LIBv lib) { char* tmp; if (!lib) return NULL; char* libname= LibName ( lib); char* libdesc= LibGetName ( lib, "full"); //? or this: STRv LibGetPrintName (LIBv library); if (debug>1) fprintf(logf,"getOneLib lib=%s\n",libname); LIBvGroup group= LibGetGroup ( lib, si->srsProg); char* libgroup = LibGetGroupName (group, "full"); // dn: lib=libname si->kvbufat= 0; // can only do one here //#ifdef FULLDN char idbuf[512]; if (hasValue(si->suffix)) sprintf( idbuf, "%s,%s", libname, si->suffix); else sprintf( idbuf, "%s", libname); addKeyVal( si, "dn: lib", strdup(idbuf)); //#else // addKeyVal( si, "dn: lib", strdup(libname)); //#endif //objectclass ( BioseqDatabank-oid NAME 'BioseqDatabank' // MUST ( name $ id ) // MAY ( lib $ description $ url $ web $ con $ fmt ) addKeyVal( si, "objectClass", strdup("BioseqDatabank")); // bioseq.schema FIXME ! addKeyVal( si, "objectClass", strdup("extensibleObject")); addKeyVal( si, "id", strdup(libname)); addKeyVal( si, "name", strdup(libdesc)); addKeyVal( si, "libgroup", strdup(libgroup)); Index * libInx = LibIndexOpen(lib, LibGetIdField (lib, si->srsProg), 1, si->srsProg); if (libInx) { tmp = IdxGetReleaseName (libInx->getIdx()); if (hasValue(tmp)) { addKeyVal( si, "cc", strdup(tmp)); // "rel"; } } // UINT4 itime= libInx->getIdsTimeCreated (); tmp= TimeToString (LibGetIndexCreTime(lib), "date"); if (hasValue(tmp)) { addKeyVal( si, "dat", strdup(tmp)); } FIELDv field; for (int c=0; (field = LibNextField (lib, &c, si->srsProg)); ) { char* fname= FieldGetName(field, si->srsProg); if (fname) { addKeyVal( si, "field", strdup(fname)); } tmp= FieldGetShortName (field, si->srsProg);//?? or not if (tmp && (strcmp(tmp,fname)!=0) ) { addKeyVal( si, "field", strdup(tmp)); } } LINKv link; for (int c=0; (link = LinkNext (&c, si->srsProg)); ) { char* lname= NULL; if ( lib == LinkGetFromLib (link, si->srsProg) ) lname= LinkGetToName (link); else if (lib == LinkGetToLib (link, si->srsProg)) lname= LinkGetFromName (link); else continue; if (lname) { addKeyVal( si, "link", strdup(lname)); } } return si->kvbuf; } static char** getLibraryInfo(srsSessionInfo si, char* libname, int iter) { //? pass ldap search filter for libname ? - parse and check for libname matches LIBv lib; LIBvGroup group; char** kvlist= NULL; int nlibs= 0, ilib= 0; int iswild= 0; int doall= 0; srsStruc2Glob(si); if (si->sizelimit > 0 && iter >= si->sizelimit) return NULL; if (!hasValue(libname)) libname= "*"; libname= strdup(libname); //? char * w= strchr(libname,'*'); if (w) { iswild= 1; *w= '\0'; } doall= (strlen(libname)==0); si->kvbufat= 0; // can only do one here //if (debug) fprintf(logf,"srsGetLibraryInfo lib=%s\n",libname); //! this only should return one dn: lib=XXX, not a whole list ! if (doall || iswild) for (int c1= 0; ilib<=iter && (group= LibNextLibGroup (&c1, si->srsProg)); ) { for (int c2= 0; ilib<=iter && (lib= LibNextLib (group, &c2, si->srsProg)); ) { if (si->kvbufat >= kvbufMax || LibIs (lib, "subentries", si->srsProg)) continue; char* atLib = LibName (lib); if (doall || strncasecmp(atLib, libname, strlen(libname))==0) { //nlibs += getOneLib( lib); if (iter == ilib) kvlist= getOneLib(si, lib); ilib++; } } } else if (iter==0) { LIBv lib= LibGetNamed (libname, si->srsProg) ; if (lib) kvlist= getOneLib(si, lib); } free(libname); if (kvlist==NULL) { // some error report si->maxN= si->entryN= ilib; // ilib ? } srsGlob2Struc(si); return kvlist; } char** srsGetNamedLibraryInfo(srsSessionInfo si, char* libname) { return getLibraryInfo(si, libname, 0); } char** srsGetLibraryInfo(srsSessionInfo si, int index) { return getLibraryInfo(si, si->srslib, index); } //static char* getFastaSeq(char* seqid, ENTRYv entry, LIBv lib, PROGv prog); static char* getSeqField(char* fldname, char* format, char* seqid, ENTRYv entry, LIBv lib, PROGv prog, int dozip); #define USEIDSRS 1 #ifdef USEIDSRS char* getRawdataById(IDv id, char* idname, ENTRYv entry, LIBv lib, PROGv prog, int dozip); #endif /* USEIDSRS*/ /* try for speed w/o icarus parsing as per srs5 asksrs */ // was srsGetNextRawdata char** srsGetNextBioseqRecord(srsSessionInfo si, int asbinary) { if (si->atEntry >= si->maxN) { srsFinishQuery(si); return NULL; } si->atEntry++; si->dozip= asbinary; THREAD_LOCK() // test where need thread locks for srs calls IDv idv= IdNew (); int gid= SetGetID (si->srsResultSet, si->atEntry, idv); ENTRYv entry = EntryOpen (si->srsProg, idv); //LIBv libv = (LIBv) LibObjById("library", IdGetLibId(idv), srsProg); LIBv libv = EntryGetLib(entry, 0); char* lib= LibName(libv); if ( lib==NULL) lib="NULL"; //? char* idval= EntryGetFullName(entry); // this is quicker than field processing, for ID if (idval) { char * c= strchr(idval,':'); if (c) { idval= c+1; } } else idval= "null"; char idbuf[512]; // FIXME //#ifdef FULLDN if (hasValue(si->suffix)) sprintf( idbuf, "%s,lib=%s,%s", idval, lib, si->suffix); else //#endif sprintf( idbuf, "%s,lib=%s", idval, lib); if (debug>1) fprintf(logf,"getraw dn: id=%s,lib=%s\n", idval, lib); char* data= NULL; char* fmt= "native"; char* dkey= "seq"; // not for locuslink, others ? if (si->contentType!=NULL && strstr(si->contentType,"fasta")!=NULL) { fmt= "fasta"; data= getSeqField( "seq", fmt, idval, entry, libv, si->srsProg, 0); //(si->dozip)); } else if (si->contentType!=NULL && strstr(si->contentType,"raw")!=NULL) { fmt= "raw"; data= getSeqField( "seq", fmt, idval, entry, libv, si->srsProg, 0); //(si->dozip)); } else { fmt= "native"; //FIXME data= getRawdataById(idv, idval, entry, libv, si->srsProg, 0); //(si->dozip)); } EntryClose (&entry); THREAD_UNLOCK() // test where need thread locks for srs calls si->kvbufat= 0; addKeyVal( si, "dn: id", strdup(idbuf)); addKeyVal( si, "objectClass", strdup(BioseqRecordClass)); addKeyVal( si, "objectClass", strdup("extensibleObject")); //? add data size field ('sl') ? - confusing sequence length and record length if (data) { // should do gzip instream as data read for efficiency .. if (si->dozip) { char* zdata= zipString(data,strlen(data)); if (zdata) { free(data); data= zdata; dkey= "bseq"; } } addKeyVal( si, "fmt", strdup(fmt)); addKeyVal( si, dkey, data); // data is malloc()ed } return si->kvbuf; } // was srsGetNextEntryResult char** srsGetNextBioseqEntry(srsSessionInfo si, int asbinary) { if (si->atEntry >= si->maxN) { srsFinishQuery(si); return NULL; } si->atEntry++; si->dozip= asbinary; // thisSi= si; // !! >>SetGetLibName<< solves SetGetID failure -- do this in query start THREAD_LOCK() // test where need thread locks for srs calls IDv idv= IdNew (); int gid= SetGetID (si->srsResultSet, si->atEntry, idv); // ^^ failure is here/before this on change of libs in resultset !! // this doesnt fail when running stand-alone main() call ! if (debug>3) { fprintf(logf,"srsGetNextBioseqEntry: SetGetID, gid=%d\n", gid); fflush(logf);} ENTRYv entry = EntryOpen (si->srsProg, idv); if (debug>3) { fprintf(logf,"srsGetNextBioseqEntry: EntryOpen\n"); fflush(logf);} LIBv libv = EntryGetLib(entry, 0); char* lib= LibName(libv); if (lib==NULL) lib= "null"; if (debug>3) { fprintf(logf,"srsGetNextBioseqEntry: lib=%s\n", lib); fflush(logf);} char* idval= EntryGetFullName(entry); // ^ this prepends libname to id ! if (idval) { char * c= strchr(idval,':'); if (c) { idval= c+1; } } else idval= "null"; if (debug>3) { fprintf(logf,"dn: id=%s,lib=%s\n", idval, lib); fflush(logf); } char idbuf[512]; // FIXME //#ifdef FULLDN if (hasValue(si->suffix)) sprintf( idbuf, "%s,lib=%s,%s", idval, lib, si->suffix); else //#endif sprintf( idbuf, "%s,lib=%s", idval, lib); si->kvbufat= 0; addKeyVal( si, "dn: id", strdup(idbuf)); addKeyVal( si, "objectClass", strdup(BioseqEntryClass)); addKeyVal( si, "objectClass", strdup("extensibleObject")); // thisSi= si; // hack for putKeyVal if (si->allattr) printAttribsAll(si, entry, libv, si->srsProg, putKeyVal ); else if (si->srsfield[0] != NULL) // could want just dn printAttribs1(si, entry, libv, si->srsProg, putKeyVal ); EntryClose (&entry); THREAD_UNLOCK() // test where need thread locks for srs calls return si->kvbuf; } int srsOpenLogfile(srsSessionInfo si,char* logfile, int debuglevel) { //? should we open/close for each query rather than leave open forever? if (didOpenLog) return 0; if (!hasValue(logfile)) { debug=0; return 0; } debug= debuglevel; if (!debug) debug= 1; if ((logf = fopen(logfile, "a")) == NULL) { (void)fprintf(stderr, "error opening: %s: %s\n", logfile); logf= stderr; return -1; } didOpenLog= 1; return 0; } char** srsLookupEntry(srsSessionInfo si, char* lib, char* field, char* val) { if (lib==NULL || field==NULL || val==NULL) return NULL; srsStruc2Glob(si); //! need objClasses[ic] from caller ! //objClasses[0]= "BioseqRecord"; objClasses[1]= 0; char qbuf[1024]; // FIXME sprintf(qbuf,"[%s-%s:%s]",lib,field,val); si->setStart = si->setCount = 0; int nres= srsStartQuery(si, qbuf); char** res= (nres==0) ? NULL : srsGetNextBioseqRecord(si, si->dozip); srsGlob2Struc(si); return res; } void srsSetResultPage(srsSessionInfo si, int startEntry, int maxEntry) { srsStruc2Glob(si); si->setStart= startEntry; si->setCount= maxEntry; //? if (si->setStart < 0 || si->setStart > si->entryN) si->setStart= si->atEntry; //? si->atEntry= si->setStart; if (si->setCount > 0 && (si->atEntry + si->setCount < si->entryN)) si->maxN= si->atEntry + si->setCount; //? else maxN= entryN //? add contentType to this call ? //si->contentType= contentType; // null ok srsGlob2Struc(si); } void srsSetObjectClasses(srsSessionInfo si, char* classlist) { if (debug) fprintf(logf,"# setObjectClasses: "); int nobj= 0; srsFreeList(si->objClasses); if (!hasValue(classlist)) { si->objClasses[nobj++]= strdup("*"); } else { char* cp= strtok(classlist," ;,"); for ( ; nobjobjClasses[nobj++]= strdup(cp); if (debug) fprintf(logf," %s",cp); cp= strtok(NULL," ;,"); } } si->objClasses[nobj]= NULL; if (debug) fprintf(logf,"\n"); } int srsAvailableResults(srsSessionInfo si) { srsStruc2Glob(si); if (si->atEntry > si->maxN) return 0; else return si->maxN - si->atEntry; } /* do some tricks here based on objectClass request (objClasses[]) -- for BioseqSet, or * or none, return BioseqSet (one result, summary of matches) -- for Biodata, return full list w/ attributes, but no seq (unless asked for) -- for BioseqEntry, "" -- for BioseqRecord, return srsGetNextBioseqRecord -- for BinaryBioseqRecord, return gzip(srsGetNextBioseqRecord) -- for BioseqDatabank, return databank(s) (not here ?) -- for BioseqDirectory, return directory (not here ?) */ char** srsGetNextResult(srsSessionInfo si) { // add pagesize option to get nresults in a chunk srsStruc2Glob(si); char** val= NULL; int did= 0; // set flag for result kind at query time, also other resultset info for (int ic= 0; (si->objClasses[ic]); ic++) { char* oc= si->objClasses[ic]; //? do this only if maxN > xxx ? if (strcasecmp(oc,"*")==0) { did++; val= srsGetBioseqSet(si); break; } if (strcasecmp(oc,"BioseqSet")==0) { did++; val= srsGetBioseqSet(si); break; } if (strcasecmp(oc,"BioseqEntry")==0) { did++; val= srsGetNextBioseqEntry(si, 0); break; } if (strcasecmp(oc,"BinaryBioseqEntry")==0) { did++; val= srsGetNextBioseqEntry(si, 1); break; } if (strcasecmp(oc,"Biodata")==0) { did++; val= srsGetNextBioseqEntry(si, 0); break; } if (strcasecmp(oc,"BioseqRecord")==0) { did++; val= srsGetNextBioseqRecord(si, 0); break; } if (strcasecmp(oc,"BinaryBioseqRecord")==0) { did++; val= srsGetNextBioseqRecord(si, 1); break; } if (strcasecmp(oc,"BioseqDirectory")==0) { did++; val= srsGetDirectoryInfo(si); break; } if (strcasecmp(oc,"BioseqDatabank")==0) { did++; if (hasValue(si->srslib)) { si->maxN= si->entryN= 1; val= getLibraryInfo(si, si->srslib, si->atEntry++); } else val= getLibraryInfo(si, NULL, si->atEntry++); break; } } if (did==0) val= srsGetBioseqSet(si); // default srsGlob2Struc(si); return val; } int srsGetResultsToFile(srsSessionInfo si, char* fname, char* contentType) { // do all of atEntry .. maxN to file // also need to handle results of lookupEntry getNamedLibraryInfo getLibraryInfo getDirectoryInfo int nout= 0, dorawdata= 0; srsStruc2Glob(si); if (fname==NULL || (strcmp(fname,"stdout")==0)) outf= stdout; else if ((outf = fopen(fname, "a")) == NULL) { (void)fprintf(stderr, "error opening: %s: %s\n", fname); return -1; } si->doxml= 0; if (contentType!=NULL && strstr(contentType,"xml")!=NULL) { si->doxml= 1; } if (si->srsResultSet) { while ( si->atEntry < si->maxN ) { char** kvl; srsGlob2Struc(si); kvl= srsGetNextResult(si); if (kvl) { nout += printKeyValList(si,kvl); srsFreeResult( si, kvl); } else break; } } if (outf != stdout) fclose(outf); outf= stdout; //? srsGlob2Struc(si); return nout; } static long srsStartDirLibQuery(srsSessionInfo si, char* objectClass) { srsFinishQuery(si); //queryId++; //char qname[128]; //sprintf(qname,"Qi%d",queryId); //msgid; << not unique srsFreeList(si->objClasses); si->objClasses[0]= strdup(objectClass); si->objClasses[1]= NULL; si->atEntry= 0; si->maxN= si->entryN= 1; //? no? or count libs ? if (strcmp(objectClass,"BioseqDatabank")==0) (void) getLibraryInfo(si, NULL, 9999); // count, set maxN return si->maxN; } // cut from main() // do various style queries using basedn, scope, filter // but return only # results, leave to caller to use srsNextResult() int srsAnySearch(srsSessionInfo si, char* qfilter) // other opts?, char* returnAttributes { char * srsquery = NULL, * lurl = NULL, * srsid = NULL; int wantDirInfo, wantLibInfo, doSearch, doIdLookup; int levels= 0, result= 0; // parse filter for styles? // ldap://.../basedn?attr?scope?filt?extn // getz style // others ? if (debug>0) fprintf(logf,"# srsAnySearch=%s\n", (qfilter==NULL)?"null":qfilter); srsStruc2Glob(si); if (hasValue(qfilter)) { if (strchr(qfilter,'?')) { parseLdapUrl(si, qfilter); srsGlob2Struc(si); //? } else si->filter= strdup(qfilter); } else si->filter= NULL; if (si->basedn) { // NOTE! this includes full dn sent by client, including server-base + subpath // need to split out server-base from subpath (if it exists) // really need for server-caller to tell me server-base, or use config file char * key, * val ; char * rdn= strdup(si->basedn); if (hasValue(si->suffix)) { char * e= strstr(rdn,si->suffix); if (e) { if (e[-1]= ',') e--; *e= '\0'; } } char * cp = strtok(rdn,","); // ! remove si->suffix from this ... at least from levels levels= 0; // always get something from strtok if basedn while(cp!=NULL) { char** atr = breakAttr(cp); levels++; key = (char*)atr[0]; val = (char*)atr[1]; if (strcasecmp("lib",key)==0) si->srslib= val; //breakAttr dups it - strdup(val); if (strcasecmp("id",key)==0) srsid= val; //strdup(val); // ?? save key=val list for later - xml output ? cp= strtok(NULL,","); } free(rdn); } else { si->basedn= strdup(""); //? or '/' levels= 0; } if (debug>0) fprintf(logf,"# basedn=%s, levels=%d\n", si->basedn, levels); if (hasValue(si->filter)) { if (debug>0) {fprintf(logf,"# <<< filter=%s\n",si->filter);fflush(logf);} if (strstr(si->filter,"[lib")!=NULL) srsquery= si->filter; else srsquery= ldap2SrsQuery( si, si->filter); srsGlob2Struc(si); if (debug>0) {fprintf(logf,"# >>> query=%s\n",(srsquery==NULL)?"null":srsquery);fflush(logf);} } if (si->scope < 0) { if (srsquery) si->scope= LDAP_SCOPE_SUBTREE; //hasValue(si->filter) || else if (levels>0) si->scope= LDAP_SCOPE_ONELEVEL; else si->scope= LDAP_SCOPE_BASE; } // need to know whether filter is empty or has more than objectclass=* wantDirInfo= ( ( levels == 0 && si->scope == LDAP_SCOPE_BASE ) // ^^ if basedn has suffix and that is missed, level is off || ( levels == 1 && si->scope == LDAP_SCOPE_BASE && !hasValue(si->srslib) ) ); wantLibInfo= ( (levels == 0 && si->scope == LDAP_SCOPE_ONELEVEL ) || (levels == 1 && si->scope != LDAP_SCOPE_SUBTREE ) ); doIdLookup= ( levels > 1 && hasValue(si->basedn) && hasValue(si->srslib)); // || scope != LDAP_SCOPE_SUBTREE ); doSearch= (si->scope == LDAP_SCOPE_SUBTREE ); //&& levels < 2 ); //do these elsewhere ? // srsSetSizelimit(si, sizelimit); // for later fix of global sizelimit // srsSetAttrList(si, attrlist); // srsSetLibrary(si, srslib); // srsSetDnList(si, dnkeys); // srsSetBaseDN(si, basedn); // splitAttrs( returnAttributes); //? // srsGlob2Struc(si); if (wantDirInfo) { if (debug>0) {fprintf(logf,"# srsGetDirectoryInfo\n" );fflush(logf);} result= srsStartDirLibQuery( si, "BioseqDirectory"); } // else if (wantLibInfo && levels == 1 && si->srslib == NULL ) { // wantLibInfo= 0; // } else if (wantLibInfo) { if (debug>0) {fprintf(logf, "# srsGetLibraryInfo\n" );fflush(logf);} result= srsStartDirLibQuery( si, "BioseqDatabank"); //^^ need to count libs available (all or specified?) and return that count } else if (doIdLookup) { char** atr; char * rdn, * key, * val; rdn= strdup(si->basedn); key= strchr(rdn,','); if (key) *key= 0; atr = breakAttr(rdn); key = (char*)atr[0]; val = (char*)atr[1]; if (si->srslib!=NULL && key!=NULL && val!=NULL) { if (debug>0) {fprintf(logf,"# srsLookupEntry=%s,%s,%s\n", si->srslib, key, val);fflush(logf);} //result= srsStartDirLibQuery( si, "BioseqRecord"); //!FIXME char qbuf[1024]; sprintf(qbuf,"[%s-%s:%s]",si->srslib,key,val); si->setStart = si->setCount = 0; result= srsStartQuery(si, qbuf); } free( rdn); } else if (doSearch) { //if (debug>0) {fprintf(logf,"# doSearch \n");fflush(logf);} result= srsStartQuery(si, srsquery); } srsGlob2Struc(si); return result; } //--------- end back functions ----------------- static void usage(srsSessionInfo si) { //? match some ldapsearch params ? fprintf( outf,"srs6ldap - srs6 search backend for LDAP directory services\n"); fprintf( outf,"options: \n"); fprintf( outf," -f '(&(objectclass=XXX)(|(lib=aaa)(lib=bbb))(fld=*xyz*)' == ldap filter query \n"); fprintf( outf," -g '[lib-fld:query]...' == srs/getz format query \n"); fprintf( outf," -a 'id,acc,org,des' == attributes/fields to return \n"); fprintf( outf," -a 'id:1,acc:99,org:1,des:5' == attributes/fields with maximum lines/entry \n"); fprintf( outf," -a 'id:1,objectclass=cla:99,acc:99,spp=org:1,info=des:5'\n"); fprintf( outf," == ldap-attr=srs-field mapping, with maximum lines \n"); fprintf( outf," -b basedn == append to dn: id=XXX,lib=aaa\n"); // ldapsearch fprintf( outf," -c configfile == read configurations from file\n"); fprintf( outf," -d #level == debug (1=minimal call opts; 2+parsing ; 3+result ids)\n"); fprintf( outf," -l logfile == log progress to file\n"); fprintf( outf," -m limit == limit output to #limit \n"); fprintf( outf," -o objectClasses == search/return these data classes; [ ,] delimited \n"); fprintf( outf," -r == get raw entry data only\n"); fprintf( outf," -s == operate as slapd backend, reading stdin for options, writing stdout\n"); fprintf( outf," -t == test query run\n"); fprintf( outf," -p format == sequence format (fasta, other), or filter(fmt=fasta)\n"); fprintf( outf," -u ldapUrl == ldap://host/lib=xxx,basedn?attrs?scope?(filter)?opts \n"); fprintf( outf," -x == output XMLized version of LDIF \n"); #ifdef USEZIP fprintf( outf," -z == output gzip compressed data fields\n"); #endif // add dirinfo, libinfo, idlookup, dosearch flags - how? // add lib, id opts - in basedn ? // ?? want to handle full url- ldap://host/dn?attrs?scope?filter?opts // lurl= "ldap://hostname/lib=xxx,basedn?attrs?scope?(&objectClass=BioseqRecord)(query)?sizelimit=x,yyy=z"; fprintf( outf,"\n"); exit(0); } #if !defined(SRS6LDAP_NOMAIN) int main ( int argc, char** argv ) { return srs6ldapMain ( argc, argv ); } #endif /*!defined(SRS6LDAP_NOMAIN)*/ int srs6ldapMain ( int argc, char** argv ) { char ch; char * srsquery = NULL, * lurl = NULL, * srsid = NULL, * obclasslist = NULL, * contype= NULL, * based = NULL; int n, nout, levels= 0, dorawdata= 0, dotest= 0; int wantDirInfo, wantLibInfo, doSearch, doIdLookup; debug= 10; //if (debug>0) fprintf(logf,"# srs6ldapMain: srsSessionOpen\n" ); //srsSessionInfo si= srsSessionOpen(-1); // need for ldap2SrsQuery ? //x scope= -1; // globalSi= srsStrucNew(); if (argc < 2) usage(globalSi); // jni ok here //if (debug>0) fprintf(logf,"# getopt\n" ); // jni fails here ?? while ((ch = getopt(argc, argv, "a:b:c:d:f:g:hl:m:o:p:rstu:xz")) != -1) switch(ch) { case 'a': globalSi->attrlist= strdup(optarg); break; case 'b': globalSi->basedn= strdup(optarg); break; case 'c': readConfig( globalSi,optarg); break; case 'd': debug= atoi(optarg); break; case 'f': globalSi->filter= strdup(optarg); break; case 'g': srsquery= strdup(optarg); break; case 'l': srsOpenLogfile(globalSi, optarg, debug); break; case 'm': globalSi->sizelimit= atol(optarg); if (globalSi->sizelimit<1) globalSi->sizelimit= 10; break; case 'o': obclasslist= strdup(optarg); break; case 'p': contype= strdup(optarg); break; case 'r': dorawdata= 1; break; case 's': doslapd= 1; break; case 't': dotest= 1; break; case 'u': lurl= strdup(optarg);break; case 'x': globalSi->doxml= 1; break; //instead of LDIF output; fixme to allow contentTypes like bioseq/fasta;xml #ifdef USEZIP case 'z': globalSi->dozip= 1; break; // also/instead set by objectClass=Binary... #endif case '?': case 'h': default: usage(globalSi); } srsSessionInfo si= srsSessionOpen(-1); // need for ldap2SrsQuery ? // srsSessionOpen does SOME of these globalSi sets, not all if (globalSi->doxml) globalSi->dozip= 0; if (hasValue(contype)) si->contentType= contype; if (hasValue(globalSi->basedn)) si->basedn= globalSi->basedn; if (hasValue(globalSi->suffix)) si->suffix= globalSi->suffix; if (hasValue(obclasslist)) srsSetObjectClasses(si,obclasslist); srsSetSizelimit(si, globalSi->sizelimit); // for later fix of global sizelimit si->attrlist= globalSi->attrlist; //? strdup() si->filter= globalSi->filter; si->dozip= globalSi->dozip; si->doxml= globalSi->doxml; //srsOpen(); >> srsSessionOpen does if (dotest) { srsquery= strdup(defquery); si->scope= LDAP_SCOPE_SUBTREE; } if (doslapd) { readSlapdShellCommands(si); } if (lurl) { parseLdapUrl(si, lurl); srsGlob2Struc(si); //? } if (si->basedn) { char * key, * val ; char * rdn= strdup(si->basedn); char * cp = strtok(rdn,","); levels= 0; // always get something from strtok if basedn while(cp!=NULL) { char** atr = breakAttr(cp); levels++; key = (char*)atr[0]; val = (char*)atr[1]; if (strcasecmp("lib",key)==0) si->srslib= val; //strdup(val); if (strcasecmp("id",key)==0) srsid= val; //strdup(val); // ?? save key=val list for later - xml output ? cp= strtok(NULL,","); } } else { si->basedn= strdup(""); //? or '/' levels= 0; } if (si->scope < 0) { if (si->filter || srsquery) si->scope= LDAP_SCOPE_SUBTREE; else if (levels>0) si->scope= LDAP_SCOPE_ONELEVEL; else si->scope= LDAP_SCOPE_BASE; } // need to know whether filter is empty or has more than objectclass=* wantDirInfo= ( levels == 0 && si->scope == LDAP_SCOPE_BASE ); wantLibInfo= ( (levels == 0 && si->scope == LDAP_SCOPE_ONELEVEL ) || (levels == 1 && si->scope != LDAP_SCOPE_SUBTREE ) ); doIdLookup= ( levels > 1 ); // || si->scope != LDAP_SCOPE_SUBTREE ); /* && srsid ?? */ doSearch= (si->scope == LDAP_SCOPE_SUBTREE && levels < 2 ); if (si->filter) { if (debug>2) fprintf(logf,"# <<< filter=%s\n",si->filter); srsquery= ldap2SrsQuery( si, si->filter); srsGlob2Struc(si); if (debug>2) fprintf(logf,"# >>> query=%s\n",(srsquery==NULL)?"null":srsquery); } splitAttrs(si, si->attrlist); srsGlob2Struc(si); //more from slapd.srs_back_search() // srsSetAttrList(si, attrlist); /* null attrs sets to "all" */ // srsSetLibrary(si, srslib); /* NULL ok */ // srsSetDnList(si, dnkeys); // srsSetBaseDN(si, basedn); if (debug>0) fprintf(logf,"# srsldap main: levels= %d, scope=%d.\n", levels, si->scope, 0 ); if (wantDirInfo) { char** keyvallist; if (debug>0) fprintf(logf,"# srsGetDirectoryInfo\n" ); keyvallist = srsGetDirectoryInfo(si); printKeyValList(si, keyvallist); srsFreeResult(si,keyvallist); // if (err == LDAP_SUCCESS) sent++; else goto done; } if (wantLibInfo && levels == 1 && si->srslib == NULL ) { wantLibInfo= 0; } if (wantLibInfo) { char** keyvallist; int li; if (debug>0) fprintf(logf, "# srsGetLibraryInfo\n", 0,0, 0 ); for ( li= 0; ((keyvallist = srsGetLibraryInfo(si,li)) != NULL); li++) { printKeyValList(si, keyvallist); srsFreeResult(si,keyvallist); //if (err == LDAP_SUCCESS) sent++; else goto done; } } if (doIdLookup) { char** keyvallist; char** atr; char * rdn, * key, * val; rdn= strdup(si->basedn); key= strchr(rdn,','); if (key) *key= 0; atr = breakAttr(rdn); key = (char*)atr[0]; val = (char*)atr[1]; if (debug>0) fprintf(logf,"# >>> srsLookupEntry=%s,%s,%s\n", (si->srslib==NULL)?"null":si->srslib, key, val); keyvallist= srsLookupEntry(si, si->srslib, key, val); printKeyValList( si, keyvallist); srsFreeResult(si, keyvallist); free( rdn); //free( key ); free( val); // if (err == LDAP_SUCCESS) sent++; else goto done; } if (doSearch) { if (srsquery == NULL) { fprintf(logf,"# Need a search filter \n"); usage(globalSi); // unless have lib or wantDirInfo } long res= srsStartQuery(si, srsquery); srsGlob2Struc(si); if (si->srsResultSet) { nout= 0; //for (n=1; n <= maxN; n++) while ( si->atEntry < si->maxN ) { char** kvl; //atEntry= n - 1; // NO - srsStartQuery() sets atEntry, and srsGetNext incs it srsGlob2Struc(si); if (dorawdata) { kvl= srsGetNextBioseqRecord(si, si->dozip); } else { kvl= srsGetNextResult(si); } if (kvl) { nout += printKeyValList( si, kvl); srsFreeResult( si, kvl); } } } } // slapd result if (doslapd) { fprintf( outf,"\n"); fprintf( outf,"RESULT:\n"); fprintf( outf,"code: %d\n",si->errorcode); } // close(logf); //? return 0; } // for contentType=fasta, do another way - use icarus parsing to get 'seq' field //static char* getFastaSeq(char* seqid, ENTRYv entry, LIBv lib, int dozip) //{ // return getSeqField("seq", "fasta",seqid, entry, lib, dozip); //} static char* getSeqField(char* fldname, char* format, char* seqid, ENTRYv entry, LIBv lib, PROGv prog, int dozip) { char * buf= NULL; FIELDv field= LibHasFieldNamed( lib, fldname, prog); if (field==NULL) return NULL; int isfasta= (strstr(format,"fasta")!=NULL); int c=0; TOKv tok = EntryNextFieldToken (entry, field, &c); if (tok) { STRv toks = EntryToken2Str (entry, tok, field, prog); char* val= _Str(toks); int sl= strlen(val); if (isfasta) { int nb= sl + int(sl/50) + strlen(seqid) + 128; buf= (char*) calloc( nb, sizeof(char)); strcpy(buf, ">"); strcat(buf, seqid); strcat(buf, "\n"); int n= 50; for (int i= 0; isl) n= sl-i; } } else { // raw seq if (dozip) { buf= zipString( val, sl); } else { int nb= sl + 1; buf= (char*) calloc( nb, sizeof(char)); strncpy(buf,val,nb); } } StrDel (toks); } return buf; } #ifdef USEIDSRS //typedef long long FIPTYPE; //typedef FIP64 FIPTYPE; // !! must be UINT4 not FIP64/long long or CC gets very silly !!!? typedef UINT4 FIPTYPE; // missing from srs6 entry.h but in libsrs.a ENTRYo* Entry_New(); void EntryIdRecordUnpack(ENTRYo*,char*); //static char* getRawdataById(IDv id, char* idname, ENTRYv entry, LIBv lib, PROGv prog, int dozip) { Int4 fileX, fileXe, len, idsize; FIPTYPE textFip, dataFip, dataSize; FIPTYPE topFip1, top2Fip1; char* idxbuff; int useBigFip= 1; // THIS IS PROBLEM!!? SOME of IdxRecordGet() returns are as BigFip // havent yet found srs6 API call which distinguishes big and little fips char* entryName= "?"; Index * libInx; libInx = LibIndexOpen(lib, LibGetIdField (lib, prog), 0, prog); if (libInx == NULL) return NULL; fileXe= EntryGetFileX(entry); //! this one works best len= LibGetMaxNameLength (lib); idsize= IdGetSize ( id); // 5 = useBigFip, 4 = useFip useBigFip= (idsize>4); // ?? THIS IS IT ? - do we have any indices w/ idsize==4? // this works now ! !! ONLY IF VARS ARE NOT long-long/FIP64 (dang-dang SunSun CC++) textFip = EntryGetFip( entry, 0); dataFip = EntryGetFip( entry, 1); int c= 0; Int4 errCode= 0; LIBvFileType fileType= LibNextFileType ( lib, &c, prog); FIDELv* fidel= LibGetIdxFidel (lib, prog); //! this works ! LIBvFlatFile lff= LibOpenFlatFile ( lib, fileXe, fileType, fidel, &errCode, 0, prog); FILEv file= LibFlatFileGetFile( lff); if (!file) return NULL; if (debug>1) { // FileSeekEnd ( file, dataFip); //??? get end of file? bogus data !!! // FIPTYPE dataEnd= FileTell(file); FIPTYPE dataEnd= FileGetSize (file); fprintf(logf,"getraw FileSeekEnd: tx=%d, dt=%d end=%d nm=%s \n", textFip, dataFip, dataEnd, entryName); fflush(logf); } // this works, if start w/ current entry, but cant make Entry_New ! // ! need to handle case where current id is last in library ! // idxbuf == NULL ? idxbuff= IdxRecordGet (libInx->getIdx(), IdGetFip(id)+1); if (idxbuff==NULL) { FIPTYPE dataEnd= FileGetSize (file); dataSize = dataEnd - textFip; } else { ENTRYv entry1 = entry; // int en= EntryNext(entry1); // not useful // entry1 = Entry_New(); entry1->lib = lib; //entry1->id = id; // cant do EntryIdRecordUnpack (entry1, idxbuff); FIPTYPE dataEnd = EntryGetFip( entry1, 0); dataSize= dataEnd - textFip; } if (debug>1) { fprintf(logf,"getraw: at=%d, sz=%d fileXe=%d nm=%s \n", textFip, dataSize, fileXe, entryName); fflush(logf); } FileSeek64 ( file, textFip); FIPTYPE bufsize= dataSize; if (dataSize < 1) bufsize= 255; // error or last record, read to end of file ? else if (dataSize > maxDsize) { if (debug>0) fprintf(logf,"getraw: truncate sz=%ld to %d \n", dataSize, maxDsize); bufsize= maxDsize; } char* dbuf= NULL; if (dozip) { } else { dbuf= (char*) calloc(1,bufsize+1); //fseek( dataf, textFip, 0); //dataSize= fread( dbuf, dataSize, 1, dataf); //FileOpen( file); FileRead ( file, dbuf, bufsize); // doesnt return n read //FileClose( file); // leave open if same as last... } return dbuf; } #endif /* USEIDSRS*/ //// fixme for compression while reading in data ! //static void* zipStart(char* output, long outsize) //{ //#ifdef USEZIP // int zerr= Z_OK; // z_streamp zp= (z_streamp) calloc(1, sizeof(z_stream)); // zp->zalloc= Z_NULL; // zp->zfree = Z_NULL; // zp->opaque= Z_NULL; // zerr= deflateInit( zp, Z_DEFAULT_COMPRESSION); // if (zerr != Z_OK) { // free(zp); return NULL; // } // zp->next_out = (Bytef*) output; // zp->avail_out = outsize; // return zp; //#else // return NULL; //#endif //} // //static void* zipAdd(void* zstream, char* inbuf, int insize) //{ //#ifdef USEZIP // static const int bufsize = 10240; // int zerr= Z_OK; // int zflush= Z_NO_FLUSH; // if (zstream == NULL) return NULL; // z_streamp zp= (z_streamp)(zstream); // // if (insize==0) insize= strlen(inbuf); // zp->next_in = (Bytef*) inbuf; // zp->avail_in= insize; // if (debug>0) fprintf(logf,"zipAdd in=%d \n",insize); // // for (;;) { // if (zp->avail_out == 0) { // output= (char*) realloc(output, outsize + bufsize) ; // if (output==NULL) { zerr= Z_MEM_ERROR; break; } // zp->next_out = (Bytef*) (output + outsize) ; // zp->avail_out = bufsize ; // outsize += bufsize ; // } // // zerr = deflate( zp, zflush); // if (debug>2) fprintf(logf,"zipAdd out=%d err=%d\n",outsize - zp->avail_out,zerr); // // if (zflush == Z_FINISH && zerr == Z_STREAM_END) { zerr= Z_OK; break; } // if (zerr != Z_OK) break; // if (zflush == Z_FINISH && zp->avail_out != 0) break; // if (zp->avail_in == 0) zflush= Z_FINISH; // } // return XXX; // need output, outsize, zerr? zflush //#else // return NULL; //#endif //} // //static char* zipEnd(void* zstream) //{ //#ifdef USEZIP // int zerr= Z_OK; // int zflush= Z_FINISH; // if (zstream == NULL) return NULL; // z_streamp zp= (z_streamp)(zstream); // // outsize= outsize - zp->avail_out; // if (debug>0) fprintf(logf,"zipString out=%d err=%d\n",outsize,zerr); // deflateEnd( zp); // if (zerr != Z_OK) return NULL; // // free(zp); // output[outsize]= '\0'; //?? // return output; //#else // return NULL; //#endif //} static char* zipString( char* inbuf, int insize) { #ifdef USEZIP static const int zsize = 10240; int zerr; char* output; long bufsize, outsize; z_stream z; int zflush= Z_NO_FLUSH; z.zalloc= Z_NULL; z.zfree= Z_NULL; z.opaque= Z_NULL; zerr= deflateInit( &z, Z_DEFAULT_COMPRESSION); if (insize==0) insize= strlen(inbuf); z.next_in = (Bytef*)inbuf; z.avail_in= insize; if (debug>0) fprintf(logf,"zipString in=%d err=%d\n",insize,zerr); bufsize = zsize; //? or insize relative ? outsize = bufsize; output= (char*) calloc(1,bufsize+1); z.next_out = (Bytef*) output; z.avail_out = bufsize; //while (z.avail_in != 0) for (;;) { if (z.avail_out == 0) { output= (char*) realloc(output, outsize + bufsize) ; if (output==NULL) { zerr= Z_MEM_ERROR; break; } z.next_out = (Bytef*) (output + outsize) ; z.avail_out = bufsize ; outsize += bufsize ; } zerr = deflate( &z, zflush); if (debug>2) fprintf(logf,"zipString out=%d err=%d\n",outsize - z.avail_out,zerr); if (zflush == Z_FINISH && zerr == Z_STREAM_END) { zerr= Z_OK; break; } if (zerr != Z_OK) break; if (zflush == Z_FINISH && z.avail_out != 0) break; if (z.avail_in == 0) zflush= Z_FINISH; } outsize= outsize - z.avail_out; if (debug>0) fprintf(logf,"zipString out=%d err=%d\n",outsize,zerr); deflateEnd( &z); if (zerr != Z_OK) return NULL; output[outsize]= '\0'; //?? return output; #else return NULL; #endif } static STRv EntryToken2Str (ENTRYv entry, TOKv tok, FIELDv field, PROGv prog) { JOBv job; STRv s=NULL; FIELDvFormat format= NULL; //if (debug) printf("# tokcode=%d fldtype=%s\n",TokGetCode(tok), // FieldTypeGetName(FieldGetType(field,prog))); if (field && FieldIs (field, "formatted", prog)) { if ((format = FieldGetActiveFormat (field, prog))) { char * fname= FieldGetName(field,prog); if (debug>2) fprintf(logf,"# fld=%s format=%s\n",fname,FieldFormatGetName(format)); // Sequence,swiss if (strcmp(fname,"Sequence")==0) { SEQv seq= (SEQv)TokGetObj(tok); if (debug>2) { char * sn= SeqGetName (seq); fprintf(logf,"# seq name=%s len=%d\n",sn, SeqLen(seq)); } s = SeqGetStr(seq); } else s = TokGetStrv (tok); // seq: ** token is an object ** } } else s = TokGetStrv (tok); return s; } static int printKeyValList(srsSessionInfo si, char** kvl) { int iout= 0; if (kvl==NULL) return iout; if (debug>1) fprintf(logf,"dn: %s=%s\n", kvl[0],kvl[1] ); while (*kvl) { char *k= *kvl++; char *v= *kvl++; didPutRec= iout; printKeyVal(si, k, v); if (didPutRec) iout++; } if (si->doxml && iout>0) fprintf( outf,"\n",XMLRecordTag); fprintf( outf,"\n"); return iout; } static void printAttribsAll(srsSessionInfo si, ENTRYv entry, LIBv lib, PROGv prog, void (*keyvalHandler)(srsSessionInfo si,char* lkey, char* val) ) { TOKv tok; FIELDv field; int nat, c; char * lkey; nat= 9999; //? check table for limit? if (debug>2) fprintf(logf,"# printAttribsAll: "); for ( c=0 ; (field= LibNextField(lib, &c, prog) ) ; ) { //at++ << NO, LibNextField incs &at lkey= FieldGetShortName(field, prog); if (debug>2) fprintf(logf,"%s, ", lkey); if (strcmp(lkey,"all")==0) continue; //fails w/ 'all' //locuslink.i -- fails for $Field:[$DF_HeaderField name:'Locus Information'] field= LibHasFieldNamed( lib, lkey, prog); if (!field) continue; //? fails here for locuslink ^^ fix - but loose fields ! for (int c=0; c2) fprintf(logf,"\n"); } void printAttribs1(srsSessionInfo si, ENTRYv entry, LIBv lib, PROGv prog, void (*keyvalHandler)(srsSessionInfo si,char* lkey, char* val) ) { TOKv tok; FIELDv field; char * sfld; int nat, at; char * lkey; for ( at=0 ; (sfld= si->srsfield[at]) != 0; at++) { if (strcmp(sfld,"dn")==0) continue; field= LibHasFieldNamed( lib, sfld, prog); if (!field) { if (debug>1) fprintf(logf,"# missing fld=%s\n",sfld); continue; } nat= si->nattr[at]; lkey= si->ldapattr[at]; for (int c=0; callattr= 0; srsFreeList(si->ldapattr); srsFreeList(si->srsfield); //si->ldapattr[0]= si->srsfield[0]= 0; if (debug) fprintf(logf,"# srsSetAttrs: "); if (!hasValue(attrlist)) { // attrlist==NULL if (debug) fprintf(logf,"all\n"); si->allattr= 1; return; } int i= 0; char* cp= strtok(attrlist,";,"); for ( ; iallattr= 1; continue; } if (strcasecmp("all",atr[0])==0) { si->allattr= 1; continue; } /* //! need to malloc [] arrays here ! FIXME*/ si->ldapattr[i] = (char*)atr[0]; si->srsfield[i] = (char*)atr[1]; si->nattr[i]= (int) atr[2]; cp= strtok(NULL,";,"); } si->ldapattr[i]= si->srsfield[i]= 0; if (debug) fprintf(logf,"\n"); } void srsSetAttrs(srsSessionInfo si,char* attrlist) { splitAttrs(si, attrlist); } void srsSetAttrList(srsSessionInfo si,char** attrlist) { si->allattr= 0; srsFreeList(si->ldapattr); srsFreeList(si->srsfield); if (debug) fprintf(logf,"# srsSetAttrList: "); if (attrlist==NULL) { if (debug) fprintf(logf,"all\n"); si->allattr= 1; return; } int i= 0; for ( ; iallattr= 1; continue; } // this is LDAP_ALL_USER_ATTRIBUTES if (strcasecmp("all",atr[0])==0) { si->allattr= 1; continue; } // need some other special attributes // content or format == sequence format? or use objectClass= - not here, do in query si->ldapattr[i] = (char*)atr[0]; si->srsfield[i] = (char*)atr[1]; si->nattr[i]= (int) atr[2]; } si->ldapattr[i]= si->srsfield[i]= 0; if (debug) fprintf(logf,"\n"); } // userattrs = ( attrs == NULL ) ? 1 // : charray_inlist( attrs, LDAP_ALL_USER_ATTRIBUTES ); static int isspace(char c) { return (c == ' ' || c == '\t' || c < ' '); } static char * nextwhite( char * s) { if (!s) return (char*)0; while (*s && !isspace(*s)) s++; return s; } static char * overwhite( char * s) { if (!s) return (char*)0; while (*s && isspace(*s)) s++; return s; } static char * getValue( char * q, char delim) { char * p; if ((p = strchr( q, delim)) == (char*)0) return (char*)0; q = p + 1; if ((q = overwhite( q)) == (char*)0) return (char*)0; *p = '\0'; return q; } static int isendofline( char * s) { return (*s == '\r' || *s == '\n'); } static char * endofline( char * s) { if (!s) return (char*)0; for (; *s && !isendofline(s); s++); return s; } static char * overnullreplacement( char * s) { if (*s == '\r') *s++ = '\0'; if (*s == '\n') *s++ = '\0'; return s; } static void parseLdapUrl(srsSessionInfo si, char* lurl) { // lurl= "ldap://hostname/lib=xxx,basedn?attrs?scope?(&objectClass=BioseqRecord)(query)?sizelimit=x,yyy=z"; // parse for attrlist?scope?filter?opts if (lurl) { char* cattrs= getValue(lurl, '?'); char* cscope= getValue(cattrs, '?'); char* cfilt= getValue(cscope,'?'); char* copts= getValue(cfilt,'?'); char* cdn= strrchr( lurl,'/'); if (cdn) { *cdn++= '\0'; if (*cdn) si->basedn= strdup(cdn); } if (cattrs) si->attrlist= strdup(cattrs); if (cfilt) si->filter= strdup(cfilt); if (cscope) { if (strcasecmp(cscope,"base")==0) si->scope= LDAP_SCOPE_BASE; else if (strcasecmp(cscope,"one")==0) si->scope= LDAP_SCOPE_ONELEVEL; else if (strcasecmp(cscope,"sub")==0) si->scope= LDAP_SCOPE_SUBTREE; } if (copts) { // split on , key=val list } } } /* -- read openldap slapd shell backend stdin SEARCH ; msgid: long ; base: str ; scope: int ; deref: int ; sizelimit: int timelimit: int ; filter: string ; attrsonly : 0/1 ; attrs: str (all) */ static void handleConfigVal(srsSessionInfo si,char* key, char* val) { if (debug) fprintf(logf,"%s: %s ; ",key,val); if (strcasecmp(key,"base")==0) si->basedn= strdup(val); else if (strcasecmp(key,"suffix")==0) si->suffix= strdup(val); //? "suffix" == basedn ? -- need to distinguish server-base and caller-dn else if (strcasecmp(key,"scope")==0) si->scope= atoi(val); else if (strcasecmp(key,"deref")==0) si->deref= atoi(val); else if (strcasecmp(key,"sizelimit")==0) si->sizelimit= atol(val); //? got sizelimit == 0 and val == 0 even when called w/ value ? //x else if (strcasecmp(key,"timelimit")==0) si->timelimit= atoi(val); else if (strcasecmp(key,"attrsonly")==0) si->attrsonly= atoi(val); else if (strcasecmp(key,"msgid")==0) si->msgid= atol(val); else if (strcasecmp(key,"filter")==0) si->filter= strdup(val); else if (strcasecmp(key,"attrs")==0) si->attrlist= strdup(val); // whitespc delimited else if (strcasecmp(key,"dir.id")==0) { dirInfo.id= strdup(val); } else if (strcasecmp(key,"dir.name")==0) { dirInfo.name= strdup(val); } else if (strcasecmp(key,"dir.desc")==0) { dirInfo.desc= strdup(val); } else if (strcasecmp(key,"dir.url")==0) { dirInfo.url= strdup(val); } else if (strcasecmp(key,"dir.web")==0) { dirInfo.web= strdup(val); } else if (strcasecmp(key,"dir.con")==0) { dirInfo.con= strdup(val); } else if (strcasecmp(key,"debug")==0) { debug= atoi(val); } else if (strcasecmp(key,"logfile")==0) { srsOpenLogfile(si,val,debug); } else if (strcasecmp(key,"xml")==0) { si->doxml= atoi(val); } else if (strcasecmp(key,"zip")==0) { si->dozip= atoi(val); } } static void readSlapdShellCommands(srsSessionInfo si) { if (debug) fprintf(logf,"# slapd input: "); char buf[2048]; while ( fgets(buf, sizeof(buf), stdin) != 0 ) { if (strlen(buf)<3) continue; char * key= buf; char * val= getValue(buf, ':'); if (val==NULL) continue; char * e= endofline(val); if (e) overnullreplacement(e); handleConfigVal(si,key,val); } if (debug) fprintf(logf,"\n"); } static int readConfig(srsSessionInfo si, char* config) { char buf[10240]; FILE * configf= NULL; if (!hasValue(config)) return -1; if (debug) fprintf(logf,"# read config: %s = ", config); configf = fopen(config, "r"); if (configf == NULL) { (void)fprintf(logf, "error opening config: %s\n", config); return -1; } while ( fgets(buf, sizeof(buf), configf) != 0 ) { char * e; if (strlen(buf)<3) continue; char * key= buf; char * val= getValue(buf, '='); //getValueToks(buf, "=:\t "); if (val==NULL) continue; e= nextwhite(key); if (e) *e= '\0'; e= endofline(val); if (e) overnullreplacement(e); handleConfigVal(si,key,val); } fclose(configf); didConfig= 1; if (debug) fprintf(logf,"\n"); if (debug) fprintf(logf,"# dirInfo .id=%s, .name=%s\n", dirInfo.id, dirInfo.name); } static void xmlEscapeOut(FILE* outf, char* val) { for (char * c= val; (*c); c++) switch (*c) { case '>': fputs(">",outf); break; case '<': fputs("<",outf); break; case '&': fputs("&",outf); break; default : fputc( *c, outf); break; } } #ifndef LDAPFILTER //static char* xmlEscape( char* val) { return val; } // need replacement xmlEscape ! static char* pcrsSubst( char* subpatt, char* line, int dofree) { return line; } static char* ldap2SrsQuery(srsSessionInfo si, char* filter) { fprintf(logf,"WARNING: LDAPFILTER for ldap2SrsQuery is not defined in this compile.\n"); if (strstr(filter,"[lib")!=NULL) return filter; return NULL; // or fail? NULL; } #else /* LDAPFILTER */ // use pcre, pcrs C perl regex libraries // for optimum use - use pcrs job lists on one string static const char** pcreMatch(char* patt, char* line, int options) { const char *errptr; int erroffset; int ovector[30]; const char **listptr; int ovint= 30; pcrsCount= 0; //if (debug>2) fprintf(logf,"in='%s' ",line); pcre * re= pcre_compile(patt, options, &errptr, &erroffset, NULL); pcrsCount= pcre_exec(re, NULL, line, strlen(line), 0, 0, ovector, ovint); if (pcrsCount>0){ int err= pcre_get_substring_list(line, ovector, pcrsCount, &listptr); //if (debug>2) { // fprintf(logf," m=%d,mv=",pcrsCount); // for (int i=0; i2) fprintf(logf," mp:%s \n",patt); return listptr; } static char* pcrsSubList(char** sublist, char* line, int dofree) { char** lp= sublist; pcrs_job * joblist= NULL; pcrs_job * jobat; char *result; size_t length= 0; pcrsCount= 0; pcrsDidMalloc= 0; int i=0; //if (debug>2) fprintf(logf,"in='%s' ",line); while ( *lp!=NULL ) { pcrs_job * job = pcrs_compile_command( *lp, &pcrsCount); if (job!=NULL) { if (joblist==NULL) { joblist= job; } else { jobat->next= job; } jobat= job; } lp++; } pcrsCount = pcrs_execute_list(joblist, line, strlen(line)+1, &result, &length); if (pcrsCount>0) { //if (debug>2) fprintf(logf,"out='%s' ",result); if (dofree) free(line); line= result; pcrsDidMalloc= 1; } pcrs_free_joblist(joblist); //if (debug>2) fprintf(logf," slst:%s \n",*sublist); return line; } static char* pcrsSubst(char* subpatt, char* line, int dofree) { pcrs_job *job; char *result; size_t length= 0; pcrsCount= 0; pcrsDidMalloc= 0; //if (debug>2) fprintf(logf,"in='%s' ",line); job = pcrs_compile_command( subpatt, &pcrsCount); if (job == NULL) { //if (debug>2) fprintf(logf," error"); } else { pcrsCount = pcrs_execute(job, line, strlen(line)+1, &result, &length); if (pcrsCount>0) { //if (debug>2) fprintf(logf,"out='%s' ",result); if (dofree) free(line); line= result; pcrsDidMalloc= 1; } pcrs_free_job(job); } //if (debug>2) fprintf(logf," sb:%s \n",subpatt); return line; } static char* xmlEscape( char* query) { char* sublist[]= { "s,\\&,&,g", "s,\\>,>,g", "s,\\<,<,g", NULL}; char* q= pcrsSubList(sublist,query,kDontFree); return q; } // ldap2SrsQuery - perl tested parser (too bad C/C++/Java dont do such by default) //? read list of perl regex for filter2srsquery from config file? // note we are throwing away memory w/o freeing all these strings ! // need to be able to use multiple lib= values ! // ? require () enclosed in paren. block ? // (|(&(lib=swissprot)(des=kinase))(&(lib=trembl)(des=kinase))) // or // (&(|(lib=swissprot)(lib=trembl))(des=kinase)) // do simple case: use all libs as [lib={lib1 lib2}-fld:xxxx]... // this works: // ldap://iubio.bio.indiana.edu:3895/srv=srs?id,acc?sub?(&(objectClass=*)\ // (lib={swissprot trembl})(des=kinesin))?sizelimit=500 // also need to parse contentType/format (e.g. fasta) here? static const int kMaxLibs = 20; static char* ldap2SrsQuery(srsSessionInfo si, char* query) { char * q= strdup(query); // so we can free(q) safely // pcrsSubst() and pcrsSubList() will free input q if new one made char ** libList = new char*[kMaxLibs+1]; char * libMatch= "\\(lib=([^\\)]+)\\)"; int nlib= 0; for (int more= 1; more; ) { const char** libl= pcreMatch( libMatch, q, PCRE_CASELESS); more = (pcrsCount>0 && libl != NULL); if (more) { for (int k= 1; k<=kMaxLibs && ksrslib= strdup( lib); libList[nlib++]= strdup(lib); if (debug>1) fprintf(logf," lib:%s \n",lib); } libList[nlib]= 0; //cut below// q= pcrsSubst("s/\\(lib=([^\\)]+)\\)//i",q, kDoFree); } } // pull out all objectclass=(\w+), save to use in output type determination // assume for now >1 are all ORed choices char* ocp= "\\(objectclass=([^\\)]+)\\)"; int nobj= 0; for (int more= 1; more; ) { const char** list= pcreMatch( ocp, q, PCRE_CASELESS); more= (pcrsCount>0 && list != NULL); if (more) { srsFreeList(si->objClasses); for (int k= 1; k<=kMaxObjClass && kobjClasses[nobj++]= strdup(objcl); if (debug>1) fprintf(logf," objclass:%s \n",objcl); } si->objClasses[nobj]= NULL; q= pcrsSubst("s/\\(objectclass=([^\\)]+)\\)//i",q, kDoFree); } } // also pull out 'start', 'count' -- change to 'set-start', 'set-size' ? //(start=%d)(count=%d) char* startMatch= "\\(start=([^\\)]+)\\)"; for (int more= 1; more; ) { const char** list= pcreMatch( startMatch, q, PCRE_CASELESS); more= (pcrsCount>0 && list != NULL); if (more) { //for (int k= 1; ksetStart= atoi(list[1]); if (debug>0) fprintf(logf," setStart:%d \n",si->setStart); // } q= pcrsSubst("s/\\(start=([^\\)]+)\\)//ig",q, kDoFree); } } char* countMatch= "\\(count=([^\\)]+)\\)"; for (int more= 1; more; ) { const char** list= pcreMatch( countMatch, q, PCRE_CASELESS); more= (pcrsCount>0 && list != NULL); if (more) { //for (int k= 1; ksetCount= atoi(list[1]); if (debug>0) fprintf(logf," setCount:%d \n",si->setCount); // } q= pcrsSubst("s/\\(count=([^\\)]+)\\)//ig",q, kDoFree); } } char* formatMatch= "\\((fmt|format)=([^\\)]+)\\)"; for (int more= 1; more; ) { const char** list= pcreMatch( formatMatch, q, PCRE_CASELESS); more= (pcrsCount>2 && list != NULL); if (more) { //for (int k= 1; kcontentType= strdup(list[2]); if (debug>0) fprintf(logf," contentType:%s \n",si->contentType); // } q= pcrsSubst("s/\\((fmt|format)=([^\\)]+)\\)//ig",q, kDoFree); } } { char* sublist[]= { //"s/\\(lib=([^\\)]+)\\)//gi", // got these above //"s/\\(objectclass=\\*\\)//ig", //"s/\\(objectclass=[^\\)]*\\)//ig", // need to do something with specific objectclass=XXX requests // copy/move into another list for handling "s/\\(undefined\\)//ig", "s/\\(badfilter\\)//ig", // ldap decided it was no good ! should i log/return error? "s/>=(\\w+)/=#$1:/g", "s/=>(\\w+)/=#$1:/g", "s/<=(\\w+)/=#:$1/g", "s/=<(\\w+)/=#:$1/g", "s/\\((\\w+)=([^#])/($1=:$2/g", NULL}; q= pcrsSubList(sublist,q, kDoFree); } // should allow srsfield rename.. q= pcrsSubst("s/\\((\\w+)=([^\\)]+)\\)/[lib-$1$2]/g",q, kDoFree); // mask term { char* sublist[]= { "s,\\]\\[,\\]_\\[,g", "s,\\]\\(,\\]_\\(,g", NULL}; q= pcrsSubList(sublist,q, kDoFree); } // inbool char* ibp= "\\(([\\&\\|])([^\\&\\|\\(\\)]+)\\)"; for (int more= 1; more; ) { const char** list= pcreMatch( ibp, q, 0); more= (pcrsCount>0 && list != NULL); if (more) { const char * b = list[1]; // list[0] is all of pat? char * c = strdup(list[2]); char * cpat; if (strcmp(b,"|")==0) cpat= "s/([\\]\\}\\)])_/$1^_/g"; else cpat= "s/([\\]\\}\\)])_/$1*_/g"; char * c1 = pcrsSubst(cpat, c, kDoFree); char spat[200]; strcpy(spat,"s/"); strcat(spat,ibp); strcat(spat,"/\\{"); strcat(spat,c1); strcat(spat,"\\}_/"); free(c1); q= pcrsSubst(spat, q, kDoFree); } } // fixup { char* sublist[]= { "s,\\]\\(,\\]_\\(,g", "s,\\*_,\\&,g", "s,\\^_,\\|,g", "s,\\{,\\(,g", "s,\\},\\),g", "s,^[_\\s]*\\(,,", NULL}; q= pcrsSubList(sublist,q, kDoFree); } //q= pcrsSubst("s/\\)[_\\s]*$//",q); // this one fails - why?? //q= pcrsSubst("s/\\)([_\\s]*)$//",q); q= pcrsSubst("s/\\)[_\\s]*//",q, kDoFree); // ok w/o $ // $f =~ s/\[lib-/\[lib=$SrsLib-/; char* libpat = NULL; if (libList && libList[0]) { int n= 20; for (int c= 0; (libList[c]); c++) n += 1+strlen(libList[c]); libpat= (char*) calloc( n, sizeof(char)); strncpy(libpat,"{",n); for (int c= 0; (libList[c]); c++) { if (c>0) strncat(libpat," ",n); strncat(libpat,libList[c],n); } strncat(libpat,"}",n); } else if (si->srslib) { libpat= strdup(si->srslib); } if (libpat) { int n= strlen("s/\\[lib-/\\[lib=XXXXXXXXX-/") + strlen(libpat) + 256; char* spat= (char*) calloc( n, sizeof(char)); strncpy(spat,"s/\\[lib-/\\[lib=",n); strncat(spat,libpat,n); strncat(spat,"-/",n); q= pcrsSubst(spat,q, kDoFree); free(spat); free(libpat); } else return NULL; //? yes? return q; } /**** # the perl code for above --------------- # $scmd = ldapfilt2Srs($lfilt); # $scmd .= '&[lib-'.$spar[0].':'.$spar[1].']'; << for dn/id ? sub ldapfilt2Srs { local ($_)= @_; s/\(objectclass=*\)//ig; s/>=(\w+)/=#$1:/g; s/=>(\w+)/=#$1:/g; s/<=(\w+)/=#:$1/g; s/=<(\w+)/=#:$1/g; s/\((\w+)=([^#])/($1=:$2/g; # text flds while (s/\((\w+)=([^\)]+)\)/XXZ/) { my $k= $1; my $v= $2; $k= $SrsFields{$k} || $k; s/XXZ/[lib-$k$v]/; } my $f= $_; $f= maskterm($f); $f= inbool($f); $f= fixup($f); $f =~ s/\[lib-/\[lib=$SrsLib-/; return $f; } sub maskterm { $_ = shift; s,\]\[,\]_\[,g; s,\]\(,\]_\(,g; return $_; } sub inbool { local $_ = shift; my $nop= '[^\&\|\(\)]'; my $p= '\(([\&\|])('.$nop.'+)\)'; while (s,$p,ZZZ,) { my $b= $1; my $c= $2; if ($b eq '|') { $b= '^_'; } else { $b= '*_'; } $c =~ s,([\]\}\)])_,$1$b,g; s,ZZZ,\{$c\}_,; } return $_; } sub fixup { local $_ = shift; s,\*_,\&,g; s,\^_,\|,g; s,\{,\(,g; s,\},\),g; s,^[_\s]*\(,,; s,\)[_\s]*$,,; return $_; } sub xmlEscape { local $_ = shift; s,\&,&,g; s,\>,>,g; s,\<,<,g; s,\",",g; return $_; } *******/ #endif /* LDAPFILTER */