// Biogridlet.java // should be // package iubio.grid; import java.io.*; import java.util.*; import java.net.*; import javax.naming.*; import javax.naming.directory.*; /**

Biogridlet - basic biogrid toolkit component

a basic directory access component for bioinformatics grids.

Simple test of a "Gridlet" for bio data directory access. For each compute node on your test grid, do this:

  1. Install/test/locate NCBI BLAST software (yet to do as a gridlet), set bl=/path/to/blastall
  2. Download Biogridlet .class and .prop files, edit Biogridlet.prop properties to taste, especially QUERY selection. Make sure Java 1.3+ runtime is available
  3. Find a query biosequence in fasta format to test. A sample query set is
         java Biogridlet count=100 ldap://bio-mirror.net:3895/srv=srs out=query \
           'query=(lib=genbank)(org=Anopheles gambiae)' 
  4. Use Biogridlet to copy a databank subset to each node and run blast:
    1. node1:
          java Biogridlet start=0 count=1000 | $bl/formatdb -i stdin -p F -o T -n databank1 
      		$bl/blastall -p blastn -d databank1 -i query -m 8 -o databank1.out
      		
    2. node2:
          java Biogridlet start=1000 count=1000 | $bl/formatdb -i stdin -p F -o T -n databank2  
      		$bl/blastall -p blastn -d databank2 -i query -m 8 -o databank2.out
      		
    3. node3 .. n
  5. Copy blast results from each node and assemble to full result (yet to do; see NBLAST)
The runtime cost for this grid example, from a few quick tests, is approximately the time it takes to run on one computer with a full databank, divided by the number of nodes and subset databanks you use.

Gridlet defined

From Jan K. Labanowski: Computational Portals for Chemistry Gridlets and XMLets
I found a new word gridlet in papers by Rajkumar Buyya and Manzur Murshed from Monash University: http://www.csse.monash.edu.au/~rajkumar/. By gridlet they understand the tiny GridApp that contains all information related to jobs and job execution management details such as jobs processing requirements.

Note: this should become part of existing package iubio.grid used by BioGridRunner -- maybe subpackage iubio.grid.gridlet ?

Design



Notes

For now security & authentication will wait, as other globus-type components of grid. Design for restricted list of applications that can be run. @author d.gilbert, nov 2002, gilbertd@bio.indiana.edu */ public class Biogridlet { // properties keys /** properties key: data directory url; must have */ public static final String URL = "Biogridlet.URL"; // == Context.PROVIDER_URL ? /** url component */ public static final String PROTOCOL = "Biogridlet.PROTOCOL"; /** url component */ public static final String HOST = "Biogridlet.HOST"; /** url component */ public static final String PORT = "Biogridlet.PORT"; /** url component */ public static final String DN = "Biogridlet.DN"; /** url component */ public static final String PATH = "Biogridlet.PATH"; /** url component */ public static final String FILE = "Biogridlet.FILE"; /** url component */ public static final String REF = "Biogridlet.REF"; /** properties key: search scope, option, sub is default */ public static final String SCOPE = "Biogridlet.SCOPE"; /** properties key: query for databank, data field, etc. to search, ldap query syntax for now */ public static final String QUERY = "Biogridlet.QUERY"; /** properties key: start object number to retrieve from query result */ public static final String START = "Biogridlet.START"; /** properties key: number of objects to retrieve */ public static final String COUNT = "Biogridlet.COUNT"; /** properties key: objectClass to search/retrieve default * gets query summary, objectClasses should be defined in http://iubio.bio.indiana.edu/biogrid/directories/schema/bioseq.schema */ public static final String OBJECT = "Biogridlet.OBJECT"; /** properties key: result biosequence format limited choices now: fasta, native (e.g. genbank, embl, swissprot, other biosequence formats) */ public static final String FORMAT = "Biogridlet.FORMAT"; /** properties key: which object fields to return * = all, others are defined in http://iubio.bio.indiana.edu/biogrid/directories/schema/bioseq.schema */ public static final String ATTRIBUTES = "Biogridlet.ATTRIBUTES"; /** properties key: ldap extension controls, sizelimit=10, timelimit=1000 being useful */ public static final String EXTENSIONS = "Biogridlet.EXTENSIONS"; /** visible title for url */ public static final String TITLE = "Biogridlet.TITLE"; /** properties key: output control; listdn=false for no name */ public static final String LISTDN = "Biogridlet.LISTDN"; /** properties key: output control; listkey=false for no field key */ public static final String LISTKEY = "Biogridlet.LISTKEY"; /** properties key: output control; listkey=false for no field value */ public static final String LISTVAL = "Biogridlet.LISTVAL"; public static final String DEBUG = "Biogridlet.DEBUG"; /** properties key: output file, standard output is default */ public static final String OUTPUT = "Biogridlet.OUTPUT"; /** default properties file */ public static final String PROPERTIES= "Biogridlet.prop"; private DirContext dir; private NamingEnumeration dirresults; private Properties direnv; //String basedn; // get from env... private int nrecs= 999999999; //Integer.MAXINT; //? private boolean showdn= true, showkey= true, showval= true; private boolean debug= false; private boolean helping; private PrintStream out= System.out; private PrintStream err= System.err; /** run with Biogridlet.props properties and/or command-line options */ public static void main(String[] args) { String url= null, output= null, properties= Biogridlet.PROPERTIES; Properties env= new Properties(); try { env.load(new FileInputStream(properties)); } catch (Exception ex1) {} for (int i= 0; i0) { key= arg.substring(m,e); val= arg.substring(e+1); } else if (m>0) { key= arg.substring(m,arg.length()); } if (key==null) continue; if (key.equalsIgnoreCase("h")||key.equalsIgnoreCase("help")) new Biogridlet(env).usage(); if (key!=null && val!=null) env.put("Biogridlet."+key.toUpperCase(),val); if ( key.startsWith("ldap://") ) url= key; else if ( key.startsWith("http://") ) url= key; else if (key.equals("p")||key.startsWith("pro")) { //properties if (val==null) val= args[++i]; properties= val; //env.load(new FileInputStream(val)); try { env.load(new FileInputStream(properties)); } catch (Exception ex) {} } else if (key.equals("u")||key.equals("url")) { //url if (val==null) val= args[++i]; url= val; } else if (key.equals("o")||key.startsWith("out")) { //output if (val==null) val= args[++i]; output= val; env.setProperty(OUTPUT,output); } else { //? //if (key!=null && val!=null) env.put("Biogridlet."+key.toUpperCase(),val); } } Biogridlet bn= new Biogridlet(env); boolean ok= bn.search(url); if (ok) bn.retrieve(); else System.err.println("No results"); } public Biogridlet() { this(new Properties()); } public Biogridlet(Properties env) { setProperties(env); } public void usage() { out.println(" java "+getClass().getName()+" [ help | url | key=value | -p | -u ]"); out.println(" A basic gridlet client for Bio-data directory search and retrieval"); out.println(" -u url-to-search"); out.println(" -p proprties-file"); out.println(" key=value new property"); out.println(" Reads default properties from "+PROPERTIES); direnv.list(err); out.println(); //helping= true; System.exit(0); } public void setProperties(Properties env) { if (env==null) env= new Properties(); direnv= env; debug = boolOf( direnv.getProperty(DEBUG,"false") ); } public boolean search(String url) { Properties env= new Properties(direnv); if (url==null || url.length()==0) url= env.getProperty(URL); if (url!=null) env= parseUrl(url, env); if (env.getProperty(URL)==null) usage(); if (debug) { err.println("Search env:"); env.list(err); } if ("ldap".equals(env.getProperty(PROTOCOL))) return ldapsearch(env); else if ("http".equals(env.getProperty(PROTOCOL))) return false; //websearch(env); else return false; //? } public void retrieve() { String output= direnv.getProperty(OUTPUT); if (output!=null && output.length()>1) try { out= new PrintStream( new FileOutputStream( output)); } catch (Exception ex) {} // ldapsearch -LLL equivalents... showdn = boolOf( direnv.getProperty(LISTDN,"true") ); showkey= boolOf( direnv.getProperty(LISTKEY,"true") ); showval= boolOf( direnv.getProperty(LISTVAL,"true") ); int nitems= 0; if (dirresults!=null) try { for (int ir= 0; ir < nrecs && dirresults.hasMore() ; ir++) { SearchResult nc = (SearchResult) dirresults.next(); String name= nc.getName(); //v.addElement("dn: "); //v.addElement(name); //if (namelist!=null) namelist.add(name); //if (saveattr!=null) saveattr.put("dn",name); if (showdn) { out.print("dn: "); out.println(name); } Attributes attrs= nc.getAttributes(); for (NamingEnumeration es= attrs.getAll(); es.hasMore() ; ) { Attribute at= (Attribute) es.next(); String na= at.getID(); //if (saveattr!=null && saveattr.containsKey(na)) // saveattr.put(na,at.get()); if (showkey && !showval) out.println(na); else for (NamingEnumeration ea= at.getAll(); ea.hasMore() ; ) { Object va= ea.next(); //v.addElement(na); v.addElement(va.toString()); if (showval) { if (showkey) out.print(na+": " ); // if binary .. handle out.println(va); } } } if (showdn||showkey) out.println(); //? always nitems++; nc.setAttributes(null); nc.setObject(null); nc= null; } } catch (Exception e) { if (debug) err.println(getClass().getName()+".retrieve.ERROR: "+e.getMessage()); } if (debug) { err.println("retrieved: "+nitems); } out.close(); } boolean ldapsearch(Properties env) { //String query= env.getProperty(QUERY); //int e= query.lastIndexOf(")"); if (e<0) e= query.length(); StringBuffer qb= new StringBuffer(); if (env.getProperty(OBJECT)!=null) qb.append("(objectClass="+env.getProperty(OBJECT)+")"); if (env.getProperty(START)!=null) qb.append("(start="+env.getProperty(START)+")"); if (env.getProperty(COUNT)!=null) qb.append("(count="+env.getProperty(COUNT)+")"); if (env.getProperty(FORMAT)!=null) qb.append("(format="+env.getProperty(FORMAT)+")"); boolean doand= (qb.length()>0); qb.append(env.getProperty(QUERY)); if (doand) { qb.insert(0,"(&"); qb.append(")"); } if (debug) { System.err.println("ldap query: "+qb); } return ldapsearch(env, env.getProperty(URL), env.getProperty(SCOPE), qb.toString(), splitString( env.getProperty(ATTRIBUTES)," ,;"), splitString( env.getProperty(EXTENSIONS),"=,;")); } boolean ldapsearch( Properties env, String ldapurl, String scope, String filter, String[] attr, String[] extn) { if (attr==null) attr= new String[0]; if (extn==null) extn= new String[0]; String sizelimit= null, timelimit= null, deref= null; for (int i=0; i0) sc.setReturningAttributes(attr); //? if (filter==null) filter="(objectClass=*)"; dir = (DirContext) new InitialDirContext(env); dirresults = dir.search( basedn, filter, sc); return (dirresults.hasMore()); } catch (Exception e) { if (debug) System.err.println(getClass().getName()+".ldapsearch.ERROR: "+e.getMessage()); // report error return false; } } private static final void hput(Hashtable h,String k,String v) { if (v!=null) h.put(k,v); //else h.put(k,""); } public Properties parseUrl(String url, Properties h) { if (h==null) h= new Properties(); if (url.startsWith("ldap")) try { // ldap://host:port/dn?attributes?scope?filter?extensions com.sun.jndi.ldap.LdapURL lu= new com.sun.jndi.ldap.LdapURL(url); //hput(h,"url",lu); //? hput(h, URL,lu.toString()); hput(h, PROTOCOL,lu.getScheme()); hput(h, HOST,lu.getHost()); //int port= lu.getPort(); if (port==0) port= 389; hput(h, PORT,String.valueOf(lu.getPort())); hput(h, DN,lu.getDN()); hput(h, ATTRIBUTES,lu.getAttributes()); hput(h, SCOPE,lu.getScope()); hput(h, QUERY,lu.getFilter()); hput(h, EXTENSIONS,lu.getExtensions()); String t= lu.getScheme()+ ":/"+ lu.getHost() + "/"+ lu.getDN(); hput(h, TITLE,t); return h; } catch (Exception ex) { } try { URL lu= new URL(url); // java.net.MalformedURLException: unknown protocol: ldap hput(h,URL,lu.toString()); hput(h,PROTOCOL,lu.getProtocol()); hput(h,HOST,lu.getHost()); hput(h,PORT,String.valueOf(lu.getPort())); hput(h,PATH,lu.getPath()); hput(h,FILE,lu.getFile()); hput(h,REF,lu.getRef()); hput(h,QUERY,lu.getQuery()); String t; if ( lu.getHost()==null || "localhost".equals(lu.getHost()) ) t= lu.getFile(); else t= lu.getProtocol()+ ":/"+ lu.getHost() + lu.getFile(); hput(h,TITLE,t); } catch (Exception e) { hput(h,URL,url); int c= url.indexOf(":"); if (c>0) hput(h,PROTOCOL,url.substring(0,c)); else hput(h,PROTOCOL, "unknown"); hput(h,TITLE,url); } return h; } private static final boolean boolOf(Object val) { String b= String.valueOf(val); return "true".equalsIgnoreCase(b) || "1".equals(b) || "on".equalsIgnoreCase(b) || "yes".equalsIgnoreCase(b); } private static String[] splitString(String s, String del) { if (s==null) return new String[0]; StringTokenizer st= new StringTokenizer(s, del); int n= st.countTokens(); String[] ss= new String[n]; for (int i=0; i