emboss EMBOSSDIR=/bio/mb/emboss/ EMBOSS_ACDROOT=/bio/mb/emboss/share/EMBOSS/acd PLPLOT_LIB=/bio/mb/emboss/share/EMBOSS empath=/bio/mb/emboss/bin/ PATH=/bio/mb/emboss/bin/:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/usr/local/sbin packinfo About/EMBOSS TITLE EMBOSS INFO European Molecular Biology Open Source Suite (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/ 2.2.0 Tue Feb 5 00:55:05 EST 2002 dghome abiview Display/abiview TITLE abiview (EMBOSS) INFO Reads ABI file and display the trace (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/abiview.html fname -fname=$value outseq output output biosequence/genbank abiview.out.gb -outseq=$value graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value separate false -separate yticks false -yticks sequence true -nosequence window 40 -window=$value bases GATC -bases=$value goutfile output output image/pict abiview.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}abiview -auto $fname $outseq $graph $separate $yticks $sequence $window $bases $goutfile ajfeatest Test/ajfeatest TITLE ajfeatest (EMBOSS) INFO Reads and writes (returns) a sequence and its features (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ajfeatest.html sequence input input biosequence/genbank ajfeatest.in.gb -sequence=$value -sformat=genbank seqtype any outseq output output biosequence/genbank ajfeatest.out.gb -outseq=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ajfeatest -auto $sequence $outseq ajtest Test/ajtest TITLE ajtest (EMBOSS) INFO Test file for ACD parsing (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ajtest.html sequence input input biosequence/genbank ajtest.in.gb -sequence=$value -sformat=genbank seqset input input biosequence/genbank ajtest.in.gb -seqset=$value -sformat=genbank STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ajtest -auto $sequence $seqset alignwrap alignment/global/alignwrap TITLE alignwrap (EMBOSS) INFO Aligns a set of sequences to a seed alignment (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/alignwrap.html inpath ./ -inpath=$value 0 Directory containing the seed alignments (input) extn .align -extn=$value 1 File extention of seed alignment files (input) scopfamilies -scopfamilies=$value 2 scop families file containing the set of sequences in EMBL-like format outpath ./tmp/ -outpath=$value 3 Directory for extended alignments (output) outextn .extalign -outextn=$value 4 File extention of extended alignment files (output) STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}alignwrap -auto $inpath $extn $scopfamilies $outpath $outextn antigenic Protein/Motifs/antigenic TITLE antigenic (EMBOSS) INFO Finds antigenic sites in proteins (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/antigenic.html sequence input input biosequence/genbank antigenic.in.gb -sequence=$value -sformat=genbank seqtype PureProtein minlen 6 -minlen=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}antigenic -auto $sequence $minlen backtranseq Nucleic/Translation/backtranseq Protein/Composition/backtranseq TITLE backtranseq (EMBOSS) INFO Back translate a protein sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/backtranseq.html sequence input input biosequence/genbank backtranseq.in.gb -sequence=$value -sformat=genbank seqtype PureProtein cfile Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -cfile=$value outfile output output biosequence/genbank backtranseq.out.gb -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}backtranseq -auto $sequence $cfile $outfile banana Nucleic/Composition/banana TITLE banana (EMBOSS) INFO Bending and curvature plot in B-DNA (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/banana.html sequence input input biosequence/genbank banana.in.gb -sequence=$value -sformat=genbank seqtype puredna anglesfile input input text/plain banana.in -anglesfile=$value data false -data graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value residuesperline 50 -residuesperline=$value outfile output output text/plain banana.out -outfile=$value goutfile output output image/pict banana.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}banana -auto $sequence $anglesfile $data $graph $residuesperline $outfile $goutfile biosed Sequence Edit/biosed TITLE biosed (EMBOSS) INFO Replace or delete sequence sections (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/biosed.html sequence input input biosequence/genbank biosed.in.gb -sequence=$value -sformat=genbank delete false -delete target N -target=$value replace A -replace=$value outseq output output biosequence/genbank biosed.out.gb -outseq=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}biosed -auto $sequence $delete $target $replace $outseq btwisted Nucleic/Composition/btwisted TITLE btwisted (EMBOSS) INFO Calculates the twisting in a B-DNA sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/btwisted.html sequence input input biosequence/genbank btwisted.in.gb -sequence=$value -sformat=genbank seqtype PureDNA angledata Eangles.dat -angledata=$value energydata Eenergy.dat -energydata=$value outfile output output text/plain btwisted.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}btwisted -auto $sequence $angledata $energydata $outfile cai Nucleic/Codon usage/cai TITLE cai (EMBOSS) INFO CAI codon adaptation index (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cai.html seqall input input biosequence/genbank cai.in.gb -seqall=$value -sformat=genbank seqtype DNA cfile Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -cfile=$value outfile output output text/plain cai.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cai -auto $seqall $cfile $outfile chaos Nucleic/Composition/chaos TITLE chaos (EMBOSS) INFO Create a chaos game representation plot for a sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/chaos.html sequence input input biosequence/genbank chaos.in.gb -sequence=$value -sformat=genbank seqtype dna data false -data graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain chaos.out -outfile=$value goutfile output output image/pict chaos.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}chaos -auto $sequence $data $graph $outfile $goutfile charge Protein/Composition/charge TITLE charge (EMBOSS) INFO Protein charge plot (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/charge.html seqall input input biosequence/genbank charge.in.gb -seqall=$value -sformat=genbank seqtype protein plot false -plot window 5 -window=$value graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain charge.out -outfile=$value aadata Eamino.dat -aadata=$value goutfile output output image/pict charge.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}charge -auto $seqall $plot $window $graph $outfile $aadata $goutfile checktrans Protein/Composition/checktrans TITLE checktrans (EMBOSS) INFO Reports STOP codons and ORF statistics of a protein sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/checktrans.html sequence input input biosequence/genbank checktrans.in.gb -sequence=$value -sformat=genbank seqtype stopprotein orfml 100 -orfml=$value report output output text/plain checktrans.out -report=$value outseq output output biosequence/genbank checktrans.out.gb -outseq=$value featout output output text/plain checktrans.out -featout=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}checktrans -auto $sequence $orfml $report $outseq $featout chips Nucleic/Codon usage/chips TITLE chips (EMBOSS) INFO Codon usage statistics (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/chips.html seqall input input biosequence/genbank chips.in.gb -seqall=$value -sformat=genbank seqtype DNA cfile Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -cfile=$value window 30 -window=$value outfile output output text/plain chips.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}chips -auto $seqall $cfile $window $outfile cirdna Display/cirdna TITLE cirdna (EMBOSS) INFO Draws circular maps of DNA constructs (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cirdna.html graphout png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graphout=$value inputfile input input text/plain cirdna.in -inputfile=$value originangle 90 -originangle=$value posticks Out -posticks=$value posblocks In -posblocks=$value intersymbol Y -intersymbol=$value intercolor 1 -intercolor=$value interticks N -interticks=$value gapsize 500 -gapsize=$value ticklines N -ticklines=$value tickheight 1 -tickheight=$value blockheight 1 -blockheight=$value rangeheight 1 -rangeheight=$value gapgroup 1 -gapgroup=$value postext 1 -postext=$value goutfile output output image/pict cirdna.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cirdna -auto $graphout $inputfile $originangle $posticks $posblocks $intersymbol $intercolor $interticks $gapsize $ticklines $tickheight $blockheight $rangeheight $gapgroup $postext $goutfile codcmp Nucleic/Codon usage/codcmp TITLE codcmp (EMBOSS) INFO Codon usage table comparison (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/codcmp.html first Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -first=$value second Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -second=$value outfile output output text/plain codcmp.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}codcmp -auto $first $second $outfile coderet Feature tables/coderet Nucleic/Translation/coderet TITLE coderet (EMBOSS) INFO Extract CDS, mRNA and translations from feature tables (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/coderet.html seqall input input biosequence/genbank coderet.in.gb -seqall=$value -sformat=genbank seqtype DNA cds true -nocds mrna true -nomrna translation true -notranslation seqout output output biosequence/genbank coderet.out.gb -seqout=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}coderet -auto $seqall $cds $mrna $translation $seqout complex Nucleic/Composition/complex TITLE complex (EMBOSS) INFO Find the linguistic complexity in nucleotide sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/complex.html sequence input input biosequence/genbank complex.in.gb -sequence=$value -sformat=genbank seqtype dna omnia false -omnia 0 calculate over a set of sequences outseq output output biosequence/genbank complex.out.gb -outseq=$value lwin 100 -lwin=$value step 5 -step=$value 1 the displacement of the window over the sequence sim 0 -sim=$value 2 calculate the linguistic complexity by comparison with a number of simulations having a uniform distribution of bases jmin 4 -jmin=$value 3 " jmax 6 -jmax=$value 4 " freq false -freq 5 execute the simulation of a sequence based on the base frequency of the original sequence print false -print 6 generate a file named UjTable containing the values of Uj for each word j in the real sequence(s) and in any simulated sequences outfile output output text/plain complex.out -outfile=$value ujtable output output text/plain complex.out -ujtable=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}complex -auto $sequence $omnia $outseq $lwin $step $sim $jmin $jmax $freq $print $outfile $ujtable compseq Nucleic/Composition/compseq Protein/Composition/compseq TITLE compseq (EMBOSS) INFO Counts the composition of dimer/trimer/etc words in a sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/compseq.html sequence input input biosequence/genbank compseq.in.gb -sequence=$value -sformat=genbank word 2 -word=$value 0 This is the size of word (n-mer) to count. Thus if you want to count codon frequencies, you should enter 3 here. outfile output output text/plain compseq.out -outfile=$value infile input input text/plain compseq.in -infile=$value zerocount true -nozerocount 1 You can make the output results file much smaller if you do not display the words with a zero count. frame 0 -frame=$value 2 The normal behaviour of 'compseq' is to count the frequencies of all words that occur by moving a window of length 'word' up by one each time. This option allows you to move the window up by the length of the word each time, skipping over the intervening words. You can count only those words that occur in a single frame of the word by setting this value to a number other than zero. If you set it to 1 it will only count the words in frame 1, 2 will only count the words in frame 2 and so on. ignorebz true -noignorebz 3 The amino acid code B represents Asparagine or Aspartic acid and the code Z represents Glutamine or Glutamic acid. These are not commonly used codes and you may wish not to count words containing them, just noting them in the count of 'Other' words. reverse false -reverse 4 Set this to be true if you also wish to also count words in the reverse complement of a nucleic sequence. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}compseq -auto $sequence $word $outfile $infile $zerocount $frame $ignorebz $reverse cons Alignment/Consensus/cons TITLE cons (EMBOSS) INFO Creates a consensus from multiple alignments (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cons.html msf input input biosequence/genbank cons.in.gb -msf=$value -sformat=genbank seqtype gapany datafile EDNAMAT -datafile=$value plurality -plurality=$value 0 Set a cut-off for the number of positive matches below which there is no consensus. The default plurality is taken as half the total weight of all the sequences in the alignment. setcase 0 -setcase=$value 1 Sets the threshold for the positive matches above which the consensus is is upper-case and below which the consensus is in lower-case. identity 0 -identity=$value 2 Provides the facility of setting the required number of identities at a site for it to give a consensus at that position. Therefore, if this is set to the number of sequences in the alignment only columns of identities contribute to the consensus. outseq output output biosequence/genbank cons.out.gb -outseq=$value name -name=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cons -auto $msf $datafile $plurality $setcase $identity $outseq $name contacts Protein/3D Structure/contacts TITLE contacts (EMBOSS) INFO Reads coordinate files and writes contact files (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/contacts.html cpdb ./ -cpdb=$value cpdbextn .pxyz -cpdbextn=$value con ./ -con=$value conextn .con -conextn=$value thresh 1.0 -thresh=$value ignore 20.0 -ignore=$value 0 If any two atoms from two different residues are at least this distance apart then no futher inter-atomic contacts will be checked for for that residue pair . This speeds the calculation up considerably. vdwf Evdw.dat -vdwf=$value conerrf output output text/plain contacts.out -conerrf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}contacts -auto $cpdb $cpdbextn $con $conextn $thresh $ignore $vdwf $conerrf corbatest Test/corbatest TITLE corbatest (EMBOSS) INFO Test of EMBL corba retrieval (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/corbatest.html entry hsfau -entry=$value outfile output output text/plain corbatest.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}corbatest -auto $entry $outfile cpgplot Nucleic/CpG Islands/cpgplot TITLE cpgplot (EMBOSS) INFO Plot CpG rich areas (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cpgplot.html sequence input input biosequence/genbank cpgplot.in.gb -sequence=$value -sformat=genbank seqtype DNA window 100 -window=$value 0 The percentage CG content and the Observed frequency of CG is calculated within a window whose size is set by this parameter. The window is moved down the sequence and these statistics are calculated at each postition that the window is moved to. shift 1 -shift=$value 1 This determines the number of bases that the window is moved each time after values of the percentage CG content and the Observed frequency of CG are calculated within the window. minlen 200 -minlen=$value 2 This sets the minimum length that a CpG island has to be before it is reported. minoe 0.6 -minoe=$value 3 This sets the minimum average observed to expected ratio of C plus G to CpG in a set of 10 windows that are required before a CpG island is reported. minpc 50. -minpc=$value 4 This sets the minimum average percentage of G plus C a set of 10 windows that are required before a CpG island is reported. outfile output output text/plain cpgplot.out -outfile=$value graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value obsexp true -noobsexp 5 If this is set to true then the graph of the observed to expected ratio of C plus G to CpG within a window is displayed. cg true -nocg 6 If this is set to true then the graph of the regions which have been determined to be CpG islands is displayed. pc true -nopc 7 If this is set to true then the graph of the percentage C plus G within a window is displayed. featout output output text/plain cpgplot.out -featout=$value goutfile output output image/pict cpgplot.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cpgplot -auto $sequence $window $shift $minlen $minoe $minpc $outfile $graph $obsexp $cg $pc $featout $goutfile cpgreport Nucleic/CpG Islands/cpgreport TITLE cpgreport (EMBOSS) INFO Reports all CpG rich regions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cpgreport.html sequence input input biosequence/genbank cpgreport.in.gb -sequence=$value -sformat=genbank seqtype DNA score 17 -score=$value 0 This sets the score for each CG sequence found. A value of 17 is more sensitive, but 28 has also been used with some success. outfile output output text/plain cpgreport.out -outfile=$value featout output output text/plain cpgreport.out -featout=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cpgreport -auto $sequence $score $outfile $featout cusp Nucleic/Codon usage/cusp TITLE cusp (EMBOSS) INFO Create a codon usage table (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cusp.html sequence input input biosequence/genbank cusp.in.gb -sequence=$value -sformat=genbank seqtype DNA cfile Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -cfile=$value outfile output output text/plain cusp.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cusp -auto $sequence $cfile $outfile cutgextract Utilities/Database creation/cutgextract TITLE cutgextract (EMBOSS) INFO Extract data from CUTG (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cutgextract.html wildspec *.codon -wildspec=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cutgextract -auto $wildspec cutseq Sequence Edit/cutseq TITLE cutseq (EMBOSS) INFO Removes a specified section from a sequence. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/cutseq.html sequence input input biosequence/genbank cutseq.in.gb -sequence=$value -sformat=genbank seqtype gapany outseq output output biosequence/genbank cutseq.out.gb -outseq=$value from -from=$value 0 This is the start position (inclusive) of the section of the sequence that you wish to remove. to -to=$value 1 This is the end position (inclusive) of the section of the sequence that you wish to remove. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}cutseq -auto $sequence $outseq $from $to dan Nucleic/Composition/dan TITLE dan (EMBOSS) INFO Calculates DNA RNA/DNA melting temperature (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dan.html sequence input input biosequence/genbank dan.in.gb -sequence=$value -sformat=genbank seqtype DNA windowsize 20 -windowsize=$value 0 The values of melting point and other thermodynamic properties of the sequence are determined by taking a short length of sequence known as a window and determining the properties of the sequence in that window. The window is incrementally moved along the sequence with the properties being calcualted at each new position. shiftincrement 1 -shiftincrement=$value 1 This is the amount by which the window is moved at each increment in order to find the melting point and other properties along the sequence. dnaconc 50. -dnaconc=$value saltconc 50. -saltconc=$value plot -plot 2 If this is not specified then the file of output data is produced, else a plot of the melting point along the sequence is produced. mintemp 55. -mintemp=$value 3 Enter a minimum value for the temperature scale (y-axis) of the plot. graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value rna -rna 4 This specifies that the sequence is an RNA sequnce and not a DNA sequence. product -product 5 This prompts for percent formamide, percent of mismatches allowed and product length. formamide 0. -formamide=$value 6 This specifies the percent formamide to be used in calculations (it is ignored unless -product is used). mismatch 0. -mismatch=$value 7 This specifies the percent mismatch to be used in calculations (it is ignored unless -product is used). prodlen -prodlen=$value 8 This specifies the product length to be used in calculations (it is ignored unless -product is used). thermo -thermo 9 Output the DeltaG, DeltaH and DeltaS values of the sequence windows to the output data file. temperature 25. -temperature=$value 10 If -thermo has been specified then this specifies the temperature at which to calculate the DeltaG, DeltaH and DeltaS values. goutfile output output image/pict dan.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dan -auto $sequence $windowsize $shiftincrement $dnaconc $saltconc $plot $mintemp $graph $rna $product $formamide $mismatch $prodlen $thermo $temperature $goutfile dbiblast Utilities/Database indexing/dbiblast TITLE dbiblast (EMBOSS) INFO Index a BLAST database (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dbiblast.html staden -staden dbname -dbname=$value directory . -directory=$value filename -filename=$value indexdirectory . -indexdirectory=$value sortoptions -T.-k1,1 -sortoptions=$value 0 Sort options, typically '-T .' to use current directory for work files and '-k 1,1' to force GNU sort to use the first field release 0.0 -release=$value date 00/00/00 -date=$value 1 Allowed values: Date string dd/mm/yy systemsort true -nosystemsort cleanup true -nocleanup seqtype unknown -seqtype=$value blastversion unknown -blastversion=$value sourcefile -sourcefile STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dbiblast -auto $staden $dbname $directory $filename $indexdirectory $sortoptions $release $date $systemsort $cleanup $seqtype $blastversion $sourcefile dbifasta Utilities/Database indexing/dbifasta TITLE dbifasta (EMBOSS) INFO Index a fasta database (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dbifasta.html staden -staden idformat idacc -idformat=$value dbname -dbname=$value directory . -directory=$value filenames *.dat -filenames=$value exclude -exclude=$value indexdirectory . -indexdirectory=$value sortoptions -T.-k1,1 -sortoptions=$value 0 Sort options, typically '-T .' to use current directory for work files and '-k 1,1' to force GNU sort to use the first field release 0.0 -release=$value date 00/00/00 -date=$value 1 Allowed values: Date string dd/mm/yy systemsort true -nosystemsort cleanup true -nocleanup STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dbifasta -auto $staden $idformat $dbname $directory $filenames $exclude $indexdirectory $sortoptions $release $date $systemsort $cleanup dbiflat Utilities/Database indexing/dbiflat TITLE dbiflat (EMBOSS) INFO Index a flat file database (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dbiflat.html staden -staden idformat SWISS -idformat=$value dbname -dbname=$value directory . -directory=$value filenames *.dat -filenames=$value exclude -exclude=$value indexdirectory . -indexdirectory=$value sortoptions -T.-k1,1 -sortoptions=$value 0 Sort options, typically '-T .' to use current directory for work files and '-k 1,1' to force GNU sort to use the first field release 0.0 -release=$value date 00/00/00 -date=$value 1 Allowed values: Date string dd/mm/yy systemsort true -nosystemsort cleanup true -nocleanup STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dbiflat -auto $staden $idformat $dbname $directory $filenames $exclude $indexdirectory $sortoptions $release $date $systemsort $cleanup dbigcg Utilities/Database indexing/dbigcg TITLE dbigcg (EMBOSS) INFO Index a GCG formatted database (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dbigcg.html staden -staden idformat EMBL -idformat=$value dbname -dbname=$value directory . -directory=$value filename *.seq -filename=$value indexdirectory . -indexdirectory=$value sortoptions -T.-k1,1 -sortoptions=$value 0 Sort options, typically '-T .' to use current directory for work files and '-k 1,1' to force GNU sort to use the first field release 0.0 -release=$value date 00/00/00 -date=$value 1 Allowed values: Date string dd/mm/yy systemsort true -nosystemsort cleanup true -nocleanup STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dbigcg -auto $staden $idformat $dbname $directory $filename $indexdirectory $sortoptions $release $date $systemsort $cleanup degapseq Sequence Edit/degapseq TITLE degapseq (EMBOSS) INFO Removes gap characters from sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/degapseq.html sequence input input biosequence/genbank degapseq.in.gb -sequence=$value -sformat=genbank seqtype gapany outseq output output biosequence/genbank degapseq.out.gb -outseq=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}degapseq -auto $sequence $outseq demoalign Test/demoalign TITLE demoalign (EMBOSS) INFO Reads a sequence set, writes an alignment file (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demoalign.html sequence input input biosequence/genbank demoalign.in.gb -sequence=$value -sformat=genbank outfile -outfile=$value floatmatrix EDNAMAT -floatmatrix=$value 0 Matrix file intmatrix EDNAMAT -intmatrix=$value 1 Matrix file dofloat -dofloat STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demoalign -auto $sequence $outfile $floatmatrix $intmatrix $dofloat demofeatures demo/demofeatures TITLE demofeatures (EMBOSS) INFO demonstration of the feature functions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demofeatures.html featout output output text/plain demofeatures.out -featout=$value typesort false -typesort 0 Sort output features by their type startsort false -startsort 1 Sort output features by their start position dictionary true -nodictionary 2 No will mean no checking of values tracedict true -notracedict 3 Useful if you specify nodictionary as newly created one will be produced STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demofeatures -auto $featout $typesort $startsort $dictionary $tracedict demolist Test/demolist TITLE demolist (EMBOSS) INFO demonstration of the list functions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demolist.html gff input input text/plain demolist.in -gff=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demolist -auto $gff demoreport Test/demoreport TITLE demoreport (EMBOSS) INFO Reads a sequence and feature table, writes a report (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demoreport.html sequence input input biosequence/genbank demoreport.in.gb -sequence=$value -sformat=genbank STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demoreport -auto $sequence demosequence Test/demosequence TITLE demosequence (EMBOSS) INFO demonstration of the sequence functions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demosequence.html STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demosequence -auto $ demostring demo/demostring TITLE demostring (EMBOSS) INFO demonstration of the string functions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demostring.html instring -instring=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demostring -auto $instring demotable demo/demotable TITLE demotable (EMBOSS) INFO demonstration of the table functions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/demotable.html gff input input text/plain demotable.in -gff=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}demotable -auto $gff descseq Sequence Edit/descseq TITLE descseq (EMBOSS) INFO Alter the name or description of a sequence. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/descseq.html sequence input input biosequence/genbank descseq.in.gb -sequence=$value -sformat=genbank seqtype gapany outseq output output biosequence/genbank descseq.out.gb -outseq=$value name -name=$value description -description=$value append -append 0 This allows you to append the name or description you have given on to the end of the existing name or description of the sequence. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}descseq -auto $sequence $outseq $name $description $append dichet Protein/3D Structure/dichet TITLE dichet (EMBOSS) INFO Parse dictionary of heterogen groups (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dichet.html inf input input text/plain dichet.in -inf=$value outf output output text/plain dichet.out -outf=$value dogrep false -dogrep path ./ -path=$value extn .ent -extn=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dichet -auto $inf $outf $dogrep $path $extn diffseq Alignment/Differences/diffseq TITLE diffseq (EMBOSS) INFO Find differences (SNPs) between nearly identical sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/diffseq.html asequence input input biosequence/genbank diffseq.in.gb -asequence=$value -sformat=genbank seqtype any bsequence input input biosequence/genbank diffseq.in.gb -bsequence=$value -sformat=genbank seqtype @($(asequence.protein) ? protein : nucleotide) wordsize 10 -wordsize=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}diffseq -auto $asequence $bsequence $wordsize digest Protein/Motifs/digest TITLE digest (EMBOSS) INFO Protein proteolytic enzyme or reagent cleavage digest (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/digest.html sequencea input input biosequence/genbank digest.in.gb -sequencea=$value -sformat=genbank seqtype Protein menu 1 -menu=$value unfavoured -unfavoured 0 Trypsin will not normally cut after a K if it is followed by (e.g.) another K or a P. Specifying this shows those cuts. as well as the favoured ones. overlap -overlap 1 Used for partial digestion. Shows all cuts from favoured cut sites plus 1..3, 2..4, 3..5 etc but not (e.g.) 2..5. Overlaps are therefore fragments with exactly one potential cut site within it. aadata Eamino.dat -aadata=$value 2 Molecular weight data for amino acids allpartials -allpartials 3 As for overlap but fragments containing more than one potential cut site are included. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}digest -auto $sequencea $menu $unfavoured $overlap $aadata $allpartials distmat Phylogeny/distmat TITLE distmat (EMBOSS) INFO Creates a distance matrix from multiple alignments (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/distmat.html msf input input biosequence/genbank distmat.in.gb -msf=$value -sformat=genbank seqtype gapany nucmethod 0 -nucmethod=$value 0 Multiple substitution correction methods for nucleotides. protmethod 0 -protmethod=$value 1 Multiple substitution correction methods for proteins. outf output output text/plain distmat.out -outf=$value ambiguous false -ambiguous 2 Option to use the abiguous codes in the calculation of the Jukes-Cantor method or if the sequences are proteins. gapweight 0. -gapweight=$value 3 Option to weight gaps in the uncorrected (nucleotide) and Jukes-Cantor distance methods. position 123 -position=$value 4 Choose base positions to analyse in each codon i.e. 123 (all bases), 12 (the first two bases), 1, 2, or 3 individual bases. calculatea false -calculatea 5 This will force the calculation of the a-parameter in the Jin-Nei Gamma distance calculation, otherwise the default is 1.0 (see -parametera option). parametera 1.0 -parametera=$value 6 User defined a parameter to be use in the Jin-Nei Gamma distance calculation. The suggested value to be used is 1.0 [Jin et al.] and this is the default. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}distmat -auto $msf $nucmethod $protmethod $outf $ambiguous $gapweight $position $calculatea $parametera domainer Utilities/Database creation/domainer TITLE domainer (EMBOSS) INFO Build domain coordinate files (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/domainer.html scop input input text/plain domainer.in -scop=$value cpdb ./ -cpdb=$value cpdbscop ./ -cpdbscop=$value cpdbextn .pxyz -cpdbextn=$value pdbscop ./ -pdbscop=$value pdbextn .ent -pdbextn=$value cpdberrf output output text/plain domainer.out -cpdberrf=$value pdberrf output output text/plain domainer.out -pdberrf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}domainer -auto $scop $cpdb $cpdbscop $cpdbextn $pdbscop $pdbextn $cpdberrf $pdberrf dotmatcher Alignment/Dot plots/dotmatcher TITLE dotmatcher (EMBOSS) INFO Displays a thresholded dotplot of two sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dotmatcher.html sequencea input input biosequence/genbank dotmatcher.in.gb -sequencea=$value -sformat=genbank seqtype any sequenceb input input biosequence/genbank dotmatcher.in.gb -sequenceb=$value -sformat=genbank seqtype @($(sequencea.protein) ? protein : nucleotide) windowsize 10 -windowsize=$value threshold 17 -threshold=$value matrixfile EDNAMAT -matrixfile=$value data false -data 0 Output the match data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain dotmatcher.out -outfile=$value goutfile output output image/pict dotmatcher.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dotmatcher -auto $sequencea $sequenceb $windowsize $threshold $matrixfile $data $graph $outfile $goutfile dotpath Alignment/Dot plots/dotpath TITLE dotpath (EMBOSS) INFO Displays a non-overlapping wordmatch dotplot of two sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dotpath.html sequencea input input biosequence/genbank dotpath.in.gb -sequencea=$value -sformat=genbank seqtype any sequenceb input input biosequence/genbank dotpath.in.gb -sequenceb=$value -sformat=genbank seqtype @($(sequencea.protein) ? protein : nucleotide) wordsize 4 -wordsize=$value overlaps false -overlaps 0 Displays the overlapping matches (in red) as well as the minimal set of non-overlapping matches data false -data 1 Output the match data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value boxit true -noboxit outfile output output text/plain dotpath.out -outfile=$value goutfile output output image/pict dotpath.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dotpath -auto $sequencea $sequenceb $wordsize $overlaps $data $graph $boxit $outfile $goutfile dottup Alignment/Dot plots/dottup TITLE dottup (EMBOSS) INFO Displays a wordmatch dotplot of two sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dottup.html sequencea input input biosequence/genbank dottup.in.gb -sequencea=$value -sformat=genbank seqtype any sequenceb input input biosequence/genbank dottup.in.gb -sequenceb=$value -sformat=genbank seqtype @($(sequencea.protein) ? protein : nucleotide) wordsize 4 -wordsize=$value data false -data 0 Output the match data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value boxit true -noboxit outfile output output text/plain dottup.out -outfile=$value goutfile output output image/pict dottup.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dottup -auto $sequencea $sequenceb $wordsize $data $graph $boxit $outfile $goutfile dreg Nucleic/Motifs/dreg TITLE dreg (EMBOSS) INFO regular expression search of a nucleotide sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/dreg.html sequence input input biosequence/genbank dreg.in.gb -sequence=$value -sformat=genbank seqtype dna outfile output output text/plain dreg.out -outfile=$value pattern -pattern=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}dreg -auto $sequence $outfile $pattern ealistat HMM/ealistat TITLE ealistat (EMBOSS) INFO Statistics for multiple alignment files (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ealistat.html infile input input text/plain ealistat.in -infile=$value additional false -additional fast false -fast outfile output output text/plain ealistat.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ealistat -auto $infile $additional $fast $outfile eclique phylip/eclique TITLE eclique (EMBOSS) INFO Largest clique program (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/eclique.html infile input input text/plain eclique.in -infile=$value outfile output output text/plain eclique.out -outfile=$value trout true -notrout drawtree true -nodrawtree treefile output output text/plain eclique.out -treefile=$value ancestral false -ancestral minclique false -minclique cliqminnum 1 -cliqminnum=$value og false -og outgnum 1 -outgnum=$value printdata false -printdata progress false -progress matrixout false -matrixout STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}eclique -auto $infile $outfile $trout $drawtree $treefile $ancestral $minclique $cliqminnum $og $outgnum $printdata $progress $matrixout econsense phylip/econsense TITLE econsense (EMBOSS) INFO Majority-rule and strict consensus tree (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/econsense.html infile input input text/plain econsense.in -infile=$value outfile output output text/plain econsense.out -outfile=$value trout true -notrout drawtree true -nodrawtree treefile output output text/plain econsense.out -treefile=$value root false -root og false -og outgnum 1 -outgnum=$value progress false -progress printsets true -noprintsets STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}econsense -auto $infile $outfile $trout $drawtree $treefile $root $og $outgnum $progress $printsets econtml phylip/econtml TITLE econtml (EMBOSS) INFO Continuous character Maximum Likelihood method (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/econtml.html infile input input text/plain econtml.in -infile=$value outfile output output text/plain econtml.out -outfile=$value besttree true -nobesttree lengths false -lengths global false -global random false -random randseed 3 -randseed=$value continuous false -continuous all false -all og false -og outgnum 1 -outgnum=$value multsets false -multsets datasets 1 -datasets=$value printdata false -printdata progress false -progress drawtree true -nodrawtree trout true -notrout treefile output output text/plain econtml.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}econtml -auto $infile $outfile $besttree $lengths $global $random $randseed $continuous $all $og $outgnum $multsets $datasets $printdata $progress $drawtree $trout $treefile econtrast phylip/econtrast TITLE econtrast (EMBOSS) INFO Continuous character Contrasts (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/econtrast.html infile input input text/plain econtrast.in -infile=$value treefile input input text/plain econtrast.in -treefile=$value outfile output output text/plain econtrast.out -outfile=$value corplusreg true -nocorplusreg multsets false -multsets datasets 1 -datasets=$value printdata false -printdata progress false -progress STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}econtrast -auto $infile $treefile $outfile $corplusreg $multsets $datasets $printdata $progress ednacomp phylip/ednacomp TITLE ednacomp (EMBOSS) INFO DNA compatibility algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednacomp.html msf input input biosequence/genbank ednacomp.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain ednacomp.out -outfile=$value trout true -notrout drawtree true -nodrawtree treefile output output text/plain ednacomp.out -treefile=$value og false -og outgnum 1 -outgnum=$value printdata false -printdata progress false -progress stepoutput false -stepoutput allnodes false -allnodes random false -random randseed -randseed=$value randtimes -randtimes=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednacomp -auto $msf $outfile $trout $drawtree $treefile $og $outgnum $printdata $progress $stepoutput $allnodes $random $randseed $randtimes ednadist phylip/ednadist TITLE ednadist (EMBOSS) INFO Nucleic acid sequence Distance Matrix program (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednadist.html msf input input biosequence/genbank ednadist.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain ednadist.out -outfile=$value method Kimura -method=$value ttratio 2.0 -ttratio=$value categories 1 -categories=$value basefrequency true -nobasefrequency printinitial false -printinitial freqa 0.25 -freqa=$value freqc 0.25 -freqc=$value freqg 0.25 -freqg=$value freqt 0.25 -freqt=$value matrix S -matrix=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednadist -auto $msf $outfile $method $ttratio $categories $basefrequency $printinitial $freqa $freqc $freqg $freqt $matrix ednainvar phylip/ednainvar TITLE ednainvar (EMBOSS) INFO Nucleic acid sequence Invariants method (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednainvar.html msf input input biosequence/genbank ednainvar.in.gb -msf=$value -sformat=genbank outfile output output text/plain ednainvar.out -outfile=$value printdata false -printdata progress false -progress STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednainvar -auto $msf $outfile $printdata $progress ednaml phylip/ednaml TITLE ednaml (EMBOSS) INFO Estimates phylogenies from nucleic acid sequence Maximum Likelihood (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednaml.html msf input input biosequence/genbank ednaml.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain ednaml.out -outfile=$value besttree true -nobesttree lengths false -lengths global false -global random false -random randseed 3 -randseed=$value randtimes 3 -randtimes=$value ttratio 2.0 -ttratio=$value basefrequency true -nobasefrequency freqa 0.25 -freqa=$value freqc 0.25 -freqc=$value freqg 0.25 -freqg=$value freqt 0.25 -freqt=$value categories false -categories catnum 2 -catnum=$value catvals -catvals=$value catprob -catprob=$value autog false -autog lambda 1.0 -lambda=$value og false -og outgnum 1 -outgnum=$value printdata false -printdata progress false -progress drawtree true -nodrawtree trout true -notrout treefile output output text/plain ednaml.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednaml -auto $msf $outfile $besttree $lengths $global $random $randseed $randtimes $ttratio $basefrequency $freqa $freqc $freqg $freqt $categories $catnum $catvals $catprob $autog $lambda $og $outgnum $printdata $progress $drawtree $trout $treefile ednamlk phylip/ednamlk TITLE ednamlk (EMBOSS) INFO Estimates phylogenies from nucleic acid sequence Maximum Likelihood with molecular clock (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednamlk.html msf input input biosequence/genbank ednamlk.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain ednamlk.out -outfile=$value besttree true -nobesttree lengths false -lengths global false -global random false -random randseed 3 -randseed=$value randtimes 3 -randtimes=$value ttratio 2.0 -ttratio=$value basefrequency true -nobasefrequency freqa 0.25 -freqa=$value freqc 0.25 -freqc=$value freqg 0.25 -freqg=$value freqt 0.25 -freqt=$value categories false -categories catnum 2 -catnum=$value catvals -catvals=$value catprob -catprob=$value autog false -autog lambda 1.0 -lambda=$value printdata false -printdata progress false -progress drawtree true -nodrawtree trout true -notrout treefile output output text/plain ednamlk.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednamlk -auto $msf $outfile $besttree $lengths $global $random $randseed $randtimes $ttratio $basefrequency $freqa $freqc $freqg $freqt $categories $catnum $catvals $catprob $autog $lambda $printdata $progress $drawtree $trout $treefile ednapars phylip/ednapars TITLE ednapars (EMBOSS) INFO DNA parsimony algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednapars.html msf input input biosequence/genbank ednapars.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain ednapars.out -outfile=$value besttree true -nobesttree random false -random randseed 3 -randseed=$value randtimes 3 -randtimes=$value og false -og outgnum 1 -outgnum=$value thresh false -thresh valthresh 1.0 -valthresh=$value printdata false -printdata progress false -progress steps false -steps seqatnodes false -seqatnodes drawtree true -nodrawtree trout true -notrout treefile output output text/plain ednapars.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednapars -auto $msf $outfile $besttree $random $randseed $randtimes $og $outgnum $thresh $valthresh $printdata $progress $steps $seqatnodes $drawtree $trout $treefile ednapenny phylip/ednapenny TITLE ednapenny (EMBOSS) INFO Penny algorithm for DNA (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ednapenny.html msf input input biosequence/genbank ednapenny.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain ednapenny.out -outfile=$value numgroups 1000 -numgroups=$value howoften 100 -howoften=$value simple -simple og false -og outgnum 1 -outgnum=$value thresh false -thresh valthresh 1.0 -valthresh=$value printdata false -printdata progress false -progress steps false -steps seqatnodes false -seqatnodes drawtree true -nodrawtree trout true -notrout treefile output output text/plain ednapenny.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ednapenny -auto $msf $outfile $numgroups $howoften $simple $og $outgnum $thresh $valthresh $printdata $progress $steps $seqatnodes $drawtree $trout $treefile edollop phylip/edollop TITLE edollop (EMBOSS) INFO Dollo and polymorphism parsimony algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/edollop.html datafile input input text/plain edollop.in -datafile=$value outfile output output text/plain edollop.out -outfile=$value besttree true -nobesttree random false -random randseed 3 -randseed=$value randtimes 3 -randtimes=$value dollo true -nodollo thresh false -thresh valthresh 1.0 -valthresh=$value ancest false -ancest multsets false -multsets datasets 2 -datasets=$value printdata false -printdata progress false -progress steps false -steps statesatnodes false -statesatnodes drawtree true -nodrawtree trout true -notrout treefile output output text/plain edollop.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}edollop -auto $datafile $outfile $besttree $random $randseed $randtimes $dollo $thresh $valthresh $ancest $multsets $datasets $printdata $progress $steps $statesatnodes $drawtree $trout $treefile edolpenny phylip/edolpenny TITLE edolpenny (EMBOSS) INFO Penny algorithm Dollo or polymorphism (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/edolpenny.html datafile input input text/plain edolpenny.in -datafile=$value outfile output output text/plain edolpenny.out -outfile=$value dollo true -nodollo numgroups 1000 -numgroups=$value howoften 100 -howoften=$value simple -simple thresh false -thresh valthresh 1.0 -valthresh=$value ancest false -ancest multsets false -multsets datasets 2 -datasets=$value printdata false -printdata progress false -progress steps false -steps statesatnodes false -statesatnodes drawtree true -nodrawtree trout true -notrout treefile output output text/plain edolpenny.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}edolpenny -auto $datafile $outfile $dollo $numgroups $howoften $simple $thresh $valthresh $ancest $multsets $datasets $printdata $progress $steps $statesatnodes $drawtree $trout $treefile efactor phylip/efactor TITLE efactor (EMBOSS) INFO multistate to binary recoding program (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/efactor.html datafile input input text/plain efactor.in -datafile=$value outfile output output text/plain efactor.out -outfile=$value anc false -anc factors false -factors progress true -noprogress STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}efactor -auto $datafile $outfile $anc $factors $progress efitch phylip/efitch TITLE efitch (EMBOSS) INFO Fitch-Margoliash and Least-Squares Distance Methods (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/efitch.html infile input input text/plain efitch.in -infile=$value outfile output output text/plain efitch.out -outfile=$value besttree true -nobesttree length false -length power 3.0 -power=$value negbranch false -negbranch random false -random randseed -randseed=$value randtimes -randtimes=$value global false -global og false -og outgnum 1 -outgnum=$value lt false -lt ut false -ut replicates false -replicates multsets false -multsets datasets -datasets=$value printdata false -printdata progress false -progress trout true -notrout drawtree true -nodrawtree treefile output output text/plain efitch.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}efitch -auto $infile $outfile $besttree $length $power $negbranch $random $randseed $randtimes $global $og $outgnum $lt $ut $replicates $multsets $datasets $printdata $progress $trout $drawtree $treefile egendist phylip/egendist TITLE egendist (EMBOSS) INFO Genetic Distance Matrix program (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/egendist.html infile input input text/plain egendist.in -infile=$value outfile output output text/plain egendist.out -outfile=$value all false -all STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}egendist -auto $infile $outfile $all ehmmalign HMM/ehmmalign TITLE ehmmalign (EMBOSS) INFO Align sequences with an HMM (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmalign.html hmmfile input input text/plain ehmmalign.in -hmmfile=$value sequences input input biosequence/genbank ehmmalign.in.gb -sequences=$value -sformat=genbank mapali -mapali=$value withali -withali=$value matchonly false -matchonly outfile output output text/plain ehmmalign.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmalign -auto $hmmfile $sequences $mapali $withali $matchonly $outfile ehmmbuild HMM/ehmmbuild TITLE ehmmbuild (EMBOSS) INFO Build HMM (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmbuild.html sequences input input biosequence/genbank ehmmbuild.in.gb -sequences=$value -sformat=genbank strategy L -strategy=$value name Ehmm -name=$value resave -resave=$value append false -append force false -force amino false -amino nucleic false -nucleic archpri 0.85 -archpri=$value binary false -binary cfile -cfile=$value cstrategy F -cstrategy=$value fast false -fast gapmax 0.5 -gapmax=$value hand false -hand idlevel 0.62 -idlevel=$value efficiency true -noefficiency null -null=$value pam -pam=$value pamweight 20.0 -pamweight=$value prior -prior=$value swentry 0.5 -swentry=$value swexit 0.5 -swexit=$value more false -more weighting G -weighting=$value outfile output output text/plain ehmmbuild.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmbuild -auto $sequences $strategy $name $resave $append $force $amino $nucleic $archpri $binary $cfile $cstrategy $fast $gapmax $hand $idlevel $efficiency $null $pam $pamweight $prior $swentry $swexit $more $weighting $outfile ehmmcalibrate HMM/ehmmcalibrate TITLE ehmmcalibrate (EMBOSS) INFO Calibrate a hidden Markov model (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmcalibrate.html infile input input text/plain ehmmcalibrate.in -infile=$value cpu 0 -cpu=$value fixed 0 -fixed=$value histogram -histogram=$value mean 350. -mean=$value num 5000 -num=$value pvm false -pvm sd 350. -sd=$value seed 0 -seed=$value outfile output output text/plain ehmmcalibrate.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmcalibrate -auto $infile $cpu $fixed $histogram $mean $num $pvm $sd $seed $outfile ehmmconvert HMM/ehmmconvert TITLE ehmmconvert (EMBOSS) INFO Convert between HMM formats (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmconvert.html infile input input text/plain ehmmconvert.in -infile=$value format A -format=$value append false -append force false -force outfile output output text/plain ehmmconvert.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmconvert -auto $infile $format $append $force $outfile ehmmemit HMM/ehmmemit TITLE ehmmemit (EMBOSS) INFO Extract HMM sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmemit.html infile input input text/plain ehmmemit.in -infile=$value selex false -selex consensus false -consensus number 10 -number=$value seed 0 -seed=$value outfile output output text/plain ehmmemit.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmemit -auto $infile $selex $consensus $number $seed $outfile ehmmfetch HMM/ehmmfetch TITLE ehmmfetch (EMBOSS) INFO Extract HMM from a database (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmfetch.html database -database=$value consensus -consensus=$value outfile output output text/plain ehmmfetch.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmfetch -auto $database $consensus $outfile ehmmindex HMM/ehmmindex TITLE ehmmindex (EMBOSS) INFO Index an HMM database (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmindex.html infile input input text/plain ehmmindex.in -infile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmindex -auto $infile ehmmpfam HMM/ehmmpfam TITLE ehmmpfam (EMBOSS) INFO Align single sequence with an HMM (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmpfam.html sequences input input biosequence/genbank ehmmpfam.in.gb -sequences=$value -sformat=genbank hmmfile -hmmfile=$value nucleic false -nucleic nalign 100 -nalign=$value evalue 10. -evalue=$value hitcut -1000000. -hitcut=$value dbsize 59021 -dbsize=$value cpu 0 -cpu=$value dome 1000000. -dome=$value domt -1000000. -domt=$value forward false -forward nulltwo false -nulltwo pvm false -pvm xnu false -xnu outfile output output text/plain ehmmpfam.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmpfam -auto $sequences $hmmfile $nucleic $nalign $evalue $hitcut $dbsize $cpu $dome $domt $forward $nulltwo $pvm $xnu $outfile ehmmsearch HMM/ehmmsearch TITLE ehmmsearch (EMBOSS) INFO Search sequence database with an HMM (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ehmmsearch.html sequences input input biosequence/genbank ehmmsearch.in.gb -sequences=$value -sformat=genbank hmmfile -hmmfile=$value nalign 100 -nalign=$value evalue 10. -evalue=$value hitcut -1000000. -hitcut=$value dbsize 59021 -dbsize=$value cpu 0 -cpu=$value dome 1000000. -dome=$value domt -1000000. -domt=$value forward false -forward nulltwo false -nulltwo pvm false -pvm xnu false -xnu outfile output output text/plain ehmmsearch.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ehmmsearch -auto $sequences $hmmfile $nalign $evalue $hitcut $dbsize $cpu $dome $domt $forward $nulltwo $pvm $xnu $outfile einverted Nucleic/Repeats/einverted Nucleic/2D structure/einverted TITLE einverted (EMBOSS) INFO Finds DNA inverted repeats (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/einverted.html sequence input input biosequence/genbank einverted.in.gb -sequence=$value -sformat=genbank seqtype dna outfile output output text/plain einverted.out -outfile=$value gap 12 -gap=$value threshold 50 -threshold=$value match 3 -match=$value mismatch -4 -mismatch=$value maxrepeat 4000 -maxrepeat=$value 0 Maximum separation between the start of repeat and the end of the inverted repeat (the default is 4000 bases). STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}einverted -auto $sequence $outfile $gap $threshold $match $mismatch $maxrepeat ekitsch phylip/ekitsch TITLE ekitsch (EMBOSS) INFO Fitch-Margoliash method with contemporary tips (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/ekitsch.html infile input input text/plain ekitsch.in -infile=$value outfile output output text/plain ekitsch.out -outfile=$value besttree true -nobesttree power 3.0 -power=$value negbranch false -negbranch random false -random randseed -randseed=$value randtimes -randtimes=$value lt false -lt ut false -ut replicates false -replicates multsets false -multsets datasets -datasets=$value printdata false -printdata progress false -progress drawtree true -nodrawtree treefile output output text/plain ekitsch.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}ekitsch -auto $infile $outfile $besttree $power $negbranch $random $randseed $randtimes $lt $ut $replicates $multsets $datasets $printdata $progress $drawtree $treefile embossdata Utilities/Miscellaneous Utilities/embossdata TITLE embossdata (EMBOSS) INFO Finds or fetches the data files read in by the EMBOSS programs (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/embossdata.html outf output output text/plain embossdata.out -outf=$value showall -showall fetch -fetch filename -filename=$value 0 This specifies the name of the file that should be fetched into the current directory or searched for in all of the directories that EMBOSS programs search when looking for a data file. The name of the file is not altered when it is fetched. reject 2||3||4 -reject=$value 1 This specifies the names of the sub-directories of the EMBOSS data directory that should be ignored when displaying data directories. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}embossdata -auto $outf $showall $fetch $filename $reject embossversion Utilities/Miscellaneous Utilities/embossversion TITLE embossversion (EMBOSS) INFO Writes the current EMBOSS version number (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/embossversion.html outfile output output text/plain embossversion.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}embossversion -auto $outfile emix phylip/emix TITLE emix (EMBOSS) INFO Mixed parsimony algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/emix.html infile input input text/plain emix.in -infile=$value outfile output output text/plain emix.out -outfile=$value besttree true -nobesttree STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}emix -auto $infile $outfile $besttree emma Alignment/Multiple/emma TITLE emma (EMBOSS) INFO Multiple alignment program - interface to ClustalW program (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/emma.html inseqs input input biosequence/genbank emma.in.gb -inseqs=$value -sformat=genbank seqtype gapany outseq output output biosequence/genbank emma.out.gb -outseq=$value dendoutfile output output text/plain emma.out -dendoutfile=$value onlydend false -onlydend dend false -dend dendfile NULL -dendfile=$value insist false -insist prot -prot slowfast s -slowfast=$value 0 A distance is calculated between every pair of sequences and these are used to construct the dendrogram which guides the final multiple alignment. The scores are calculated from separate pairwise alignments. These can be calculated using 2 methods: dynamic programming (slow but accurate) or by the method of Wilbur and Lipman (extremely fast but approximate). The slow-accurate method is fine for short sequences but will be VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences. pwgapc 10.0 -pwgapc=$value 1 The penalty for opening a gap in the pairwise alignments. pwgapv 0.1 -pwgapv=$value 2 The penalty for extending a gap by 1 residue in the pairwise alignments. pwmatrix b -pwmatrix=$value 3 The scoring table which describes the similarity of each amino acid to each other. There are three 'in-built' series of weight matrices offered. Each consists of several matrices which work differently at different evolutionary distances. To see the exact details, read the documentation. Crudely, we store several matrices in memory, spanning the full range of amino acid distance (from almost identical sequences to highly divergent ones). For very similar sequences, it is best to use a strict weight matrix which only gives a high score to identities and the most favoured conservative substitutions. For more divergent sequences, it is appropriate to use 'softer' matrices which give a high score to many other frequent substitutions. 1) BLOSUM (Henikoff). These matrices appear to be the best available for carrying out data base similarity (homology searches). The matrices used are: Blosum80, 62, 45 and 30. 2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We use the PAM 120, 160, 250 and 350 matrices. 3) GONNET . These matrices were derived using almost the same procedure as the Dayhoff one (above) but are much more up to date and are based on a far larger data set. They appear to be more sensitive than the Dayhoff series. We use the GONNET 40, 80, 120, 160, 250 and 350 matrices. We also supply an identity matrix which gives a score of 1.0 to two identical amino acids and a score of zero otherwise. This matrix is not very useful. pwdnamatrix i -pwdnamatrix=$value 4 The scoring table which describes the scores assigned to matches and mismatches (including IUB ambiguity codes). pairwisedata NULL -pairwisedata=$value ktup -ktup=$value 5 This is the size of exactly matching fragment that is used. INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity. For longer sequences (e.g. >1000 residues) you may need to increase the default. Allowed values: integer from 0 to 4 gapw -gapw=$value 6 This is a penalty for each gap in the fast alignments. It has little affect on the speed or sensitivity except for extreme values. Allowed values: Positive integer topdiags -topdiags=$value 7 The number of k-tuple matches on each diagonal (in an imaginary dot-matrix plot) is calculated. Only the best ones (with most matches) are used in the alignment. This parameter specifies how many. Decrease for speed; increase for sensitivity. Allowed values: Positive integer window -window=$value 8 This is the number of diagonals around each of the 'best' diagonals that will be used. Decrease for speed; increase for sensitivity. Allowed values: Positive integer nopercent false -nopercent matrix b -matrix=$value 9 This gives a menu where you are offered a choice of weight matrices. The default for proteins is the PAM series derived by Gonnet and colleagues. Note, a series is used! The actual matrix that is used depends on how similar the sequences to be aligned at this alignment step are. Different matrices work differently at each evolutionary distance. There are three 'in-built' series of weight matrices offered. Each consists of several matrices which work differently at different evolutionary distances. To see the exact details, read the documentation. Crudely, we store several matrices in memory, spanning the full range of amino acid distance (from almost identical sequences to highly divergent ones). For very similar sequences, it is best to use a strict weight matrix which only gives a high score to identities and the most favoured conservative substitutions. For more divergent sequences, it is appropriate to use 'softer' matrices which give a high score to many other frequent substitutions. 1) BLOSUM (Henikoff). These matrices appear to be the best available for carrying out data base similarity (homology searches). The matrices used are: Blosum80, 62, 45 and 30. 2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We use the PAM 120, 160, 250 and 350 matrices. 3) GONNET . These matrices were derived using almost the same procedure as the Dayhoff one (above) but are much more up to date and are based on a far larger data set. They appear to be more sensitive than the Dayhoff series. We use the GONNET 40, 80, 120, 160, 250 and 350 matrices. We also supply an identity matrix which gives a score of 1.0 to two identical amino acids and a score of zero otherwise. This matrix is not very useful. Alternatively, you can read in your own (just one matrix, not a series). dnamatrix i -dnamatrix=$value 10 This gives a menu where you are offered amenu where a single matrix (not a series) can be selected. mamatrix NULL -mamatrix=$value gapc 10.0 -gapc=$value 11 The penalty for opening a gap in the alignment. Increasing the gap opening penalty will make gaps less frequent. Allowed values: Positive foating point number gapv 5.0 -gapv=$value 12 The penalty for extending a gap by 1 residue. Increasing the gap extension penalty will make gaps shorter. Terminal gaps are not penalised. Allowed values: Positive foating point number unweighted false -unweighted 13 The 'Transition weight' gives transitions (A <--> G or C <--> T i.e. purine-purine or pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero means that the transitions are scored as mismatches, while a weight of 1 gives the transitions the match score. For distantly related DNA sequences, the weight should be near to zero; for closely related sequences it can be useful to assign a higher score. endgaps false -endgaps 14 End gap separation' treats end gaps just like internal gaps for the purposes of avoiding gaps that are too close (set by 'gap separation distance'). If you turn this off, end gaps will be ignored for this purpose. This is useful when you wish to align fragments where the end gaps are not biologically meaningful. gapdist 8 -gapdist=$value 15 Gap separation distance' tries to decrease the chances of gaps being too close to each other. Gaps that are less than this distance apart are penalised more than other gaps. This does not prevent close gaps; it makes them less frequent, promoting a block-like appearance of the alignment. Allowed values: Positive integer norgap false -norgap 16 Residue specific penalties' are amino acid specific gap penalties that reduce or increase the gap opening penalties at each position in the alignment or sequence. As an example, positions that are rich in glycine are more likely to have an adjacent gap than positions that are rich in valine. hgapres GPSNDQEKR -hgapres=$value 17 This is a set of the residues 'considered' to be hydrophilic. It is used when introducing Hydrophilic gap penalties. nohgap false -nohgap 18 Hydrophilic gap penalties' are used to increase the chances of a gap within a run (5 or more residues) of hydrophilic amino acids; these are likely to be loop or random coil regions where gaps are more common. The residues that are 'considered' to be hydrophilic are set by '-hgapres'. maxdiv 30 -maxdiv=$value 19 This switch, delays the alignment of the most distantly related sequences until after the most closely related sequences have been aligned. The setting shows the percent identity level required to delay the addition of a sequence; sequences that are less identical than this level to any other sequences will be aligned later. Allowed values: Integer from 0 to 100 STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}emma -auto $inseqs $outseq $dendoutfile $onlydend $dend $dendfile $insist $prot $slowfast $pwgapc $pwgapv $pwmatrix $pwdnamatrix $pairwisedata $ktup $gapw $topdiags $window $nopercent $matrix $dnamatrix $mamatrix $gapc $gapv $unweighted $endgaps $gapdist $norgap $hgapres $nohgap $maxdiv emowse Protein/Composition/emowse TITLE emowse (EMBOSS) INFO Protein identification by mass spectrometry (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/emowse.html sequences input input biosequence/genbank emowse.in.gb -sequences=$value -sformat=genbank seqtype protein infile input input text/plain emowse.in -infile=$value enzyme 1 -enzyme=$value aadata Eamino.dat -aadata=$value 0 Molecular weight data for amino acids weight 0 -weight=$value pcrange 25 -pcrange=$value frequencies Efreqs.dat -frequencies=$value tolerance 0.1 -tolerance=$value partials 0.4 -partials=$value outfile output output text/plain emowse.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}emowse -auto $sequences $infile $enzyme $aadata $weight $pcrange $frequencies $tolerance $partials $outfile eneighbor phylip/eneighbor TITLE eneighbor (EMBOSS) INFO Phylogenies from distance matrix by N-J or UPGMA method (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/eneighbor.html infile input input text/plain eneighbor.in -infile=$value outfile output output text/plain eneighbor.out -outfile=$value trout true -notrout drawtree true -nodrawtree treefile output output text/plain eneighbor.out -treefile=$value nj true -nonj og false -og outgnum -outgnum=$value lt false -lt ut false -ut sr true -nosr random false -random randseed -randseed=$value multsets false -multsets datasets -datasets=$value printdata false -printdata progress false -progress STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}eneighbor -auto $infile $outfile $trout $drawtree $treefile $nj $og $outgnum $lt $ut $sr $random $randseed $multsets $datasets $printdata $progress entrails Utilities/Miscellaneous Utilities/entrails TITLE entrails (EMBOSS) INFO Reports the internal data from the EMBOSS code (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/entrails.html outfile output output text/plain entrails.out -outfile=$value fullreport false -fullreport 0 By default, only the essential details are reported STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}entrails -auto $outfile $fullreport entret Sequence Edit/entret TITLE entret (EMBOSS) INFO Reads and writes (returns) flatfile entries (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/entret.html sequence input input biosequence/genbank entret.in.gb -sequence=$value -sformat=genbank outfile output output text/plain entret.out -outfile=$value firstonly -firstonly STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}entret -auto $sequence $outfile $firstonly epenny phylip/epenny TITLE epenny (EMBOSS) INFO Penny algorithm, branch-and-bound to find all most parsimonious trees (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/epenny.html infile input input text/plain epenny.in -infile=$value outfile output output text/plain epenny.out -outfile=$value method Wag -method=$value numgroups 1000 -numgroups=$value howoften 100 -howoften=$value simple -simple og false -og outgnum 1 -outgnum=$value thresh false -thresh valthresh 1.0 -valthresh=$value multsets false -multsets datasets -datasets=$value printdata false -printdata progress false -progress steps false -steps seqatnodes false -seqatnodes drawtree true -nodrawtree trout true -notrout treefile output output text/plain epenny.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}epenny -auto $infile $outfile $method $numgroups $howoften $simple $og $outgnum $thresh $valthresh $multsets $datasets $printdata $progress $steps $seqatnodes $drawtree $trout $treefile eprotdist phylip/eprotdist TITLE eprotdist (EMBOSS) INFO Protein distance algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/eprotdist.html msf input input biosequence/genbank eprotdist.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain eprotdist.out -outfile=$value method Pam -method=$value categ G -categ=$value gencode U -gencode=$value prob 0.457 -prob=$value tranrate 2.0 -tranrate=$value basefrequency true -nobasefrequency freqa 0.25 -freqa=$value freqc 0.25 -freqc=$value freqg 0.25 -freqg=$value freqt 0.25 -freqt=$value printdata false -printdata progress false -progress STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}eprotdist -auto $msf $outfile $method $categ $gencode $prob $tranrate $basefrequency $freqa $freqc $freqg $freqt $printdata $progress eprotpars phylip/eprotpars TITLE eprotpars (EMBOSS) INFO Protein parsimony algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/eprotpars.html msf input input biosequence/genbank eprotpars.in.gb -msf=$value -sformat=genbank seqtype gapany outfile output output text/plain eprotpars.out -outfile=$value besttree true -nobesttree random false -random randseed 3 -randseed=$value randtimes 3 -randtimes=$value og false -og outgnum 1 -outgnum=$value thresh false -thresh valthresh 1.0 -valthresh=$value printdata false -printdata progress false -progress steps false -steps seqatnodes false -seqatnodes drawtree true -nodrawtree trout true -notrout treefile output output text/plain eprotpars.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}eprotpars -auto $msf $outfile $besttree $random $randseed $randtimes $og $outgnum $thresh $valthresh $printdata $progress $steps $seqatnodes $drawtree $trout $treefile equicktandem Nucleic/Repeats/equicktandem TITLE equicktandem (EMBOSS) INFO Finds tandem repeats (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/equicktandem.html sequence input input biosequence/genbank equicktandem.in.gb -sequence=$value -sformat=genbank seqtype dna outfile output output text/plain equicktandem.out -outfile=$value maxrepeat 600 -maxrepeat=$value threshold 20 -threshold=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}equicktandem -auto $sequence $outfile $maxrepeat $threshold erestml phylip/erestml TITLE erestml (EMBOSS) INFO Restriction site Maximum Likelihood method (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/erestml.html datafile input input text/plain erestml.in -datafile=$value outfile output output text/plain erestml.out -outfile=$value besttree true -nobesttree allsites true -noallsites lengths false -lengths sitelen 6 -sitelen=$value extrap 100.0 -extrap=$value global false -global random false -random randseed 3 -randseed=$value randtimes 3 -randtimes=$value og false -og outgnum 1 -outgnum=$value multsets false -multsets datasets 2 -datasets=$value printdata false -printdata progress false -progress drawtree true -nodrawtree trout true -notrout treefile output output text/plain erestml.out -treefile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}erestml -auto $datafile $outfile $besttree $allsites $lengths $sitelen $extrap $global $random $randseed $randtimes $og $outgnum $multsets $datasets $printdata $progress $drawtree $trout $treefile eseqboot phylip/eseqboot TITLE eseqboot (EMBOSS) INFO Bootstrapped sequences algorithm (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/eseqboot.html datafile input input text/plain eseqboot.in -datafile=$value outfile output output text/plain eseqboot.out -outfile=$value inter false -inter randseed 3 -randseed=$value method Seq -method=$value enzymes false -enzymes all false -all test Boot -test=$value reps 100 -reps=$value printdata false -printdata progress false -progress STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}eseqboot -auto $datafile $outfile $inter $randseed $method $enzymes $all $test $reps $printdata $progress est2genome Alignment/Global/est2genome TITLE est2genome (EMBOSS) INFO Align EST and genomic DNA sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/est2genome.html est input input biosequence/genbank est2genome.in.gb -est=$value -sformat=genbank seqtype dna genome input input biosequence/genbank est2genome.in.gb -genome=$value -sformat=genbank seqtype dna match 1 -match=$value mismatch 1 -mismatch=$value gappenalty 2 -gappenalty=$value 0 Cost for deleting a single base in either sequence, excluding introns intronpenalty 40 -intronpenalty=$value 1 Cost for an intron, independent of length. splicepenalty 20 -splicepenalty=$value 2 Cost for an intron, independent of length and starting/ending on donor-acceptor sites minscore 30 -minscore=$value 3 You can exclude alignments with scores below a threshold by setting this to be false. reverse -reverse 4 Reverse the orientation of the EST sequence splice true -nosplice 5 Use donor and acceptor splice sites. If you want to ignore donor-acceptor sites then set this to be false. align -align 6 Show the alignment. The alignment includes the first and last 5 bases of each intron, together with the intron width. The direction of splicing is indicated by angle brackets (forward or reverse) or ???? (unknown). width 50 -width=$value mode both -mode=$value 7 This determines the comparion mode. The default value is 'both', in which case both strands of the est are compared assuming a forward gene direction (ie GT/AG splice sites), and the best comparsion redone assuming a reversed (CT/AC) gene splicing direction. The other allowed modes are 'forward', when just the forward strand is searched, and 'reverse', ditto for the reverse strand. best true -nobest 8 You can print out all comparisons instead of just the best one by setting this to be false. space 10.0 -space=$value 9 for linear-space recursion. If product of sequence lengths divided by 4 exceeds this then a divide-and-conquer strategy is used to control the memory requirements. In this way very long sequences can be aligned. If you have a machine with plenty of memory you can raise this parameter (but do not exceed the machine's physical RAM) shuffle -shuffle=$value seed 20825 -seed=$value outfile output output text/plain est2genome.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}est2genome -auto $est $genome $match $mismatch $gappenalty $intronpenalty $splicepenalty $minscore $reverse $splice $align $width $mode $best $space $shuffle $seed $outfile etandem Nucleic/Repeats/etandem TITLE etandem (EMBOSS) INFO Looks for tandem repeats in a nucleotide sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/etandem.html sequence input input biosequence/genbank etandem.in.gb -sequence=$value -sformat=genbank seqtype dna outfile output output text/plain etandem.out -outfile=$value minrepeat 10 -minrepeat=$value 0 Allowed values: Integer, 2 or higher maxrepeat -maxrepeat=$value 1 Allowed values: Integer, same as -minrepeat or higher threshold 20 -threshold=$value mismatch -mismatch uniform -uniform STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}etandem -auto $sequence $outfile $minrepeat $maxrepeat $threshold $mismatch $uniform extractfeat Sequence Edit/extractfeat Feature tables/extractfeat TITLE extractfeat (EMBOSS) INFO Extract features from a sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/extractfeat.html sequence input input biosequence/genbank extractfeat.in.gb -sequence=$value -sformat=genbank seqtype any outseq output output biosequence/genbank extractfeat.out.gb -outseq=$value before 0 -before=$value 0 If this value is greater than 0 then that number of bases or residues before the feature are included in the extracted sequence. This allows you to get the context of the feature. If this value is negative then the start of the extracted sequence will be this number of bases/residues before the end of the feature. So a value of '10' will start the extraction 10 bases/residues before the start of the sequence, and a value of '-10' will start the extraction 10 bases/residues before the end of the feature. The output sequence will be padded with 'N' or 'X' characters if the sequence starts after the required start of the extraction. after 0 -after=$value 1 If this value is greater than 0 then that number of bases or residues after the feature are included in the extracted sequence. This allows you to get the context of the feature. If this value is negative then the end of the extracted sequence will be this number of bases/residues after the start of the feature. So a value of '10' will end the extraction 10 bases/residues after the end of the sequence, and a value of '-10' will end the extraction 10 bases/residues after the start of the feature. The output sequence will be padded with 'N' or 'X' characters if the sequence ends before the required end of the extraction. source * -source=$value 2 By default any feature source in the feature table is shown. You can se t this to match any feature source you wish to show. The source name is usuall y either the name of the program that detected the feature or it is the feature table (eg: EMBL) that the feature came from. The source may be wildcarded by u sing '*'. If you wish to show more than one source, separate their names with the character '|', eg: gene* | embl type * -type=$value 3 By default every feature in the feature table is extracted. You can set this to be any feature type you wish to extract. See http://www3.ebi.ac.uk/Services/WebFeat/ for a list of the EMBL feature types and see Appendix A of the Swissprot user manual in http://www.expasy.ch/txt/userman.txt for a list of the Swissprot feature types. The type may be wildcarded by using '*'. If you wish to extract more than one type, separate their names with the character '|', eg: *UTR | intron sense 0 -sense=$value 4 By default any feature type in the feature table is extracted. You can set this to match any feature sense you wish. 0 - any sense, 1 - forward sense, -1 - reverse sense Default: 0 - any sense, 1 - forward sense, -1 - reverse sense minscore 0.0 -minscore=$value 5 If this is greater than or equal to the maximum score, then any score is permitted maxscore 0.0 -maxscore=$value 6 If this is less than or equal to the maximum score, then any score is permitted tag * -tag=$value 7 Tags are the types of extra values that a feature may have. For example in the EMBL feature table, a 'CDS' type of feature may have the tags '/codon', '/codon_start', '/db_xref', '/EC_number', '/evidence', '/exception', '/function', '/gene', '/label', '/map', '/note', '/number', '/partial', '/product', '/protein_id', '/pseudo', '/standard_name', '/translation', '/transl_except', '/transl_table', or '/usedin'. Some of these tags also have values, for example '/gene' can have the value of the gene name. By default any feature tag in the feature table is extracted. You can set this to match any feature tag you wish to show. The tag may be wildcarded by using '*'. If you wish to extract more than one tag, separate their names with the character '|', eg: gene | label value * -value=$value 8 Tag values are the values associated with a feature tag. Tags are the types of extra values that a feature may have. For example in the EMBL feature table, a 'CDS' type of feature may have the tags '/codon', '/codon_start', '/db_xref', '/EC_number', '/evidence', '/exception', '/function', '/gene', '/label', '/map', '/note', '/number', '/partial', '/product', '/protein_id', '/pseudo', '/standard_name', '/translation', '/transl_except', '/transl_table', or '/usedin'. Only some of these tags can have values, for example '/gene' can have the value of the gene name. By default any feature tag value in the feature table is shown. You can set this to match any feature tag valueyou wish to show. The tag value may be wildcarded by using '*'. If you wish to show more than one tag value, separate their names with the character '|', eg: pax* | 10 STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}extractfeat -auto $sequence $outseq $before $after $source $type $sense $minscore $maxscore $tag $value extractseq Sequence Edit/extractseq TITLE extractseq (EMBOSS) INFO Extract regions from a sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/extractseq.html sequence input input biosequence/genbank extractseq.in.gb -sequence=$value -sformat=genbank seqtype any outseq output output biosequence/genbank extractseq.out.gb -outseq=$value regions -regions=$value 0 Regions to extract. A set of regions is specified by a set of pairs of positions. The positions are integers. They are separated by any non-digit, non-alpha character. Examples of region specifications are: 24-45, 56-78 1:45, 67=99;765..888 1,5,8,10,23,45,57,99 separate false -separate 1 If this is set true then each specified region is written out as a separate sequence. The name of the sequence is created from the name of the original sequence with the start and end positions of the range appended with underscore characters between them, eg: XYZ region 2 to 34 is written as: XYZ_2_34 STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}extractseq -auto $sequence $outseq $regions $separate findkm Enzyme Kinetics/findkm TITLE findkm (EMBOSS) INFO Find Km and Vmax for an enzyme reaction by a Hanes/Woolf plot (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/findkm.html infile input input text/plain findkm.in -infile=$value outfile output output text/plain findkm.out -outfile=$value plot true -noplot graphlb png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graphlb=$value goutfile output output image/pict findkm.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}findkm -auto $infile $outfile $plot $graphlb $goutfile freak Nucleic/Composition/freak TITLE freak (EMBOSS) INFO Residue/base frequency table or plot (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/freak.html seqall input input biosequence/genbank freak.in.gb -seqall=$value -sformat=genbank seqtype any plot false -plot letters gc -letters=$value step 1 -step=$value window 30 -window=$value graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain freak.out -outfile=$value goutfile output output image/pict freak.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}freak -auto $seqall $plot $letters $step $window $graph $outfile $goutfile fuzznuc Nucleic/Motifs/fuzznuc TITLE fuzznuc (EMBOSS) INFO Nucleic acid pattern search (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/fuzznuc.html sequence input input biosequence/genbank fuzznuc.in.gb -sequence=$value -sformat=genbank seqtype dna pattern -pattern=$value mismatch 0 -mismatch=$value mmshow false -mmshow accshow false -accshow descshow false -descshow usashow false -usashow 0 Showing the USA (Uniform Sequence Address) of the matching sequences will turn your output file into a 'list' file that can then be read in by many other EMBOSS programs by specifying it with a '@' in front of the filename. complement false -complement outf output output text/plain fuzznuc.out -outf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}fuzznuc -auto $sequence $pattern $mismatch $mmshow $accshow $descshow $usashow $complement $outf fuzzpro Protein/Motifs/fuzzpro TITLE fuzzpro (EMBOSS) INFO Protein pattern search (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/fuzzpro.html sequence input input biosequence/genbank fuzzpro.in.gb -sequence=$value -sformat=genbank seqtype protein pattern -pattern=$value mismatch 0 -mismatch=$value mmshow false -mmshow accshow false -accshow usashow false -usashow 0 Showing the USA (Uniform Sequence Address) of the matching sequences will turn your output file into a 'list' file that can then be read in by many other EMBOSS programs by specifying it with a '@' in front of the filename. descshow false -descshow outf output output text/plain fuzzpro.out -outf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}fuzzpro -auto $sequence $pattern $mismatch $mmshow $accshow $usashow $descshow $outf fuzztran Nucleic/Motifs/fuzztran Protein/Motifs/fuzztran TITLE fuzztran (EMBOSS) INFO Protein pattern search after translation (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/fuzztran.html sequence input input biosequence/genbank fuzztran.in.gb -sequence=$value -sformat=genbank seqtype dna frame 1 -frame=$value table 0 -table=$value pattern -pattern=$value mismatch 0 -mismatch=$value mmshow false -mmshow accshow false -accshow usashow false -usashow 0 Showing the USA (Uniform Sequence Address) of the matching sequences will turn your output file into a 'list' file that can then be read in by many other EMBOSS programs by specifying it with a '@' in front of the filename. descshow false -descshow outf output output text/plain fuzztran.out -outf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}fuzztran -auto $sequence $frame $table $pattern $mismatch $mmshow $accshow $usashow $descshow $outf garnier Protein/2D Structure/garnier TITLE garnier (EMBOSS) INFO Predicts protein secondary structure (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/garnier.html sequencea input input biosequence/genbank garnier.in.gb -sequencea=$value -sformat=genbank seqtype PureProtein STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}garnier -auto $sequencea geecee Nucleic/CpG Islands/geecee TITLE geecee (EMBOSS) INFO Calculates the fractional GC content of nucleic acid sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/geecee.html sequence input input biosequence/genbank geecee.in.gb -sequence=$value -sformat=genbank seqtype dna outfile output output text/plain geecee.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}geecee -auto $sequence $outfile getorf Nucleic/Gene finding/getorf TITLE getorf (EMBOSS) INFO Finds and extracts open reading frames (ORFs) (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/getorf.html sequence input input biosequence/genbank getorf.in.gb -sequence=$value -sformat=genbank seqtype DNA outseq output output biosequence/genbank getorf.out.gb -outseq=$value table 0 -table=$value minsize 30 -minsize=$value find 0 -find=$value 0 This is a small menu of possible output options. The first four options are to select either the protein translation or the original nucleic acid sequence of the open reading frame. There are two possible definitions of an open reading frame: it can either be a region that is free of STOP codons or a region that begins with a START codon and ends with a STOP codon. The last three options are probably only of interest to people who wish to investigate the statistical properties of the regions around potential START or STOP codons. The last option assumes that ORF lengths are calculated between two STOP codons. methionine true -nomethionine 1 START codons at the beginning of protein products will usually code for Methionine, despite what the codon will code for when it is internal to a protein. This qualifier sets all such START codons to code for Methionine by default. circular false -circular reverse true -noreverse 2 Set this to be false if you do not wish to find ORFs in the reverse complement of the sequence. flanking 100 -flanking=$value 3 If you have chosen one of the options of the type of sequence to find that gives the flanking sequence around a STOP or START codon, this allows you to set the number of nucleotides either side of that codon to output. If the region of flanking nucleotides crosses the start or end of the sequence, no output is given for this codon. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}getorf -auto $sequence $outseq $table $minsize $find $methionine $circular $reverse $flanking helixturnhelix Protein/2D Structure/helixturnhelix Protein/Motifs/helixturnhelix TITLE helixturnhelix (EMBOSS) INFO Report nucleic acid binding motifs (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/helixturnhelix.html sequence input input biosequence/genbank helixturnhelix.in.gb -sequence=$value -sformat=genbank seqtype PureProtein mean 238.71 -mean=$value sd 293.61 -sd=$value minsd 2.5 -minsd=$value eightyseven -eightyseven STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}helixturnhelix -auto $sequence $mean $sd $minsd $eightyseven histogramtest Test/histogramtest TITLE histogramtest (EMBOSS) INFO test of graphics (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/histogramtest.html sets 1 -sets=$value points 10 -points=$value bins -bins=$value sidebyside 1 -sidebyside=$value xstart 0 -xstart=$value xend -xend=$value graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value goutfile output output image/pict histogramtest.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}histogramtest -auto $sets $points $bins $sidebyside $xstart $xend $graph $goutfile hmoment Protein/2D Structure/hmoment TITLE hmoment (EMBOSS) INFO Hydrophobic moment calculation (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/hmoment.html seqall input input biosequence/genbank hmoment.in.gb -seqall=$value -sformat=genbank seqtype pureprotein plot false -plot window 10 -window=$value aangle 100 -aangle=$value bangle 160 -bangle=$value baseline 0.35 -baseline=$value double false -double graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain hmoment.out -outfile=$value goutfile output output image/pict hmoment.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}hmoment -auto $seqall $plot $window $aangle $bangle $baseline $double $graph $outfile $goutfile iep Protein/Composition/iep TITLE iep (EMBOSS) INFO Calculates the isoelectric point of a protein (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/iep.html sequencea input input biosequence/genbank iep.in.gb -sequencea=$value -sformat=genbank seqtype pureprotein plot false -plot report true -noreport step .5 -step=$value amino 1 -amino=$value termini true -notermini graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain iep.out -outfile=$value goutfile output output image/pict iep.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}iep -auto $sequencea $plot $report $step $amino $termini $graph $outfile $goutfile infoalign Alignment/Multiple/infoalign Information/infoalign TITLE infoalign (EMBOSS) INFO Information on a multiple sequence alignment (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/infoalign.html sequence input input biosequence/genbank infoalign.in.gb -sequence=$value -sformat=genbank seqtype gapany outfile output output text/plain infoalign.out -outfile=$value refseq 0 -refseq=$value 0 If you give the number in the alignment or the name of a sequence, it will be taken to be the reference sequence. The reference sequence is the one against which all the other sequences are compared. If this is set to 0 then the consensus sequence will be used as the reference sequence. By default the consensus sequence is used as the reference sequence. matrix EDNAMAT -matrix=$value plurality 50.0 -plurality=$value 1 Set a cut-off for the % of positive scoring matches below which there is no consensus. The default plurality is taken as 50% of the total weight of all the sequences in the alignment. identity 0.0 -identity=$value 2 Provides the facility of setting the required number of identities at a position for it to give a consensus. Therefore, if this is set to 100% only columns of identities contribute to the consensus. html false -html only false -only 3 This is a way of shortening the command line if you only want a few things to be displayed. Instead of specifying: '-nohead -nousa -noname -noalign -nogaps -nogapcount -nosimcount -noidcount -nodiffcount' to get only the sequence length output, you can specify '-only -seqlength heading -heading usa -usa name -name seqlength -seqlength alignlength -alignlength gaps -gaps gapcount -gapcount idcount -idcount simcount -simcount diffcount -diffcount change -change description -description STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}infoalign -auto $sequence $outfile $refseq $matrix $plurality $identity $html $only $heading $usa $name $seqlength $alignlength $gaps $gapcount $idcount $simcount $diffcount $change $description infoseq Information/infoseq TITLE infoseq (EMBOSS) INFO Displays some simple information about sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/infoseq.html sequence input input biosequence/genbank infoseq.in.gb -sequence=$value -sformat=genbank outfile output output text/plain infoseq.out -outfile=$value html false -html only false -only 0 This is a way of shortening the command line if you only want a few things to be displayed. Instead of specifying: '-nohead -noname -noacc -notype -nopgc -nodesc' to get only the length output, you can specify '-only -length heading -heading usa -usa name -name accession -accession type -type length -length pgc -pgc description -description STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}infoseq -auto $sequence $outfile $html $only $heading $usa $name $accession $type $length $pgc $description isochore Nucleic/Composition/isochore TITLE isochore (EMBOSS) INFO Plots isochores in large DNA sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/isochore.html sequence input input biosequence/genbank isochore.in.gb -sequence=$value -sformat=genbank seqtype dna out output output text/plain isochore.out -out=$value window 1000 -window=$value shift 100 -shift=$value graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value goutfile output output image/pict isochore.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}isochore -auto $sequence $out $window $shift $graph $goutfile lindna Display/lindna TITLE lindna (EMBOSS) INFO Draws linear maps of DNA constructs (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/lindna.html graphout png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graphout=$value inputfile input input text/plain lindna.in -inputfile=$value intersymbol Straight -intersymbol=$value intercolor 1 -intercolor=$value interticks N -interticks=$value gapsize 500 -gapsize=$value ticklines N -ticklines=$value tickheight 1 -tickheight=$value blockheight 1 -blockheight=$value rangeheight 1 -rangeheight=$value gapgroup 1 -gapgroup=$value postext 1 -postext=$value goutfile output output image/pict lindna.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}lindna -auto $graphout $inputfile $intersymbol $intercolor $interticks $gapsize $ticklines $tickheight $blockheight $rangeheight $gapgroup $postext $goutfile listor Sequence Edit/listor TITLE listor (EMBOSS) INFO Writes a list file of the logical OR of two sets of sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/listor.html firstset input input biosequence/genbank listor.in.gb -firstset=$value -sformat=genbank secondset input input biosequence/genbank listor.in.gb -secondset=$value -sformat=genbank outlist output output text/plain listor.out -outlist=$value operator OR -operator=$value 0 The following logical operators combine the sequences in the following ways: OR - gives all that occur in one set or the other AND - gives only those which occur in both sets XOR - gives those which only occur in one set or the other, but not in both NOT - gives those which occur in the first set except for those that also occur in the second STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}listor -auto $firstset $secondset $outlist $operator marscan Nucleic/Gene finding/marscan Nucleic/Motifs/marscan TITLE marscan (EMBOSS) INFO Finds MAR/SAR sites in nucleic sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/marscan.html sequence input input biosequence/genbank marscan.in.gb -sequence=$value -sformat=genbank seqtype dna outf output output text/plain marscan.out -outf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}marscan -auto $sequence $outf maskfeat Sequence Edit/maskfeat Feature tables/maskfeat TITLE maskfeat (EMBOSS) INFO Mask off features of a sequence. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/maskfeat.html sequence input input biosequence/genbank maskfeat.in.gb -sequence=$value -sformat=genbank seqtype any outseq output output biosequence/genbank maskfeat.out.gb -outseq=$value type repeat* -type=$value 0 By default any feature in the feature table with a type starting 'repeat' is masked. You can set this to be any feature type you wish to mask. See http://www3.ebi.ac.uk/Services/WebFeat/ for a list of the EMBL feature types and see Appendix A of the Swissprot user manual in http://www.expasy.ch/txt/userman.txt for a list of the Swissprot feature types. The type may be wildcarded by using '*'. If you wish to mask more than one type, separate their names with spaces or commas, eg: *UTR repeat* maskchar -maskchar=$value 1 Character to use when masking. Default is 'X' for protein sequences, 'N' for nucleic sequences. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}maskfeat -auto $sequence $outseq $type $maskchar maskseq Sequence Edit/maskseq TITLE maskseq (EMBOSS) INFO Mask off regions of a sequence. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/maskseq.html sequence input input biosequence/genbank maskseq.in.gb -sequence=$value -sformat=genbank seqtype any outseq output output biosequence/genbank maskseq.out.gb -outseq=$value regions -regions=$value 0 Regions to mask. A set of regions is specified by a set of pairs of positions. The positions are integers. They are separated by any non-digit, non-alpha character. Examples of region specifications are: 24-45, 56-78 1:45, 67=99;765..888 1,5,8,10,23,45,57,99 maskchar -maskchar=$value 1 Character to use when masking. Default is 'X' for protein sequences, 'N' for nucleic sequences. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}maskseq -auto $sequence $outseq $regions $maskchar matcher Alignment/Local/matcher TITLE matcher (EMBOSS) INFO Finds the best local alignments between two sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/matcher.html sequencea input input biosequence/genbank matcher.in.gb -sequencea=$value -sformat=genbank seqtype any sequenceb input input biosequence/genbank matcher.in.gb -sequenceb=$value -sformat=genbank seqtype @($(sequencea.protein) ? protein : nucleotide) STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}matcher -auto $sequencea $sequenceb megamerger Alignment/Consensus/megamerger TITLE megamerger (EMBOSS) INFO Merge two large overlapping nucleic acid sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/megamerger.html seqa input input biosequence/genbank megamerger.in.gb -seqa=$value -sformat=genbank seqtype DNA seqb input input biosequence/genbank megamerger.in.gb -seqb=$value -sformat=genbank seqtype DNA outseq output output biosequence/genbank megamerger.out.gb -outseq=$value report output output text/plain megamerger.out -report=$value wordsize 20 -wordsize=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}megamerger -auto $seqa $seqb $outseq $report $wordsize merger Alignment/Consensus/merger TITLE merger (EMBOSS) INFO Merge two overlapping nucleic acid sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/merger.html seqa input input biosequence/genbank merger.in.gb -seqa=$value -sformat=genbank seqtype DNA seqb input input biosequence/genbank merger.in.gb -seqb=$value -sformat=genbank seqtype DNA outseq output output biosequence/genbank merger.out.gb -outseq=$value datafile EDNAMAT -datafile=$value gapopen 50.0 -gapopen=$value gapextend 5 -gapextend=$value outfile stdout -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}merger -auto $seqa $seqb $outseq $datafile $gapopen $gapextend $outfile msbar Nucleic/Mutation/msbar Protein/Mutation/msbar TITLE msbar (EMBOSS) INFO Mutate sequence beyond all recognition (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/msbar.html sequence input input biosequence/genbank msbar.in.gb -sequence=$value -sformat=genbank seqtype any outseq output output biosequence/genbank msbar.out.gb -outseq=$value count 1 -count=$value inframe false -inframe point 0 -point=$value codon 0 -codon=$value 0 Types of codon mutations to perform. These are only done if the sequence is nucleic. block 0 -block=$value minimum 1 -minimum=$value maximum 10 -maximum=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}msbar -auto $sequence $outseq $count $inframe $point $codon $block $minimum $maximum mwfilter Protein/Composition/mwfilter TITLE mwfilter (EMBOSS) INFO Filter noisy molwts from mass spec output (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/mwfilter.html infile input input text/plain mwfilter.in -infile=$value tolerance 50.0 -tolerance=$value datafile Emwfilter.dat -datafile=$value outfile output output text/plain mwfilter.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}mwfilter -auto $infile $tolerance $datafile $outfile needle Alignment/Global/needle TITLE needle (EMBOSS) INFO Needleman-Wunsch global alignment. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/needle.html sequencea input input biosequence/genbank needle.in.gb -sequencea=$value -sformat=genbank seqtype any seqall input input biosequence/genbank needle.in.gb -seqall=$value -sformat=genbank seqtype @($(sequencea.protein) ? protein : nucleotide) datafile EDNAMAT -datafile=$value gapopen -gapopen=$value 0 The gap open penalty is the score taken away when a gap is created. The best value depends on the choice of comparison matrix. The default value assumes you are using the EBLOSUM62 matrix for protein sequences, and the EDNAFULL matrix for nucleotide sequences. Allowed values: Floating point number from 1.0 to 100.0 gapextend -gapextend=$value 1 The gap extension, penalty is added to the standard gap penalty for each base or residue in the gap. This is how long gaps are penalized. Usually you will expect a few long gaps rather than many short gaps, so the gap extension penalty should be lower than the gap penalty. An exception is where one or both sequences are single reads with possible sequencing errors in which case you would expect many single base gaps. You can get this result by setting the gap open penalty to zero (or very low) and using the gap extension penalty to control gap scoring. Allowed values: Floating point number from 0.0 to 10.0 similarity true -nosimilarity 2 Display percent identity and similarity fasta false -fasta 3 Output overlap as fasta sequences STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}needle -auto $sequencea $seqall $datafile $gapopen $gapextend $similarity $fasta newcpgreport Nucleic/CpG Islands/newcpgreport TITLE newcpgreport (EMBOSS) INFO Report CpG rich areas (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/newcpgreport.html sequence input input biosequence/genbank newcpgreport.in.gb -sequence=$value -sformat=genbank seqtype DNA window 100 -window=$value shift 1 -shift=$value minlen 200 -minlen=$value minoe 0.6 -minoe=$value minpc 50. -minpc=$value outfile output output text/plain newcpgreport.out -outfile=$value obsexp true -noobsexp cg true -nocg pc true -nopc STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}newcpgreport -auto $sequence $window $shift $minlen $minoe $minpc $outfile $obsexp $cg $pc newcpgseek Nucleic/CpG Islands/newcpgseek TITLE newcpgseek (EMBOSS) INFO Reports CpG rich regions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/newcpgseek.html sequence input input biosequence/genbank newcpgseek.in.gb -sequence=$value -sformat=genbank seqtype DNA score 17 -score=$value outfile output output text/plain newcpgseek.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}newcpgseek -auto $sequence $score $outfile newseq Sequence Edit/newseq TITLE newseq (EMBOSS) INFO Type in a short new sequence. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/newseq.html outseq output output biosequence/genbank newseq.out.gb -outseq=$value name -name=$value 0 The name of of the sequence should be a single word that you will use to identify the sequence. It should have no (or few) punctuation characters in it. description -description=$value 1 Enter any description of the sequence that you require. type N -type=$value sequence -sequence=$value 2 The sequence itself. Because of the limitation of the operating system, you will only be able to type in a short sequence of (typically) 250 characters, or so. The keyboard will beep at you when you have reached this limit and you will not be able to press the RETURN/ENTER key until you have deleted a few characters. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}newseq -auto $outseq $name $description $type $sequence noreturn Sequence Edit/noreturn TITLE noreturn (EMBOSS) INFO Removes carriage return from ASCII files (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/noreturn.html infile input input text/plain noreturn.in -infile=$value outfile output output text/plain noreturn.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}noreturn -auto $infile $outfile notseq Sequence Edit/notseq TITLE notseq (EMBOSS) INFO Excludes a set of sequences and writes out the remaining ones (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/notseq.html sequence input input biosequence/genbank notseq.in.gb -sequence=$value -sformat=genbank outseq output output biosequence/genbank notseq.out.gb -outseq=$value exclude -exclude=$value 0 Enter a list of sequence names or accession numbers to exclude from the sequences read in. The excluded sequences will be written to the file specified in the 'junkout' parameter. The remainder will be written out to the file specified in the 'outseq' parameter. The list of sequence names can be separated by either spaces or commas. The sequence names can be wildcarded. The sequence names are case independent. An example of a list of sequences to be excluded is: myseq, hs*, one two three junkout output output biosequence/genbank notseq.out.gb -junkout=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}notseq -auto $sequence $outseq $exclude $junkout nrscope Utilities/Database creation/nrscope TITLE nrscope (EMBOSS) INFO Converts redundant EMBL-format SCOP file to non-redundant one (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/nrscope.html scopin input input text/plain nrscope.in -scopin=$value scopout output output text/plain nrscope.out -scopout=$value dpdb ./ -dpdb=$value extn .pxyz -extn=$value thresh 95.0 -thresh=$value datafile EDNAMAT -datafile=$value gapopen 10 -gapopen=$value 0 The gap insertion penalty is the score taken away when a gap is created. The best value depends on the choice of comparison matrix. The default value assumes you are using the EBLOSUM62 matrix for protein sequences, and the EDNAFULL matrix for nucleotide sequences. Allowed values: Floating point number from 1.0 to 100.0 gapextend 0.5 -gapextend=$value 1 The gap extension, penalty is added to the standard gap penalty for each base or residue in the gap. This is how long gaps are penalized. Usually you will expect a few long gaps rather than many short gaps, so the gap extension penalty should be lower than the gap penalty. An exception is where one or both sequences are single reads with possible sequencing errors in which case you would expect many single base gaps. You can get this result by setting the gap open penalty to zero (or very low) and using the gap extension penalty to control gap scoring. Allowed values: Floating point number from 0.0 to 10.0 errf output output text/plain nrscope.out -errf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}nrscope -auto $scopin $scopout $dpdb $extn $thresh $datafile $gapopen $gapextend $errf nthseq Sequence Edit/nthseq TITLE nthseq (EMBOSS) INFO Writes one sequence from a multiple set of sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/nthseq.html sequence input input biosequence/genbank nthseq.in.gb -sequence=$value -sformat=genbank number 1 -number=$value outseq output output biosequence/genbank nthseq.out.gb -outseq=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}nthseq -auto $sequence $number $outseq octanol Protein/Composition/octanol TITLE octanol (EMBOSS) INFO Displays protein hydropathy (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/octanol.html sequencea input input biosequence/genbank octanol.in.gb -sequencea=$value -sformat=genbank seqtype pureprotein graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value datafile input input text/plain octanol.in -datafile=$value width 19 -width=$value octanolplot false -octanolplot interfaceplot false -interfaceplot differenceplot true -nodifferenceplot goutfile output output image/pict octanol.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}octanol -auto $sequencea $graph $datafile $width $octanolplot $interfaceplot $differenceplot $goutfile oddcomp Protein/Motifs/oddcomp TITLE oddcomp (EMBOSS) INFO Finds protein sequence regions with a biased composition (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/oddcomp.html sequence input input biosequence/genbank oddcomp.in.gb -sequence=$value -sformat=genbank seqtype Protein outfile output output text/plain oddcomp.out -outfile=$value compdata input input text/plain oddcomp.in -compdata=$value window 30 -window=$value 0 This is the size of window in which to count. Thus if you want to count frequencies in a 40 aa stretch you should enter 40 here. ignorebz true -noignorebz 1 The amino acid code B represents Asparagine or Aspartic acid and the code Z represents Glutamine or Glutamic acid. These are not commonly used codes and you may wish not to count words containing them, just noting them in the count of 'Other' words. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}oddcomp -auto $sequence $outfile $compdata $window $ignorebz palindrome Nucleic/Repeats/palindrome TITLE palindrome (EMBOSS) INFO Looks for inverted repeats in a nucleotide sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/palindrome.html insequence input input biosequence/genbank palindrome.in.gb -insequence=$value -sformat=genbank seqtype nucleotide minpallen 10 -minpallen=$value maxpallen 100 -maxpallen=$value gaplimit 100 -gaplimit=$value nummismatches 0 -nummismatches=$value 0 Allowed values: Positive integer outfile output output text/plain palindrome.out -outfile=$value overlap true -nooverlap STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}palindrome -auto $insequence $minpallen $maxpallen $gaplimit $nummismatches $outfile $overlap pasteseq Sequence Edit/pasteseq TITLE pasteseq (EMBOSS) INFO Insert one sequence into another (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pasteseq.html sequence input input biosequence/genbank pasteseq.in.gb -sequence=$value -sformat=genbank seqtype any insseq input input biosequence/genbank pasteseq.in.gb -insseq=$value -sformat=genbank seqtype @($(sequence.protein) ? protein : nucleotide) outseq output output biosequence/genbank pasteseq.out.gb -outseq=$value pos -pos=$value 0 The position in the main input sequence to insert after. To insert before the start use the position 0. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pasteseq -auto $sequence $insseq $outseq $pos patmatdb Protein/Motifs/patmatdb TITLE patmatdb (EMBOSS) INFO Search a protein sequence with a motif (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/patmatdb.html sequence input input biosequence/genbank patmatdb.in.gb -sequence=$value -sformat=genbank seqtype Protein motif -motif=$value 0 Patterns for patmatdb are based on the format of pattern used in the PROSITE database. For example: '[DE](2)HS{P}X(2)PX(2,4)C' means two Asps or Glus in any order followed by His, Ser, any residue other then Pro, then two of any residue followed by Pro followed by two to four of any residue followed by Cys. The search is case-independent, so 'AAA' matches 'aaa'. outfile output output text/plain patmatdb.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}patmatdb -auto $sequence $motif $outfile patmatmotifs Protein/Motifs/patmatmotifs TITLE patmatmotifs (EMBOSS) INFO Search a PROSITE motif database with a protein sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/patmatmotifs.html sequence input input biosequence/genbank patmatmotifs.in.gb -sequence=$value -sformat=genbank seqtype Protein outfile output output text/plain patmatmotifs.out -outfile=$value full false -full prune true -noprune 0 Ignore simple patterns. If this is true then these simple post-translational modification sites are not reported: myristyl, asn_glycosylation, camp_phospho_site, pkc_phospho_site, ck2_phospho_site, and tyr_phospho_site. STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}patmatmotifs -auto $sequence $outfile $full $prune patmattest Test/patmattest TITLE patmattest (EMBOSS) INFO test of pattern matching (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/patmattest.html sequence input input biosequence/genbank patmattest.in.gb -sequence=$value -sformat=genbank seqtype any expression -expression=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}patmattest -auto $sequence $expression pepcoil Protein/2D Structure/pepcoil Protein/Motifs/pepcoil TITLE pepcoil (EMBOSS) INFO Predicts coiled coil regions (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepcoil.html sequence input input biosequence/genbank pepcoil.in.gb -sequence=$value -sformat=genbank seqtype PureProtein window 28 -window=$value coil true -nocoil frame -frame other true -noother outfile output output text/plain pepcoil.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepcoil -auto $sequence $window $coil $frame $other $outfile pepinfo Protein/Composition/pepinfo TITLE pepinfo (EMBOSS) INFO Plots simple amino acid properties in parallel (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepinfo.html inseq input input biosequence/genbank pepinfo.in.gb -inseq=$value -sformat=genbank seqtype protein graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain pepinfo.out -outfile=$value generalplot true -nogeneralplot hydropathyplot true -nohydropathyplot hwindow 9 -hwindow=$value aaproperties Eaa_properties.dat -aaproperties=$value aahydropathy Eaa_hydropathy.dat -aahydropathy=$value goutfile output output image/pict pepinfo.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepinfo -auto $inseq $graph $outfile $generalplot $hydropathyplot $hwindow $aaproperties $aahydropathy $goutfile pepnet Display/pepnet Protein/2D Structure/pepnet TITLE pepnet (EMBOSS) INFO Displays proteins as a helical net (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepnet.html sequence input input biosequence/genbank pepnet.in.gb -sequence=$value -sformat=genbank seqtype Protein amphipathic -amphipathic 0 If this is true then the residues ACFGILMVWY are marked as squares and all other residues are unmarked. This overrides any other markup that you may have specified using the qualifiers '-squares', '-diamonds' and '-octags'. squares ILVM -squares=$value 1 By default the aliphatic residues ILVM are marked with squares. diamonds DENQST -diamonds=$value 2 By default the residues DENQST are marked with diamonds. octags HKR -octags=$value 3 By default the positively charged residues HKR are marked with octagons. data false -data 4 Output the data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value goutfile output output image/pict pepnet.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepnet -auto $sequence $amphipathic $squares $diamonds $octags $data $graph $goutfile pepstats Protein/Composition/pepstats TITLE pepstats (EMBOSS) INFO Protein statistics (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepstats.html sequencea input input biosequence/genbank pepstats.in.gb -sequencea=$value -sformat=genbank seqtype PureProtein termini true -notermini aadata Eamino.dat -aadata=$value 0 Molecular weight data for amino acids outfile output output text/plain pepstats.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepstats -auto $sequencea $termini $aadata $outfile pepwheel Display/pepwheel Protein/2D Structure/pepwheel TITLE pepwheel (EMBOSS) INFO Shows protein sequences as helices (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepwheel.html sequence input input biosequence/genbank pepwheel.in.gb -sequence=$value -sformat=genbank seqtype Protein amphipathic -amphipathic 0 If this is true then the residues ACFGILMVWY are marked as squares and all other residues are unmarked. This overrides any other markup that you may have specified using the qualifiers '-squares', '-diamonds' and '-octags'. wheel true -nowheel steps 18 -steps=$value 1 The number of residues plotted per turn is this value divided by the 'turns' value. turns 5 -turns=$value 2 The number of residues plotted per turn is the 'steps' value divided by this value. squares ILVM -squares=$value 3 By default the aliphatic residues ILVM are marked with squares. diamonds DENQST -diamonds=$value 4 By default the residues DENQST are marked with diamonds. octags HKR -octags=$value 5 By default the positively charged residues HKR are marked with octagons. data false -data 6 Output the match data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain pepwheel.out -outfile=$value goutfile output output image/pict pepwheel.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepwheel -auto $sequence $amphipathic $wheel $steps $turns $squares $diamonds $octags $data $graph $outfile $goutfile pepwindow Protein/Composition/pepwindow TITLE pepwindow (EMBOSS) INFO Displays protein hydropathy (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepwindow.html sequencea input input biosequence/genbank pepwindow.in.gb -sequencea=$value -sformat=genbank seqtype pureprotein graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value datafile input input text/plain pepwindow.in -datafile=$value length 7 -length=$value goutfile output output image/pict pepwindow.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepwindow -auto $sequencea $graph $datafile $length $goutfile pepwindowall Protein/Composition/pepwindowall TITLE pepwindowall (EMBOSS) INFO Displays protein hydropathy of a set of sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/pepwindowall.html msf input input biosequence/genbank pepwindowall.in.gb -msf=$value -sformat=genbank seqtype gapprotein graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value datafile input input text/plain pepwindowall.in -datafile=$value length 7 -length=$value goutfile output output image/pict pepwindowall.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}pepwindowall -auto $msf $graph $datafile $length $goutfile plotcon Alignment/Multiple/plotcon TITLE plotcon (EMBOSS) INFO Plots the quality of conservation of a sequence alignment (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/plotcon.html msf input input biosequence/genbank plotcon.in.gb -msf=$value -sformat=genbank seqtype gapany winsize 4 -winsize=$value 0 Number of columns to average alignment quality over. The larger this value is, the smoother the plot will be. scorefile EDNAMAT -scorefile=$value data false -data 1 Output the match data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value outfile output output text/plain plotcon.out -outfile=$value goutfile output output image/pict plotcon.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}plotcon -auto $msf $winsize $scorefile $data $graph $outfile $goutfile plotorf Nucleic/Gene finding/plotorf Nucleic/Translation/plotorf TITLE plotorf (EMBOSS) INFO Plot potential open reading frames (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/plotorf.html sequence input input biosequence/genbank plotorf.in.gb -sequence=$value -sformat=genbank seqtype dna graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value start ATG -start=$value stop TAA,TAG,TGA -stop=$value goutfile output output image/pict plotorf.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}plotorf -auto $sequence $graph $start $stop $goutfile polydot Alignment/Dot plots/polydot TITLE polydot (EMBOSS) INFO Displays all-against-all dotplots of a set of sequences (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/polydot.html sequences input input biosequence/genbank polydot.in.gb -sequences=$value -sformat=genbank seqtype any wordsize 6 -wordsize=$value data false -data 0 Output the match data to a file instead of plotting it graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value gap 10 -gap=$value 1 This specifies the size of the gap that is used to separate the individual dotplots in the display. The size is measured in residues, as displayed in the output. boxit true -noboxit dumpfeat false -dumpfeat format gff -format=$value ext gff -ext=$value outfile output output text/plain polydot.out -outfile=$value goutfile output output image/pict polydot.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}polydot -auto $sequences $wordsize $data $graph $gap $boxit $dumpfeat $format $ext $outfile $goutfile preg Protein/Motifs/preg TITLE preg (EMBOSS) INFO Regular expression search of a protein sequence (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/preg.html sequence input input biosequence/genbank preg.in.gb -sequence=$value -sformat=genbank seqtype protein outfile output output text/plain preg.out -outfile=$value pattern -pattern=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}preg -auto $sequence $outfile $pattern prettyplot Alignment/Multiple/prettyplot Display/prettyplot TITLE prettyplot (EMBOSS) INFO Displays aligned sequences, with colouring and boxing (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/prettyplot.html msf input input biosequence/genbank prettyplot.in.gb -msf=$value -sformat=genbank seqtype gapany residuesperline 50 -residuesperline=$value 0 The number of residues to be displayed on each line resbreak -resbreak=$value ccolours true -noccolours cidentity RED -cidentity=$value csimilarity GREEN -csimilarity=$value cother BLACK -cother=$value docolour false -docolour title true -notitle shade -shade=$value 1 Set to BPLW for normal shading so for pair = 1.5,1.0,0.5 and shade = BPLW Residues score Colour 1.5 or over....... BLACK (B) 1.0 to 1.5 ....... BROWN (P) 0.5 to 1.0 ....... WHEAT (L) under 0.5 ....... WHITE (W) The only four letters allowed are BPLW, in any order. pair 1.5,1.0,0.5 -pair=$value identity 0 -identity=$value box true -nobox boxcol false -boxcol boxcolval GREY -boxcolval=$value consensus false -consensus name true -noname maxnamelen 10 -maxnamelen=$value number true -nonumber listoptions true -nolistoptions plurality -plurality=$value collision true -nocollision alternative 0 -alternative=$value 2 Use alternative collisions routine 0) Normal collision check. (default) 1) checks identical scores with the max score found. So if any other residue matches the identical score then a collision has occurred. 2) If another residue has a greater than or equal to matching score and these do not match then a collision has occurred. 3) Checks all those not in the current consensus.If any of these give a top score for matching or identical scores then a collision has occured. portrait false -portrait matrixfile EDNAMAT -matrixfile=$value showscore -1 -showscore=$value data false -data graph png|postscript|mac|mac8|colourps|hpgl |meta|tektronics|x11|none|data -graph=$value goutfile output output image/pict prettyplot.pict -goutfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}prettyplot -auto $msf $residuesperline $resbreak $ccolours $cidentity $csimilarity $cother $docolour $title $shade $pair $identity $box $boxcol $boxcolval $consensus $name $maxnamelen $number $listoptions $plurality $collision $alternative $portrait $matrixfile $showscore $data $graph $goutfile prettyseq Display/prettyseq Nucleic/Translation/prettyseq TITLE prettyseq (EMBOSS) INFO Output sequence with translated ranges (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/prettyseq.html sequence input input biosequence/genbank prettyseq.in.gb -sequence=$value -sformat=genbank seqtype DNA cfile Eacc.cut|Eadenovirus5.cut|Eadenovirus7.cut|Eaidlav.cut|Eanasp.cut|Eani.cut |Eanidmit.cut|Easn.cut|Eath.cut|Eatu.cut|Eavi.cut|Ebja.cut |Ebly.cut|Ebme.cut|Ebmo.cut|Ebna.cut|Ebov.cut|Ebovsp.cut |Ebst.cut|Ebsu.cut|Ecac.cut|Ecal.cut|Eccr.cut|Ecel.cut |Echi.cut|Echicken.cut|Echisp.cut|Echk.cut|Echmp.cut|Echnt.cut |Echos.cut|Echzm.cut|Echzmrubp.cut|Ecpx.cut|Ecre.cut|Ecrisp.cut |Ectr.cut|Edayhoff.cut|Eddi.cut|Edog.cut|Edro.cut|Edrosophila.cut |Eeca.cut|Eeco.cut|Eecoli.cut|Ef1.cut|Efish.cut|Efmdvpolyp.cut |Eham.cut|Ehha.cut|Ehin.cut|Ehma.cut|Ehum.cut|Ehuman.cut |Ekla.cut|Ekpn.cut|Ella.cut|Emac.cut|Emaize.cut|Emixlg.cut |Emouse.cut|Emsa.cut|Emse.cut|Emta.cut|Emtu.cut|Emus.cut |Emussp.cut|Emva.cut|Emze.cut|Emzecp.cut|Encr.cut|Eneu.cut |Engo.cut|Eoncsp.cut|Epae.cut|Epea.cut|Epet.cut|Epfa.cut |Ephix174.cut|Ephv.cut|Ephy.cut|Epig.cut|Epolyomaa2.cut|Epombe.cut |Epombecai.cut|Epot.cut|Eppu.cut|Epse.cut|Epsy.cut|Epvu.cut |Erab.cut|Erabbit.cut|Erabsp.cut|Erat.cut|Eratsp.cut|Erca.cut |Erhm.cut|Eric.cut|Erle.cut|Erme.cut|Ersp.cut|Esalsp.cut |Esau.cut|Esco.cut|Esgi.cut|Eshp.cut|Eshpsp.cut|Esli.cut |Eslm.cut|Esma.cut|Esmi.cut|Esmu.cut|Esoy.cut|Espi.cut |Espn.cut|Espo.cut|Espu.cut|Esta.cut|Esty.cut|Esus.cut |Esv40.cut|Esyhsp.cut|Esynsp.cut|Etbr.cut|Etcr.cut|Eter.cut |Etetsp.cut|Etob.cut|Etobcp.cut|Etom.cut|Etrb.cut|Evco.cut |Ewht.cut|Exel.cut|Exenopus.cut|Eyeast.cut|Eyeastcai.cut|Eyen.cut |Eysc.cut|Eyscmt.cut|Eysp.cut|Ezebrafish.cut|Ezma.cut -cfile=$value range -range=$value width 60 -width=$value ruler true -noruler plabel true -noplabel nlabel true -nonlabel outfile output output text/plain prettyseq.out -outfile=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}prettyseq -auto $sequence $cfile $range $width $ruler $plabel $nlabel $outfile prima Nucleic/Primers/prima TITLE prima (EMBOSS) INFO Selects primers for PCR and DNA amplification. (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/prima.html sequence input input biosequence/genbank prima.in.gb -sequence=$value -sformat=genbank targetrange false -targetrange targetstart -targetstart=$value targetend -targetend=$value overlap 50 -overlap=$value minprimerlen 18 -minprimerlen=$value maxprimerlen 22 -maxprimerlen=$value minpmgccont .40 -minpmgccont=$value maxpmgccont .55 -maxpmgccont=$value minprimertm 53 -minprimertm=$value maxprimertm 58 -maxprimertm=$value minprodlen 100 -minprodlen=$value maxprodlen 300 -maxprodlen=$value minprodgccont .40 -minprodgccont=$value maxprodgccont .55 -maxprodgccont=$value saltconc 50 -saltconc=$value dnaconc 50 -dnaconc=$value list false -list outf output output text/plain prima.out -outf=$value STDOUT output text/plain stdout $stdout STDERR output text/plain stderr $stderr ${empath}prima -auto $sequence $targetrange $targetstart $targetend $overlap $minprimerlen $maxprimerlen $minpmgccont $maxpmgccont $minprimertm $maxprimertm $minprodlen $maxprodlen $minprodgccont $maxprodgccont $saltconc $dnaconc $list $outf primer3 Nucleic/Primers/primer3 TITLE primer3 (EMBOSS) INFO Picks PCR primers and hybridization oligos (EMBOSS) HELP http://www.uk.embnet.org/Software/EMBOSS/Apps/primer3.html input true sequence input input biosequence/genbank primer3.in.gb -sequence=$value -sformat=genbank seqtype dna advanced true program true explainflag false -explainflag 0 If this flag is non-0, produce LEFT-EXPLAIN, RIGHT-EXPLAIN, and INTERNAL-OLIGO-EXPLAIN output tags, which are intended to provide information on the number of oligos and primer pairs that Primer3 examined, and statistics on the number discarded for various reasons. fileflag false -fileflag 1 If the associated value is non-0, then Primer3 creates two output files for each input SEQUENCE. File (sequence-id).for lists all acceptable forward primers for (sequence-id), and (sequence-id).rev lists all acceptable reverse primers for (sequence-id), where (sequence-id) is the value of the SEQUENCE-ID tag (which must be supplied). In addition, if the input tag TASK is 1 or 4, Primer3 produces a file (sequence-id).int, which lists all acceptable internal oligos. task 0 -task=$value 2 Tell Primer3 what task to perform. Legal values are 0: 'Pick PCR primers', 1: 'Pick PCR primers and hybridization probe', 2: 'Pick forward primer only', 3: 'Pick reverse primer only', 4: 'Pick hybridization probe only'. The tasks should be self explanatory. Briefly, an 'internal oligo' is intended to be used as a hybridization probe (hyb probe) to detect the PCR product after amplification. numreturn 5 -numreturn=$value 3 The maximum number of primer pairs to return. Primer pairs returned are sorted by their 'quality', in other words by the value of the objective function (where a lower number indicates a better primer pair). Caution: setting this parameter to a large value will increase running time. firstbaseindex 1 -firstbaseindex=$value 4 This parameter is the index of the first base in the input sequence. For input and output using 1-based indexing (such as that used in GenBank and to which many users are accustomed) set this parameter to 1. For input and output using 0-based indexing set this parameter to 0. (This parameter also affects the indexes in the contents of the files produced when the primer file flag is set.) sequenceopt true includedregion -includedregion=$value 5 A sub-region of the given sequence in which to pick primers. For example, often the first dozen or so bases of a sequence are vector, and should be excluded from consideration. The value for this parameter has the form (start),(end) where (start) is the index of the first base to consider, and (end) is the last in the primer-picking region. target -target=$value 6 If one or more Targets is specified then a legal primer pair must flank at least one of them. A Target might be a simple sequence repeat site (for example a CA repeat) or a single-base-pair polymorphism. The value should be a space-separated list of (start),(end) pairs where (start) is the index of the first base of a Target, and (end) is the last E.g. 50,51 requires primers to surround the 2 bases at positions 50 and 51. excludedregion -excludedregion=$value 7 Primer oligos may not overlap any region specified in this tag. The associated value must be a space-separated list of (start),(end) pairs where (start) is the index of the first base of the excluded region, and and (end) is the last. This tag is useful for tasks such as excluding regions of low sequence quality or for excluding regions containing repetitive elements such as ALUs or LINEs. E.g. 401,407 68,70 forbids selection of primers in the 7 bases starting at 401 and the 3 bases at 68. forwardinput -forwardinput=$value 8 The sequence of a forward primer to check and around which to design reverse primers and optional internal oligos. Must be a substring of SEQUENCE. reverseinput -reverseinput=$value 9 The sequence of a reverse primer to check and around which to design forward primers and optional internal oligos. Must be a substring of the reverse strand of SEQUENCE. primer true pickanyway false -pickanyway 10 If true pick a primer pair even if LEFT-INPUT, RIGHT-INPUT, or INTERNAL-OLIGO-INPUT violates specific constraints. mispriminglibrary input input text/plain primer3.in -mispriminglibrary=$value maxmispriming 12.00 -maxmispriming=$value 11 The maximum allowed weighted similarity with any sequence in MISPRIMING-LIBRARY. pairmaxmispriming 24.00 -pairmaxmispriming=$value 12 The maximum allowed sum of weighted similarities of a primer pair (one similarity for each primer) with any single sequence in MISPRIMING-LIBRARY. gcclamp 0 -gcclamp=$value 13 Require the specified number of consecutive Gs and Cs at the 3' end of both the forward and reverse primer. (This parameter has no effect on the internal oligo if one is requested.) osize 20 -osize=$value 14 Optimum length (in bases) of a primer oligo. Primer3 will attempt to pick primers close to this length. minsize 18 -minsize=$value 15 Minimum acceptable length of a primer. Must be greater than 0 and less than or equal to MAX-SIZE. maxsize 27 -maxsize=$value 16 Maximum acceptable length (in bases) of a primer. Currently this parameter cannot be larger than 35. This limit is governed by the maximum oligo size for which Primer3's melting-temperature is valid. otm 60.0 -otm=$value 17 Optimum melting temperature(Celsius) for a primer oligo. Primer3 will try to pick primers with melting temperatures are close to this temperature. The oligo melting temperature formula in Primer3 is that given in Rychlik, Spencer and Rhoads, Nucleic Acids Research, vol 18, num 12, pp 6409-6412 and Breslauer, Frank, Bloeker and Marky, Proc. Natl. Acad. Sci. USA, vol 83, pp 3746-3750. Please refer to the former paper for background discussion. mintm 57.0 -mintm=$value 18 Minimum acceptable melting temperature(Celsius) for a primer oligo. maxtm 63.0 -maxtm=$value 19 Maximum acceptable melting temperature(Celsius) for a primer oligo. maxdifftm 100.0 -maxdifftm=$value 20 Maximum acceptable (unsigned) difference between the melting temperatures of the forward and reverse primers. ogcpercent 50.0 -ogcpercent=$value 21 Primer optimum GC percent. mingc 20.0 -mingc=$value 22 Minimum allowable percentage of Gs and Cs in any primer. maxgc 80.0 -maxgc=$value 23 Maximum allowable percentage of Gs and Cs in any primer generated by Primer. saltconc 50.0 -saltconc=$value 24 The millimolar concentration of salt (usually KCl) in the PCR. Primer3 uses this argument to calculate oligo melting temperatures. dnaconc 50.0 -dnaconc=$value 25 The nanomolar concentration of annealing oligos in the PCR. Primer3 uses this argument to calculate oligo melting temperatures. The default (50nM) works well with the standard protocol used at the Whitehead/MIT Center for Genome Research--0.5 microliters of 20 micromolar concentration for each primer oligo in a 20 microliter reaction with 10 nanograms template, 0.025 units/microliter Taq polymerase in 0.1 mM each dNTP, 1.5mM MgCl2, 50mM KCl, 10mM Tris-HCL (pH 9.3) using 35 cycles with an annealing temperature of 56 degrees Celsius. This parameter corresponds to 'c' in Rychlik, Spencer and Rhoads' equation (ii) (Nucleic Acids Research, vol 18, num 12) where a suitable value (for a lower initial concentration of template) is 'empirically determined'. The value of this parameter is less than the actual concentration of oligos in the reaction because it is the concentration of annealing oligos, which in turn depends on the amount of template (including PCR product) in a given cycle. This concentration increases a great deal during a PCR; fortunately PCR seems quite robust for a variety of oligo melting temperatures. See ADVICE FOR PICKING PRIMERS. numnsaccepted 0 -numnsaccepted=$value 26 Maximum number of unknown bases (N) allowable in any primer. selfany 8.00 -selfany=$value 27 The maximum allowable local alignment score when testing a single primer for (local) self-complementarity and the maximum allowable local alignment score when testing for complementarity between forward and reverse primers. Local self-complementarity is taken to predict the tendency of primers to anneal to each other without necessarily causing self-priming in the PCR. The scoring system gives 1.00 for complementary bases, -0.25 for a match of any base (or N) with an N, -1.00 for a mismatch, and -2.00 for a gap. Only single-base-pair gaps are allowed. For example, the alignment 5' ATCGNA 3' ...|| | | 3' TA-CGT 5' is allowed (and yields a score of 1.75), but the alignment 5' ATCCGNA 3' ...|| | | 3' TA--CGT 5' is not considered. Scores are non-negative, and a score of 0.00 indicates that there is no reasonable local alignment between two oligos. selfend 3.00 -selfend=$value 28 The maximum allowable 3'-anchored global alignment score when testing a single primer for self-complementarity, and the maximum allowable 3'-anchored global alignment score when testing for complementarity between forward and reverse primers. The 3'-anchored global alignment score is taken to predict the likelihood of PCR-priming primer-dimers, for example 5' ATGCCCTAGCTTCCGGATG 3' .............||| ||||| ..........3' AAGTCCTACATTTAGCCTAGT 5' or 5' AGGCTATGGGCCTCGCGA 3' ...............|||||| ............3' AGCGCTCCGGGTATCGGA 5' The scoring system is as for the Maximum Complementarity argument. In the examples above the scores are 7.00 and 6.00 respectively. Scores are non-negative, and a score of 0.00 indicates that there is no reasonable 3'-anchored global alignment between two oligos. In order to estimate 3'-anchored global alignments for candidate primers and primer pairs, Primer assumes that the sequence from which to choose primers is presented 5' to 3'. It is nonsensical to provide a larger value for this parameter than for the Maximum (local) Complementarity parameter because the score of a local alignment will always be at least as great as the score of a global alignment. maxpolyx 5 -maxpolyx=$value 29 The maximum allowable length of a mononucleotide repeat in a primer, for example AAAAAA. product true productosize 200 -productosize=$value 30 The optimum size for the PCR product. 0 indicates that there is no optimum product size. productsizerange 100-300 -productsizerange=$value 31 The associated values specify the lengths of the product that the user wants the primers to create, and is a space separated list of elements of the form (x)-(y) where an (x)-(y) pair is a legal range of lengths for the product. For example, if one wants PCR products to be between 100 to 150 bases (inclusive) then one would set this parameter to 100-150. If one desires PCR products in either the range from 100 to 150 bases or in the range from 200 to 250 bases then one would set this parameter to 100-150 200-250. Primer3 favors ranges to the left side of the parameter string. Primer3 will return legal primers pairs in the first range regardless the value of the objective function for these pairs. Only if there are an insufficient number of primers in the first range will Primer3 return primers in a subsequent range. productotm 0.0 -productotm=$value 32 The optimum melting temperature for the PCR product. 0 indicates that there is no optimum temperature. productmintm -1000000.0 -productmintm=$value 33 The minimum allowed melting temperature of the amplicon. Please see the documentation on the maximum melting temperature of the product for details. productmaxtm 1000000.0 -productmaxtm=$value 34 The maximum allowed melting temperature of the amplicon. Product Tm is calculated using the formula from Bolton and McCarthy, PNAS 84:1390 (1962) as presented in Sambrook, Fritsch and Maniatis, Molecular Cloning, p 11.46 (1989, CSHL Press). Tm = 81.5 + 16.6(log10([Na+])) + .41*(%GC) - 600/length Where [Na+} is the molar sodium concentration, (%GC) is the percent of Gs and Cs in the sequence, and length is the length of the sequence. A similar formula is used by the prime primer selection program in GCG http://www.gcg.com), which instead uses 675.0/length in the last term (after F. Baldino, Jr, M.-F. Chesselet, and M.E. Lewis, Methods in Enzymology 168:766 (1989) eqn (1) on page 766 without the mismatch and formamide terms). The formulas here and in Baldino et al. assume Na+ rather than K+. According to J.G. Wetmur, Critical Reviews in BioChem. and Mol. Bio. 26:227 (1991) 50 mM K+ should be equivalent in these formulae to .2 M Na+. Primer3 uses the same salt concentration value for calculating both the primer melting temperature and the oligo melting temperature. If you are planning to use the PCR product for hybridization later this behavior will not give you the Tm under hybridization conditions. primerweights true maxendstability 9.0 -maxendstability=$value 35 The maximum stability for the five 3' bases of a forward or reverse primer. Bigger numbers mean more stable 3' ends. The value is the maximum delta G for duplex disruption for the five 3' bases as calculated using the nearest neighbor parameters published in Breslauer, Frank, Bloeker and Marky, Proc. Natl. Acad. Sci. USA, vol 83, pp 3746-3750. Primer3 uses a completely permissive default value for backward compatibility (which we may change in the next release). Rychlik recommends a maximum value of 9 (Wojciech Rychlik, 'Selection of Primers for Polymerase Chain Reaction' in BA White, Ed., 'Methods in Molecular Biology, Vol. 15: PCR Protocols: Current Methods and Applications', 1993, pp 31-40, Humana Press, Totowa NJ). oligosinput true oligoexcludedregion -oligoexcludedregion=$value 36 Middle oligos may not overlap any region specified by this tag. The associated value must be a space-separated list of (start),(end) pairs, where (start) is the index of the first base of an excluded region, and (end) is the last. Often one would make Target regions excluded regions for internal oligos. oligoinput -oligoinput=$value 37 The sequence of an internal oligo to check and around which to design forward and reverse primers. Must be a substring of SEQUENCE. oligos true oligoosize 20 -oligoosize=$value 38 Optimum length (in bases) of an internal oligo. Primer3 will attempt to pick primers close to this length. oligominsize 18 -oligominsize=$value 39 Minimum acceptable length of an internal oligo. Must be greater than 0 and less than or equal to INTERNAL-OLIGO-MAX-SIZE. oligomaxsize 27 -oligomaxsize=$value 40 Maximum acceptable length (in bases) of an internal oligo. Currently this parameter cannot be larger than 35. This limit is governed by maximum oligo size for which Primer3's melting-temperature is valid. oligootm 60.0 -oligootm=$value 41 Optimum melting temperature (Celsius) for an internal oligo. Primer3 will try to pick oligos with melting temperatures that are close to this temperature. The oligo melting temperature formula in Primer3 is that given in Rychlik, Spencer and Rhoads, Nucleic Acids Research, vol 18, num 12, pp 6409-6412 and Breslauer, Frank, Bloeker and Marky, Proc. Natl. Acad. Sci. USA, vol 83, pp 3746-3750. Please refer to the former paper for background discussion. oligomintm 57.0 -oligomintm=$value 42 Minimum acceptable melting temperature(Celsius) for an internal oligo. oligomaxtm 63.0 -oligomaxtm=$value 43 Maximum acceptable melting temperature (Celsius) for an internal oligo. oligoogcpercent 50.0 -oligoogcpercent=$value 44 Intern