# This is the InterProScan configuration file ## ## Temporary files and directory ## # The text [UNIQUE], if present, will be replaced by a value unique to your running instance # Temporary files used by the analyses will be placed in directories here: temporary.file.directory.suffix=[UNIQUE] temporary.file.directory=temp/${temporary.file.directory.suffix} ## ## H2 database ## # The H2 database is copied by the standalone version of interproscan i5.h2.database.original.location=work/template/interpro.zip # LOCK_TIMEOUT: Sets the lock timeout (in milliseconds) for the current session i5.database.connection.url=jdbc:h2:mem:interpro;LOCK_TIMEOUT=10000000 ## ## binary paths ## # Configure the version of Perl and Python to use when running member databases Perl/Python binaries perl.command=perl python3.command=python3 # Binary file locations #rpsblast binary.rpsblast.path=bin/blast/ncbi-blast-2.6.0+/rpsblast #rpsbproc binary.rpsbproc.path=bin/blast/ncbi-blast-2.6.0+/rpsbproc #hmmer 3 binary.hmmer3.path=bin/hmmer/hmmer3/3.1b1 binary.hmmer3.hmmscan.path=bin/hmmer/hmmer3/3.1b1/hmmscan binary.hmmer3.hmmsearch.path=bin/hmmer/hmmer3/3.1b1/hmmsearch #hmmer 2 binary.hmmer2.hmmsearch.path=bin/hmmer/hmmer2/2.3.2/hmmsearch binary.hmmer2.hmmpfam.path=bin/hmmer/hmmer2/2.3.2/hmmpfam binary.fingerprintscan.path=bin/prints/fingerPRINTScan binary.coils.path=bin/ncoils/2.2.1/ncoils binary.prodom.2006.1.prodomblast3i.pl.path=bin/prodom/2006.1/ProDomBlast3i.pl # Note: Correct prosite binary distribution for your platform can be downloaded: ftp://ftp.expasy.org/databases/prosite/ps_scan/ binary.prosite.psscan.pl.path=bin/prosite/ps_scan.pl binary.prosite.pfscan.path=bin/prosite/pfscan binary.prosite.pfsearch.path=bin/prosite/pfsearch binary.prosite.pfsearch.wrapperpath=bin/prosite/pfsearch_wrapper.py #CATH-Gene3d cath.resolve.hits.path=bin/gene3d/4.2.0/cath-resolve-hits #panther binary.pantherscore.path=bin/panther/panther_score.py binary.superfamily.1.75.ass3.pl.path=bin/superfamily/1.75/ass3_single_threaded.pl #PIRSF binary.pirsf.pl.path=bin/pirsf/3.02/pirsf.pl binary.blast.2.2.19.path=bin/blast/2.2.19 binary.getorf.path=bin/nucleotide/getorf #SFLD sfld.postprocess.command=bin/sfld/sfld_postprocess #signalp # Note: SignalP binary not distributed with InterProScan 5, please install separately e.g. in bin/signalp/4.1/signalp binary.signalp.path=bin/signalp/4.1/signalp signalp.perl.library.dir=bin/signalp/4.1/lib #TMHMM 2.0 # Note: TMHMM binary not distributed with InterProScan 5, please install separately e.g. in bin/tmhmm/2.0c/decodeanhmm binary.tmhmm.path=bin/tmhmm/2.0c/decodeanhmm #PHOBIUS # Note: Phobius binary not distributed with InterProScan 5, please install separately e.g. in bin/phobius/1.01/phobius.pl binary.phobius.pl.path=bin/phobius/1.01/phobius.pl # ## ## Member database model / data file locations (alphabetically sorted) ## #CDD cdd.signature.list.path=data/cdd/3.16/data/cddid.tbl cdd.library.path=data/cdd/3.16/db/Cdd_NCBI cdd.data.path=data/cdd/3.16/data #Gene3d 4.2.0 gene3d.hmm.path=data/gene3d/4.2.0/gene3d_main.hmm gene3d.model2sf_map.path=data/gene3d/4.2.0/model_to_family_map.tsv gene3d.hmmsearch.force=true # HAMAP hamap.profile.models.path=data/hamap/2018_03/hamap.prf hamap.profile.models.dir=data/hamap/2018_03/profiles hamap.hmm.lib.path=data/hamap/2018_03/hamap.hmm.lib #MobiDB binary.mobidb.path=bin/mobidb/2.0/mobidb_lite.py binary.mobidb.binx.path=bin/mobidb/2.0/binx #PANTHER panther.temporary.file.directory= panther.hmm.path=data/panther/12.0/panther.hmm panther.names.tab=data/panther/12.0/names.tab # PFam pfam-a.hmm.path=data/pfam/31.0/pfam_a.hmm pfam-a.seed.path=data/pfam/31.0/pfam_a.seed pfam-clans.path=data/pfam/31.0/pfam_clans pfam-a.dat.path=data/pfam/31.0/pfam_a.dat ## #PIRSF 3.02 pirsf.sfhmm.path=data/pirsf/3.02/sf_hmm_all pirsf.dat.path=data/pirsf/3.02/pirsf.dat #PRINTS 42.0 prints.kdat.path.42.0=data/prints/42.0/prints42_0.kdat prints.pval.path.42.0=data/prints/42.0/prints.pval prints.hierarchy.path.42.0=data/prints/42.0/FingerPRINTShierarchy.db #ProDom 2006.1 prodom.ipr.path.2006.1=data/prodom/2006.1/prodom.ipr #Prosite prosite.models.path=data/prosite/2018_02/prosite.dat prosite.evaluator.models.path=data/prosite/2018_02/evaluator.dat prosite.models.dir=data/prosite/2018_02/prosite_models psscan.prositeprofiles.usepfsearch=true # #SFLD sfld.hmm.path=data/sfld/4/sfld.hmm sfld.sites.annotation.file.path=data/sfld/4/sfld_sites.annot sfld.hierarchy.file.path=data/sfld/4/sfld_hierarchy_flat.txt #smart 7.1 smart.hmm.path=data/smart/7.1/smart.HMMs smart.hmm.bin.path=data/smart/7.1/smart.HMMs.bin smart.overlapping.path= smart.threshold.path= #SuperFamily 1.75 superfamily.hmm.path.3.0=data/superfamily/1.75/hmmlib_1.75 superfamily.self.hits.path.1.75=data/superfamily/1.75/self_hits.tab superfamily.cla.path.1.75=data/superfamily/1.75/dir.cla.scop.txt_1.75 superfamily.model.tab.path.1.75=data/superfamily/1.75/model.tab superfamily.pdbj95d.path.1.75=data/superfamily/1.75/pdbj95d #tigrfam 15.0 tigrfam.hmm.path=data/tigrfam/15.0/TIGRFAMs_HMM.LIB #TMHMM 2.0 # Note: TMHMM model files not distributed with InterProScan 5, please install separately e.g. in data/tmhmm/2.0/TMHMM2.0.model tmhmm.model.path=data/tmhmm/2.0c/TMHMM2.0c.model ## ## cpu options for parallel processing ## #hmmer cpu options for the different jobs hmmer3.hmmsearch.cpu.switch.gene3d=--cpu 1 hmmer3.hmmsearch.cpu.switch.panther=--cpu 1 hmmer3.hmmsearch.cpu.switch.pfama=--cpu 1 hmmer3.hmmsearch.cpu.switch.pirsf=--cpu 1 hmmer3.hmmsearch.cpu.switch.sfld=--cpu 1 hmmer3.hmmsearch.cpu.switch.superfamily=--cpu 1 hmmer3.hmmsearch.cpu.switch.tigrfam=--cpu 1 hmmer3.hmmsearch.cpu.switch.hmmfilter=--cpu 1 hmmer2.hmmpfam.cpu.switch.smart=--cpu 1 #panther binary cpu options (for blastall and hmmsearch) panther.binary.cpu.switch=-c 1 #pirsf binary cpu options (for hmmscan) pirsf.pl.binary.cpu.switch=-cpu 1 ## ## max number of proteins per analysis batch ## # These values control the maximum number of proteins put through # an analysis in one go - different algorithms have different optimum values. # Note that if you suffer from out of memory errors, reducing these values # will almost certainly help, but may reduce the speed of analysis. analysis.max.sequence.count.CDD=1000 analysis.max.sequence.count.GENE3D=4000 analysis.max.sequence.count.SFLD=32000 analysis.max.sequence.count.TMHMM=16000 analysis.max.sequence.count.PANTHER=500 analysis.max.sequence.count.SMART=500 analysis.max.sequence.count.TIGRFAM=4000 analysis.max.sequence.count.PRINTS=500 analysis.max.sequence.count.PROSITE_PROFILES=1000 analysis.max.sequence.count.PROSITE_PATTERNS=4000 analysis.max.sequence.count.PIRSF=4000 analysis.max.sequence.count.PRODOM=4000 analysis.max.sequence.count.SSF=2000 analysis.max.sequence.count.HAMAP=32000 analysis.max.sequence.count.PFAM=4000 analysis.max.sequence.count.COILS=32000 analysis.max.sequence.count.PHOBIUS=16000 # SignalP 4.1 binary only allows a maximum of 10,000 sequences analysis.max.sequence.count.SIGNALP=8000 analysis.max.sequence.count.MOBIDB_LITE=1000 ## ## General settings ## # If multiple hosts are sharing the same file system, a delay may be required to # avoid stale NFS handles # nfs.delay.milliseconds=0 # Instructs I5 to completely clean up after itself - leave set to true. delete.temporary.directory.on.completion=true ## ## Broker TCP Connection ## # A list of TCP ports that should not be used for messaging. (Apart from this, only ports > 1024 and < 65535 will be used.) tcp.port.exclusion.list=3879,3878,3881,3882 ## ## precalculated match lookup service ## # By default, if the sequence already has matches available from the EBI, this service will look them # up for you. Note - at present it will always return all the available matches, ignoring any -appl options # set on the command line. precalculated.match.lookup.service.url=http://www.ebi.ac.uk/interpro/match-lookup #proxy set up precalculated.match.lookup.service.proxy.host= precalculated.match.lookup.service.proxy.port=3128 precalculated.match.protein.lookup.batch.size=100 precalculated.match.protein.insert.batch.size=500 precalculated.match.protein.insert.batch.size.nolookup=4000 #Exclude sites from output (residue level annotations) exclude.sites.from.output=false ## ## getorf configuration for nucleic acid sequences ## # the following are roughly the times getorf takes to find sequences of open reading frames (ORFs) in n nucleotide sequences #number of sequences -> approx. time it takes in our tests # 600000 -> 10 minutes # 3600000 -> 1 hour # 7200000 -> 2 hours # 43200000 -> 12 hours # JOB: jobLoadNucleicAcidSequence getorf.minsize=75 # Set InterProScan to only process the x longest ORFs for each nucleotide sequence binary.getorf.parser.filtersize=12 ## ## Output format ## # TRUE by default, which means all generated graphical output documents (only SVG at the moment) will be archived (using the Linux command tar). # This simple switch allows you to switch the archive mode off (simply set it to FALSE). archiveSVGOutput=true ## ## Master/Stand alone embedded workers ## # Set the number of embedded workers to the number of processors that you would like to employ # on the machine you are using to run InterProScan. #number of embedded workers a master process can have number.of.embedded.workers=2 maxnumber.of.embedded.workers=28 ## ## Distributed mode (Cluster mode) ## #grid name grid.name=lsf #grid.name=other-cluster #project name for this run - use user.digest user.digest=i5GridRun #grid jobs limit : number of jobs you are allowed to run on the cluster grid.jobs.limit=1000 #time between each bjobs or qstat command to check the status of jobs on the cluster grid.check.interval.seconds=120 #allow master interproscan to run binaries () master.can.run.binaries=true #deal with unknown step states recover.unknown.step.state=false #Grid submission commands (e.g. LSF bsub or SGE qsub) for starting remote workers #commands the master uses to start new remote workers grid.master.submit.command=bsub -q QUEUE_NAME grid.master.submit.high.memory.command=bsub -q QUEUE_NAME -M 8192 #commands a worker uses to start new remote workers grid.worker.submit.command=bsub -q QUEUE_NAME grid.worker.submit.high.memory.command=bsub -q QUEUE_NAME -M 8192 # command to start a new worker (new jvm) worker.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar # This may be identical to the worker.command argument above, however you may choose to select # a machine with a much larger available memory, for use when a StepExecution fails. worker.high.memory.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar # Set the number of embedded workers to the number of processors that you would like to employ # on the node machine on which the worker will run. #number of embedded workers in a remote worker worker.number.of.embedded.workers=4 worker.maxnumber.of.embedded.workers=4 # max number of connections to the master master.maxconsumers=48 #number of connections to the worker worker.maxconsumers=32 #throttled network? grid.throttle=true # max number of jobs a tier 1 worker is allowed on its queue worker.maxunfinished.jobs=32 #network tier depth max.tier.depth=1 # Active MQ JMS broker temporary data directory jms.broker.temp.directory=activemq-data/localhost/tmp_storage