# This is the InterProScan configuration file

##
## Temporary files and directory
##
# The text [UNIQUE], if present, will be replaced by a value unique to your running instance

# Temporary files used by the analyses will be placed in directories here:
temporary.file.directory.suffix=[UNIQUE]
temporary.file.directory=temp/${temporary.file.directory.suffix}

##
## H2 database
##
# The H2 database is copied by the standalone version of interproscan
i5.h2.database.original.location=work/template/interpro.zip
# LOCK_TIMEOUT: Sets the lock timeout (in milliseconds) for the current session
i5.database.connection.url=jdbc:h2:mem:interpro;LOCK_TIMEOUT=10000000

##
## binary paths
##
# Configure the version of Perl and Python to use when running member databases Perl/Python binaries
perl.command=perl
python3.command=python3

# Binary file locations
#rpsblast
binary.rpsblast.path=bin/blast/ncbi-blast-2.6.0+/rpsblast

#rpsbproc
binary.rpsbproc.path=bin/blast/ncbi-blast-2.6.0+/rpsbproc

#hmmer 3
binary.hmmer3.path=bin/hmmer/hmmer3/3.1b1
binary.hmmer3.hmmscan.path=bin/hmmer/hmmer3/3.1b1/hmmscan
binary.hmmer3.hmmsearch.path=bin/hmmer/hmmer3/3.1b1/hmmsearch

#hmmer 2
binary.hmmer2.hmmsearch.path=bin/hmmer/hmmer2/2.3.2/hmmsearch
binary.hmmer2.hmmpfam.path=bin/hmmer/hmmer2/2.3.2/hmmpfam
binary.fingerprintscan.path=bin/prints/fingerPRINTScan
binary.coils.path=bin/ncoils/2.2.1/ncoils
binary.prodom.2006.1.prodomblast3i.pl.path=bin/prodom/2006.1/ProDomBlast3i.pl
# Note: Correct prosite binary distribution for your platform can be downloaded: ftp://ftp.expasy.org/databases/prosite/ps_scan/
binary.prosite.psscan.pl.path=bin/prosite/ps_scan.pl
binary.prosite.pfscan.path=bin/prosite/pfscan
binary.prosite.pfsearch.path=bin/prosite/pfsearch
binary.prosite.pfsearch.wrapperpath=bin/prosite/pfsearch_wrapper.py

#CATH-Gene3d
cath.resolve.hits.path=bin/gene3d/4.2.0/cath-resolve-hits

#panther
binary.pantherscore.path=bin/panther/panther_score.py

binary.superfamily.1.75.ass3.pl.path=bin/superfamily/1.75/ass3_single_threaded.pl

#PIRSF
binary.pirsf.pl.path=bin/pirsf/3.02/pirsf.pl

binary.blast.2.2.19.path=bin/blast/2.2.19
binary.getorf.path=bin/nucleotide/getorf

#SFLD
sfld.postprocess.command=bin/sfld/sfld_postprocess

#signalp
# Note: SignalP binary not distributed with InterProScan 5, please install separately e.g. in bin/signalp/4.1/signalp
binary.signalp.path=bin/signalp/4.1/signalp
signalp.perl.library.dir=bin/signalp/4.1/lib

#TMHMM 2.0
# Note: TMHMM binary not distributed with InterProScan 5, please install separately e.g. in bin/tmhmm/2.0c/decodeanhmm
binary.tmhmm.path=bin/tmhmm/2.0c/decodeanhmm

#PHOBIUS
# Note: Phobius binary not distributed with InterProScan 5, please install separately e.g. in bin/phobius/1.01/phobius.pl
binary.phobius.pl.path=bin/phobius/1.01/phobius.pl
#

##
##  Member database model / data file locations (alphabetically sorted)
##
#CDD
cdd.signature.list.path=data/cdd/3.16/data/cddid.tbl
cdd.library.path=data/cdd/3.16/db/Cdd_NCBI
cdd.data.path=data/cdd/3.16/data

#Gene3d 4.2.0
gene3d.hmm.path=data/gene3d/4.2.0/gene3d_main.hmm
gene3d.model2sf_map.path=data/gene3d/4.2.0/model_to_family_map.tsv
gene3d.hmmsearch.force=true

# HAMAP
hamap.profile.models.path=data/hamap/2018_03/hamap.prf
hamap.profile.models.dir=data/hamap/2018_03/profiles
hamap.hmm.lib.path=data/hamap/2018_03/hamap.hmm.lib

#MobiDB
binary.mobidb.path=bin/mobidb/2.0/mobidb_lite.py
binary.mobidb.binx.path=bin/mobidb/2.0/binx

#PANTHER
panther.temporary.file.directory=
panther.hmm.path=data/panther/12.0/panther.hmm
panther.names.tab=data/panther/12.0/names.tab

# PFam
pfam-a.hmm.path=data/pfam/31.0/pfam_a.hmm
pfam-a.seed.path=data/pfam/31.0/pfam_a.seed
pfam-clans.path=data/pfam/31.0/pfam_clans
pfam-a.dat.path=data/pfam/31.0/pfam_a.dat

##

#PIRSF 3.02
pirsf.sfhmm.path=data/pirsf/3.02/sf_hmm_all
pirsf.dat.path=data/pirsf/3.02/pirsf.dat

#PRINTS 42.0
prints.kdat.path.42.0=data/prints/42.0/prints42_0.kdat
prints.pval.path.42.0=data/prints/42.0/prints.pval
prints.hierarchy.path.42.0=data/prints/42.0/FingerPRINTShierarchy.db

#ProDom 2006.1
prodom.ipr.path.2006.1=data/prodom/2006.1/prodom.ipr

#Prosite
prosite.models.path=data/prosite/2018_02/prosite.dat
prosite.evaluator.models.path=data/prosite/2018_02/evaluator.dat
prosite.models.dir=data/prosite/2018_02/prosite_models
psscan.prositeprofiles.usepfsearch=true

#
#SFLD
sfld.hmm.path=data/sfld/4/sfld.hmm
sfld.sites.annotation.file.path=data/sfld/4/sfld_sites.annot
sfld.hierarchy.file.path=data/sfld/4/sfld_hierarchy_flat.txt

#smart 7.1
smart.hmm.path=data/smart/7.1/smart.HMMs
smart.hmm.bin.path=data/smart/7.1/smart.HMMs.bin
smart.overlapping.path=
smart.threshold.path=

#SuperFamily 1.75
superfamily.hmm.path.3.0=data/superfamily/1.75/hmmlib_1.75
superfamily.self.hits.path.1.75=data/superfamily/1.75/self_hits.tab
superfamily.cla.path.1.75=data/superfamily/1.75/dir.cla.scop.txt_1.75
superfamily.model.tab.path.1.75=data/superfamily/1.75/model.tab
superfamily.pdbj95d.path.1.75=data/superfamily/1.75/pdbj95d

#tigrfam 15.0
tigrfam.hmm.path=data/tigrfam/15.0/TIGRFAMs_HMM.LIB

#TMHMM 2.0
# Note: TMHMM model files not distributed with InterProScan 5, please install separately e.g. in data/tmhmm/2.0/TMHMM2.0.model
tmhmm.model.path=data/tmhmm/2.0c/TMHMM2.0c.model

##
## cpu options for parallel processing
##

#hmmer cpu options for the different jobs
hmmer3.hmmsearch.cpu.switch.gene3d=--cpu 1
hmmer3.hmmsearch.cpu.switch.panther=--cpu 1
hmmer3.hmmsearch.cpu.switch.pfama=--cpu 1
hmmer3.hmmsearch.cpu.switch.pirsf=--cpu 1
hmmer3.hmmsearch.cpu.switch.sfld=--cpu 1
hmmer3.hmmsearch.cpu.switch.superfamily=--cpu 1
hmmer3.hmmsearch.cpu.switch.tigrfam=--cpu 1

hmmer3.hmmsearch.cpu.switch.hmmfilter=--cpu 1

hmmer2.hmmpfam.cpu.switch.smart=--cpu 1


#panther binary cpu options (for blastall and hmmsearch)
panther.binary.cpu.switch=-c 1

#pirsf binary cpu options (for hmmscan)
pirsf.pl.binary.cpu.switch=-cpu 1


##
## max number of proteins per analysis batch
##
# These values control the maximum number of proteins put through
# an analysis in one go - different algorithms have different optimum values.
# Note that if you suffer from out of memory errors, reducing these values
# will almost certainly help, but may reduce the speed of analysis.
analysis.max.sequence.count.CDD=1000
analysis.max.sequence.count.GENE3D=4000
analysis.max.sequence.count.SFLD=32000
analysis.max.sequence.count.TMHMM=16000
analysis.max.sequence.count.PANTHER=500
analysis.max.sequence.count.SMART=500
analysis.max.sequence.count.TIGRFAM=4000
analysis.max.sequence.count.PRINTS=500
analysis.max.sequence.count.PROSITE_PROFILES=1000
analysis.max.sequence.count.PROSITE_PATTERNS=4000
analysis.max.sequence.count.PIRSF=4000
analysis.max.sequence.count.PRODOM=4000
analysis.max.sequence.count.SSF=2000
analysis.max.sequence.count.HAMAP=32000
analysis.max.sequence.count.PFAM=4000
analysis.max.sequence.count.COILS=32000
analysis.max.sequence.count.PHOBIUS=16000
# SignalP 4.1 binary only allows a maximum of 10,000 sequences
analysis.max.sequence.count.SIGNALP=8000
analysis.max.sequence.count.MOBIDB_LITE=1000

##
##  General settings
##

# If multiple hosts are sharing the same file system, a delay may be required to
# avoid stale NFS handles
# nfs.delay.milliseconds=0

# Instructs I5 to completely clean up after itself - leave set to true.
delete.temporary.directory.on.completion=true

##
## Broker TCP Connection
##

# A list of TCP ports that should not be used for messaging. (Apart from this, only ports > 1024 and < 65535 will be used.)
tcp.port.exclusion.list=3879,3878,3881,3882

##
##  precalculated match lookup service
##
# By default, if the sequence already has matches available from the EBI, this service will look them
# up for you.  Note - at present it will always return all the available matches, ignoring any -appl options
# set on the command line.
precalculated.match.lookup.service.url=http://www.ebi.ac.uk/interpro/match-lookup

#proxy set up
precalculated.match.lookup.service.proxy.host=
precalculated.match.lookup.service.proxy.port=3128

precalculated.match.protein.lookup.batch.size=100
precalculated.match.protein.insert.batch.size=500
precalculated.match.protein.insert.batch.size.nolookup=4000

#Exclude sites from output (residue level annotations)
exclude.sites.from.output=false

##
## getorf configuration for nucleic acid sequences
##
# the following are roughly the times getorf takes to find sequences of open reading frames (ORFs) in n nucleotide sequences
#number of sequences -> approx. time it takes in our tests
#        600000 -> 10 minutes
#        3600000 -> 1 hour
#        7200000 -> 2 hours
#        43200000 -> 12 hours

# JOB: jobLoadNucleicAcidSequence
getorf.minsize=75
# Set InterProScan to only process the x longest ORFs for each nucleotide sequence
binary.getorf.parser.filtersize=12

##
## Output format
##
# TRUE by default, which means all generated graphical output documents (only SVG at the moment) will be archived (using the Linux command tar).
# This simple switch allows you to switch the archive mode off (simply set it to FALSE).
archiveSVGOutput=true

##
## Master/Stand alone embedded workers
##

# Set the number of embedded workers to the number of processors that you would like to employ
# on the machine you are using to run InterProScan.
#number of embedded workers  a master process can have
number.of.embedded.workers=2
maxnumber.of.embedded.workers=28

##
## Distributed mode (Cluster mode)
##

#grid name
grid.name=lsf
#grid.name=other-cluster

#project name for this run  - use user.digest
user.digest=i5GridRun

#grid jobs limit : number of jobs you are allowed to run on the cluster
grid.jobs.limit=1000


#time between each bjobs or qstat command to check the status of jobs on the cluster
grid.check.interval.seconds=120

#allow master interproscan to run binaries ()
master.can.run.binaries=true

#deal with unknown step states
recover.unknown.step.state=false

#Grid submission commands (e.g. LSF bsub or SGE qsub) for starting remote workers
#commands the master uses to start new remote workers
grid.master.submit.command=bsub -q QUEUE_NAME
grid.master.submit.high.memory.command=bsub -q QUEUE_NAME -M 8192

#commands a worker uses to start new remote workers
grid.worker.submit.command=bsub -q QUEUE_NAME
grid.worker.submit.high.memory.command=bsub -q QUEUE_NAME -M 8192

# command to start a new worker (new jvm)
worker.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar
# This may be identical to the worker.command argument above, however you may choose to select
# a machine with a much larger available memory, for use when a StepExecution fails.
worker.high.memory.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar

# Set the number of embedded workers to the number of processors that you would like to employ
# on the node machine on which the worker will run.
#number of embedded workers in a remote worker
worker.number.of.embedded.workers=4
worker.maxnumber.of.embedded.workers=4

# max number of connections to the master
master.maxconsumers=48

#number of connections to the worker
worker.maxconsumers=32

#throttled network?
grid.throttle=true

# max number of jobs a tier 1 worker is allowed on its queue
worker.maxunfinished.jobs=32

#network tier depth
max.tier.depth=1

# Active MQ JMS broker temporary data directory
jms.broker.temp.directory=activemq-data/localhost/tmp_storage