From 8980f498f05ad63dbffa3241842d3d38e939531b Mon Sep 17 00:00:00 2001 From: oushujun Date: Fri, 12 Jan 2024 11:58:12 -0500 Subject: [PATCH] replace local AnnoSINEv2 with the conda version --- EDTA.pl | 4 +- EDTA_2.2.x.yml | 135 ++++++++++++++++++++++++++++--------------------- EDTA_raw.pl | 10 ++-- 3 files changed, 83 insertions(+), 66 deletions(-) diff --git a/EDTA.pl b/EDTA.pl index 5cf0be9..285fa07 100755 --- a/EDTA.pl +++ b/EDTA.pl @@ -269,11 +269,11 @@ $genometools="$genometools/" if $genometools ne '' and $genometools !~ /\/$/; die "Error: gt is not found in the genometools path $genometools!\n" unless -X "${genometools}gt"; # AnnoSINE -chomp ($annosine=`which annosine2 2>/dev/null`) if $annosine eq ''; +chomp ($annosine=`which AnnoSINE_v2 2>/dev/null`) if $annosine eq ''; $annosine =~ s/\s+$//; $annosine = dirname($annosine) unless -d $annosine; $annosine="$annosine/" if $annosine ne '' and $annosine !~ /\/$/; -die "Error: AnnoSINE is not found in the AnnoSINE path $annosine!\n" unless (-X "${annosine}AnnoSINE_v2.py" or -X "${annosine}/bin/AnnoSINE_v2.py" or -X "${annosine}annosine2"); +die "Error: AnnoSINE is not found in the AnnoSINE path $annosine!\n" unless (-X "${annosine}AnnoSINE_v2"); # LTR_retriever chomp ($LTR_retriever=`which LTR_retriever 2>/dev/null`) if $LTR_retriever eq ''; $LTR_retriever =~ s/\s+$//; diff --git a/EDTA_2.2.x.yml b/EDTA_2.2.x.yml index a594427..ced98bc 100644 --- a/EDTA_2.2.x.yml +++ b/EDTA_2.2.x.yml @@ -1,4 +1,4 @@ -name: EDTA2.2 +name: EDTA2 channels: - conda-forge - bioconda @@ -12,35 +12,24 @@ dependencies: - aiohttp=3.9.1=py39hd1e30aa_0 - aiosignal=1.3.1=pyhd8ed1ab_0 - alsa-lib=1.2.9=hd590300_0 - - arrow-cpp=10.0.1=h3e2b116_4_cpu + - annosine2=1.0.5=pyh7cba7a3_0 - astor=0.8.1=pyh9f0ad1d_0 - astunparse=1.6.3=pyhd8ed1ab_0 - async-timeout=4.0.3=pyhd8ed1ab_0 - attrs=23.2.0=pyh71513ae_0 - - aws-c-auth=0.6.21=hd93a3ba_3 - - aws-c-cal=0.5.20=hd3b2fe5_3 - - aws-c-common=0.8.5=h166bdaf_0 - - aws-c-compression=0.2.16=hf5f93bc_0 - - aws-c-event-stream=0.2.17=h57874a7_1 - - aws-c-http=0.7.0=h96ef541_0 - - aws-c-io=0.13.12=h57ca295_1 - - aws-c-mqtt=0.7.13=h0b5698f_12 - - aws-c-s3=0.2.1=h5d7e488_4 - - aws-c-sdkutils=0.1.7=hf5f93bc_0 - - aws-checksums=0.1.14=h6027aba_0 - - aws-crt-cpp=0.18.16=h26430d7_8 - - aws-sdk-cpp=1.9.379=h56135f1_7 - bedtools=2.31.1=hf5e1c6e_0 - binutils_impl_linux-64=2.40=hf600244_0 - - biopython=1.82=py39hd1e30aa_0 + - biopython=1.83=py39hd1e30aa_0 - blast=2.10.1=pl526he19e7b1_3 - blinker=1.7.0=pyhd8ed1ab_0 - - bokeh=3.3.2=pyhd8ed1ab_0 - - brotli-python=1.0.9=py39h5a03fae_9 + - bokeh=2.4.3=pyhd8ed1ab_3 + - brotli=1.1.0=hd590300_1 + - brotli-bin=1.1.0=hd590300_1 + - brotli-python=1.1.0=py39h3d6467e_1 - bwidget=1.9.14=ha770c72_1 - bz2file=0.98=py_0 - bzip2=1.0.8=hd590300_5 - - c-ares=1.24.0=hd590300_0 + - c-ares=1.25.0=hd590300_0 - ca-certificates=2023.11.17=hbcca054_0 - cached-property=1.5.2=hd8ed1ab_1 - cached_property=1.5.2=pyha770c72_1 @@ -62,10 +51,10 @@ dependencies: - curl=7.88.1=h37d81fd_2 - cycler=0.12.1=pyhd8ed1ab_0 - cytoolz=0.12.2=py39hd1e30aa_1 - - dask=2023.12.1=pyhd8ed1ab_0 - - dask-core=2023.12.1=pyhd8ed1ab_0 + - dask=2023.3.0=pyhd8ed1ab_0 + - dask-core=2023.3.0=pyhd8ed1ab_0 - dill=0.3.7=pyhd8ed1ab_0 - - distributed=2023.12.1=pyhd8ed1ab_0 + - distributed=2023.3.0=pyhd8ed1ab_0 - drmaa=0.7.9=py_1000 - entrez-direct=16.2=he881be0_1 - expat=2.5.0=hcb278e6_1 @@ -77,30 +66,29 @@ dependencies: - fontconfig=2.14.2=h14ed4e7_0 - fonts-conda-ecosystem=1=0 - fonts-conda-forge=1=0 + - fonttools=4.47.2=py39hd1e30aa_0 - freetype=2.12.1=h267a509_2 - fribidi=1.0.10=h36c2ea0_0 - frozenlist=1.4.1=py39hd1e30aa_0 - fsspec=2023.12.2=pyhca7485f_0 - gast=0.4.0=pyh9f0ad1d_0 - gawk=5.1.0=h7f98852_0 - - gcc_impl_linux-64=12.3.0=he2b93b0_3 + - gcc_impl_linux-64=13.2.0=h338b0a0_3 - genericrepeatfinder=1.0.2=h4ac6f70_0 - genometools-genometools=1.6.4=py39h58cc16e_0 - gettext=0.21.1=h27087fc_0 - - gflags=2.2.2=he1b5a44_1004 - - gfortran_impl_linux-64=12.3.0=hfcedea8_3 + - gfortran_impl_linux-64=13.2.0=h76e1118_3 - giflib=5.2.1=h0b41bf4_3 - glob2=0.7=py_0 - - glog=0.6.0=h6f12383_0 - gmp=6.1.2=hf484d3e_1000 - gnutls=3.5.19=h2a4e5f8_1 - - google-auth=2.25.2=pyhca7485f_0 + - google-auth=2.26.2=pyhca7485f_0 - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0 - google-pasta=0.2.0=pyh8c360ce_0 - graphite2=1.3.13=h58526e2_1001 - grpcio=1.51.1=py39h712372c_0 - gsl=2.7=he838d99_0 - - gxx_impl_linux-64=12.3.0=he2b93b0_3 + - gxx_impl_linux-64=13.2.0=h338b0a0_3 - h5py=3.9.0=nompi_py39h4dfffb9_100 - harfbuzz=6.0.0=h8e241bc_0 - hdf5=1.14.0=nompi_h5231ba7_103 @@ -108,9 +96,10 @@ dependencies: - icu=70.1=h27087fc_0 - idna=3.6=pyhd8ed1ab_0 - importlib-metadata=7.0.1=pyha770c72_0 - - importlib_metadata=7.0.1=hd8ed1ab_0 - - irfinder=1.3.1=h031d066_4 - - jinja2=3.1.2=pyhd8ed1ab_1 + - importlib-resources=6.1.1=pyhd8ed1ab_0 + - importlib_resources=6.1.1=pyhd8ed1ab_0 + - irf=3.08=hdbdd923_0 + - jinja2=3.1.3=pyhd8ed1ab_0 - joblib=1.3.2=pyhd8ed1ab_0 - jpeg=9e=h166bdaf_2 - k8=0.2.5=hdcf5f25_4 @@ -125,30 +114,26 @@ dependencies: - lerc=4.0.0=h27087fc_0 - libabseil=20220623.0=cxx17_h05df665_6 - libaec=1.1.2=h59595ed_1 - - libarrow=10.0.1=hee49ebd_4_cpu - libblas=3.9.0=20_linux64_openblas - - libbrotlicommon=1.0.9=h166bdaf_9 - - libbrotlidec=1.0.9=h166bdaf_9 - - libbrotlienc=1.0.9=h166bdaf_9 + - libbrotlicommon=1.1.0=hd590300_1 + - libbrotlidec=1.1.0=hd590300_1 + - libbrotlienc=1.1.0=hd590300_1 - libcblas=3.9.0=20_linux64_openblas - - libcrc32c=1.1.2=h9c3ff4c_0 - libcups=2.3.3=h36d4200_3 - libcurl=7.88.1=h91b91d3_2 - libdeflate=1.17=h0b41bf4_0 - libedit=3.1.20191231=he28a2e2_2 - libev=4.33=hd590300_2 - - libevent=2.1.10=h9b69904_4 - libexpat=2.5.0=hcb278e6_1 - libffi=3.4.2=h7f98852_5 - libgcc=7.2.0=h69d50b8_2 - - libgcc-devel_linux-64=12.3.0=h8bca6fd_103 + - libgcc-devel_linux-64=13.2.0=ha9c7c90_103 - libgcc-ng=13.2.0=h807b86a_3 - libgd=2.3.3=h5aea950_4 - libgfortran-ng=13.2.0=h69a702a_3 - libgfortran5=13.2.0=ha4646dd_3 - libglib=2.78.1=hebfc3b9_0 - libgomp=13.2.0=h807b86a_3 - - libgoogle-cloud=2.5.0=h5d50b59_1 - libgrpc=1.51.1=h05bd8bd_0 - libiconv=1.17=hd590300_2 - libidn2=2.3.4=h166bdaf_0 @@ -159,15 +144,13 @@ dependencies: - libopenblas=0.3.25=pthreads_h413a1c8_0 - libpng=1.6.39=h753d276_0 - libprotobuf=3.21.12=hfc55251_2 - - libsanitizer=12.3.0=h0f45ef3_3 + - libsanitizer=13.2.0=h7e041cc_3 - libsqlite=3.44.2=h2797004_0 - libssh2=1.10.0=haa6b8db_3 - - libstdcxx-devel_linux-64=12.3.0=h8bca6fd_103 - - libstdcxx-ng=12.3.0=h0f45ef3_3 - - libthrift=0.16.0=h491838f_2 + - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_103 + - libstdcxx-ng=13.2.0=h7e041cc_3 - libtiff=4.5.0=h6adf6a1_2 - libunistring=0.9.10=h7f98852_0 - - libutf8proc=2.8.0=h166bdaf_0 - libuuid=2.38.1=h0b41bf4_0 - libwebp=1.2.4=h1daa5a0_1 - libwebp-base=1.2.4=h166bdaf_0 @@ -176,20 +159,20 @@ dependencies: - libzlib=1.2.13=hd590300_5 - locket=1.0.0=pyhd8ed1ab_0 - ltr_finder=1.07=h4ac6f70_3 - - ltr_retriever=2.9.5=hdfd78af_0 - - lz4=4.3.2=py39h79d96da_1 + - ltr_retriever=2.9.9=hdfd78af_0 + - lz4=4.3.3=py39h79d96da_0 - lz4-c=1.9.4=hcb278e6_0 - mafft=7.520=h031d066_3 - make=4.3=hd18ef5c_1 - - markdown=3.5.1=pyhd8ed1ab_0 + - markdown=3.5.2=pyhd8ed1ab_0 - markupsafe=2.1.3=py39hd1e30aa_1 - - matplotlib=3.3.2=0 - - matplotlib-base=3.3.2=py39h98787fa_1 + - matplotlib-base=3.8.2=py39he9076e7_0 - mdust=2006.10.17=h031d066_6 - minimap2=2.26=he4a0461_2 - msgpack-python=1.0.7=py39h7633fee_0 - multidict=6.0.4=py39hd1e30aa_1 - multiprocess=0.70.15=py39hd1e30aa_1 + - munkres=1.0.7=py_1 - muscle=5.1=h4ac6f70_3 - mysql-connector-c=6.1.11=h6eb9d5d_1007 - nccl=2.19.4.1=h6103f9b_0 @@ -197,17 +180,15 @@ dependencies: - nettle=3.3=0 - nodejs=12.4.0=he1b5a44_0 - nseg=1.0.1=h031d066_4 - - numpy=1.26.2=py39h474f0d3_0 + - numpy=1.26.3=py39h474f0d3_0 - oauthlib=3.2.2=pyhd8ed1ab_0 - openjdk=11.0.1=h516909a_1016 - openjpeg=2.5.0=hfec8fc6_2 - openssl=1.1.1w=hd590300_0 - opt_einsum=3.3.0=pyhc1e730c_2 - - orc=1.8.2=hfdbbad2_2 - packaging=23.2=pyhd8ed1ab_0 - pandas=2.1.4=py39hddac248_0 - pango=1.50.14=hd33c08f_0 - - parquet-cpp=1.5.1=2 - partd=1.4.1=pyhd8ed1ab_0 - pcre=8.45=h9c3ff4c_0 - pcre2=10.40=hc3806b6_0 @@ -243,13 +224,11 @@ dependencies: - perl-xsloader=0.24=pl526_0 - pillow=9.4.0=py39h2320bf1_1 - pip=23.3.2=pyhd8ed1ab_0 - - pixman=0.42.2=h59595ed_0 + - pixman=0.43.0=h59595ed_0 - pp=1.6.4.4=py_0 - protobuf=4.21.12=py39h227be39_0 - psutil=5.9.7=py39hd1e30aa_0 - pthread-stubs=0.4=h36c2ea0_1001 - - pyarrow=10.0.1=py39hacc6ce7_4_cpu - - pyarrow-hotfix=0.6=pyhd8ed1ab_0 - pyasn1=0.5.1=pyhd8ed1ab_0 - pyasn1-modules=0.3.0=pyhd8ed1ab_0 - pycparser=2.21=pyhd8ed1ab_0 @@ -266,6 +245,45 @@ dependencies: - pyu2f=0.1.5=pyhd8ed1ab_0 - pyyaml=6.0.1=py39hd1e30aa_1 - r-base=4.2.3=ha7d60f8_0 + - r-cli=3.6.2=r42ha503ecb_0 + - r-colorspace=2.1_0=r42h57805ef_1 + - r-crayon=1.5.2=r42hc72bb7e_2 + - r-dplyr=1.1.4=r42ha503ecb_0 + - r-ellipsis=0.3.2=r42h57805ef_2 + - r-fansi=1.0.6=r42h57805ef_0 + - r-farver=2.1.1=r42ha503ecb_2 + - r-generics=0.1.3=r42hc72bb7e_2 + - r-ggplot2=3.4.4=r42hc72bb7e_0 + - r-glue=1.7.0=r42h57805ef_0 + - r-gtable=0.3.4=r42hc72bb7e_0 + - r-here=1.0.1=r42hc72bb7e_2 + - r-isoband=0.2.7=r42ha503ecb_2 + - r-labeling=0.4.3=r42hc72bb7e_0 + - r-lattice=0.22_5=r42h57805ef_0 + - r-lifecycle=1.0.4=r42hc72bb7e_0 + - r-magrittr=2.0.3=r42h57805ef_2 + - r-mass=7.3_60=r42h57805ef_1 + - r-matrix=1.6_5=r42h316c678_0 + - r-mgcv=1.9_1=r42h316c678_0 + - r-munsell=0.5.0=r42hc72bb7e_1006 + - r-nlme=3.1_164=r42h61816a4_0 + - r-pillar=1.9.0=r42hc72bb7e_1 + - r-pkgconfig=2.0.3=r42hc72bb7e_3 + - r-purrr=1.0.2=r42h57805ef_0 + - r-r6=2.5.1=r42hc72bb7e_2 + - r-rcolorbrewer=1.1_3=r42h785f33e_2 + - r-rlang=1.1.3=r42ha503ecb_0 + - r-rprojroot=2.0.4=r42hc72bb7e_0 + - r-scales=1.3.0=r42hc72bb7e_0 + - r-stringi=1.7.12=r42h1ae9187_0 + - r-stringr=1.5.1=r42h785f33e_0 + - r-tibble=3.2.1=r42h57805ef_2 + - r-tidyr=1.3.0=r42ha503ecb_1 + - r-tidyselect=1.2.0=r42hc72bb7e_1 + - r-utf8=1.2.4=r42h57805ef_0 + - r-vctrs=0.6.5=r42ha503ecb_0 + - r-viridislite=0.4.2=r42hc72bb7e_1 + - r-withr=2.5.2=r42hc72bb7e_0 - re2=2022.06.01=h27087fc_1 - readline=8.2=h8228510_1 - recon=1.08=h031d066_6 @@ -277,12 +295,11 @@ dependencies: - requests-oauthlib=1.3.1=pyhd8ed1ab_0 - rmblast=2.10.0=h2d02072_0 - rsa=4.9=pyhd8ed1ab_0 - - s2n=1.3.31=hae46d1a_0 - scikit-learn=1.3.2=py39ha22ef79_2 - scipy=1.11.4=py39h474f0d3_0 - sed=4.8=he412f7d_0 - seqtk=1.4=he4a0461_1 - - setuptools=68.2.2=pyhd8ed1ab_0 + - setuptools=69.0.3=pyhd8ed1ab_0 - six=1.16.0=pyh6c4a22f_0 - snappy=1.1.10=h9fff704_0 - sortedcontainers=2.4.0=pyhd8ed1ab_0 @@ -296,7 +313,7 @@ dependencies: - tensorflow=2.11.0=cuda112py39h01bd6f0_0 - tensorflow-base=2.11.0=cuda112py39h1c230a5_0 - tensorflow-estimator=2.11.0=cuda112py39hd320b7a_0 - - termcolor=2.3.0=pyhd8ed1ab_0 + - termcolor=2.4.0=pyhd8ed1ab_0 - tesorter=1.4.6=pyhdfd78af_0 - threadpoolctl=3.2.0=pyha21a80b_0 - tk=8.6.13=noxft_h4845f30_101 @@ -311,6 +328,7 @@ dependencies: - ucsc-fatotwobit=447=h954228d_0 - ucsc-twobitinfo=447=h954228d_0 - ucsc-twobittofa=447=h954228d_0 + - unicodedata2=15.1.0=py39hd1e30aa_0 - urllib3=2.1.0=pyhd8ed1ab_0 - werkzeug=3.0.1=pyhd8ed1ab_0 - wget=1.20.3=ha56f1ee_1 @@ -335,7 +353,6 @@ dependencies: - xorg-renderproto=0.11.1=h7f98852_1002 - xorg-xextproto=7.3.0=h0b41bf4_1003 - xorg-xproto=7.0.31=h7f98852_1007 - - xyzservices=2023.10.1=pyhd8ed1ab_0 - xz=5.2.6=h166bdaf_0 - yaml=0.2.5=h7f98852_2 - yarl=1.9.3=py39hd1e30aa_0 diff --git a/EDTA_raw.pl b/EDTA_raw.pl index ac42531..36b997e 100755 --- a/EDTA_raw.pl +++ b/EDTA_raw.pl @@ -48,7 +48,6 @@ --mdust [path] Path to the mdust program. (default: find from ENV) --repeatmasker [path] Path to the RepeatMasker program. (default: find from ENV) --repeatmodeler [path] Path to the RepeatModeler2 program. (default: find from ENV) - --annosine [path] The directory containing AnnoSINE (default: read from ENV) --threads|-t [int] Number of theads to run this script. Default: 4 --help|-h Display this help info \n"; @@ -195,11 +194,11 @@ $repeatmodeler="$repeatmodeler/" if $repeatmodeler ne '' and $repeatmodeler !~ /\/$/; die "Error: RepeatModeler is not found in the RepeatModeler path $repeatmodeler!\n" unless -X "${repeatmodeler}RepeatModeler"; # AnnoSINE -chomp ($annosine=`which annosine2 2>/dev/null`) if $annosine eq ''; +chomp ($annosine=`which AnnoSINE_v2 2>/dev/null`) if $annosine eq ''; $annosine =~ s/\s+$//; $annosine = dirname($annosine) unless -d $annosine; $annosine="$annosine/" if $annosine ne '' and $annosine !~ /\/$/; -die "Error: AnnoSINE is not found in the AnnoSINE path $annosine!\n" unless (-X "${annosine}AnnoSINE_v2.py" or -X "${annosine}/bin/AnnoSINE_v2.py" or -X "${annosine}annosine2"); +die "Error: AnnoSINE is not found in the AnnoSINE path $annosine!\n" unless (-X "${annosine}AnnoSINE_v2"); # LTR_retriever chomp ($LTR_retriever=`which LTR_retriever 2>/dev/null`) if $LTR_retriever eq ''; $LTR_retriever =~ s/\s+$//; @@ -426,7 +425,7 @@ if (-s "Seed_SINE.fa"){ print STDERR "$date\tExisting result file Seed_SINE.fa found!\n\t\t\t\tWill keep this file without rerunning this module.\n\t\t\t\tPlease specify --overwrite 1 if you want to rerun AnnoSINE_v2.\n\n"; } else { - $status = system("python3 ${annosine}annosine2 -t $threads -a 2 --num_alignments 50000 -rpm 0 --copy_number 3 --shift 100 -auto 1 3 $genome ./ > /dev/null 2>&1"); + $status = system("python3 ${annosine}AnnoSINE_v2 -t $threads -a 2 --num_alignments 50000 -rpm 0 --copy_number 3 --shift 100 -auto 1 3 $genome ./ > /dev/null 2>&1"); #`rm $_` for grep { /^.\/${genome}_([0-9a-f]{32})\.mod$/i } glob("./*"); # remove duplicated genome file } @@ -573,7 +572,8 @@ if ($overwrite eq 0 and -s "./TIR-Learner-Result/TIR-Learner_FinalAnn.fa"){ print STDERR "$date\tExisting raw result TIR-Learner_FinalAnn.fa found!\n\t\t\t\tWill use this for further analyses.\n\t\t\t\tPlease specify --overwrite 1 if you want to rerun this module.\n\n"; } else { - `python3 $TIR_Learner/TIR-Learner3.0.py -f $genome_file_real_path -s $species -t $threads -l $maxint -c -o $genome_file_real_path.EDTA.raw/TIR --grf_path $grfp --gt_path $genometools`; + #`python3 $TIR_Learner/TIR-Learner3.0.py -f $genome_file_real_path -s $species -t $threads -l $maxint -c -o $genome_file_real_path.EDTA.raw/TIR --grf_path $grfp --gt_path $genometools`; + `python3 $TIR_Learner/TIR-Learner3.0.py -f $genome_file_real_path -s $species -t $threads -l $maxint -o $genome_file_real_path.EDTA.raw/TIR --grf_path $grfp --gt_path $genometools`; } # clean raw predictions with flanking alignment