From 298312e57b3955e7470b68dc4a9aaa4742ac2ccd Mon Sep 17 00:00:00 2001
From: Benjamin Cretois <benjamin.cretois@nina.no>
Date: Mon, 16 Dec 2024 13:23:55 +0100
Subject: [PATCH] [ADD] more description to README

---
 README.md        | 10 ++++++++--
 src/analysefs.py | 10 ++++++----
 src/config.py    | 16 ++++++++--------
 src/extract.py   | 28 ++++++++++++++++------------
 src/utils.py     | 18 +++++++++++-------
 5 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 03c73d8..fe28142 100644
--- a/README.md
+++ b/README.md
@@ -14,10 +14,14 @@ This repository is made so that we can pull the changes made to the [BirdNET](ht
 # Clone this repository:
 git clone https://github.com/NINAnor/birdnetfs.git
 cd birdnetfs
+python -m venv .venv
+source .venv/bin/activate
 pip install requirements.txt
 ```
 
 2- Clone the BirdNET repository:
+  
+`birdnetfs` attempts to reuse `BirdNET` functions as much as possible.
 
 ```bash
 git clone https://github.com/kahst/BirdNET-Analyzer.git
@@ -26,12 +30,14 @@ mv BirdNET-Analyzer birdnetsrc
 
 3- Analyze
 
+First, you need to change the **BirdNET config_file** (**NOT** the config_connection.yaml) located in `src/config.py`. More particularly you may want to change the `OUTPUT_PATH` parameter which is the path where the output files will be created.
+
 There is two options.
 
 - First you can analyze a file of your chosing using:
 
 ```bash
-export PYTHONPATH="${PYTHONPATH}:./birdnetsrc"
+export PYTHONPATH="${PYTHONPATH}:./src/birdnetsrc:src:birdnetsrc"
 python analyse.py filecache::ssh://$USER:$PASSWORD@HOST:/PATH/TO/AUDIO/FILE1.mp3
 ```
 
@@ -63,7 +69,7 @@ python3 src/parse_results.py
 3- Extract the detections!
 
 ```bash
-python3 src/extract.py
+./extract.sh
 ```
 
 
diff --git a/src/analysefs.py b/src/analysefs.py
index 3df12f1..eaf7be6 100644
--- a/src/analysefs.py
+++ b/src/analysefs.py
@@ -13,7 +13,7 @@
 )
 from birdnetsrc.audio import splitSignal
 from birdnetsrc.utils import readLines, save_result_file
-from utils import clean_tmp, read_audio_data
+from utils import read_audio_data
 
 RAVEN_TABLE_HEADER = "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n"
 
@@ -60,6 +60,7 @@ def generate_raven_table(
         selection_id += 1
         out_string += f"{selection_id}\tSpectrogram 1\t1\t0\t3\t{low_freq}\t{high_freq}\tnocall\tnocall\t1.0\t{afile_path}\t0\n"
 
+    print(f"FILE SAVED IN {result_path}")
     save_result_file(result_path, out_string)
 
 
@@ -94,7 +95,7 @@ def analyzeFile(fpath: pathlib.Path):
         item: Tuple containing (file path, config)
 
     Returns:
-        The `True` if the file was analyzed successfully.
+        The True if the file was analyzed successfully.
     """
 
     # Start time
@@ -106,7 +107,7 @@ def analyzeFile(fpath: pathlib.Path):
     result_file_name = get_result_file_names(fpath)
 
     # Open file and split into chunks:
-    wave, sr, fileLengthSeconds, tmpdir = read_audio_data(fpath, sr=cfg.SAMPLE_RATE)
+    wave, sr, fileLengthSeconds = read_audio_data(fpath, sr=cfg.SAMPLE_RATE)
 
     # Status
     print(f"Analyzing {fpath}", flush=True)
@@ -158,7 +159,8 @@ def analyzeFile(fpath: pathlib.Path):
     saveResultFiles(results, result_file_name, fpath, cfg.SAMPLE_RATE)
     delta_time = (datetime.datetime.now() - start_time).total_seconds()
     print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
-    clean_tmp(tmpdir)
+    print(f"OUTPUT file saved in {result_file_name}")
+
     return True
 
 
diff --git a/src/config.py b/src/config.py
index f301f70..41e1c7f 100644
--- a/src/config.py
+++ b/src/config.py
@@ -13,13 +13,13 @@
 ##########################
 
 MODEL_VERSION: str = "V2.4"
-PB_MODEL: str = "/home/benjamin.cretois/Code/birdnetfs/src/birdnetsrc/checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model"
+PB_MODEL: str = "./checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model"
 # MODEL_PATH = PB_MODEL # This will load the protobuf model
-MODEL_PATH: str = "/home/benjamin.cretois/Code/birdnetfs/src/birdnetsrc/checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite"
-MDATA_MODEL_PATH: str = "/home/benjamin.cretois/Code/birdnetfs/src/birdnetsrc/checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_MData_Model_V2_FP16.tflite"
-LABELS_FILE: str = "/home/benjamin.cretois/Code/birdnetfs/src/birdnetsrc/checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels.txt"
+MODEL_PATH: str = "./checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite"
+MDATA_MODEL_PATH: str = "./checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_MData_Model_V2_FP16.tflite"
+LABELS_FILE: str = "./checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels.txt"
 TRANSLATED_LABELS_PATH: str = (
-    "/home/benjamin.cretois/Code/birdnetfs/birdnetfs/src/birdnetsrc/labels/V2.4"
+    "./labels/V2.4"
 )
 
 # Path to custom trained classifier
@@ -71,15 +71,15 @@
 # Note: Entries in this list have to match entries from the LABELS_FILE
 # We use the 2021 eBird taxonomy for species names (Clements list)
 CODES_FILE: str = (
-    "/home/benjamin.cretois/Code/birdnetfs/src/birdnetsrc/eBird_taxonomy_codes_2021E.json"
+    "./eBird_taxonomy_codes_2021E.json"
 )
 SPECIES_LIST_FILE: str = (
-    "/home/benjamin.cretois/Code/birdnetfs/src/birdnetsrc/example/species_list.txt"
+    "./example/species_list.txt"
 )
 
 # File input path and output path for selection tables
 INPUT_PATH: str = "example/"
-OUTPUT_PATH: str = "results/"
+OUTPUT_PATH: str = "/data/Prosjekter3/824001_05_metodesats_gis_24_41_flittie_kleiven/birdnetResults"
 
 # Supported file types
 ALLOWED_FILETYPES: list[str] = [
diff --git a/src/extract.py b/src/extract.py
index c2e4dfd..1c37816 100644
--- a/src/extract.py
+++ b/src/extract.py
@@ -1,14 +1,12 @@
 import argparse
-import glob
 import logging
 import os
-import traceback
 
 import fs
-import numpy as np
 import pyarrow.parquet as pq
 import yaml
 from tenacity import retry, wait_exponential
+
 from utils import openAudioFile, openCachedFile, saveSignal
 
 
@@ -28,16 +26,19 @@ def do_connection(connection_string):
         if connection_string:
             return fs.open_fs(connection_string)
         return False
-    except Exception as e:
-        #logging.error(f"Attempt failed to connect to filesystem: {e}")
-        #logging.info("Retrying connection...")
+    except Exception:
+        # logging.error(f"Attempt failed to connect to filesystem: {e}")
+        # logging.info("Retrying connection...")
         raise
 
+
 # @retry(wait=wait_exponential(multiplier=5, min=60, max=600))
-def extract_segments(item, sample_rate, out_path, filesystem, seg_length=3):
+def extract_segments(
+    item, sample_rate, out_path, filesystem, connection_string, seg_length=3
+):
     """Extract segments from the audio file and save them."""
     segments = item
-    audio_file = item["audio"]
+    audio_file = os.path.join(connection_string, item["audio"])
 
     signal, rate = (
         openAudioFile(audio_file, sample_rate)
@@ -46,7 +47,7 @@ def extract_segments(item, sample_rate, out_path, filesystem, seg_length=3):
     )
 
     save_extracted_segments(signal, rate, segments, out_path, seg_length)
-    #logging.info(f"Segments extracted from {audio_file}")
+    # logging.info(f"Segments extracted from {audio_file}")
 
 
 def save_extracted_segments(signal, rate, segment, out_path, seg_length):
@@ -62,7 +63,6 @@ def save_extracted_segments(signal, rate, segment, out_path, seg_length):
         save_segment(segment_signal, segment, out_path)
 
 
-
 def save_segment(segment_signal, segment, out_path):
     """Save an individual segment."""
     species_path = os.path.join(out_path, segment["species"])
@@ -75,7 +75,6 @@ def save_segment(segment_signal, segment, out_path):
 
 
 if __name__ == "__main__":
-
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--config",
@@ -96,5 +95,10 @@ def save_segment(segment_signal, segment, out_path):
     for item in items.to_pylist():
         print(f"Extracting segments from {item}")
         extract_segments(
-            item, config["SAMPLE_RATE"], config["OUT_PATH_SEGMENTS"], myfs, seg_length=3
+            item,
+            config["SAMPLE_RATE"],
+            config["OUT_PATH_SEGMENTS"],
+            myfs,
+            config["CONNECTION_STRING"],
+            seg_length=3,
         )
diff --git a/src/utils.py b/src/utils.py
index f836d3b..55fc533 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -10,15 +10,20 @@
 
 def read_file(filepath, sr):
     # Step 1: Create a temporary directory for this process
-    temp_dir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
-    print(f"Created temp directory: {temp_dir}")
+    # temp_dir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
+    # print(f"Created temp directory: {temp_dir}")
 
     # Step 2: Use fsspec and configure it to use the temp_dir for caching
-    with fsspec.open(filepath, block_cache_dir=temp_dir) as f:
+    with fsspec.open(
+        filepath
+    ) as f:  #     with fsspec.open(filepath, block_cache_dir=temp_dir) as f:
         wave, fs = librosa.load(f, sr=sr, mono=True, res_type="kaiser_fast")
 
     # Return the data and the temp directory path (for later cleanup)
-    return wave, fs, temp_dir
+    return (
+        wave,
+        fs,
+    )  # temp_dir
 
 
 def clean_tmp(temp_dir):
@@ -32,16 +37,15 @@ def clean_tmp(temp_dir):
 
 def read_audio_data(path, sr):
     try:
-        ndarray, rate, tmpdir = read_file(path, sr)
+        ndarray, rate = read_file(path, sr)  # , tmpdir
         duration = librosa.get_duration(y=ndarray, sr=sr)
     except audioread.exceptions.NoBackendError as e:
         print(e)
-    return ndarray, rate, duration, tmpdir
+    return ndarray, rate, duration  # , tmpdir
 
 
 def openCachedFile(filesystem, path, sample_rate=48000, offset=0.0, duration=None):
     import shutil
-    import tempfile
 
     bin = filesystem.openbin(path)