diff --git a/.github/workflows/python-testing.yml b/.github/workflows/python-testing.yml new file mode 100644 index 0000000..be75fb0 --- /dev/null +++ b/.github/workflows/python-testing.yml @@ -0,0 +1,33 @@ +name: Skelly Synchronize Tests + +on: + pull_request: + branches: [ main ] + + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.x + uses: actions/setup-python@v4 + with: + # Semantic version range syntax or exact version of a Python version + python-version: '3.9' + # Optional - x64 or x86 architecture, defaults to x64 + architecture: 'x64' + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run tests with pytest + run: | + pip install pytest + pytest skelly_synchronize/test_test.py + - name: Upload pytest test results + uses: actions/upload-artifact@v3 + with: + name: pytest-results-3.9 + path: junit/test-results-3.9.xml diff --git a/README.md b/README.md index 184e2e3..d27d21d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,17 @@ This package synchronizes a set of videos of the same event by cross-correlating # How to run -Synchronize your videos by setting the path to your freemocap data folder, your sessionID, and the file types of your videos into __main__.py, then run the file. +Synchronize your videos by setting the path to your freemocap data folder, your sessionID, and the file types of your videos into __main__.py, then run the file. The sessionID should be the name of a subfolder of your freemocap data folder, and should contain a subfolder called `RawVideos` containing the videos that need synching. + +![Main](https://user-images.githubusercontent.com/24758117/220470598-580360ef-8d4f-447c-820e-cc4d2d544c07.png) + +The terminal output while running should look like this: + +TerminalOutput + +A `SyncedVideos` folder will be created in the session folder and filled with the synchronized video files. The session folder will also have an `AudioFiles` folder containing audio files of the raw videos, which are used in processing. + +FileStructureAfterRunning ## Installation @@ -17,4 +27,18 @@ The following requirements must be met for the script to function: 1. Videos must have audio 2. Videos must be in the same file format 3. Videos must have overlapping audio from the same real world event -4. Videos must be in a folder titled "RawVideos", with no other videos in the folder \ No newline at end of file +4. Videos must be in a folder titled "RawVideos", with no other videos in the folder + +# Expected File Structure + +To function correctly, Skelly Synchronize expects the following folder structure: +``` +freemocap_data_folder: + sessionID: + RawVideos: + Cam0.mp4 + Cam1.mp4 + ... + ... +``` +The camera names can be changed, and the file format may changed as well, although freemocap currently only uses `.mp4`. diff --git a/skelly_synchronize/__init__.py b/skelly_synchronize/__init__.py index 8c30e37..8faf4db 100644 --- a/skelly_synchronize/__init__.py +++ b/skelly_synchronize/__init__.py @@ -1,7 +1,7 @@ """Top-level package for basic_template_repo.""" __package_name__ = "skelly_synchronize" -__version__ = "v2023.01.1004" +__version__ = "v2023.01.1001" __author__ = """Philip Queen""" __email__ = "info@freemocap.org" diff --git a/skelly_synchronize/skelly_synchronize.py b/skelly_synchronize/skelly_synchronize.py index ca88420..f16954b 100644 --- a/skelly_synchronize/skelly_synchronize.py +++ b/skelly_synchronize/skelly_synchronize.py @@ -7,7 +7,7 @@ from scipy import signal from pathlib import Path -logging.basicConfig(level = logging.DEBUG) +logging.basicConfig(level = logging.INFO) class VideoSynchronize: '''Class of functions for time synchronizing and trimming video files based on cross correlation of their audio.''' @@ -19,12 +19,12 @@ def __init__(self, sessionID: str, fmc_data_path: Path) -> None: self.raw_video_folder_name = "RawVideos" self.raw_video_path = self.base_path / self.raw_video_folder_name self.synchronized_video_folder_name = "SyncedVideos" - self.synchronized_video_path = self.base_path / self.synchronized_video_folder_name + self.synchronized_folder_path = self.base_path / self.synchronized_video_folder_name self.audio_folder_name = "AudioFiles" self.audio_folder_path = self.base_path / self.audio_folder_name # create synchronizeded video and audio file folders - self.synchronized_video_path.mkdir(parents = False, exist_ok=True) + self.synchronized_folder_path.mkdir(parents = False, exist_ok=True) self.audio_folder_path.mkdir(parents = False, exist_ok=True) @@ -39,70 +39,43 @@ def get_video_file_list(self, file_type: str) -> list: video_filepath_list = list(self.raw_video_path.glob(file_extension_upper)) + list(self.raw_video_path.glob(file_extension_lower)) #if two capitalization standards are used, the videos may not be in original order # because glob behaves differently on windows vs. mac/linux, we collect all files both upper and lowercase, and remove redundant files that appear on windows - unique_video_filepath_list = self.get_unique_list(video_filepath_list) + unique_video_filepath_list = self._get_unique_list(video_filepath_list) return unique_video_filepath_list - - def get_video_files(self, video_filepath_list: list) -> dict: - '''Get video files from clip_list and return a dictionary with keys as the name of the video and values as the video files''' - # create empty list for storing audio and video files, will contain sublists formatted like [video_file_name,video_file,audio_file_name,audio_file] + def get_video_file_dict(self, video_filepath_list: list) -> dict: video_file_dict = dict() - - # iterate through clip_list, open video files and audio files, and store in file_list for video_filepath in video_filepath_list: - # take vid_name and change extension to create audio file name - video_name = str(video_filepath).split("/")[-1] #get just the name of the video file - camera_name = video_name.split(".")[0] - - # open video files - video_file = mp.VideoFileClip(str(video_filepath), audio=True) - logging.debug(f"video size is {video_file.size}") - # waiting on moviepy to fix issue related to portrait mode videos having height and width swapped - #video_file = video_file.resize((1080,1920)) #hacky workaround for iPhone portrait mode videos - #logging.debug(f"resized video is {video_file.size}") - - vid_length = video_file.duration + video_dict = dict() + video_dict["video filepath"] = video_filepath + video_dict["video pathstring"] = str(video_filepath) + video_name = str(video_filepath).split("/")[-1] + video_dict["camera name"] = video_name.split(".")[0] - video_file_dict[video_name] = {"video file": video_file, "camera name": camera_name, "video duration": vid_length} - - logging.info(f"video_name: {video_name}, video length: {vid_length} seconds") + video_dict["video duration"] = self._extract_video_duration_ffmpeg(str(video_filepath)) + video_dict["video fps"] = self._extract_video_fps_ffmpeg(str(video_filepath)) + video_file_dict[video_name] = video_dict return video_file_dict - def get_audio_files(self, video_file_dict: dict) -> dict: - '''Extract audio files from videos and return a dictionary with keys as the name of the audio and values as the audio files''' + def get_audio_files(self, video_file_dict: dict, audio_extension: str) -> dict: audio_signal_dict = dict() - for video_dict in video_file_dict.values(): - audio_name = video_dict["camera name"] + '.wav' - - # create .wav file of clip audio - video_dict["video file"].audio.write_audiofile(str(self.audio_folder_path / audio_name)) - - # extract raw audio from Wav file + self._extract_audio_from_video_ffmpeg(file_pathstring=video_dict["video pathstring"], + file_name=video_dict["camera name"], + output_folder_path=self.audio_folder_path, + output_extension=audio_extension) + + audio_name = video_dict["camera name"] + "." + audio_extension + audio_signal, audio_rate = librosa.load(self.audio_folder_path / audio_name, sr = None) audio_signal_dict[audio_name] = {"audio file": audio_signal, "sample rate": audio_rate, "camera name": video_dict["camera name"]} return audio_signal_dict - def get_audio_sample_rates(self, audio_signal_dict:dict) -> list: - '''Get the sample rates of each audio file and return them in a list''' - audio_sample_rate_list = [single_audio_dict["sample rate"] for single_audio_dict in audio_signal_dict.values()] - - return audio_sample_rate_list - - def get_unique_list(self, list: list) -> list: - '''Return a list of the unique elements from input list''' - unique_list = [] - [unique_list.append(clip) for clip in list if clip not in unique_list] - - return unique_list - - def get_fps_list(self, video_file_dict: dict) -> list: - '''Retrieve frames per second of each video clip in video_file_dict and return the list''' - return [video_dict["video file"].fps for video_dict in video_file_dict.values()] - + def get_fps_list(self, video_file_dict: dict): + return [video_dict["video fps"] for video_dict in video_file_dict.values()] + def check_rates(self, rate_list: list): '''Check if audio sample rates or video frame rates are equal, throw an exception if not (or if no rates are given).''' if len(rate_list) == 0: @@ -113,32 +86,13 @@ def check_rates(self, rate_list: list): return rate_list[0] else: raise Exception(f"rates are not equal, rates are {rate_list}") - - def normalize_audio(self, audio_file): - '''Perform z-score normalization on an audio file and return the normalized audio file - this is best practice for correlating.''' - return ((audio_file - np.mean(audio_file))/np.std(audio_file - np.mean(audio_file))) - - def cross_correlate(self, audio1, audio2): - '''Take two audio files, synchronize them using cross correlation, and trim them to the same length. - Inputs are two WAV files to be synchronizeded. Return the lag expressed in terms of the audio sample rate of the clips. - ''' - - # compute cross correlation with scipy correlate function, which gives the correlation of every different lag value - # mode='full' makes sure every lag value possible between the two signals is used, and method='fft' uses the fast fourier transform to speed the process up - correlation = signal.correlate(audio1, audio2, mode='full', method='fft') - # lags gives the amount of time shift used at each index, corresponding to the index of the correlate output list - lags = signal.correlation_lags(audio1.size, audio2.size, mode="full") - # lag is the time shift used at the point of maximum correlation - this is the key value used for shifting our audio/video - lag = lags[np.argmax(correlation)] - - return lag - + def find_lags(self, audio_signal_dict: dict, sample_rate: int) -> dict: '''Take a file list containing video and audio files, as well as the sample rate of the audio, cross correlate the audio files, and output a lag list. The lag list is normalized so that the lag of the latest video to start in time is 0, and all other lags are positive. ''' comparison_file_key = next(iter(audio_signal_dict)) - lag_dict = {single_audio_dict["camera name"]: self.cross_correlate(audio_signal_dict[comparison_file_key]["audio file"],single_audio_dict["audio file"])/sample_rate for single_audio_dict in audio_signal_dict.values()} # cross correlates all audio to the first audio file in the list + lag_dict = {single_audio_dict["camera name"]: self._cross_correlate(audio_signal_dict[comparison_file_key]["audio file"],single_audio_dict["audio file"])/sample_rate for single_audio_dict in audio_signal_dict.values()} # cross correlates all audio to the first audio file in the list #also divides by the audio sample rate in order to get the lag in seconds #now that we have our lag array, we subtract every value in the array from the max value @@ -150,82 +104,150 @@ def find_lags(self, audio_signal_dict: dict, sample_rate: int) -> dict: return normalized_lag_dict - def find_minimum_video_duration(self, video_file_dict: dict, lag_list: list) -> float: - '''Take a list of video files and a list of lags, and find what the shortest video is starting from each videos lag offset''' - - min_duration = min([video_dict["video duration"] - lag_list[video_dict["camera name"]] for video_dict in video_file_dict.values()]) - - return min_duration - - def trim_videos(self, video_file_dict: dict, lag_list: list) -> list: - '''Take a list of video files and a list of lags, and make all videos start and end at the same time. - Must be in folder of file list''' + def trim_videos(self, video_file_dict: dict, lag_dict: dict) -> list: + '''Take a list of video files and a list of lags, and make all videos start and end at the same time.''' - min_duration = self.find_minimum_video_duration(video_file_dict, lag_list) + min_duration = self._find_minimum_video_duration(video_file_dict, lag_dict) trimmed_video_filenames = [] # can be used for plotting for video_dict in video_file_dict.values(): logging.debug(f"trimming video file {video_dict['camera name']}") - trimmed_video = video_dict["video file"].subclip(lag_list[video_dict["camera name"]],lag_list[video_dict["camera name"]] + min_duration) if video_dict["camera name"].split("_")[0] == "raw": - video_name = "synced_" + video_dict["camera name"][4:] + ".mp4" + synced_video_name = "synced_" + video_dict["camera name"][4:] + ".mp4" else: - video_name = "synced_" + video_dict["camera name"] + ".mp4" - trimmed_video_filenames.append(video_name) #add new name to list to reference for plotting - logging.debug(f"video size is {trimmed_video.size}") - trimmed_video.write_videofile(str(self.synchronized_video_path / video_name)) - logging.info(f"Video Saved - Cam name: {video_dict['camera name']}, Video Duration: {trimmed_video.duration}") + synced_video_name = "synced_" + video_dict["camera name"] + ".mp4" + trimmed_video_filenames.append(synced_video_name) #add new name to list to reference for plotting + logging.info(f"Saving video - Cam name: {video_dict['camera name']}") + self._trim_single_video_ffmpeg(input_video_pathstring = video_dict["video pathstring"], + start_time = lag_dict[video_dict["camera name"]], + desired_duration = min_duration, + output_video_pathstring = str(self.synchronized_folder_path / synced_video_name)) + logging.info(f"Video Saved - Cam name: {video_dict['camera name']}, Video Duration: {min_duration}") return trimmed_video_filenames + def _extract_audio_from_video_ffmpeg(self, file_pathstring, file_name, output_folder_path, output_extension="wav"): + '''Run a subprocess call to extract the audio from a video file using ffmpeg''' + + subprocess.run(["ffmpeg", "-y", "-i", file_pathstring, f"{output_folder_path}/{file_name}.{output_extension}"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + def _extract_video_duration_ffmpeg(self, file_pathstring): + '''Run a subprocess call to get the duration from a video file using ffmpeg''' + + extract_duration_subprocess = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_pathstring], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + video_duration = float(extract_duration_subprocess.stdout) -def synchronize_videos(sessionID: str, fmc_data_path: Path, file_type: str) -> None: + return video_duration + + def _extract_video_fps_ffmpeg(self, file_pathstring): + '''Run a subprocess call to get the fps of a video file using ffmpeg''' + + extract_fps_subprocess=subprocess.run(['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=r_frame_rate', '-of', 'default=noprint_wrappers=1:nokey=1', file_pathstring], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + # get the results, then remove the excess characters to get something like '####/###' + cleaned_stdout = str(extract_fps_subprocess.stdout).split("'")[1].split("\\")[0] + # separate out numerator and denominator to calculate the fraction + numerator, denominator = cleaned_stdout.split("/") + video_fps = float(int(numerator)/int(denominator)) + + return video_fps + + def _trim_single_video_ffmpeg(self, input_video_pathstring, start_time, desired_duration, output_video_pathstring): + '''Run a subprocess call to trim a video from start time to last as long as the desired duration''' + + trim_video_subprocess = subprocess.run(["ffmpeg", "-i", f"{input_video_pathstring}", "-ss", f"{start_time}", "-t", f"{desired_duration}", "-y", f"{output_video_pathstring}"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + def get_audio_sample_rates(self, audio_signal_dict:dict) -> list: + '''Get the sample rates of each audio file and return them in a list''' + audio_sample_rate_list = [single_audio_dict["sample rate"] for single_audio_dict in audio_signal_dict.values()] + + return audio_sample_rate_list + + def _get_unique_list(self, list: list) -> list: + '''Return a list of the unique elements from input list''' + unique_list = [] + [unique_list.append(clip) for clip in list if clip not in unique_list] + + return unique_list + + def _normalize_audio(self, audio_file): + '''Perform z-score normalization on an audio file and return the normalized audio file - this is best practice for correlating.''' + return ((audio_file - np.mean(audio_file))/np.std(audio_file - np.mean(audio_file))) + + def _cross_correlate(self, audio1, audio2): + '''Take two audio files, synchronize them using cross correlation, and trim them to the same length. + Inputs are two WAV files to be synchronizeded. Return the lag expressed in terms of the audio sample rate of the clips. + ''' + + # compute cross correlation with scipy correlate function, which gives the correlation of every different lag value + # mode='full' makes sure every lag value possible between the two signals is used, and method='fft' uses the fast fourier transform to speed the process up + correlation = signal.correlate(audio1, audio2, mode='full', method='fft') + # lags gives the amount of time shift used at each index, corresponding to the index of the correlate output list + lags = signal.correlation_lags(audio1.size, audio2.size, mode="full") + # lag is the time shift used at the point of maximum correlation - this is the key value used for shifting our audio/video + lag = lags[np.argmax(correlation)] + + return lag + + def _find_minimum_video_duration(self, video_file_dict: dict, lag_list: list) -> float: + '''Take a list of video files and a list of lags, and find what the shortest video is starting from each videos lag offset''' + + min_duration = min([video_dict["video duration"] - lag_list[video_dict["camera name"]] for video_dict in video_file_dict.values()]) + + return min_duration + +def synchronize_vidoes(sessionID: str, fmc_data_path: Path, file_type: str) -> None: '''Run the functions from the VideoSynchronize class to synchronize all videos with the given file type in the base path folder. - file_type can be given in either case, with or without a leading period + file_type can be given in either case, with or without a leading period. + Uses FFmpeg to handle the video files. ''' # instantiate class synchronize = VideoSynchronize(sessionID, fmc_data_path) - # the rest of this could theoretically be put in the init function, don't know which is best practice... # create list of video clips in raw video folder clip_list = synchronize.get_video_file_list(file_type) - - # get the files and sample rate of videos in raw video folder, and store in list - video_file_dict = synchronize.get_video_files(clip_list) - audio_signal_dict = synchronize.get_audio_files(video_file_dict) - - # find the frames per second of each video - fps_list = synchronize.get_fps_list(video_file_dict) + # create dictionaries with video and audio information + video_file_dict = synchronize.get_video_file_dict(clip_list) + audio_signal_dict = synchronize.get_audio_files(video_file_dict, audio_extension="wav") + + # get video fps and audio sample rate + fps_list = synchronize.get_fps_list(video_file_dict) audio_sample_rates = synchronize.get_audio_sample_rates(audio_signal_dict) - + # frame rates and audio sample rates must be the same duration for the trimming process to work correctly synchronize.check_rates(fps_list) synchronize.check_rates(audio_sample_rates) - + # find the lags between starting times - lag_list = synchronize.find_lags(audio_signal_dict, audio_sample_rates[0]) - - # use lags to trim the videos - trimmed_videos = synchronize.trim_videos(video_file_dict, lag_list) + lag_dict = synchronize.find_lags(audio_signal_dict, audio_sample_rates[0]) + + synchronize.trim_videos(video_file_dict, lag_dict) def main(sessionID: str, fmc_data_path: Path, file_type: str): # start timer to measure performance start_timer = time.time() - synchronize_videos(sessionID, fmc_data_path, file_type) + synchronize_vidoes(sessionID, fmc_data_path, file_type) # end performance timer end_timer = time.time() #calculate and display elapsed processing time elapsed_time = end_timer - start_timer - logging.info(f"elapsed processing time in seconds: {elapsed_time}") + logging.info(f"Elapsed processing time in seconds: {elapsed_time}") if __name__ == "__main__": - sessionID = "iPhoneTesting" - fmc_data_path = Path("/Users/philipqueen/Documents/Humon Research Lab/FreeMocap_Data") + sessionID = "your_session_id" + freemocap_data_path = Path("path_to_your_freemocap_data_folder") file_type = "MP4" - main(sessionID, fmc_data_path, file_type) \ No newline at end of file + main(sessionID, freemocap_data_path, file_type) \ No newline at end of file diff --git a/skelly_synchronize/system/logging_configuration.py b/skelly_synchronize/system/logging_configuration.py index a2d7bd0..e3bf404 100644 --- a/skelly_synchronize/system/logging_configuration.py +++ b/skelly_synchronize/system/logging_configuration.py @@ -19,13 +19,13 @@ def get_logging_handlers(log_file_path: Optional[str] = ""): ) console_handler = logging.StreamHandler(sys.stdout) - console_handler.setLevel(logging.DEBUG) + console_handler.setLevel(logging.INFO) console_handler.setFormatter(default_formatter) handlers = [console_handler] if log_file_path: file_handler = logging.FileHandler(log_file_path) file_handler.setFormatter(default_formatter) - file_handler.setLevel(logging.DEBUG) + file_handler.setLevel(logging.INFO) handlers.append(file_handler) return handlers @@ -35,7 +35,7 @@ def configure_logging(log_file_path: Optional[str] = ""): if len(logging.getLogger().handlers) == 0: handlers = get_logging_handlers(log_file_path) logging.getLogger("").handlers.extend(handlers) - logging.root.setLevel(logging.DEBUG) + logging.root.setLevel(logging.INFO) logger = logging.getLogger(__name__) logger.info(f"Added logging handlers: {handlers}") else: diff --git a/skelly_synchronize/test_test.py b/skelly_synchronize/test_test.py index 4e29f36..cfb9158 100644 --- a/skelly_synchronize/test_test.py +++ b/skelly_synchronize/test_test.py @@ -6,4 +6,4 @@ def returnTrue(num): def test_test(): - assert returnTrue() == True \ No newline at end of file + assert returnTrue(6) == True \ No newline at end of file