diff --git a/notebooks/Optimus_VIRTUOSO.ipynb b/notebooks/Optimus_VIRTUOSO.ipynb new file mode 100644 index 0000000..d763e62 --- /dev/null +++ b/notebooks/Optimus_VIRTUOSO.ipynb @@ -0,0 +1,864 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "colab": { + "name": "Optimus_VIRTUOSO.ipynb", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [], + "machine_shape": "hm" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QA0W-VK1JVQl" + }, + "source": [ + "# Optimus VIRTUOSO (ver. 4.0)\n", + "\n", + "## \"Music never allows falsehoods for even the deaf hear flat notes!\" ---OV\n", + "\n", + "***\n", + "\n", + "Powered by tegridy-tools TMIDIX Optimus Processors: https://github.com/asigalov61/tegridy-tools\n", + "\n", + "***\n", + "\n", + "Credit for char-based GPT2 code used in this colab goes out to Andrej Karpathy: https://github.com/karpathy/minGPT\n", + "\n", + "***\n", + "\n", + "WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/\n", + "\n", + "***\n", + "\n", + "#### Project Los Angeles\n", + "\n", + "#### Tegridy Code 2021\n", + "\n", + "***" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eftzIVKrqR5S" + }, + "source": [ + "# Setup Environment, clone needed repos, and install all required dependencies" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OCjG6albmof3", + "cellView": "form" + }, + "source": [ + "#@title nvidia-smi gpu check\n", + "!nvidia-smi" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "HsUtsJGNz6f2", + "cellView": "form" + }, + "source": [ + "#@title Install all dependencies (run only once per session)\n", + "\n", + "!git clone https://github.com/asigalov61/tegridy-tools\n", + "\n", + "!pip install torch\n", + "!pip install tqdm" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "pf8B3p6QySmE", + "cellView": "form" + }, + "source": [ + "#@title Import all needed modules\n", + "\n", + "print('Loading needed modules. Please wait...')\n", + "import os\n", + "from datetime import datetime\n", + "import secrets\n", + "\n", + "import tqdm\n", + "from tqdm import auto\n", + "\n", + "if not os.path.exists('/content/Dataset'):\n", + " os.makedirs('/content/Dataset')\n", + "\n", + "print('Loading TMIDIX module...')\n", + "os.chdir('/content/tegridy-tools/tegridy-tools')\n", + "import TMIDIX\n", + "\n", + "print('Loading minGPT module...')\n", + "os.chdir('/content/tegridy-tools/tegridy-tools')\n", + "from minGPT import *\n", + "\n", + "from IPython.display import display, Javascript, HTML, Audio\n", + "\n", + "from google.colab import output, drive\n", + "\n", + "os.chdir('/content/')\n", + "print('Loading complete. Enjoy! :)')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "627-AmFZayac" + }, + "source": [ + "# (QUICK DEMO) Download ready-to-use pre-trained model" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6QchPlW-YOGZ", + "cellView": "form" + }, + "source": [ + "#@title Download latest best pre-trained model checkpoint and TXT dataset\n", + "\n", + "#@markdown NOTE: You can go straight to the setup/load sections and then to music generation\n", + "\n", + "#@markdown NOTE: You do not need to change any settings to run this model. Just use the colab defaults to run it.\n", + "%cd /content/\n", + "\n", + "print('=' * 70)\n", + "print('Downloading pre-trained dataset-model...Please wait...')\n", + "print('=' * 70)\n", + "\n", + "!wget https://github.com/asigalov61/Optimus-VIRTUOSO/raw/main/Dataset-Model/MuseNet/Optimus-VIRTUOSO-Dataset-Model.zip.001\n", + "!wget https://github.com/asigalov61/Optimus-VIRTUOSO/raw/main/Dataset-Model/MuseNet/Optimus-VIRTUOSO-Dataset-Model.zip.002\n", + "!wget https://github.com/asigalov61/Optimus-VIRTUOSO/raw/main/Dataset-Model/MuseNet/Optimus-VIRTUOSO-Dataset-Model.zip.003\n", + "\n", + "!cat Optimus-VIRTUOSO-Dataset-Model.zip* > Optimus-VIRTUOSO-Dataset-Model.zip\n", + "print('=' * 70)\n", + "\n", + "!unzip -j Optimus-VIRTUOSO-Dataset-Model.zip\n", + "print('=' * 70)\n", + "\n", + "print('Done! Enjoy! :)')\n", + "print('=' * 70)\n", + "%cd /content/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N2Pv5eNRqiyr" + }, + "source": [ + "# (TRAIN FROM SCRATCH) Download and process MIDI dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EHuggjW7etzZ", + "cellView": "form" + }, + "source": [ + "#@title Download special Tegridy Piano MIDI dataset (Recommended)\n", + "\n", + "#@markdown Solo Piano\n", + "\n", + "#@markdown Works best stand-alone/as-is for the optimal results\n", + "%cd /content/Dataset/\n", + "\n", + "!wget 'https://github.com/asigalov61/Tegridy-MIDI-Dataset/raw/master/Tegridy-Piano-CC-BY-NC-SA.zip'\n", + "!unzip -j '/content/Dataset/Tegridy-Piano-CC-BY-NC-SA.zip'\n", + "!rm '/content/Dataset/Tegridy-Piano-CC-BY-NC-SA.zip'\n", + "\n", + "%cd /content/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "7RoOfsF_z6UN", + "cellView": "form" + }, + "source": [ + "#@title Download special Tegridy Piano Violin MIDI dataset\n", + "\n", + "#@markdown Piano-Violin Duo\n", + "\n", + "#@markdown Works best stand-alone/as-is for the optimal results\n", + "\n", + "#@markdown NOTE: Do not forget to enable MIDI channels and velocities TXT encoding for most optimal results\n", + "\n", + "%cd /content/Dataset/\n", + "\n", + "!wget 'https://github.com/asigalov61/Tegridy-MIDI-Dataset/raw/master/Tegridy-Piano-Violin-CC-BY-NC-SA.zip'\n", + "!unzip -j '/content/Dataset/Tegridy-Piano-Violin-CC-BY-NC-SA.zip'\n", + "!rm '/content/Dataset/Tegridy-Piano-Violin-CC-BY-NC-SA.zip'\n", + "\n", + "%cd /content/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "cellView": "form", + "id": "_OFd-_1COifV" + }, + "source": [ + "#@title Download Named Select Tegridy Children Songs MIDI dataset\n", + "\n", + "#@markdown Multi-instrumental with drums\n", + "\n", + "#@markdown Works best stand-alone/as-is for the optimal results\n", + "\n", + "#@markdown NOTE: Do not forget to enable MIDI channels and velocities TXT encoding for most optimal results\n", + "\n", + "%cd /content/Dataset/\n", + "\n", + "!wget 'https://github.com/asigalov61/Tegridy-MIDI-Dataset/raw/master/Named-Select-Tegridy-Children-Songs-CC-BY-NC-SA.zip'\n", + "!unzip -j '/content/Dataset/Named-Select-Tegridy-Children-Songs-CC-BY-NC-SA.zip'\n", + "!rm '/content/Dataset/Named-Select-Tegridy-Children-Songs-CC-BY-NC-SA.zip'\n", + "\n", + "%cd /content/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-1ypXZoySkHJ" + }, + "source": [ + "# If you are not sure where to start or what settings to select, please use original defaults" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YsE8z4jcR-o4", + "cellView": "form" + }, + "source": [ + "#@title Process MIDIs to special MIDI dataset with Tegridy MIDI Processor\n", + "#@markdown NOTES:\n", + "\n", + "#@markdown 1) Dataset MIDI file names are used as song names. Feel free to change it to anything you like.\n", + "\n", + "#@markdown 2) Best results are achieved with the single-track, single-channel, single-instrument MIDI 0 files with plain English names (avoid special or sys/foreign chars)\n", + "\n", + "#@markdown 3) MIDI Channel = -1 means all MIDI channels except the drums. MIDI Channel = 16 means all channels will be processed. Otherwise, only single indicated MIDI channel will be processed.\n", + "\n", + "desired_dataset_name = \"Optimus-VIRTUOSO-Music-Dataset\" #@param {type:\"string\"}\n", + "file_name_to_output_dataset_to = \"/content/Optimus-VIRTUOSO-Music-Dataset\" #@param {type:\"string\"}\n", + "desired_MIDI_channel_to_process = 0 #@param {type:\"slider\", min:-1, max:16, step:1}\n", + "sorted_or_random_file_loading_order = True #@param {type:\"boolean\"}\n", + "encode_velocities = False #@param {type:\"boolean\"}\n", + "encode_MIDI_channels = False #@param {type:\"boolean\"}\n", + "add_transposed_dataset_by_this_many_pitches = 0 #@param {type:\"slider\", min:-12, max:12, step:1}\n", + "add_transposed_and_flipped_dataset = False #@param {type:\"boolean\"}\n", + "chordify_input_MIDIs = False #@param {type:\"boolean\"}\n", + "melody_conditioned_chords = False #@param {type:\"boolean\"}\n", + "melody_pitch_baseline = 60 #@param {type:\"slider\", min:0, max:127, step:1}\n", + "time_denominator = 1 #@param {type:\"slider\", min:1, max:50, step:1}\n", + "transform_to_pitch = 0 #@param {type:\"slider\", min:0, max:127, step:1}\n", + "perfect_timings = True #@param {type:\"boolean\"}\n", + "MuseNet_encoding = True #@param {type:\"boolean\"}\n", + "chars_encoding_offset = 33#@param {type:\"number\"}\n", + "\n", + "print('TMIDI Optimus MIDI Processor')\n", + "print('Starting up...')\n", + "###########\n", + "\n", + "average_note_pitch = 0\n", + "min_note = 127\n", + "max_note = 0\n", + "\n", + "files_count = 0\n", + "\n", + "gfiles = 0\n", + "\n", + "chords_list_f = []\n", + "melody_list_f = []\n", + "\n", + "chords_list = []\n", + "chords_count = 0\n", + "\n", + "melody_chords = []\n", + "melody_count = 0\n", + "\n", + "TXT_String = ''\n", + "\n", + "TXT = ''\n", + "melody = []\n", + "chords = []\n", + "\n", + "###########\n", + "\n", + "print('Loading MIDI files...')\n", + "print('This may take a while on a large dataset in particular.')\n", + "\n", + "dataset_addr = \"/content/Dataset/\"\n", + "os.chdir(dataset_addr)\n", + "filez = list()\n", + "for (dirpath, dirnames, filenames) in os.walk(dataset_addr):\n", + " filez += [os.path.join(dirpath, file) for file in filenames]\n", + "print('=' * 70)\n", + "\n", + "if filez == []:\n", + " print('Could not find any MIDI files. Please check Dataset dir...')\n", + " print('=' * 70)\n", + "\n", + "if sorted_or_random_file_loading_order:\n", + " print('Sorting files...')\n", + " filez.sort()\n", + " print('Done!')\n", + " print('=' * 70)\n", + "\n", + "# Stamping the dataset info\n", + "print('Stamping the dataset info...')\n", + "\n", + "TXT_String += 'DATASET=' + str(desired_dataset_name) + chr(10)\n", + "TXT_String += 'CREATED_ON=' + str(datetime.now()).replace(' ', '-').replace(':', '-').replace('.', '-') + chr(10)\n", + "\n", + "TXT_String += 'CHARS_ENCODING_OFFSET=' + str(chars_encoding_offset) + chr(10)\n", + "TXT_String += 'TIME_DENOMINATOR=' + str(time_denominator) + chr(10)\n", + "TXT_String += 'TRANSFORM=' + str(transform_to_pitch) + chr(10)\n", + "TXT_String += 'PERFECT_TIMINGS=' + str(perfect_timings) + chr(10)\n", + "TXT_String += 'MUSENET_ENCODING=' + str(MuseNet_encoding) + chr(10)\n", + "TXT_String += 'TRANSPOSED_BY=' + str(add_transposed_dataset_by_this_many_pitches) + chr(10)\n", + "TXT_String += 'TRANSPOSED_AND_FLIPPED=' + str(add_transposed_and_flipped_dataset) + chr(10)\n", + "\n", + "TXT_String += 'LEGEND=STA-DUR-PTC'\n", + "if encode_velocities:\n", + " TXT_String += '-VEL'\n", + "if encode_MIDI_channels:\n", + " TXT_String += '-CHA'\n", + "TXT_String += chr(10)\n", + "\n", + "print('Processing MIDI files. Please wait...')\n", + "for f in tqdm.auto.tqdm(filez):\n", + " try:\n", + " fn = os.path.basename(f)\n", + " fn1 = fn.split('.')[0]\n", + "\n", + " files_count += 1\n", + " TXT, melody, chords, bass_melody, karaokez, INTS, aux1, aux2 = TMIDIX.Optimus_MIDI_TXT_Processor(f, chordify_TXT=chordify_input_MIDIs, output_MIDI_channels=encode_MIDI_channels, char_offset=chars_encoding_offset, dataset_MIDI_events_time_denominator=time_denominator, output_velocity=encode_velocities, MIDI_channel=desired_MIDI_channel_to_process, MIDI_patch=range(0, 127), melody_conditioned_encoding=melody_conditioned_chords, melody_pitch_baseline=melody_pitch_baseline, perfect_timings=perfect_timings, musenet_encoding=MuseNet_encoding, transform=transform_to_pitch)\n", + " TXT_String += TXT\n", + " melody_list_f += melody\n", + " chords_list_f += chords\n", + " gfiles += 1\n", + "\n", + " if add_transposed_dataset_by_this_many_pitches != 0:\n", + "\n", + " TXT, melody, chords, bass_melody, karaokez, INTS, aux1, aux2 = TMIDIX.Optimus_MIDI_TXT_Processor(f, chordify_TXT=chordify_input_MIDIs, output_MIDI_channels=encode_MIDI_channels, char_offset=chars_encoding_offset, dataset_MIDI_events_time_denominator=time_denominator, output_velocity=encode_velocities, MIDI_channel=desired_MIDI_channel_to_process, transpose_by=add_transposed_dataset_by_this_many_pitches, MIDI_patch=range(0, 127), melody_conditioned_encoding=melody_conditioned_chords, melody_pitch_baseline=melody_pitch_baseline, perfect_timings=perfect_timings, musenet_encoding=MuseNet_encoding, transform=transform_to_pitch)\n", + " TXT_String += TXT\n", + " melody_list_f += melody\n", + " chords_list_f += chords\n", + " gfiles += 1\n", + "\n", + " if add_transposed_and_flipped_dataset == True:\n", + "\n", + " TXT, melody, chords, bass_melody, karaokez, INTS, aux1, aux2 = TMIDIX.Optimus_MIDI_TXT_Processor(f, chordify_TXT=chordify_input_MIDIs, output_MIDI_channels=encode_MIDI_channels, char_offset=chars_encoding_offset, dataset_MIDI_events_time_denominator=time_denominator, output_velocity=encode_velocities, MIDI_channel=desired_MIDI_channel_to_process, transpose_by=-12, MIDI_patch=range(0, 127), flip=True, melody_conditioned_encoding=melody_conditioned_chords, melody_pitch_baseline=melody_pitch_baseline, perfect_timings=perfect_timings, musenet_encoding=MuseNet_encoding, transform=transform_to_pitch)\n", + " TXT_String += TXT\n", + " melody_list_f += melody\n", + " chords_list_f += chords\n", + " gfiles += 1\n", + "\n", + " except KeyboardInterrupt:\n", + " print('Saving current progress and quitting...')\n", + " break \n", + " \n", + " except:\n", + " print('Bad MIDI:', f)\n", + " continue\n", + "\n", + "TXT_String += 'TOTAL_SONGS_IN_DATASET=' + str(gfiles)\n", + "\n", + "try:\n", + " print('Task complete :)')\n", + " print('==================================================')\n", + " if add_transposed_dataset_by_this_many_pitches != 0:\n", + " print('NOTE: Transposed dataset was added per users request.')\n", + " print('==================================================')\n", + " if add_transposed_and_flipped_dataset == True:\n", + " print('NOTE: Flipped dataset was added per users request.') \n", + " print('==================================================')\n", + " print('Number of processed dataset MIDI files:', files_count)\n", + " print('Number of MIDI chords recorded:', len(chords_list_f))\n", + " print('First chord event:', chords_list_f[0], 'Last chord event:', chords_list_f[-1]) \n", + " print('Number of recorded melody events:', len(melody_list_f))\n", + " print('First melody event:', melody_list_f[0], 'Last Melody event:', melody_list_f[-1])\n", + " print('Total number of MIDI events recorded:', len(chords_list_f) + len(melody_list_f))\n", + " print('==================================================')\n", + "\n", + " # Writing dataset to TXT file\n", + " with open(file_name_to_output_dataset_to + '.txt', 'wb') as f:\n", + " f.write(TXT_String.encode('utf-8', 'replace'))\n", + " f.close\n", + "\n", + " # Dataset\n", + " MusicDataset = [chords_list_f, melody_list_f]\n", + "\n", + " # Writing dataset to pickle file\n", + " TMIDIX.Tegridy_Any_Pickle_File_Writer(MusicDataset, file_name_to_output_dataset_to)\n", + "\n", + "except:\n", + " print('=' * 70)\n", + " print('IO Error!')\n", + " print('Please check that Dataset dir is not empty/check other IO code.')\n", + " print('=' * 70)\n", + " print('Shutting down...')\n", + " print('=' * 70)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WXVPTtq_PBVu", + "cellView": "form" + }, + "source": [ + "#@title Create a 3D Scatter-plot of the processed dataset\n", + "\n", + "chords_flat = []\n", + "st = []\n", + "du = []\n", + "pt = []\n", + "\n", + "for c in chords_list_f:\n", + " st.append(c[1])\n", + " du.append(c[2])\n", + " pt.append(c[4])\n", + "\n", + "# Creating dataset\n", + "x1 = np.array(st)\n", + "y1 = np.array(du)\n", + "z1 = np.array(pt)\n", + "\n", + "#z = np.random.randint(100, size =(50))\n", + "#x = np.random.randint(80, size =(50))\n", + "#y = np.random.randint(60, size =(50))\n", + " \n", + "# Creating figure\n", + "fig = plt.figure(figsize = (15,12))\n", + "ax = plt.axes(projection =\"3d\")\n", + " \n", + "# Creating plot\n", + "ax.scatter3D(x1, y1, z1, s = 10, c = z1)\n", + "#ax.set_position()\n", + "ax.set_xlabel('Start Times')\n", + "ax.set_ylabel('Durations')\n", + "ax.set_zlabel('Pitches')\n", + "plt.title(str(desired_dataset_name))\n", + "ax.view_init(60, 30)\n", + "# show plot\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0cj2xl5xqwea" + }, + "source": [ + "# Setup and Intialize the Model\n", + "\n", + "## YOU MUST RUN THE CELL/CODE IN THE SECTION BELOW to init the model. Does not matter if the model is empty or pre-trained.\n", + "\n", + "## NOTE: You can include the .checkpoint path if you like to resume training" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "m4QIgbe3ySmN", + "cellView": "form" + }, + "source": [ + "#@title Create/prepare GPT2 model and load the dataset\n", + "\n", + "full_path_to_training_text_file = \"/content/Optimus-VIRTUOSO-Music-Dataset.txt\" #@param {type:\"string\"}\n", + "model_attention_span_in_tokens = 512 #@param {type:\"slider\", min:0, max:1024, step:16}\n", + "model_embed_size = 512 #@param {type:\"slider\", min:0, max:1024, step:16}\n", + "number_of_heads = 8 #@param {type:\"slider\", min:1, max:16, step:1}\n", + "number_of_layers = 6 #@param {type:\"slider\", min:1, max:16, step:1}\n", + "number_of_training_epochs = 5 #@param {type:\"slider\", min:1, max:5, step:1}\n", + "training_batch_size = 48 #@param {type:\"slider\", min:4, max:256, step:4}\n", + "number_of_dataloader_threads = 4 #@param {type:\"slider\", min:1, max:64, step:1}\n", + "model_learning_rate = 6e-4 #@param {type:\"number\"}\n", + "checkpoint_full_path = \"\" #@param {type:\"string\"}\n", + "\n", + "if checkpoint_full_path == '':\n", + " checkpoint_full_path = None\n", + "\n", + "\n", + "trainer, model, train_dataset = MainLoader(full_path_to_training_text_file,\n", + " None,\n", + " number_of_dataloader_threads,\n", + " model_attention_span_in_tokens,\n", + " model_embed_size,\n", + " number_of_heads,\n", + " number_of_layers,\n", + " number_of_training_epochs,\n", + " training_batch_size,\n", + " model_learning_rate,\n", + " ckpt_path=checkpoint_full_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B_18H-M-q4CB" + }, + "source": [ + "# Train the model and plot positional embeddings" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cRffqT9WFBHB", + "cellView": "form" + }, + "source": [ + "#@title Train the model\n", + "%cd /content/\n", + "trainer.train()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p4pdg5wcRTXn" + }, + "source": [ + "# Visual check" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YVWEhUj1cg7N", + "cellView": "form" + }, + "source": [ + "#@title Plot Positional Embeddings\n", + "\n", + "# visualize some of the learned positional embeddings, maybe they contain structure\n", + "PlotPositionalEmbeddings(model, model_attention_span_in_tokens)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1_Lg6gbjKQKh" + }, + "source": [ + "# Save/Load/Reload" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pMkyEMghC-KR", + "cellView": "form" + }, + "source": [ + "#@title Save/Resave the model from memory\n", + "\n", + "#@markdown Standard PyTorch AI models file extension is PTH\n", + "\n", + "#@markdown NOTE: This code/cell will save 2 (TWO) files: model checkpoint (.pth) and model checkpoint with state dict (.pth.checkpoint) to resume training\n", + "\n", + "full_path_to_save_model_to = \"/content/Optimus-VIRTUOSO-Trained-Model.pth\" #@param {type:\"string\"}\n", + "\n", + "%cd /content/\n", + "\n", + "torch.save(model, full_path_to_save_model_to)\n", + "torch.save(model.state_dict(), full_path_to_save_model_to + '.checkpoint')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CmD7VRZhDcnJ", + "cellView": "form" + }, + "source": [ + "#@title Load/Reload existing model/checkpoint\n", + "\n", + "#@markdown NOTE on models' extensions:\n", + "\n", + "#@markdown .pth == model checkpoint only\n", + "\n", + "#@markdown .pth.checkpoint == model checkpoint with state dict to restore training\n", + "\n", + "full_path_to_model_checkpoint = \"/content/Optimus-VIRTUOSO-Trained-Model.pth\" #@param {type:\"string\"}\n", + "model = torch.load(full_path_to_model_checkpoint)\n", + "model.eval()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FfgeQl8_rEox" + }, + "source": [ + "# Generate and download the output" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6ZEqKJ6NySmV", + "cellView": "form" + }, + "source": [ + "#@title Generate and download the composition as TXT file.\n", + "#@markdown PLEASE NOTE IMPORTANT POINTS: \n", + "\n", + "#@markdown 0) If you are not sure where to start/what settings to set, please use original defaults.\n", + "\n", + "#@markdown 1) Model primes from the dataset !!!\n", + "\n", + "#@markdown 2) Model's first output may be empty or garbled so please try several times before discarting the model\n", + "\n", + "#@markdown 3) You can now communicate to the model desired length of the output composition by suffixing input_prompt with number of notes.\n", + "\n", + "#@markdown I.e. SONG=Relax_with_900_notes\n", + "\n", + "#@markdown 3) Self-continuation option overrides the SONG input prompt. Self-continuation is by random 300 tokens (~100 notes) from the dataset\n", + "\n", + "print('Optimus VIRTUOSO Model Generator')\n", + "print('Starting up...')\n", + "number_of_tokens_to_generate = 8192 #@param {type:\"slider\", min:0, max:32768, step:128}\n", + "creativity_temperature = 1 #@param {type:\"slider\", min:0.05, max:4, step:0.05}\n", + "top_k_prob = 64 #@param {type:\"slider\", min:0, max:128, step:1}\n", + "input_prompt = \"SONG=\" #@param {type:\"string\"}\n", + "self_continuation = False #@param {type:\"boolean\"}\n", + "\n", + "os.chdir('/content/')\n", + "\n", + "if self_continuation:\n", + " with open(full_path_to_training_text_file) as f:\n", + " dataset = f.read()\n", + "\n", + " idx = secrets.randbelow(len(dataset))\n", + " input_prompt = 'SONG=Self-Continuation' + chr(10)\n", + " input_prompt += dataset[idx:idx+300]\n", + "\n", + "completion = Generate(model,\n", + " train_dataset,\n", + " trainer,\n", + " number_of_tokens_to_generate,\n", + " creativity_temperature,\n", + " top_k_prob,\n", + " input_prompt)\n", + "\n", + "# Stuff for datetime stamp\n", + "filename = '/content/Optimus-VIRTUOSO-Composition-' + 'generated-on-' \n", + "fname = TMIDIX.Tegridy_File_Time_Stamp(filename)\n", + "\n", + "print('Done!')\n", + "print('Saving to', str(fname + '.txt'))\n", + "with open(fname + '.txt', \"w\") as text_file:\n", + " print(completion, file=text_file)\n", + "\n", + "print('Downloading TXT file...')\n", + "from google.colab import files\n", + "files.download(fname + '.txt')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "22qDcd4NO2bs", + "cellView": "form" + }, + "source": [ + "#@title Convert to MIDI from TXT (w/Tegridy MIDI-TXT Processor)\n", + "\n", + "#@markdown Standard MIDI timings are 400/120(80)\n", + "\n", + "#@markdown Please note that only the first generated composition is being converted to MIDI by default. Please check the output TXT file for extra generated compositions.\n", + "number_of_ticks_per_quarter = 400 #@param {type:\"slider\", min:10, max:500, step:10}\n", + "dataset_time_denominator = 1 #@param {type:\"slider\", min:1, max:20, step:1}\n", + "melody_conditioned_encoding = False\n", + "encoding_has_MIDI_channels = False #@param {type:\"boolean\"}\n", + "encoding_has_velocities = False #@param {type:\"boolean\"}\n", + "simulate_velocity = True #@param {type:\"boolean\"}\n", + "save_only_first_composition = False #@param {type:\"boolean\"}\n", + "chars_encoding_offset_used_for_dataset = 33 #@param {type:\"number\"}\n", + "\n", + "print('Converting TXT to MIDI. Please wait...')\n", + "\n", + "'''For debug:'''\n", + "# fname = '/content/Optimus-VIRTUOSO-Music-Dataset'\n", + "\n", + "with open(fname + '.txt', 'r') as f:\n", + " completion = f.read()\n", + "\n", + "output_list, song_name = TMIDIX.Optimus_TXT_to_Notes_Converter(completion, \n", + " has_MIDI_channels=encoding_has_MIDI_channels, \n", + " simulate_velocity=simulate_velocity,\n", + " char_encoding_offset=chars_encoding_offset_used_for_dataset,\n", + " save_only_first_composition=save_only_first_composition,\n", + " dataset_MIDI_events_time_denominator=dataset_time_denominator,\n", + " has_velocities=encoding_has_velocities\n", + " )\n", + "\n", + "print('Converting Song to MIDI...')\n", + "\n", + "output_signature = 'Optimus VIRTUOSO'\n", + "\n", + "detailed_stats = TMIDIX.Tegridy_SONG_to_MIDI_Converter(output_list,\n", + " output_signature = output_signature, \n", + " output_file_name = fname, \n", + " track_name=song_name, \n", + " number_of_ticks_per_quarter=number_of_ticks_per_quarter)\n", + "\n", + "print('Done!')\n", + "\n", + "print('Downloading your composition now...')\n", + "from google.colab import files\n", + "files.download(fname + '.mid')\n", + "\n", + "print('Detailed MIDI stats:')\n", + "detailed_stats" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yk8oOD_oBbWN" + }, + "source": [ + "# Plot and listen to the last output" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mqhjlVjJBaXi", + "cellView": "form" + }, + "source": [ + "#@title Install prerequisites\n", + "!apt install fluidsynth #Pip does not work for some reason. Only apt works\n", + "!pip install midi2audio\n", + "!pip install pretty_midi" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kILhoHR7JmmS", + "cellView": "form" + }, + "source": [ + "#@title Plot and listen to the last generated composition\n", + "#@markdown NOTE: May be very slow with the long compositions\n", + "from midi2audio import FluidSynth\n", + "from IPython.display import display, Javascript, HTML, Audio\n", + "import pretty_midi\n", + "import librosa.display\n", + "import matplotlib.pyplot as plt\n", + "from mpl_toolkits import mplot3d\n", + "import numpy as np\n", + "\n", + "print('Synthesizing the last output MIDI... ')\n", + "# fname = '/content/Endless-Piano-Music-Composition'\n", + "\n", + "fn = os.path.basename(fname + '.mid')\n", + "fn1 = fn.split('.')[0]\n", + "\n", + "print('Plotting the composition. Please wait...')\n", + "\n", + "pm = pretty_midi.PrettyMIDI(fname + '.mid')\n", + "\n", + "# Retrieve piano roll of the MIDI file\n", + "piano_roll = pm.get_piano_roll()\n", + "\n", + "plt.figure(figsize=(14, 5))\n", + "librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', fmin=1, hop_length=160, sr=16000, cmap=plt.cm.hot)\n", + "plt.title(fn1)\n", + "\n", + "FluidSynth(\"/usr/share/sounds/sf2/FluidR3_GM.sf2\", 16000).midi_to_audio(str(fname + '.mid'), str(fname + '.wav'))\n", + "Audio(str(fname + '.wav'), rate=16000)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2Snu3fb4N-Nd" + }, + "source": [ + "# Congrats! You did it! :)" + ] + } + ] +} \ No newline at end of file