diff --git a/pca-utils/README.md b/pca-utils/README.md
new file mode 100644
index 00000000..787d2e2b
--- /dev/null
+++ b/pca-utils/README.md
@@ -0,0 +1,78 @@
+# pca-synthetic-data
+
+## About
+### Description
+This project is designed to create synthetic data (mp3 recordings) for PCA solution 
+https://aws.amazon.com/blogs/machine-learning/post-call-analytics-for-your-contact-center-with-amazon-language-ai-services/
+
+
+## Getting started
+
+### Installation
+
+Create a virtual environment and install the required packages there. Here's how you can do it:
+
+1. Install `virtualenv` if you haven't already:
+
+   ```
+   pip install virtualenv
+   ```
+
+2. Create a new virtual environment:
+
+   ```
+   virtualenv env
+   ```
+
+3. Activate the virtual environment:
+
+   ```
+   source env/bin/activate
+   ```
+
+4. Install the required packages:
+
+   ```
+   pip install -r requirements.txt
+   ```
+
+### Usage
+
+Define the number of recordings in Python code
+   ```
+   number_of_recordings = 1
+   ```
+
+
+Now you're ready to run the Python code.
+
+   ```
+   streamlit run calls-creation-polly.py
+   ```
+
+![PCA Synthetic Data](images/pca.png)
+
+## Authors and acknowledgment
+This project was created and is maintained by @chadaws and @orvital 
+
+## Contributing
+We welcome contributions from the community! If you'd like to contribute to this project, please follow these guidelines:
+
+1. Fork the repository and create a new branch for your feature or bug fix.
+2. Make your changes and ensure that the project still builds and tests pass.
+3. Submit a merge request with a clear description of your changes and the problem they solve.
+
+We'll review your contribution and provide feedback or merge it into the main branch if it meets our standards.
+
+## Support
+Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
+
+## Roadmap
+TBD
+
+
+## License
+For open source projects, say how it is licensed.
+
+## Project status
+If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
diff --git a/pca-utils/calls-creation-polly.py b/pca-utils/calls-creation-polly.py
new file mode 100644
index 00000000..52de7498
--- /dev/null
+++ b/pca-utils/calls-creation-polly.py
@@ -0,0 +1,279 @@
+import logging
+import boto3
+import json
+import random
+import os
+from pydub import AudioSegment
+import uuid
+import datetime
+import streamlit as st
+from botocore.exceptions import ClientError
+
+# Initialize Bedrock client
+bedrock_client = boto3.client('bedrock-runtime', region_name='us-west-2')
+polly_client = boto3.client('polly', region_name='us-west-2')
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+def generate_conversation(bedrock_client,
+                          model_id,
+                          system_prompts,
+                          messages,
+                          temperature,
+                          top_k):
+    """
+    Sends messages to a model.
+    Args:
+        bedrock_client: The Boto3 Bedrock runtime client.
+        model_id (str): The model ID to use.
+        system_prompts (JSON) : The system prompts for the model to use.
+        messages (JSON) : The messages to send to the model.
+        temperature (float): The temperature parameter for the model.
+        top_k (int): The top_k parameter for the model.
+
+    Returns:
+        response (JSON): The conversation that the model generated.
+
+    """
+
+    logger.info("Generating message with model %s", model_id)
+
+    # Inference parameters to use.
+    inference_config = {"temperature": temperature}
+    # Additional inference parameters to use.
+    additional_model_fields = {"top_k": top_k}
+
+    # Send the message.
+    response = bedrock_client.converse(
+        modelId=model_id,
+        messages=messages,
+        system=system_prompts,
+        inferenceConfig=inference_config,
+        additionalModelRequestFields=additional_model_fields
+    )
+
+    # Log token usage.
+    token_usage = response['usage']
+    logger.info("Input tokens: %s", token_usage['inputTokens'])
+    logger.info("Output tokens: %s", token_usage['outputTokens'])
+    logger.info("Total tokens: %s", token_usage['totalTokens'])
+    logger.info("Stop reason: %s", response['stopReason'])
+
+    return response['output']['message']['content'][0]['text']
+
+def create_polly_json(dialogue):
+    # Define Spanish voices for Polly
+    agent_voices = ['Mia']
+    customer_voices = ['Andres']
+    agent_voice = random.choice(agent_voices)
+    customer_voice = random.choice(customer_voices)
+
+    polly_json = []
+    for entry in dialogue:
+        voice = agent_voice if entry["role"] == "agent" else customer_voice
+        polly_json.append({
+            "voice": voice,
+            "text": entry["text"]
+        })
+    return polly_json
+
+def add_background_noise(polly_json):
+    background_sounds = [
+        {"type": "typing", "frequency": 0.3},
+        {"type": "office_ambience", "frequency": 0.1},
+        {"type": "phone_ring", "frequency": 0.05}
+    ]
+
+    enhanced_json = []
+    for entry in polly_json:
+        enhanced_json.append(entry)
+        for sound in background_sounds:
+            if random.random() < sound["frequency"]:
+                enhanced_json.append({
+                    "sound_effect": sound["type"]
+                })
+
+    return enhanced_json
+
+def create_audio_with_polly(polly_json, filename):
+    # Initialize two mono tracks
+    agent_track = AudioSegment.silent(duration=0)
+    customer_track = AudioSegment.silent(duration=0)
+    
+    # Define a pause between turns (e.g., 500 ms)
+    pause = AudioSegment.silent(duration=500)
+    
+    for entry in polly_json:
+        if "voice" in entry:
+            response = polly_client.synthesize_speech(
+                Text=entry["text"],
+                OutputFormat='mp3',
+                VoiceId=entry["voice"],
+                Engine='neural',
+                LanguageCode='es-MX'
+            )
+
+            with open("temp_audio.mp3", 'wb') as file:
+                file.write(response['AudioStream'].read())
+
+            segment = AudioSegment.from_mp3("temp_audio.mp3")
+
+            # Add the segment to the appropriate track with a pause
+            if entry["voice"] == 'Mia':  # Assuming Mia is the agent
+                agent_track += segment + pause
+                customer_track += AudioSegment.silent(duration=len(segment) + len(pause))
+            else:
+                customer_track += segment + pause
+                agent_track += AudioSegment.silent(duration=len(segment) + len(pause))
+
+        elif "sound_effect" in entry:
+            sound_file = f"{entry['sound_effect']}.mp3"
+            if os.path.exists(sound_file):
+                sound = AudioSegment.from_mp3(sound_file)
+                # Add sound effect to both tracks
+                agent_track += sound
+                customer_track += sound
+
+    # Ensure both tracks are exactly the same length
+    max_length = max(len(agent_track), len(customer_track))
+    agent_track = agent_track.set_frame_rate(44100).set_channels(1)
+    customer_track = customer_track.set_frame_rate(44100).set_channels(1)
+
+    # Pad the shorter track with silence
+    if len(agent_track) < max_length:
+        agent_track += AudioSegment.silent(duration=max_length - len(agent_track))
+    elif len(customer_track) < max_length:
+        customer_track += AudioSegment.silent(duration=max_length - len(customer_track))
+
+    # Ensure both tracks have the exact same number of frames
+    min_frames = min(len(agent_track.get_array_of_samples()), len(customer_track.get_array_of_samples()))
+    agent_track = agent_track[:min_frames]
+    customer_track = customer_track[:min_frames]
+
+    # Check if both tracks have content
+    if len(agent_track) == 0 or len(customer_track) == 0:
+        logger.warning("One or both audio tracks are empty. Skipping stereo creation.")
+        stereo_audio = agent_track if len(agent_track) > 0 else customer_track
+    else:
+        # Combine into stereo
+        stereo_audio = AudioSegment.from_mono_audiosegments(agent_track, customer_track)
+
+    mp3_filename = filename + ".mp3"
+    stereo_audio.export(mp3_filename, format="mp3")
+    os.remove("temp_audio.mp3")  # Clean up temporary file
+
+    return mp3_filename
+
+def main():
+    st.title("Generate Call Transcripts")
+
+    # Updated to include only Anthropic models
+    models = [
+        {"provider": "Anthropic", "name": "Claude 3 Sonnet", "version": "1.0", "id": "anthropic.claude-3-sonnet-20240229-v1:0"},
+        {"provider": "Anthropic", "name": "Claude 3.5 Sonnet", "version": "1.0", "id": "anthropic.claude-3-5-sonnet-20240620-v1:0"},
+        {"provider": "Anthropic", "name": "Claude 3 Haiku", "version": "1.0", "id": "anthropic.claude-3-haiku-20240307-v1:0"},
+        {"provider": "Anthropic", "name": "Claude 3 Opus", "version": "1.0", "id": "anthropic.claude-3-opus-20240229-v1:0"}
+    ]
+
+    model_id = st.selectbox("Select Model", [model["provider"] + " - " + model["name"] + " (" + model["version"] + ")" for model in models])
+    model_id = [model["id"] for model in models if model["provider"] + " - " + model["name"] + " (" + model["version"] + ")" == model_id][0]
+
+    max_tokens = st.slider("Max Tokens", 100, 4096, 2000, 100)
+    temperature = st.slider("Temperature", 0.0, 1.0, 0.7, 0.1)
+    top_k = st.slider("Top K", 0, 500, 250, 10)
+    
+    # New slider for number of calls
+    num_calls = st.slider("Number of Calls to Generate", 1, 100, 1)
+
+    system_prompt = st.text_area("System Prompt", value="""
+    You are an assistant for a telecommunications company helping to generate realistic call transcripts.
+    """, height=100)
+    
+    # Move the prompt and system prompt to pre-filled text fields
+    prompt = st.text_area("Prompt", value="""
+    Generate a detailed contact center transcript in Spanish for company named AT&T Mexico (wireless provider) with flow of agent, customer back and forth. Be verbose. Randomly choose a telecommunications service issue from billing, account cycle, mobile call issues, data plans issues, balance query issues or general questions. The conversation should include:
+    1. Greeting and identification
+    2. Problem description by the customer
+    3. Troubleshooting steps suggested by the agent
+    4. Some back-and-forth dialogue as they work through the issue
+    5. Resolution or next steps
+    6. Closing of the call
+    7. Write the response as formatted as a json array that looks like this:
+        [
+        {"role": "agent", "text": "Welcome to ATT Mexico billing support. How can I assist you today?"},
+        {"role": "customer", "text": "Yes I have a question about my recent mobile phone charges."},
+        {"role": "agent", "text": "No problem, I'd be happy to review your mobile billing with you."}
+        ]
+        It is very important to get the formatting correct. It must be a valid JSON array that consists of only key-value pairs for each turn of the conversation
+    8. The agent's name is Mia
+    9. The customer's name is Andres
+    10. Show only the JSON
+    Please make the conversation natural, including some hesitations, interruptions, and casual language where appropriate.
+    """, height=200)
+
+
+
+    if st.button("Generate"):
+        for i in range(num_calls):
+            st.write(f"Generating call {i+1} of {num_calls}")
+            
+            system_prompts = [
+                {
+                    "text": system_prompt
+                }
+            ]
+
+            messages = [
+                {
+                    "role": "user",
+                    "content": [{"text": prompt}]
+                }
+            ]
+
+            # Generate conversation with retry logic
+            max_retries = 3
+            retry_count = 0
+            while retry_count < max_retries:
+                transcript = generate_conversation(bedrock_client, model_id, system_prompts, messages, temperature, top_k)
+
+                if transcript is None:
+                    st.error(f"Failed to generate transcript for call {i+1}. Skipping to next call.")
+                    break
+
+                try:
+                    # Parse the transcript into a structured format
+                    dialogue = json.loads(transcript)
+
+                    # Create the Polly JSON
+                    polly_json = create_polly_json(dialogue)
+
+                    # Add background noise
+                    enhanced_polly_json = add_background_noise(polly_json)
+
+                    # Save the JSON to a file
+                    json_filename = f"MobilePhone_{uuid.uuid4()}_AGENT_OscarR_DT_{datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}.json"
+                    with open(json_filename, 'w') as f:
+                        json.dump(enhanced_polly_json, f, indent=2)
+                    st.success(f"Polly JSON for call {i+1} has been saved to {json_filename}")
+
+                    # Create the audio recording
+                    audio_filename = create_audio_with_polly(enhanced_polly_json, json_filename.replace(".json", ""))
+                    st.success(f"Call recording {i+1} has been created as {audio_filename}")
+
+                    # Display the transcript
+                    st.json(dialogue)
+
+                    break  # Break out of the retry loop if successful
+
+                except json.JSONDecodeError as e:
+                    st.warning(f"Failed to parse transcript for call {i+1} (attempt {retry_count + 1}): {e}")
+                    retry_count += 1
+
+            if retry_count == max_retries:
+                st.error(f"Failed to generate a valid transcript for call {i+1} after {max_retries} attempts. Skipping to next call.")
+
+        st.success(f"All {num_calls} calls have been generated.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/pca-utils/images/pca.png b/pca-utils/images/pca.png
new file mode 100644
index 00000000..2a434302
Binary files /dev/null and b/pca-utils/images/pca.png differ
diff --git a/pca-utils/requirements.txt b/pca-utils/requirements.txt
new file mode 100644
index 00000000..6d73c8a4
--- /dev/null
+++ b/pca-utils/requirements.txt
@@ -0,0 +1,3 @@
+boto3
+pydub
+streamlit
\ No newline at end of file