From 68ddad55b61c9a5e7202d439e2d76a860f74a22e Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Mon, 27 Jun 2022 14:54:01 +0100 Subject: [PATCH 01/13] Add the services --- .gitignore | 160 ++++++++++++++++++ .../DialogFlowService/requirements.txt | 2 + .../Dialog Flow/DialogFlowService/service.py | 128 ++++++++++++++ Applications-Python/Dialog Flow/README.md | 26 +++ .../Dialog Flow/VoskClient/service.py | 73 ++++++++ .../Dialog Flow/VoskServer/requirements.txt | 1 + .../Dialog Flow/VoskServer/server.py | 80 +++++++++ 7 files changed, 470 insertions(+) create mode 100644 .gitignore create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/requirements.txt create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/service.py create mode 100644 Applications-Python/Dialog Flow/README.md create mode 100644 Applications-Python/Dialog Flow/VoskClient/service.py create mode 100644 Applications-Python/Dialog Flow/VoskServer/requirements.txt create mode 100644 Applications-Python/Dialog Flow/VoskServer/server.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/requirements.txt b/Applications-Python/Dialog Flow/DialogFlowService/requirements.txt new file mode 100644 index 0000000..5e31212 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/requirements.txt @@ -0,0 +1,2 @@ +dialogflow==1.1.1 +grpcio==1.26.0 \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/service.py b/Applications-Python/Dialog Flow/DialogFlowService/service.py new file mode 100644 index 0000000..101b5bb --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/service.py @@ -0,0 +1,128 @@ +# This exposes the basics of the Dialog Flow API to NAOqi through a module. +# Due to NAOqi broker weirdness, the responses are passed back to the caller as a JSON string. +# This has to be loaded into a dict and run through the function in the example listener called "byteify" +# This converts the unicode strings to ASCII and makes them work with the rest of your code and NAOqi. + +import dialogflow_v2 as dialogflow +from dialogflow_v2.proto.session_pb2 import QueryInput, TextInput +from dialogflow_v2.proto.audio_config_pb2 import InputAudioConfig, AudioEncoding +from google.protobuf.json_format import MessageToJson +from naoqi import ALBroker, ALModule + +import argparse +import sys +import time + + +def log_response(response): + """ + Print the dialogflow response to the console for debugging purposes. + """ + + print("=" * 20) + print("Query text: {}".format(response.query_result.query_text)) + print( + "Detected intent: {} (confidence: {})\n".format( + response.query_result.intent.display_name, + response.query_result.intent_detection_confidence, + ) + ) + print("Fulfillment text: {}\n".format(response.query_result.fulfillment_text.encode('utf8'))) + + +class DialogFlowService(ALModule): + """NAOqi remote module that interfaces with Google DialogFlow.""" + + def __init__(self, name): + ALModule.__init__(self, name) + + # Gross imports because of py2 + self.session_client = dialogflow.SessionsClient() + self.session = None + self.language_code = "en-GB" + pass + + def begin_session(self, project_id, session_id, language_code): + """ + Start the dialog flow session. + :param project_id: The google project ID, used to find the agent. + :param session_id: The session ID (for contexts). + :param language_code: The language being understood. + :return: + """ + if self.session is not None: + print('[DialogFlow] Session already active.') + return + self.session = self.session_client.session_path(project_id, session_id) + self.language_code = language_code + print("[DialogFlow] Session path: {}\n".format(self.session)) + + def end_session(self): + """ + Clear the session ready for reuse. + """ + print('[DialogFlow] Ending session.') + self.session = None + + def detect_intent_text(self, text): + """Detect intent from the given text string.""" + + # Collect input + text_input = TextInput(text=text, language_code=self.language_code) + + # Build and send query + query_input = QueryInput(text=text_input) + response = self.session_client.detect_intent(self.session, query_input) + log_response(response) + + # Send it as json because naoqi doesnt like objects + return str(MessageToJson(response)) + + def detect_intent_audio(self, input_audio): + """Detect intent from the given PWM audio""" + # Hardcoded Pepper's values + audio_encoding = AudioEncoding.AUDIO_ENCODING_LINEAR_16 + sample_rate_hertz = 16000 + + # Build audio config and inputs + audio_config = InputAudioConfig( + audio_encoding=audio_encoding, + language_code=self.language_code, + sample_rate_hertz=sample_rate_hertz, + ) + query_input = QueryInput(audio_config=audio_config) + + # Fetch intent. + response = self.session_client.detect_intent(self.session, query_input, input_audio=input_audio) + log_response(response) + + # Send it as json because naoqi doesnt like objects + return str(MessageToJson(response)) + + +if __name__ == '__main__': + # Nifty way of grabbing terminal arguments. + parser = argparse.ArgumentParser() + parser.add_argument('--ip', type=str, default='127.0.0.1', help='Robot IP Address. For local bot use 127.0.0.1.') + parser.add_argument('--port', type=int, default=9559, help='NaoQI port number.') + parser.add_argument('--project_id', type=str, required=True, help='Google Cloud Project ID.') + args = parser.parse_args() + + try: + # Setup a bi-directional broker to communicate with Pepper. + pythonBroker = ALBroker('pythonBroker', '0.0.0.0', 9999, args.ip, args.port) + except RuntimeError: + print('Failed to connect to Naoqi at %s:%d. Please check script arguments. Run with -h for help.' % ( + args.ip, args.port)) + sys.exit(1) + + # Register the module. + DialogFlowService = DialogFlowService('DialogFlowService') + + # Keep program running until we tell it to quit. + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + print("Interrupted by user, stopping application.") + sys.exit(0) diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md new file mode 100644 index 0000000..5daec2e --- /dev/null +++ b/Applications-Python/Dialog Flow/README.md @@ -0,0 +1,26 @@ +# Google Dialog Flow +This folder contains four projects that are used to make Pepper utilise Dialog Flow. + +This project has been inspired and influenced by [this blog post](https://blogemtech.medium.com/pepper-integration-with-dialogflow-1d7f1582da1a). + +## Projects +- DialogFlowService: This is a NAOqi service that runs on a laptop, it exposes some of the dialog flow API to Pepper. This is done because it is currently not possible to install the API on Pepper using pip. +- VoskClient: This is a socket client for a python 3 vosk server (see below). +- VoskServer: This is a Python 3 server hosting access to the Vosk Speech Recognition API. It was used during an experiment and can be optionally toggled in DialogFlowExample's demonstration listener service. +- DialogFlowExample: This Choregraphe project ties all of the above services together to create a basic dialog flow program. It contains the barebones and can be used as a template to create further applications. + +## Setup/Configuration +`requirements.txt` files have been provided where necessary to pin dependencies to the correct versions. Entire pip dumps weren't provided as they may have been polluted however the important libraries are in these files. + +Both services that can be run on the laptop (DialogFlowService and VoskClient) accept command line arguments to configure the target robot: +``` +service.py --ip --port +``` + +In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_APPLICATION_CREDENTIALS in the environment variables to the correct path to your JSON token. I'd recommend reading the setup steps for Dialog Flow [here](https://cloud.google.com/dialogflow/es/docs/quick/setup) + +TODO: +- Configuring the behaviour + +## Customisation +TODO: Customising the listener. \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/VoskClient/service.py b/Applications-Python/Dialog Flow/VoskClient/service.py new file mode 100644 index 0000000..3e2993b --- /dev/null +++ b/Applications-Python/Dialog Flow/VoskClient/service.py @@ -0,0 +1,73 @@ +# This is a prototype helper for using Vosk from Python 2 +# It could be cleaner to write a Vosk Module in C++, however it was done like this for prototyping sakes. + +from naoqi import ALModule + +import socket +import struct + + +class VoskClient(ALModule): + def __init__(self, name): + ALModule.__init__(self, name) + + # Streaming may work better than dumping. + def transcribe(self, audio): + try: + # Connect to the server/ + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect(('127.0.0.1', 9090)) + + # Send the audio + s.sendall(struct.pack('!i', len(audio))) + s.sendall(audio) + + # Read response length + buf = bytes() + while len(buf) < 4: + buf += s.recv(4) + response_len = struct.unpack('!i', buf[:4])[0] + + print(response_len) + + # Read response + response = bytes() + while len(response) < response_len: + response += s.recv(response_len - len(response)) + print(len(response)) + + resp = response.decode('utf8') + s.close() + return str(resp) + except Exception as ex: + print('=' * 20) + print('Ex: %s' % ex.message) + + +if __name__ == '__main__': + # Nifty way of grabbing terminal arguments. + parser = argparse.ArgumentParser() + parser.add_argument('--ip', type=str, default='127.0.0.1', help='Robot IP Address. For local bot use 127.0.0.1.') + parser.add_argument('--port', type=int, default=9559, help='NaoQI port number.') + parser.add_argument('--project_id', type=str, required=True, help='Google Cloud Project ID.') + args = parser.parse_args() + + try: + # Setup a bi-directional broker to communicate with Pepper. + pythonBroker = ALBroker('pythonBroker', '0.0.0.0', 9999, args.ip, args.port) + except RuntimeError: + print('Failed to connect to Naoqi at %s:%d. Please check script arguments. Run with -h for help.' % ( + args.ip, args.port)) + sys.exit(1) + + # Register the module. + VoskClient = VoskClient('VoskClient') + + # Keep program running until we tell it to quit. + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + print("Interrupted by user, stopping application.") + sys.exit(0) + diff --git a/Applications-Python/Dialog Flow/VoskServer/requirements.txt b/Applications-Python/Dialog Flow/VoskServer/requirements.txt new file mode 100644 index 0000000..cc9ecf4 --- /dev/null +++ b/Applications-Python/Dialog Flow/VoskServer/requirements.txt @@ -0,0 +1 @@ +vosk==0.3.42 \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/VoskServer/server.py b/Applications-Python/Dialog Flow/VoskServer/server.py new file mode 100644 index 0000000..410836f --- /dev/null +++ b/Applications-Python/Dialog Flow/VoskServer/server.py @@ -0,0 +1,80 @@ +# Python 3 Server for vosk. +# This is a proof of concept, it ignores disconnect handling and doesn't gracefully exit when pressing ctrl+c (press return instead). + +import os +import struct +import time +import json + +from vosk import Model, KaldiRecognizer, SetLogLevel +import sys +import wave + +import socket + +import signal + + +from threading import Thread + +model = Model(lang="en-us") + + +class SocketServer(Thread): + def run(self): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 9090)) + s.listen() + while True: + conn, addr = s.accept() + with conn: + print(f"Connected by {addr}") + + # rec = KaldiRecognizer(model, 44100) # TODO Don't hard code this. + rec = KaldiRecognizer(model, 16000) # TODO Don't hard code this. + rec.SetWords(True) + rec.SetPartialWords(True) + + buf = bytes() + while len(buf) < 4: + buf += conn.recv(4) + response_len = struct.unpack('!i', buf[:4])[0] + + print(response_len) + + read_count = 0 + datbuf = bytes() + while read_count < response_len: + data = conn.recv(4096) + datbuf += data + read_count += len(data) + + rec.AcceptWaveform(data) + + print('generate response') + + # rec.AcceptWaveform(datbuf) + + print('send response') + resp = json.loads(rec.FinalResult())['text'] + print(resp) + conn.sendall(struct.pack("!i", len(resp))) + conn.sendall(resp.encode('utf8')) + + # Wait for client to close. + # conn.recv(1) + + time.sleep(1) + + print('end') + + # if not data: + # break + + +# https://stackoverflow.com/questions/15189888/python-socket-accept-in-the-main-thread-prevents-quitting +pid = os.getpid() +sl = SocketServer() +sl.start() +input('Socket is listening, press any key to abort...') +os.kill(pid, 9) From f500fd2a7138a17bdd760c99305be092f9280673 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Mon, 27 Jun 2022 15:05:45 +0100 Subject: [PATCH 02/13] Added the choregraphe project --- .../DialogFlowExample/DialogFlowExample.pml | 16 + .../DialogFlowExample/behavior_1/behavior.xar | 151 +++++++ .../DialogFlowExample/manifest.xml | 22 + .../scripts/ListenerService.py | 388 ++++++++++++++++++ .../DialogFlowExample/scripts/stk/__init__.py | 3 + .../scripts/stk/coroutines.py | 279 +++++++++++++ .../DialogFlowExample/scripts/stk/events.py | 188 +++++++++ .../DialogFlowExample/scripts/stk/logging.py | 65 +++ .../DialogFlowExample/scripts/stk/runner.py | 153 +++++++ .../DialogFlowExample/scripts/stk/services.py | 39 ++ .../translations/translation_en_US.ts | 32 ++ .../DialogFlowService/.idea/.gitignore | 8 + .../.idea/DialogFlowService.iml | 14 + .../inspectionProfiles/Project_Default.xml | 6 + .../inspectionProfiles/profiles_settings.xml | 6 + .../DialogFlowService/.idea/misc.xml | 4 + .../DialogFlowService/.idea/modules.xml | 8 + .../DialogFlowService/.idea/vcs.xml | 6 + .../Dialog Flow/DialogFlowService/service.py | 1 - 19 files changed, 1388 insertions(+), 1 deletion(-) create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/__init__.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/coroutines.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/events.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/logging.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/runner.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/services.py create mode 100644 Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/.gitignore create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/DialogFlowService.iml create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/Project_Default.xml create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/profiles_settings.xml create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/misc.xml create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/modules.xml create mode 100644 Applications-Python/Dialog Flow/DialogFlowService/.idea/vcs.xml diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml b/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml new file mode 100644 index 0000000..9efa790 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar new file mode 100644 index 0000000..52d2291 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar @@ -0,0 +1,151 @@ +media/images/box/root.pngmedia/images/box/box-python-script.pngmedia/images/box/box-script.pngmedia/images/box/interaction/say.pngmedia/images/box/interaction/choice.png"terminate"5media/images/box/interaction/say.png \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml b/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml new file mode 100644 index 0000000..273177f --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml @@ -0,0 +1,22 @@ + + + + Untitled + + + en_US + + + en_US + + + + + interactive + + + + + + + diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py new file mode 100644 index 0000000..a3f310f --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -0,0 +1,388 @@ +import qi +from naoqi import ALBroker, ALModule, ALProxy +import numpy + +import json +import os +import StringIO +import sys +import time +import uuid + +# Determines how many times we wait for audio peaking before deciding the user has finished speaking. +LISTENING_RETRY_COUNT = 15 + +# The minimum audio peak to trigger a recording. +AUDIO_PEAK_THRESHOLD = 4000 + +# Cap at 10 seconds, speech rec gets funny after this. +MAX_RECORD_TIME = 10 + + +def byteify(input): + """ + Convert a dictionary from using Unicode strings to ASCII. + This is unfortunately necessary so that naoqi plays nice with our JSON. + + https://stackoverflow.com/a/13105359/11265569 + :param input: The "Unicode dict" + :return: The "ASCII dict" + """ + if isinstance(input, dict): + return {byteify(key): byteify(value) + for key, value in input.iteritems()} + elif isinstance(input, list): + return [byteify(element) for element in input] + elif isinstance(input, unicode): + return input.encode('utf-8') + else: + return input + + +def get_logger(session, app_id): + """Returns a qi logger object.""" + logger = qi.logging.Logger(app_id) + try: + qicore = qi.module("qicore") + log_manager = session.service("LogManager") + provider = qicore.createObject("LogProvider", log_manager) + log_manager.addProvider(provider) + except RuntimeError: + # no qicore, we're not running on a robot, it doesn't matter + pass + except AttributeError: + # old version of NAOqi - logging will probably not work. + pass + return logger + + +class ListenerModule(ALModule): + """ + Audio processing module. + Detect peaks in users voices, then record what they are saying to transmit it to Dialog Flow. + """ + + def __init__(self, name): + # TODO: For when we're dismantling, does this need to be an ALModule? + try: + ALModule.__init__(self, name) + except Exception as e: + print(str(e)) + pass + + # Project ID for dialog flow, populated later. + self.google_project_id = None + + # Get robot memory so we can attach to some events + self.mem = ALProxy('ALMemory') + + # Get a session handle also + self.session = self.mem.session() + + # Get logger + self.logger = get_logger(self.session, 'uk.ac.rgu.ListenerService') + + # Get robot LEDs so we can control eye colour + self.leds = ALProxy('ALLeds') + + # Get Pepper's audio device + self.audio_device = self.session.service('ALAudioDevice') + self.speaker_hook = None + + # Get the speech recognition module so we can disable it. + self.speech = ALProxy('ALSpeechRecognition') + + # Get dialog flow and vosk modules. + self.dialogflow = self.session.service('DialogFlowService') + self.vosk = None # initialized if vosk is enabled + + # Properties for voice detection and recording + self.is_listening = False + self.is_paused = False + self.is_recording = False + self.sound_file = None + self.previous_data = None + self.retries = LISTENING_RETRY_COUNT + + # Save a timestamp of when recording started to enforce a maximum length. + self.record_start = None + + # Set a UUID for the session ID. + # Ensures multiple bots can run the same agent and not have colliding contexts + self.session_id = uuid.uuid4() + + # Package UUID for filesystem access + self.package_uuid = None + + # Enable vosk api for transcription? + self.vosk_api = False + + # Proxies for handling responses + self.tts = ALProxy('ALTextToSpeech') + self.tablet = ALProxy('ALTabletService') + self.behavior_manager = ALProxy('ALBehaviorManager') + + # TODO: Is there a way to avoid needing a package_uuid passing in? + def start_listening(self, google_project_id, package_uuid): + # Save package uuid + self.google_project_id = google_project_id + self.package_uuid = package_uuid + + # Configure audio device. 16000 sample rate, 3 = Front Mic, 0 = no deinterlacing, we do that ourselves + # TODO: Future: Might be worth investigating using a higher sample rate and filtering the audio channels. + self.audio_device.setClientPreferences(self.getName(), 16000, 3, 0) + + # Subscribe to audio processing events + self.audio_device.subscribe(self.getName()) + + # Disable speech recognition + self.speech.pause(True) + + # Mark as listening + self.is_listening = True + + # Hook the speaker so we don't listen to our own output. + self.speaker_hook = self.audio_device.speakersPlaying.connect(self.speakers_playing) + + # Start dialogflow session + self.dialogflow.begin_session(str(self.google_project_id), str(self.session_id), 'en-GB') + + def cleanup(self): + """Use this to tidy up any event subscriptions and to resume the built-in text to speech.""" + # Turn speech recognition back on as normal + self.speech.pause(False) + + # Unsubscribe from audio processing + self.audio_device.unsubscribe(self.getName()) + + # Not listening + self.is_listening = False + + self.audio_device.speakersPlaying.disconnect(self.speaker_hook) + + # End the dialog flow session. + self.dialogflow.end_session() + + def set_vosk_enabled(self, enabled): + """ + Enable the VOSK transcription API. + This is experimental and underdeveloped and thus has some accuracy issues. + However, if worked on more could produce better latency results. + """ + self.vosk_api = enabled + + # If we're enabling, try and find the vosk service. + if enabled: + self.vosk = self.session.service('VoskClient') + + # TODO: Ensure this is fully working as expected. + def speakers_playing(self, playing): + self.is_paused = playing + if self.is_paused: + self.eyes_ignoring() + else: + self.eyes_idle() + + def begin_record(self, previous_sound_data): + # Initialize a "memory file". I believe StringIO is used so that it can be passed through the naoqi broker + # without serialization issues (numpy.int16)? + self.sound_file = StringIO.StringIO() + self.is_recording = True + self.record_start = time.time() + + # Write the last frame of data too if we have it. + if previous_sound_data is not None: + self.sound_file.write(previous_sound_data[0].tostring()) + + # Set eyes indicator TODO: Without lag please Pepper? + self.eyes_listening() + + self.logger.info('Recording has started.') + + def stop_record(self): + # Clear last saved data + self.previous_data = None + self.is_recording = False + + # Clear eye indicator + self.eyes_idle() + + def process_audio(self): + """ + Process the recorded audio and send it to dialogflow for intent processing. + """ + + # Send buffer pointer back to the start + self.sound_file.seek(0, os.SEEK_END) + length = self.sound_file.tell() + self.sound_file.seek(0) + + # Read all audio data into a single buffer and send it to dialogflow + input_audio = self.sound_file.read(length) + + # Send to dialogflow + response_json = None + start = time.clock() + if self.vosk_api: + # Transcribe audio with vosk + text = self.vosk.transcribe(input_audio) + self.logger.info('Vosk heard %s' % text) + + # If we heard something, send it to google for processing. + if text is not None and text != '': + response_json = self.dialogflow.detect_intent_text(text) + else: + response_json = self.dialogflow.detect_intent_audio(input_audio) + + end = time.clock() + self.logger.info("Request took %f" % (end - start)) + + # Convert JSON string to object and process it. + response = byteify(json.loads(response_json)) + if response is not None: + self.handle_actions(response) + + # noinspection PyPep8Naming + def processRemote(self, channels, samples, _timestamp, audio_buffer): + """Callback for audio processing.""" + + # TODO: It seems some of the data loss could be due to inefficiencies in this function? + # Worth further investigation.. + + # Load the data by casting to an array of 16-bit ints. + interleaved_data = numpy.fromstring(str(audio_buffer), dtype=numpy.int16) + + # Deinterleave the data by splitting by channel + sound_data = numpy.reshape(interleaved_data, (channels, samples), 'F') + + # Save this last frame in case next frame we begin recording + self.previous_data = sound_data + + # If we ain't listening, don't process + if self.is_paused: + if self.is_recording: + self.stop_record() + return + + # Calculate audio peak for speech detection + peak = numpy.max(sound_data) + + # If we peak, reset the counter and start recording if we haven't + # If we have, we reset the listen count. + if peak >= AUDIO_PEAK_THRESHOLD: + self.retries = LISTENING_RETRY_COUNT + if not self.is_recording: + self.logger.info('START') + self.begin_record(self.previous_data) + + # If we are recording, knock the retry counter down and save this data. + if self.is_recording: + self.retries -= 1 + self.sound_file.write(sound_data[0].tostring()) + + # If we've been recording too long, cut them short. + if self.is_recording and time.time() - self.record_start > MAX_RECORD_TIME: + self.logger.warn('Sentence was too long.') + self.stop_record() + # self.tts.say('Sorry, that sentence was too big. Could you try again?') + + # If the user has stopped speaking, process the audio + if self.is_recording and self.retries <= 0: + self.logger.info('Stopping') + self.stop_record() + self.process_audio() + + def handle_actions(self, response): + """ + Handle the actions of a dialogflow response + :param response: The dialogflow response as a dict. Must be accessed as response[...] + """ + + query_result = response['queryResult'] + + # Iterate over the additional payloads + if 'fulfillmentMessages' in query_result: + for message in query_result['fulfillmentMessages']: + # Handle custom payloads + if 'payload' in message and 'action' in message['payload']: + payload = message['payload'] + action = payload['action'] + if action == 'show_url': + + self.tablet.showWebview(payload['url']) + + elif action == 'show_local': # TODO: Collapse into show_url... + + path = payload['path'] + url = 'http://%s/apps/%s' % (self.tablet.robotIp(), + os.path.join(self.package_uuid, + os.path.normpath(path).lstrip("\\/")) + .replace(os.path.sep, "/")) + + self.tablet.showWebview(str(url)) + + elif action == 'clear_tablet': + + self.tablet.hideWebview() + + elif action == 'behavior': + + name = str(payload['behavior']) + self.logger.info('Attempt to start behaviour "%s"' % name) + try: + self.behavior_manager.stopBehavior(name) + except: + pass + + try: + self.behavior_manager.runBehavior(str(name)) + except Exception as ex: + self.logger.error('Failed to start "%s"' % name, ex) + # TODO: If an application developer wants a "rich" event, add it here. + else: + # Pass a generic action "bang" event. + self.mem.raiseEvent('DialogFlowAction', str(action)) + + # Say the fulfilment text + if 'fulfillmentText' in query_result: + self.tts.say(query_result['fulfillmentText']) + + # TODO: Fix the eyes? + + def eyes_listening(self): + """Makes Pepper's eyes blue to indicate listening""" + self.set_eyes(0, 0, 255) + + def eyes_idle(self): + """Makes Pepper's eyes white to indicate idling""" + self.set_eyes(255, 255, 255) + + def eyes_ignoring(self): + """Makes Pepper's eyes red to indicate ignorance""" + self.set_eyes(255, 0, 0) + + def set_eyes(self, r, g, b): + """Set Pepper's face LEDs""" + self.leds.fadeRGB("FaceLeds", r / 255, g / 255, b / 255, 0) + + +if __name__ == '__main__': + try: + # Set up a bidirectional broker to communicate with Pepper. + pythonBroker = ALBroker('pythonBroker', '0.0.0.0', 9999, '127.0.0.1', 9559) + #pythonBroker = ALBroker('pythonBroker', '0.0.0.0', 9999, 'pepper.local.', 9559) # For testing + except RuntimeError: + print('Failed to connect to Naoqi. Please check script arguments.') + sys.exit(1) + + # Create audio processor + ListenerService = ListenerModule('ListenerService') + # ListenerService = ListenerModule('ListenerService', args.project_id) + + # Keep program running until we tell it to quit. + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + ListenerService.cleanup() + sys.exit(0) \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/__init__.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/__init__.py new file mode 100644 index 0000000..dd8b741 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/__init__.py @@ -0,0 +1,3 @@ +""" +STK - A collection of libraries useful for making apps with NAOqi. +""" diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/coroutines.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/coroutines.py new file mode 100644 index 0000000..0a70ee0 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/coroutines.py @@ -0,0 +1,279 @@ +""" +Helper for easily doing async tasks with coroutines. + +It's mostly syntactic sugar that removes the need for .then and .andThen. + +Simply: + - make a generator function that yields futures (e.g. from qi.async) + - add the decorator async_generator + +For example: + +@stk.coroutines.async_generator +def run_test(self): + yield ALTextToSpeech.say("ready", _async=True) + yield ALTextToSpeech.say("steady", _async=True) + time.sleep(1) + yield ALTextToSpeech.say("go", _async=True) + +... this will turn run_test into a function that returns a future that is +valid when the call is done - and that is still cancelable (your robot will +start speaking). + +As your function now returns a future, it can be used in "yield run_test()" in +another function wrapped with this decorator. +""" + +__version__ = "0.1.2" + +__copyright__ = "Copyright 2017, Aldebaran Robotics / Softbank Robotics Europe" +__author__ = 'ekroeger' +__email__ = 'ekroeger@softbankrobotics.com' + +import functools +import time +import threading + +import qi + +class _MultiFuture(object): + """Internal helper for handling lists of futures. + + The callback will only be called once, with either an exception or a + list of the right type and size. + """ + def __init__(self, futures, callback, returntype): + self.returntype = returntype + self.callback = callback + self.expecting = len(futures) + self.values = [None] * self.expecting + self.failed = False + self.futures = futures + for i, future in enumerate(futures): + future.then(lambda fut: self.__handle_part_done(i, fut)) + + def __handle_part_done(self, index, future): + "Internal callback for when a sub-function is done." + if self.failed: + # We already raised an exception, don't do anything else. + return + assert self.expecting, "Got more callbacks than expected!" + try: + self.values[index] = future.value() + except Exception as exception: + self.failed = True + self.callback(exception=exception) + return + self.expecting -= 1 + if not self.expecting: + # We have all the values + self.callback(self.returntype(self.values)) + + def cancel(self): + "Cancel all subfutures." + for future in self.futures: + future.cancel() + +class FutureWrapper(object): + "Abstract base class for objects that pretend to be a future." + def __init__(self): + self.running = True + self.promise = qi.Promise(self._on_future_cancelled) + self.future = self.promise.future() + self._exception = "" + self.lock = threading.Lock() + + def _on_future_cancelled(self, promise): + """If someone from outside cancelled our future - propagate.""" + promise.setCanceled() + + def then(self, callback): + """Add function to be called when the future is done; returns a future. + + The callback will be called with a (finished) future. + """ + if self.running: # We might want a mutex here... + return self.future.then(callback) + else: + callback(self) + # return something? (to see when we have a testcase for this...) + + def andThen(self, callback): + """Add function to be called when the future is done; returns a future. + + The callback will be called with a return value (for now, None). + """ + if self.running: # We might want a mutex here... + return self.future.andThen(callback) + else: + callback(self.future.value()) #? + # return something? (to see when we have a testcase for this...) + + def hasError(self): + "Was there an error in one of the generator calls?" + return bool(self._exception) + + def wait(self): + "Blocks the thread until everything is finished." + self.future.wait() + + def isRunning(self): + "Is the sequence of generators still running?" + return self.future.isRunning() + + def value(self): + """Blocks the thread, and returns the final generator return value. + + For now, always returns None.""" + if self._exception: + raise self._exception + else: + return self.future.value() + + def hasValue(self): + "Tells us whether the generator 1) is finished and 2) has a value." + # For some reason this doesn't do what I expected + # self.future.hasValue() returns True even if we're not finished (?) + if self.running: + return False + elif self._exception: + return False + else: + return self.future.hasValue() + + def isFinished(self): + "Is the generator finished?" + return self.future.isFinished() + + def error(self): + "Returns the error of the future." + return self.future.error() + + def isCancelable(self): + "Is this future cancelable? Yes, it always is." + return True + + def cancel(self): + "Cancel the future, and stop executing the sequence of actions." + with self.lock: + self.running = False + self.promise.setCanceled() + + def isCanceled(self): + "Has this already been cancelled?" + return not self.running + + def addCallback(self, callback): + "Add function to be called when the future is done." + self.then(callback) + + # You know what? I'm not implementing unwrap() because I don't see a + # use case. + + +class GeneratorFuture(FutureWrapper): + "Future-like object (same interface) made for wrapping a generator." + def __init__(self, generator): + FutureWrapper.__init__(self) + self.generator = generator + self.future.addCallback(self.__handle_finished) + self.sub_future = None + self.__ask_for_next() + + def __handle_finished(self, future): + "Callback for when our future finished for any reason." + if self.running: + # promise was directly finished by someone else - cancel all! + self.running = False + if self.sub_future: + self.sub_future.cancel() + + def __handle_done(self, future): + "Internal callback for when the current sub-function is done." + try: + self.__ask_for_next(future.value()) + except Exception as exception: + self.__ask_for_next(exception=exception) + + def __finish(self, value): + "Finish and return." + with self.lock: + self.running = False + self.promise.setValue(value) + + def __ask_for_next(self, arg=None, exception=None): + "Internal - get the next function in the generator." + if self.running: + try: + self.sub_future = None + if exception: + future = self.generator.throw(exception) + else: + future = self.generator.send(arg) + if isinstance(future, list): + self.sub_future = _MultiFuture(future, self.__ask_for_next, + list) + elif isinstance(future, tuple): + self.sub_future = _MultiFuture(future, self.__ask_for_next, + tuple) + elif isinstance(future, Return): + # Special case: we returned a special "Return" object + # in this case, stop execution. + self.__finish(future.value) + else: + future.then(self.__handle_done) + self.sub_future = future + except StopIteration: + self.__finish(None) + except Exception as exc: + with self.lock: + self._exception = exc + self.running = False + self.promise.setError(str(exc)) +# self.__finish(None) # May not be best way of finishing? + +def async_generator(func): + """Decorator that turns a future-generator into a future. + + This allows having a function that does a bunch of async actions one + after the other without awkward "then/andThen" syntax, returning a + future-like object (actually a GeneratorFuture) that can be cancelled, etc. + """ + @functools.wraps(func) + def function(*args, **kwargs): + "Wrapped function" + return GeneratorFuture(func(*args, **kwargs)) + return function + +def public_async_generator(func): + """Variant of async_generator that returns an actual future. + + This allows you to expose it through a qi interface (on a service), but + that means cancel will not stop the whole chain. + """ + @functools.wraps(func) + def function(*args, **kwargs): + "Wrapped function" + return GeneratorFuture(func(*args, **kwargs)).future + return function + +class Return(object): + "Use to wrap a return function " + def __init__(self, value): + self.value = value + +MICROSECONDS_PER_SECOND = 1000000 + +class _Sleep(FutureWrapper): + "Helper class that behaves like an async 'sleep' function" + def __init__(self, time_in_secs): + FutureWrapper.__init__(self) + time_in_microseconds = int(MICROSECONDS_PER_SECOND * time_in_secs) + self.fut = qi.async(self.set_finished, delay=time_in_microseconds) + + def set_finished(self): + "Inner callback, finishes the future." + with self.lock: + self.promise.setValue(None) + +sleep = _Sleep diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/events.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/events.py new file mode 100644 index 0000000..d0e4c52 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/events.py @@ -0,0 +1,188 @@ +""" +stk.events.py + +Provides misc. wrappers for ALMemory and Signals (using the same syntax for +handling both). +""" + +__version__ = "0.1.1" + +__copyright__ = "Copyright 2015, Aldebaran Robotics" +__author__ = 'ekroeger' +__email__ = 'ekroeger@aldebaran.com' + +import qi + + +def on(*keys): + """Decorator for connecting a callback to one or several events. + + Usage: + + class O: + @on("MyMemoryKey") + def my_callback(self,value): + print "I was called!", value + + o = O() + events = EventHelper() + events.connect_decorators(o) + + After that, whenever MyMemoryKey is raised, o.my_callback will be called + with the value. + """ + def decorator(func): + func.__event_keys__ = keys + return func + return decorator + + +class EventHelper(object): + "Helper for ALMemory; takes care of event connections so you don't have to" + + def __init__(self, session=None): + self.session = None + self.almemory = None + if session: + self.init(session) + self.handlers = {} # a handler is (subscriber, connections) + self.subscriber_names = {} + self.wait_value = None + self.wait_promise = None + + def init(self, session): + "Sets the NAOqi session, if it wasn't passed to the constructor" + self.session = session + self.almemory = session.service("ALMemory") + + def connect_decorators(self, obj): + "Connects all decorated methods of target object." + for membername in dir(obj): + member = getattr(obj, membername) + if hasattr(member, "__event_keys__"): + for event in member.__event_keys__: + self.connect(event, member) + + def connect(self, event, callback): + """Connects an ALMemory event or signal to a callback. + + Note that some events trigger side effects in services when someone + subscribes to them (such as WordRecognized). Those will *not* be + triggered by this function, for those, use .subscribe(). + """ + if event not in self.handlers: + if "." in event: + # if we have more than one ".": + service_name, signal_name = event.split(".") + service = self.session.service(service_name) + self.handlers[event] = (getattr(service, signal_name), []) + else: + # It's a "normal" ALMemory event. + self.handlers[event] = ( + self.almemory.subscriber(event).signal, []) + signal, connections = self.handlers[event] + connection_id = signal.connect(callback) + connections.append(connection_id) + return connection_id + + def subscribe(self, event, attachedname, callback): + """Subscribes to an ALMemory event so as to notify providers. + + This is necessary for things like WordRecognized.""" + connection_id = self.connect(event, callback) + dummyname = "on_" + event.replace("/", "") + self.almemory.subscribeToEvent(event, attachedname, dummyname) + self.subscriber_names[event] = attachedname + return connection_id + + def disconnect(self, event, connection_id=None): + "Disconnects a connection, or all if no connection is specified." + if event in self.handlers: + signal, connections = self.handlers[event] + if connection_id: + if connection_id in connections: + signal.disconnect(connection_id) + connections.remove(connection_id) + else: + # Didn't specify a connection ID: remove all + for connection_id in connections: + signal.disconnect(connection_id) + del connections[:] + if event in self.subscriber_names: + name = self.subscriber_names[event] + self.almemory.unsubscribeToEvent(event, name) + del self.subscriber_names[event] + + def clear(self): + "Disconnect all connections" + for event in list(self.handlers): + self.disconnect(event) + + def get(self, key): + "Gets ALMemory value." + return self.almemory.getData(key) + + def get_int(self, key): + "Gets ALMemory value, cast as int." + try: + return int(self.get(key)) + except RuntimeError: + # Key doesn't exist + return 0 + except ValueError: + # Key exists, but can't be parsed to int + return 0 + + def set(self, key, value): + "Sets value of ALMemory key." + return self.almemory.raiseEvent(key, value) + + def remove(self, key): + "Remove key from ALMemory." + try: + self.almemory.removeData(key) + except RuntimeError: + pass + + def _on_wait_event(self, value): + "Internal - callback for an event." + if self.wait_promise: + self.wait_promise.setValue(value) + self.wait_promise = None + + def _on_wait_signal(self, *args): + "Internal - callback for a signal." + if self.wait_promise: + self.wait_promise.setValue(args) + self.wait_promise = None + + def cancel_wait(self): + "Cancel the current wait (raises an exception in the waiting thread)" + if self.wait_promise: + self.wait_promise.setCanceled() + self.wait_promise = None + + def wait_for(self, event, subscribe=False): + """Block until a certain event is raised, and returns it's value. + + If you pass subscribe=True, ALMemory.subscribeToEvent will be called + (sometimes necessary for side effects, i.e. WordRecognized). + + This will block a thread so you should avoid doing this too often! + """ + if self.wait_promise: + # there was already a wait in progress, cancel it! + self.wait_promise.setCanceled() + self.wait_promise = qi.Promise() + if subscribe: + connection_id = self.subscribe(event, "EVENTHELPER", + self._on_wait_event) + elif "." in event: # it's a signal + connection_id = self.connect(event, self._on_wait_signal) + else: + connection_id = self.connect(event, self._on_wait_event) + try: + result = self.wait_promise.future().value() + finally: + self.disconnect(event, connection_id) + return result diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/logging.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/logging.py new file mode 100644 index 0000000..d717f07 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/logging.py @@ -0,0 +1,65 @@ +""" +stk.logging.py + +Utility library for logging with qi. +""" + +__version__ = "0.1.2" + +__copyright__ = "Copyright 2015, Aldebaran Robotics" +__author__ = 'ekroeger' +__email__ = 'ekroeger@aldebaran.com' + +import functools +import traceback + +import qi + + +def get_logger(session, app_id): + """Returns a qi logger object.""" + logger = qi.logging.Logger(app_id) + try: + qicore = qi.module("qicore") + log_manager = session.service("LogManager") + provider = qicore.createObject("LogProvider", log_manager) + log_manager.addProvider(provider) + except RuntimeError: + # no qicore, we're not running on a robot, it doesn't matter + pass + except AttributeError: + # old version of NAOqi - logging will probably not work. + pass + return logger + + +def log_exceptions(func): + """Catches all exceptions in decorated method, and prints them. + + Attached function must be on an object with a "logger" member. + """ + @functools.wraps(func) + def wrapped(self, *args): + try: + return func(self, *args) + except Exception as exc: + self.logger.error(traceback.format_exc()) + raise exc + return wrapped + + +def log_exceptions_and_return(default_value): + """If an exception occurs, print it and return default_value. + + Attached function must be on an object with a "logger" member. + """ + def decorator(func): + @functools.wraps(func) + def wrapped(self, *args): + try: + return func(self, *args) + except Exception: + self.logger.error(traceback.format_exc()) + return default_value + return wrapped + return decorator diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/runner.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/runner.py new file mode 100644 index 0000000..0b1c9a7 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/runner.py @@ -0,0 +1,153 @@ +""" +stk.runner.py + +A helper library for making simple standalone python scripts as apps. + +Wraps some NAOqi and system stuff, you could do all this by directly using the +Python SDK, these helper functions just isolate some frequently used/hairy +bits so you don't have them mixed in your logic. +""" + +__version__ = "0.1.3" + +__copyright__ = "Copyright 2015, Aldebaran Robotics" +__author__ = 'ekroeger' +__email__ = 'ekroeger@aldebaran.com' + +import sys +import qi +from distutils.version import LooseVersion + +# +# Helpers for making sure we have a robot to connect to +# + + +def check_commandline_args(description): + "Checks whether command-line parameters are enough" + import argparse + parser = argparse.ArgumentParser(description=description) + parser.add_argument('--qi-url', help='connect to specific NAOqi instance') + + args = parser.parse_args() + return args + + +def is_on_robot(): + "Returns whether this is being executed on an Aldebaran robot." + import platform + return "aldebaran" in platform.platform() + + +def get_debug_robot(): + "Returns IP address of debug robot, complaining if not found" + try: + import qiq.config + qiqrobot = qiq.config.defaultHost() + if qiqrobot: + robot = raw_input( + "connect to which robot? (default is {0}) ".format(qiqrobot)) + if robot: + return robot + else: + return qiqrobot + else: + print "qiq found, but it has no default robot configured." + except ImportError: + # qiq not installed + print "qiq not installed (you can use it to set a default robot)." + return raw_input("connect to which robot? ") + + +def init(qi_url=None): + "Returns a QiApplication object, possibly with interactive input." + if qi_url: + sys.argv.extend(["--qi-url", qi_url]) + else: + args = check_commandline_args('Run the app.') + if bool(args.qi_url): + qi_url = args.qi_url + elif not is_on_robot(): + print "no --qi-url parameter given; interactively getting debug robot." + debug_robot = get_debug_robot() + if debug_robot: + sys.argv.extend(["--qi-url", debug_robot]) + qi_url = debug_robot + else: + raise RuntimeError("No robot, not running.") + + qiapp = None + sys.argv[0] = str(sys.argv[0]) + + # In versions bellow 2.3, look for --qi-url in the arguemnts and call accordingly the Application + if qi_url and hasattr(qi, "__version__") and LooseVersion(qi.__version__) < LooseVersion("2.3"): + qiapp = qi.Application(url="tcp://"+qi_url+":9559") + # In versions greater than 2.3 the ip can simply be passed through argv[0] + else: + # In some environments sys.argv[0] has unicode, which qi rejects + qiapp = qi.Application() + + qiapp.start() + return qiapp + + +# Main runner + +def run_activity(activity_class, service_name=None): + """Instantiate the given class, and runs it. + + The given class must take a qiapplication object as parameter, and may also + have on_start and on_stop methods, that will be called before and after + running it.""" + qiapp = init() + activity = activity_class(qiapp) + service_id = None + + try: + # if it's a service, register it + if service_name: + # Note: this will fail if there is already a service. Unregistering + # it would not be a good practice, because it's process would still + # be running. + service_id = qiapp.session.registerService(service_name, activity) + + if hasattr(activity, "on_start"): + def handle_on_start_done(on_start_future): + "Custom callback, for checking errors" + if on_start_future.hasError(): + try: + msg = "Error in on_start(), stopping application: %s" \ + % on_start_future.error() + if hasattr(activity, "logger"): + activity.logger.error(msg) + else: + print msg + finally: + qiapp.stop() + qi.async(activity.on_start).addCallback(handle_on_start_done) + + # Run the QiApplication, which runs until someone calls qiapp.stop() + qiapp.run() + + finally: + # Cleanup + if hasattr(activity, "on_stop"): + # We need a qi.async call so that if the class is single threaded, + # it will wait for callbacks to be finished. + qi.async(activity.on_stop).wait() + if service_id: + qiapp.session.unregisterService(service_id) + + +def run_service(service_class, service_name=None): + """Instantiate the given class, and registers it as a NAOqi service. + + The given class must take a qiapplication object as parameter, and may also + have on_start and on_stop methods, that will be called before and after + running it. + + If the service_name parameter is not given, the classes' name will be used. + """ + if not service_name: + service_name = service_class.__name__ + run_activity(service_class, service_name) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/services.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/services.py new file mode 100644 index 0000000..5c831ef --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/stk/services.py @@ -0,0 +1,39 @@ +""" +stk.services.py + +Syntactic sugar for accessing NAOqi services. +""" + +__version__ = "0.1.2" + +__copyright__ = "Copyright 2015, Aldebaran Robotics" +__author__ = 'ekroeger' +__email__ = 'ekroeger@aldebaran.com' + + +class ServiceCache(object): + "A helper for accessing NAOqi services." + + def __init__(self, session=None): + self.session = None + self.services = {} + if session: + self.init(session) + + def init(self, session): + "Sets the session object, if it wasn't passed to constructor." + self.session = session + + def __getattr__(self, servicename): + "We overload this so (instance).ALMotion returns the service, or None." + if (not servicename in self.services) or ( + servicename == "ALTabletService"): + # ugly hack: never cache ALtabletService, always ask for a new one + if servicename.startswith("__"): + # Behave like a normal python object for those + raise AttributeError + try: + self.services[servicename] = self.session.service(servicename) + except RuntimeError: # Cannot find service + self.services[servicename] = None + return self.services[servicename] diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts b/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts new file mode 100644 index 0000000..96eb8e9 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts @@ -0,0 +1,32 @@ + + + + + behavior_1/behavior.xar:/Say + + Hello + Text + Hello + + + + An error occurred starting my listener. + Text + An error occurred starting my listener. + + + + behavior_1/behavior.xar:/Say (1) + + Hello + Text + Hello + + + + Let's talk! + Text + Let's talk! + + + diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/.gitignore b/Applications-Python/Dialog Flow/DialogFlowService/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/DialogFlowService.iml b/Applications-Python/Dialog Flow/DialogFlowService/.idea/DialogFlowService.iml new file mode 100644 index 0000000..8e5446a --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/DialogFlowService.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/Project_Default.xml b/Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..df7825d --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/profiles_settings.xml b/Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/misc.xml b/Applications-Python/Dialog Flow/DialogFlowService/.idea/misc.xml new file mode 100644 index 0000000..dbf8223 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/modules.xml b/Applications-Python/Dialog Flow/DialogFlowService/.idea/modules.xml new file mode 100644 index 0000000..512e506 --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/.idea/vcs.xml b/Applications-Python/Dialog Flow/DialogFlowService/.idea/vcs.xml new file mode 100644 index 0000000..c2365ab --- /dev/null +++ b/Applications-Python/Dialog Flow/DialogFlowService/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowService/service.py b/Applications-Python/Dialog Flow/DialogFlowService/service.py index 101b5bb..22f380d 100644 --- a/Applications-Python/Dialog Flow/DialogFlowService/service.py +++ b/Applications-Python/Dialog Flow/DialogFlowService/service.py @@ -105,7 +105,6 @@ def detect_intent_audio(self, input_audio): parser = argparse.ArgumentParser() parser.add_argument('--ip', type=str, default='127.0.0.1', help='Robot IP Address. For local bot use 127.0.0.1.') parser.add_argument('--port', type=int, default=9559, help='NaoQI port number.') - parser.add_argument('--project_id', type=str, required=True, help='Google Cloud Project ID.') args = parser.parse_args() try: From e41a88015c13517d5558e18503b9d3fc91bb1de7 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Mon, 27 Jun 2022 15:31:03 +0100 Subject: [PATCH 03/13] Port listener to stk. --- .../DialogFlowExample/DialogFlowExample.pml | 11 ++ .../DialogFlowExample/behavior_1/behavior.xar | 30 +++-- .../scripts/ListenerService.py | 107 +++++++----------- 3 files changed, 69 insertions(+), 79 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml b/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml index 9efa790..0b36d6b 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml +++ b/Applications-Python/Dialog Flow/DialogFlowExample/DialogFlowExample.pml @@ -7,6 +7,17 @@ + + + + + + + + + + + diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar index 52d2291..53ba6a0 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar +++ b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar @@ -19,23 +19,31 @@ class MyClass(GeneratedClass): def onInput_onStart(self): #self.onStopped() #activate the output of the box self.serviceMan.startService('ListenerService') - time.sleep(5) + time.sleep(3) - if self.serviceMan.isServiceRunning('ListenerService'): - # Start listening. - listener = ALProxy('ListenerService') - listener.start_listening('soc-pepper-summer', self.packageUid()) + # TODO: Need a better way of sleeping then checking. Maybe try and hook one of the signals? + #if self.serviceMan.isServiceRunning('ListenerService'): + # # Start listening. + # listener = ALProxy('ListenerService') + # listener.start_listening('soc-pepper-summer', self.packageUid()) - # Fire any actions afterward. - self.onStarted() - else: - self.onError() + # # Fire any actions afterward. + # self.onStarted() + #else: + # self.onError() + + # Start listening. + listener = ALProxy('ListenerService') + listener.start_listening('soc-pepper-summer', self.packageUid()) + + # Fire any actions afterward. + self.onStarted() pass def onInput_onStop(self): self.onUnload() #it is recommended to reuse the clean-up as the box is stopped - self.onStopped() #activate the output of the box]]>media/images/box/box-script.pngmedia/images/box/box-script.png \ No newline at end of file + self.onUnload()]]> \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py index a3f310f..a799e3b 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -1,5 +1,10 @@ import qi -from naoqi import ALBroker, ALModule, ALProxy + +import stk.runner +import stk.events +import stk.services +import stk.logging + import numpy import json @@ -39,61 +44,37 @@ def byteify(input): return input -def get_logger(session, app_id): - """Returns a qi logger object.""" - logger = qi.logging.Logger(app_id) - try: - qicore = qi.module("qicore") - log_manager = session.service("LogManager") - provider = qicore.createObject("LogProvider", log_manager) - log_manager.addProvider(provider) - except RuntimeError: - # no qicore, we're not running on a robot, it doesn't matter - pass - except AttributeError: - # old version of NAOqi - logging will probably not work. - pass - return logger - - -class ListenerModule(ALModule): +class ListenerService(object): """ Audio processing module. Detect peaks in users voices, then record what they are saying to transmit it to Dialog Flow. """ - def __init__(self, name): - # TODO: For when we're dismantling, does this need to be an ALModule? - try: - ALModule.__init__(self, name) - except Exception as e: - print(str(e)) - pass + def __init__(self, qiapp): + # generic activity boilerplate + self.qiapp = qiapp + self.events = stk.events.EventHelper(qiapp.session) + self.s = stk.services.ServiceCache(qiapp.session) + self.logger = stk.logging.get_logger(qiapp.session, 'uk.ac.rgu.ListenerService') # Project ID for dialog flow, populated later. self.google_project_id = None # Get robot memory so we can attach to some events - self.mem = ALProxy('ALMemory') - - # Get a session handle also - self.session = self.mem.session() - - # Get logger - self.logger = get_logger(self.session, 'uk.ac.rgu.ListenerService') + self.mem = self.s.ALMemory # Get robot LEDs so we can control eye colour - self.leds = ALProxy('ALLeds') + self.leds = self.s.ALLeds # Get Pepper's audio device - self.audio_device = self.session.service('ALAudioDevice') + self.audio_device = self.s.ALAudioDevice self.speaker_hook = None # Get the speech recognition module so we can disable it. - self.speech = ALProxy('ALSpeechRecognition') + self.speech = self.s.ALSpeechRecognition # Get dialog flow and vosk modules. - self.dialogflow = self.session.service('DialogFlowService') + self.dialogflow = self.s.DialogFlowService self.vosk = None # initialized if vosk is enabled # Properties for voice detection and recording @@ -118,11 +99,12 @@ def __init__(self, name): self.vosk_api = False # Proxies for handling responses - self.tts = ALProxy('ALTextToSpeech') - self.tablet = ALProxy('ALTabletService') - self.behavior_manager = ALProxy('ALBehaviorManager') + self.tts = self.s.ALTextToSpeech + self.tablet = self.s.ALTabletService + self.behavior_manager = self.s.ALBehaviorManager # TODO: Is there a way to avoid needing a package_uuid passing in? + @qi.bind(returnType=qi.Void, paramsType=[qi.String, qi.String]) def start_listening(self, google_project_id, package_uuid): # Save package uuid self.google_project_id = google_project_id @@ -130,10 +112,10 @@ def start_listening(self, google_project_id, package_uuid): # Configure audio device. 16000 sample rate, 3 = Front Mic, 0 = no deinterlacing, we do that ourselves # TODO: Future: Might be worth investigating using a higher sample rate and filtering the audio channels. - self.audio_device.setClientPreferences(self.getName(), 16000, 3, 0) + self.audio_device.setClientPreferences('ListenerService', 16000, 3, 0) # Subscribe to audio processing events - self.audio_device.subscribe(self.getName()) + self.audio_device.subscribe('ListenerService') # Disable speech recognition self.speech.pause(True) @@ -147,6 +129,7 @@ def start_listening(self, google_project_id, package_uuid): # Start dialogflow session self.dialogflow.begin_session(str(self.google_project_id), str(self.session_id), 'en-GB') + @qi.bind(returnType=qi.Void, paramsType=[]) def cleanup(self): """Use this to tidy up any event subscriptions and to resume the built-in text to speech.""" # Turn speech recognition back on as normal @@ -163,17 +146,15 @@ def cleanup(self): # End the dialog flow session. self.dialogflow.end_session() - def set_vosk_enabled(self, enabled): + @qi.bind(returnType=qi.Void) + def enable_vosk(self): """ Enable the VOSK transcription API. This is experimental and underdeveloped and thus has some accuracy issues. However, if worked on more could produce better latency results. """ - self.vosk_api = enabled - - # If we're enabling, try and find the vosk service. - if enabled: - self.vosk = self.session.service('VoskClient') + self.vosk_api = True + self.vosk = self.session.service('VoskClient') # TODO: Ensure this is fully working as expected. def speakers_playing(self, playing): @@ -183,6 +164,7 @@ def speakers_playing(self, playing): else: self.eyes_idle() + @qi.nobind def begin_record(self, previous_sound_data): # Initialize a "memory file". I believe StringIO is used so that it can be passed through the naoqi broker # without serialization issues (numpy.int16)? @@ -199,6 +181,7 @@ def begin_record(self, previous_sound_data): self.logger.info('Recording has started.') + @qi.nobind def stop_record(self): # Clear last saved data self.previous_data = None @@ -207,6 +190,7 @@ def stop_record(self): # Clear eye indicator self.eyes_idle() + @qi.nobind def process_audio(self): """ Process the recorded audio and send it to dialogflow for intent processing. @@ -292,6 +276,7 @@ def processRemote(self, channels, samples, _timestamp, audio_buffer): self.stop_record() self.process_audio() + @qi.nobind def handle_actions(self, response): """ Handle the actions of a dialogflow response @@ -349,40 +334,26 @@ def handle_actions(self, response): # TODO: Fix the eyes? + @qi.nobind def eyes_listening(self): """Makes Pepper's eyes blue to indicate listening""" self.set_eyes(0, 0, 255) + @qi.nobind def eyes_idle(self): """Makes Pepper's eyes white to indicate idling""" self.set_eyes(255, 255, 255) + @qi.nobind def eyes_ignoring(self): """Makes Pepper's eyes red to indicate ignorance""" self.set_eyes(255, 0, 0) + @qi.nobind def set_eyes(self, r, g, b): """Set Pepper's face LEDs""" self.leds.fadeRGB("FaceLeds", r / 255, g / 255, b / 255, 0) -if __name__ == '__main__': - try: - # Set up a bidirectional broker to communicate with Pepper. - pythonBroker = ALBroker('pythonBroker', '0.0.0.0', 9999, '127.0.0.1', 9559) - #pythonBroker = ALBroker('pythonBroker', '0.0.0.0', 9999, 'pepper.local.', 9559) # For testing - except RuntimeError: - print('Failed to connect to Naoqi. Please check script arguments.') - sys.exit(1) - - # Create audio processor - ListenerService = ListenerModule('ListenerService') - # ListenerService = ListenerModule('ListenerService', args.project_id) - - # Keep program running until we tell it to quit. - try: - while True: - time.sleep(1) - except KeyboardInterrupt: - ListenerService.cleanup() - sys.exit(0) \ No newline at end of file +if __name__ == "__main__": + stk.runner.run_service(ListenerService) \ No newline at end of file From 351e35ee3a05eecdba419b323170d6cf6d8de623 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Mon, 27 Jun 2022 15:49:04 +0100 Subject: [PATCH 04/13] More documentation --- .../DialogFlowExample/behavior_1/behavior.xar | 72 +++---------------- .../scripts/ListenerService.py | 7 +- .../translations/translation_en_US.ts | 16 +---- Applications-Python/Dialog Flow/README.md | 9 ++- 4 files changed, 26 insertions(+), 78 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar index 53ba6a0..f1470c3 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar +++ b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar @@ -6,91 +6,41 @@ class MyClass(GeneratedClass): GeneratedClass.__init__(self) def onLoad(self): - #put initialization code here self.serviceMan = ALProxy('ALServiceManager') - pass def onUnload(self): - #put clean-up code here + # Stop our service from running in the background once the behaviour ends. self.serviceMan.stopService('ListenerService') pass def onInput_onStart(self): - #self.onStopped() #activate the output of the box + # Start our listener service. self.serviceMan.startService('ListenerService') - time.sleep(3) - - # TODO: Need a better way of sleeping then checking. Maybe try and hook one of the signals? - #if self.serviceMan.isServiceRunning('ListenerService'): - # # Start listening. - # listener = ALProxy('ListenerService') - # listener.start_listening('soc-pepper-summer', self.packageUid()) + time.sleep(3) # TODO: Proper way of waiting - # # Fire any actions afterward. - # self.onStarted() - #else: - # self.onError() + # TODO: Graceful error exits. # Start listening. listener = ALProxy('ListenerService') + + # Start listening. Initialises dialogflow with a project id. Change this to your own. listener.start_listening('soc-pepper-summer', self.packageUid()) - # Fire any actions afterward. + # Fire any program init now. self.onStarted() pass def onInput_onStop(self): - self.onUnload() #it is recommended to reuse the clean-up as the box is stopped - self.onStopped() #activate the output of the box]]>media/images/box/box-script.pngmedia/images/box/box-script.pngmedia/images/box/interaction/say.pngmedia/images/box/interaction/choice.pngmedia/images/box/interaction/choice.png \ No newline at end of file + self.onUnload()]]> \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py index a799e3b..074e081 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -1,3 +1,8 @@ +# This is a microphone listener that connects with Dialog Flow (and optionally Vosk) +# This listens to the microphone input, and when a peak is detected (hardcoded below) it will start recording. +# Once the audio volume subsides for long enough, recording is stopped and the sound generated is processed. +# Then the actions returned by dialogflow are carried out. + import qi import stk.runner @@ -51,7 +56,7 @@ class ListenerService(object): """ def __init__(self, qiapp): - # generic activity boilerplate + # STK Boilerplate self.qiapp = qiapp self.events = stk.events.EventHelper(qiapp.session) self.s = stk.services.ServiceCache(qiapp.session) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts b/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts index 96eb8e9..3eac650 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts +++ b/Applications-Python/Dialog Flow/DialogFlowExample/translations/translation_en_US.ts @@ -1,20 +1,6 @@ - - behavior_1/behavior.xar:/Say - - Hello - Text - Hello - - - - An error occurred starting my listener. - Text - An error occurred starting my listener. - - behavior_1/behavior.xar:/Say (1) @@ -26,7 +12,7 @@ Let's talk! Text - Let's talk! + Let's talk! diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index 5daec2e..f38c1a4 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -3,6 +3,9 @@ This folder contains four projects that are used to make Pepper utilise Dialog F This project has been inspired and influenced by [this blog post](https://blogemtech.medium.com/pepper-integration-with-dialogflow-1d7f1582da1a). +## TODOs +- Maybe merge VoskClient and DialogFlowService into one bundle with a flag to enable vosk? That way they could also utilise stk and simplify their code? Not urgent however + ## Projects - DialogFlowService: This is a NAOqi service that runs on a laptop, it exposes some of the dialog flow API to Pepper. This is done because it is currently not possible to install the API on Pepper using pip. - VoskClient: This is a socket client for a python 3 vosk server (see below). @@ -10,6 +13,9 @@ This project has been inspired and influenced by [this blog post](https://blogem - DialogFlowExample: This Choregraphe project ties all of the above services together to create a basic dialog flow program. It contains the barebones and can be used as a template to create further applications. ## Setup/Configuration +You must install the NAOqi Python 2.7 SDK from [here](http://doc.aldebaran.com/2-5/dev/python/install_guide.html). +[Python 2.7](https://www.python.org/downloads/release/python-2718/) is required for the services however Python 3 is required for the Vosk Server. + `requirements.txt` files have been provided where necessary to pin dependencies to the correct versions. Entire pip dumps weren't provided as they may have been polluted however the important libraries are in these files. Both services that can be run on the laptop (DialogFlowService and VoskClient) accept command line arguments to configure the target robot: @@ -23,4 +29,5 @@ TODO: - Configuring the behaviour ## Customisation -TODO: Customising the listener. \ No newline at end of file +You can customise the listener's respones to actions by editing handle_actions in ListenerService.py. +By default it can open urls, display local assets, clear the tablet and speak. It can also fire events to ALMemory if you haven't hard-coded a custom response for it. Hardcoding a response or custom action is only really necessary for when you need parameters to be returned via your action. \ No newline at end of file From 6f4880c9258db531f9c38abe50131a4866aa80dc Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Mon, 27 Jun 2022 15:59:53 +0100 Subject: [PATCH 05/13] Ensure cleanup --- .../Dialog Flow/DialogFlowExample/behavior_1/behavior.xar | 7 +++++-- .../DialogFlowExample/scripts/ListenerService.py | 2 +- Applications-Python/Dialog Flow/README.md | 7 ++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar index f1470c3..3ba1bfc 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar +++ b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar @@ -7,10 +7,13 @@ class MyClass(GeneratedClass): def onLoad(self): self.serviceMan = ALProxy('ALServiceManager') + self.listener = None pass def onUnload(self): # Stop our service from running in the background once the behaviour ends. + if self.listener is not None: + self.listener.cleanup() self.serviceMan.stopService('ListenerService') pass @@ -22,10 +25,10 @@ class MyClass(GeneratedClass): # TODO: Graceful error exits. # Start listening. - listener = ALProxy('ListenerService') + self.listener = ALProxy('ListenerService') # Start listening. Initialises dialogflow with a project id. Change this to your own. - listener.start_listening('soc-pepper-summer', self.packageUid()) + self.listener.start_listening('soc-pepper-summer', self.packageUid()) # Fire any program init now. self.onStarted() diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py index 074e081..251be4e 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -141,7 +141,7 @@ def cleanup(self): self.speech.pause(False) # Unsubscribe from audio processing - self.audio_device.unsubscribe(self.getName()) + self.audio_device.unsubscribe('ListenerService') # Not listening self.is_listening = False diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index f38c1a4..244b762 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -23,11 +23,8 @@ Both services that can be run on the laptop (DialogFlowService and VoskClient) a service.py --ip --port ``` -In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_APPLICATION_CREDENTIALS in the environment variables to the correct path to your JSON token. I'd recommend reading the setup steps for Dialog Flow [here](https://cloud.google.com/dialogflow/es/docs/quick/setup) - -TODO: -- Configuring the behaviour +In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_APPLICATION_CREDENTIALS in the environment variables to the correct path to your JSON token. I'd recommend reading the setup steps for Dialog Flow [here](https://cloud.google.com/dialogflow/es/docs/quick/setup). ## Customisation -You can customise the listener's respones to actions by editing handle_actions in ListenerService.py. +You can customise the listener's respones to actions by editing `handle_actions` in `ListenerService.py`. By default it can open urls, display local assets, clear the tablet and speak. It can also fire events to ALMemory if you haven't hard-coded a custom response for it. Hardcoding a response or custom action is only really necessary for when you need parameters to be returned via your action. \ No newline at end of file From 05700bcaf7bc1b03ca476f0ab50f9d8436d1e856 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Tue, 28 Jun 2022 15:32:21 +0100 Subject: [PATCH 06/13] More listener work. --- .../scripts/ListenerService.py | 103 ++++++++++-------- 1 file changed, 55 insertions(+), 48 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py index 251be4e..ab98903 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -3,15 +3,15 @@ # Once the audio volume subsides for long enough, recording is stopped and the sound generated is processed. # Then the actions returned by dialogflow are carried out. -import qi +# A future improvement would be to monitor the ambient audio volume and listen for peaks +import numpy +import qi import stk.runner import stk.events import stk.services import stk.logging -import numpy - import json import os import StringIO @@ -23,7 +23,7 @@ LISTENING_RETRY_COUNT = 15 # The minimum audio peak to trigger a recording. -AUDIO_PEAK_THRESHOLD = 4000 +AUDIO_PEAK_THRESHOLD = 3500 # Cap at 10 seconds, speech rec gets funny after this. MAX_RECORD_TIME = 10 @@ -78,7 +78,7 @@ def __init__(self, qiapp): # Get the speech recognition module so we can disable it. self.speech = self.s.ALSpeechRecognition - # Get dialog flow and vosk modules. + # Dialog Flow and Vosk self.dialogflow = self.s.DialogFlowService self.vosk = None # initialized if vosk is enabled @@ -161,18 +161,15 @@ def enable_vosk(self): self.vosk_api = True self.vosk = self.session.service('VoskClient') - # TODO: Ensure this is fully working as expected. def speakers_playing(self, playing): + """Callback for Audio Device speakers. Prevents Pepper listening to itself.""" self.is_paused = playing - if self.is_paused: - self.eyes_ignoring() - else: - self.eyes_idle() @qi.nobind def begin_record(self, previous_sound_data): - # Initialize a "memory file". I believe StringIO is used so that it can be passed through the naoqi broker - # without serialization issues (numpy.int16)? + """Begin recording by initialising a buffer for audio and writing any previous data""" + # Initialize some memory for us to record to + # StringIO is used so that it can be passed through the naoqi broker without serialization issues self.sound_file = StringIO.StringIO() self.is_recording = True self.record_start = time.time() @@ -181,24 +178,21 @@ def begin_record(self, previous_sound_data): if previous_sound_data is not None: self.sound_file.write(previous_sound_data[0].tostring()) - # Set eyes indicator TODO: Without lag please Pepper? - self.eyes_listening() - self.logger.info('Recording has started.') @qi.nobind def stop_record(self): + """Stop recording, clear any previous data""" # Clear last saved data self.previous_data = None self.is_recording = False - # Clear eye indicator - self.eyes_idle() - @qi.nobind def process_audio(self): """ Process the recorded audio and send it to dialogflow for intent processing. + If Vosk API is enabled, audio will be processed first then sent as text to dialogflow. + Otherwise, audio data will be sent to dialogflow. """ # Send buffer pointer back to the start @@ -235,20 +229,20 @@ def process_audio(self): def processRemote(self, channels, samples, _timestamp, audio_buffer): """Callback for audio processing.""" - # TODO: It seems some of the data loss could be due to inefficiencies in this function? - # Worth further investigation.. - - # Load the data by casting to an array of 16-bit ints. - interleaved_data = numpy.fromstring(str(audio_buffer), dtype=numpy.int16) + # If you are using inteleaved data, you'll want to use this commented block instead of just converting from a + #interleaved_data = numpy.fromstring(str(audio_buffer), dtype=numpy.int16) # Load from a string + #sound_data = numpy.reshape(interleaved_data, (channels, samples), 'F') # Split data by channels - # Deinterleave the data by splitting by channel - sound_data = numpy.reshape(interleaved_data, (channels, samples), 'F') + # Load the single-channel sound data. + sound_data = numpy.fromstring(str(audio_buffer), dtype=numpy.int16) # Save this last frame in case next frame we begin recording self.previous_data = sound_data # If we ain't listening, don't process if self.is_paused: + # Show that Pepper isn't listening. + self.eyes_ignoring() if self.is_recording: self.stop_record() return @@ -257,8 +251,8 @@ def processRemote(self, channels, samples, _timestamp, audio_buffer): peak = numpy.max(sound_data) # If we peak, reset the counter and start recording if we haven't - # If we have, we reset the listen count. if peak >= AUDIO_PEAK_THRESHOLD: + # Reset the retry count. We use this to determine when the user finishes speaking. self.retries = LISTENING_RETRY_COUNT if not self.is_recording: self.logger.info('START') @@ -266,20 +260,30 @@ def processRemote(self, channels, samples, _timestamp, audio_buffer): # If we are recording, knock the retry counter down and save this data. if self.is_recording: + # Change eyes to indicate listening + self.eyes_listening() + self.retries -= 1 - self.sound_file.write(sound_data[0].tostring()) + #self.sound_file.write(sound_data[0].tostring()) + self.sound_file.write(sound_data.tostring()) # TODO: Test new audio data stuff + + # Don't listen for too long + if time.time() - self.record_start > MAX_RECORD_TIME: + self.logger.warn('Sentence was too long.') + self.stop_record() - # If we've been recording too long, cut them short. - if self.is_recording and time.time() - self.record_start > MAX_RECORD_TIME: - self.logger.warn('Sentence was too long.') - self.stop_record() - # self.tts.say('Sorry, that sentence was too big. Could you try again?') + # User may have stopped speaking + if self.retries <= 0: + self.logger.info('Stopping') + self.stop_record() + + # Pepper will likely not be listening while we process. + self.eyes_ignoring() + self.process_audio() + else: + # Change to indicate idling. + self.eyes_idle() - # If the user has stopped speaking, process the audio - if self.is_recording and self.retries <= 0: - self.logger.info('Stopping') - self.stop_record() - self.process_audio() @qi.nobind def handle_actions(self, response): @@ -288,6 +292,12 @@ def handle_actions(self, response): :param response: The dialogflow response as a dict. Must be accessed as response[...] """ + # If we have no result, don't run. + # This doesn't tend to happen but its a nice safeguard. + if not 'queryResult' in response: + return + + # Easy access to the query result. query_result = response['queryResult'] # Iterate over the additional payloads @@ -299,17 +309,15 @@ def handle_actions(self, response): action = payload['action'] if action == 'show_url': - self.tablet.showWebview(payload['url']) + url = payload['url'] - elif action == 'show_local': # TODO: Collapse into show_url... - - path = payload['path'] - url = 'http://%s/apps/%s' % (self.tablet.robotIp(), + if not url.startswith('http'): + url = 'http://%s/apps/%s' % (self.tablet.robotIp(), os.path.join(self.package_uuid, - os.path.normpath(path).lstrip("\\/")) + os.path.normpath(url).lstrip("\\/")) .replace(os.path.sep, "/")) - - self.tablet.showWebview(str(url)) + + self.tablet.showWebview(url) elif action == 'clear_tablet': @@ -328,6 +336,7 @@ def handle_actions(self, response): self.behavior_manager.runBehavior(str(name)) except Exception as ex: self.logger.error('Failed to start "%s"' % name, ex) + # TODO: If an application developer wants a "rich" event, add it here. else: # Pass a generic action "bang" event. @@ -337,8 +346,6 @@ def handle_actions(self, response): if 'fulfillmentText' in query_result: self.tts.say(query_result['fulfillmentText']) - # TODO: Fix the eyes? - @qi.nobind def eyes_listening(self): """Makes Pepper's eyes blue to indicate listening""" @@ -357,7 +364,7 @@ def eyes_ignoring(self): @qi.nobind def set_eyes(self, r, g, b): """Set Pepper's face LEDs""" - self.leds.fadeRGB("FaceLeds", r / 255, g / 255, b / 255, 0) + self.leds.fadeRGB("FaceLeds", r / 255, g / 255, b / 255, 0) # 0 seconds fade to not freeze our listener if __name__ == "__main__": From 6535708eb837385b0bbbfc5028f8417dab604c05 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Tue, 28 Jun 2022 15:41:37 +0100 Subject: [PATCH 07/13] More readme stuff --- Applications-Python/Dialog Flow/README.md | 53 ++++++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index 244b762..a43dd3c 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -3,12 +3,9 @@ This folder contains four projects that are used to make Pepper utilise Dialog F This project has been inspired and influenced by [this blog post](https://blogemtech.medium.com/pepper-integration-with-dialogflow-1d7f1582da1a). -## TODOs -- Maybe merge VoskClient and DialogFlowService into one bundle with a flag to enable vosk? That way they could also utilise stk and simplify their code? Not urgent however - ## Projects - DialogFlowService: This is a NAOqi service that runs on a laptop, it exposes some of the dialog flow API to Pepper. This is done because it is currently not possible to install the API on Pepper using pip. -- VoskClient: This is a socket client for a python 3 vosk server (see below). +- VoskClient: This is a socket client for a python 3 vosk server (see below). This has been kept in a separate project to DialogFlowService even though they have a lot of duplicated code as VoskClient isn't ready for prime use, it is still very much a prototype. - VoskServer: This is a Python 3 server hosting access to the Vosk Speech Recognition API. It was used during an experiment and can be optionally toggled in DialogFlowExample's demonstration listener service. - DialogFlowExample: This Choregraphe project ties all of the above services together to create a basic dialog flow program. It contains the barebones and can be used as a template to create further applications. @@ -26,5 +23,49 @@ service.py --ip --port In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_APPLICATION_CREDENTIALS in the environment variables to the correct path to your JSON token. I'd recommend reading the setup steps for Dialog Flow [here](https://cloud.google.com/dialogflow/es/docs/quick/setup). ## Customisation -You can customise the listener's respones to actions by editing `handle_actions` in `ListenerService.py`. -By default it can open urls, display local assets, clear the tablet and speak. It can also fire events to ALMemory if you haven't hard-coded a custom response for it. Hardcoding a response or custom action is only really necessary for when you need parameters to be returned via your action. \ No newline at end of file +You can customise the listener's respones to actions by editing `handle_actions` in `ListenerService.py`. More on that later in the README. +By default it can open urls, display local assets, clear the tablet and speak. It can also fire events to ALMemory if you haven't hard-coded a custom response for it. Hardcoding a response or custom action is only really necessary for when you need parameters to be returned via your action. + +## Creating a new project +To create a new project with dialog flow, you'll want to follow the setup above, as well as create a new Dialog Flow Agent. +To create a new Choregraphe program, create it as you would normally, then copy and paste `DialogFlowExample/scripts` into your new project. Then add the following to your `manifest.xml`: +```xml + + + +``` +This tells NAOqi to install the ListenerService. Then you'll want to copy the "Start Listener" block from the graph into your own project. This just promps NAOqi to launch this service and starts it's listener. Remember to have the Dialog Flow server running on your PC before you do, otherwise it will not work. + +## Payloads +This implementation supports many pre-defined payloads and you can add your own too. + +### Speech +Adding text responses adds to the pool of potential lines to say. One of these will be picked by dialogflow to be said. +You could add a payload to add more speech if you'd like, but that was out of scope for what this was designed for. +Multiple speech will cause large delay in execution and the custom payloads would execute before the speech. + +### Show URL +```json +{ + "action": "show_url", + "url": "" +} +``` + +### Clear Tablet +```json +{ + "action": "clear_tablet" +} +``` + +### Run Behaviour +```json +{ + "action": "behavior", + "behavior": "" +} +``` + +### Custom Actions +Custom actions can either be implemented by adding them in the `ListenerService.py` `handle_actions` method, or they can be added in your Choregraphe (if they are "bang" type actions). This can be done by adding a switch onto the ALMemory event `DialogFlowAction` which will fire with the `action` component of the payload. \ No newline at end of file From e7aac116487eda755d2380affe77687090b84b83 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Tue, 28 Jun 2022 15:46:04 +0100 Subject: [PATCH 08/13] More readme --- .../DialogFlowExample/behavior_1/behavior.xar | 2 +- Applications-Python/Dialog Flow/README.md | 52 +++++++++++++++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar index 3ba1bfc..40c67a0 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar +++ b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar @@ -24,7 +24,7 @@ class MyClass(GeneratedClass): # TODO: Graceful error exits. - # Start listening. + # Get the listener service self.listener = ALProxy('ListenerService') # Start listening. Initialises dialogflow with a project id. Change this to your own. diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index a43dd3c..9258f15 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -22,10 +22,6 @@ service.py --ip --port In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_APPLICATION_CREDENTIALS in the environment variables to the correct path to your JSON token. I'd recommend reading the setup steps for Dialog Flow [here](https://cloud.google.com/dialogflow/es/docs/quick/setup). -## Customisation -You can customise the listener's respones to actions by editing `handle_actions` in `ListenerService.py`. More on that later in the README. -By default it can open urls, display local assets, clear the tablet and speak. It can also fire events to ALMemory if you haven't hard-coded a custom response for it. Hardcoding a response or custom action is only really necessary for when you need parameters to be returned via your action. - ## Creating a new project To create a new project with dialog flow, you'll want to follow the setup above, as well as create a new Dialog Flow Agent. To create a new Choregraphe program, create it as you would normally, then copy and paste `DialogFlowExample/scripts` into your new project. Then add the following to your `manifest.xml`: @@ -36,7 +32,53 @@ To create a new Choregraphe program, create it as you would normally, then copy ``` This tells NAOqi to install the ListenerService. Then you'll want to copy the "Start Listener" block from the graph into your own project. This just promps NAOqi to launch this service and starts it's listener. Remember to have the Dialog Flow server running on your PC before you do, otherwise it will not work. -## Payloads +Then add a new Python Box with the following code in it. You will also need to add an output named `onStarted`. +This is only example code and in this example it simply waits 3 seconds for the service to start and does not gracefully deal with problems such as the Dialog Flow server not being found. More robust solutions should be found. I'll do this if I have the time left. + +```py +import time + + +class MyClass(GeneratedClass): + def __init__(self): + GeneratedClass.__init__(self) + + def onLoad(self): + self.serviceMan = ALProxy('ALServiceManager') + self.listener = None + pass + + def onUnload(self): + # Stop our service from running in the background once the behaviour ends. + if self.listener is not None: + self.listener.cleanup() + self.serviceMan.stopService('ListenerService') + pass + + def onInput_onStart(self): + # Start our listener service. + self.serviceMan.startService('ListenerService') + time.sleep(3) # TODO: Proper way of waiting + + # TODO: Graceful error exits. + + # Get the listener service + self.listener = ALProxy('ListenerService') + + # Start listening. Initialises dialogflow with a project id. Change this to your own. + self.listener.start_listening('soc-pepper-summer', self.packageUid()) + + # Fire any program init now. + self.onStarted() + + pass + + def onInput_onStop(self): + self.onUnload() + self.onStopped() +``` + +## Dialog Flow Response Payloads This implementation supports many pre-defined payloads and you can add your own too. ### Speech From ffbbe4237e1dafda0f360254b89e4c28b8f79d9a Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Wed, 29 Jun 2022 15:10:34 +0100 Subject: [PATCH 09/13] More robust starting block --- .../DialogFlowExample/behavior_1/behavior.xar | 230 ++++++++++++++++-- .../DialogFlowExample/manifest.xml | 2 +- .../scripts/ListenerService.py | 8 +- .../Dialog Flow/DialogFlowService/service.py | 4 +- Applications-Python/Dialog Flow/README.md | 42 ++-- 5 files changed, 252 insertions(+), 34 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar index 40c67a0..4ddd84c 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar +++ b/Applications-Python/Dialog Flow/DialogFlowExample/behavior_1/behavior.xar @@ -1,4 +1,4 @@ -media/images/box/root.pngmedia/images/box/box-python-script.pngmedia/images/box/box-python-script.pngmedia/images/box/box-script.pngmedia/images/box/box-script.png \ No newline at end of file + self.onUnload()]]>media/images/box/interaction/learn_face.pngmedia/images/box/interaction/face.pngmedia/images/box/box-script.pngmedia/images/box/box-script.pngmedia/images/box/interaction/LED.pngmedia/images/box/interaction/LED.pngmedia/images/box/interaction/vocabulary.png#00ff00media/images/box/interaction/LED.pngmedia/images/box/interaction/LED.pngmedia/images/box/interaction/vocabulary.png#ff0000media/images/box/interaction/LED.pngmedia/images/box/interaction/LED.pngmedia/images/box/interaction/vocabulary.png#00aaffmedia/images/box/wait.png \ No newline at end of file diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml b/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml index 273177f..4d03b57 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml +++ b/Applications-Python/Dialog Flow/DialogFlowExample/manifest.xml @@ -17,6 +17,6 @@ - + diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py index ab98903..cb19c75 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -79,7 +79,7 @@ def __init__(self, qiapp): self.speech = self.s.ALSpeechRecognition # Dialog Flow and Vosk - self.dialogflow = self.s.DialogFlowService + self.dialogflow = self.s.DialogFlowAPI self.vosk = None # initialized if vosk is enabled # Properties for voice detection and recording @@ -108,7 +108,9 @@ def __init__(self, qiapp): self.tablet = self.s.ALTabletService self.behavior_manager = self.s.ALBehaviorManager - # TODO: Is there a way to avoid needing a package_uuid passing in? + # Tell Choregraphe we're ready to rock 'n' roll! + self.mem.raiseEvent('ListenerServiceStarted', True) + @qi.bind(returnType=qi.Void, paramsType=[qi.String, qi.String]) def start_listening(self, google_project_id, package_uuid): # Save package uuid @@ -265,7 +267,7 @@ def processRemote(self, channels, samples, _timestamp, audio_buffer): self.retries -= 1 #self.sound_file.write(sound_data[0].tostring()) - self.sound_file.write(sound_data.tostring()) # TODO: Test new audio data stuff + self.sound_file.write(sound_data.tostring()) # Don't listen for too long if time.time() - self.record_start > MAX_RECORD_TIME: diff --git a/Applications-Python/Dialog Flow/DialogFlowService/service.py b/Applications-Python/Dialog Flow/DialogFlowService/service.py index 22f380d..4abf67b 100644 --- a/Applications-Python/Dialog Flow/DialogFlowService/service.py +++ b/Applications-Python/Dialog Flow/DialogFlowService/service.py @@ -30,7 +30,7 @@ def log_response(response): print("Fulfillment text: {}\n".format(response.query_result.fulfillment_text.encode('utf8'))) -class DialogFlowService(ALModule): +class DialogFlowAPI(ALModule): """NAOqi remote module that interfaces with Google DialogFlow.""" def __init__(self, name): @@ -116,7 +116,7 @@ def detect_intent_audio(self, input_audio): sys.exit(1) # Register the module. - DialogFlowService = DialogFlowService('DialogFlowService') + DialogFlowAPI = DialogFlowAPI('DialogFlowAPI') # Keep program running until we tell it to quit. try: diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index 9258f15..4d4802d 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -30,10 +30,9 @@ To create a new Choregraphe program, create it as you would normally, then copy ``` -This tells NAOqi to install the ListenerService. Then you'll want to copy the "Start Listener" block from the graph into your own project. This just promps NAOqi to launch this service and starts it's listener. Remember to have the Dialog Flow server running on your PC before you do, otherwise it will not work. +This tells NAOqi to install the ListenerService. Then you'll want to copy the "Start Listener" block from the graph into your own project. This just promps NAOqi to launch this service and starts it's listener. Remember to have the Dialog Flow server running on your PC before you do, otherwise the program will stop immediately. -Then add a new Python Box with the following code in it. You will also need to add an output named `onStarted`. -This is only example code and in this example it simply waits 3 seconds for the service to start and does not gracefully deal with problems such as the Dialog Flow server not being found. More robust solutions should be found. I'll do this if I have the time left. +Then add a new Python Box with the following code in it. You will also need to add an input named `listenerStarted` and an output named `onStarted`. Then add a memory event on the left of the graph attached to the event `ListenerServiceStarted`, you'll likely have to use the `Create new key` button. Plug this into `listenerStarted`. This lets the script know we're about ready to begin. We then wait a couple of seconds for the service manager to keep up then start our program. ```py import time @@ -44,35 +43,50 @@ class MyClass(GeneratedClass): GeneratedClass.__init__(self) def onLoad(self): + # Initialize our fields self.serviceMan = ALProxy('ALServiceManager') self.listener = None + + # Store the service name. We use the packageUid to make sure we don't collide. + self.serviceName = self.packageUid() + ".ListenerService" pass def onUnload(self): # Stop our service from running in the background once the behaviour ends. if self.listener is not None: self.listener.cleanup() - self.serviceMan.stopService('ListenerService') + self.logger.info('Stopping listener service.') + + if self.serviceMan.isServiceRunning(self.serviceName): + self.serviceMan.stopService(self.serviceName) pass def onInput_onStart(self): + # Try to get Dialog Flow. + try: + ALProxy('DialogFlowAPI') + except RuntimeError: + # Server isn't loaded! + self.logger.error('Dialog Flow Server must be started first!') + self.onStopped() + return + # Start our listener service. - self.serviceMan.startService('ListenerService') - time.sleep(3) # TODO: Proper way of waiting + self.logger.info('Starting listener service.') + self.serviceMan.startService(self.serviceName) + pass - # TODO: Graceful error exits. - # Get the listener service - self.listener = ALProxy('ListenerService') + def onInput_listenerStarted(self, *_args): + # Wait for the service manager to catch up. + time.sleep(2) - # Start listening. Initialises dialogflow with a project id. Change this to your own. + # Grab the listener and start our program. + self.listener = ALProxy('ListenerService') + self.logger.info('Starting listener.') self.listener.start_listening('soc-pepper-summer', self.packageUid()) - - # Fire any program init now. self.onStarted() - pass - def onInput_onStop(self): self.onUnload() self.onStopped() From d8475047fff69ebc6b66fd9265909ac40bac8e43 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Wed, 29 Jun 2022 15:17:42 +0100 Subject: [PATCH 10/13] Update README.md --- Applications-Python/Dialog Flow/README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index 4d4802d..146cc0f 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -1,9 +1,9 @@ -# Google Dialog Flow -This folder contains four projects that are used to make Pepper utilise Dialog Flow. +# Pepper Google Dialog Flow Integration +This folder contains four projects that are used to make Pepper utilise Dialog Flow. The dialog flow API has been optimised to ensure near-realtime responses. This project has been inspired and influenced by [this blog post](https://blogemtech.medium.com/pepper-integration-with-dialogflow-1d7f1582da1a). -## Projects +## Project Structure - DialogFlowService: This is a NAOqi service that runs on a laptop, it exposes some of the dialog flow API to Pepper. This is done because it is currently not possible to install the API on Pepper using pip. - VoskClient: This is a socket client for a python 3 vosk server (see below). This has been kept in a separate project to DialogFlowService even though they have a lot of duplicated code as VoskClient isn't ready for prime use, it is still very much a prototype. - VoskServer: This is a Python 3 server hosting access to the Vosk Speech Recognition API. It was used during an experiment and can be optionally toggled in DialogFlowExample's demonstration listener service. @@ -124,4 +124,10 @@ Multiple speech will cause large delay in execution and the custom payloads woul ``` ### Custom Actions -Custom actions can either be implemented by adding them in the `ListenerService.py` `handle_actions` method, or they can be added in your Choregraphe (if they are "bang" type actions). This can be done by adding a switch onto the ALMemory event `DialogFlowAction` which will fire with the `action` component of the payload. \ No newline at end of file +Custom actions can either be implemented by adding them in the `ListenerService.py` `handle_actions` method, or they can be added in your Choregraphe (if they are "bang" type actions). This can be done by adding a switch onto the ALMemory event `DialogFlowAction` which will fire with the `action` component of the payload. + +## Future Steps +As part of further development of this system, the following could be investigated: +- Adapting the system that detects speech to be more sensitive and to account for background noise, allowing for single-word responses to be captured easier. +- The Vosk API could be promising, however this same approach could be used to support virtually any speech recognition system. +- Look into using a higher sample rate from the microphones, that could lend itself to better clarity and therefore better recognition. \ No newline at end of file From 2abc3c20aa07cff240d48e7d0b413a94c1a4fc19 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Wed, 29 Jun 2022 15:23:27 +0100 Subject: [PATCH 11/13] Update README.md --- Applications-Python/Dialog Flow/README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index 146cc0f..7bc2702 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -5,9 +5,9 @@ This project has been inspired and influenced by [this blog post](https://blogem ## Project Structure - DialogFlowService: This is a NAOqi service that runs on a laptop, it exposes some of the dialog flow API to Pepper. This is done because it is currently not possible to install the API on Pepper using pip. -- VoskClient: This is a socket client for a python 3 vosk server (see below). This has been kept in a separate project to DialogFlowService even though they have a lot of duplicated code as VoskClient isn't ready for prime use, it is still very much a prototype. -- VoskServer: This is a Python 3 server hosting access to the Vosk Speech Recognition API. It was used during an experiment and can be optionally toggled in DialogFlowExample's demonstration listener service. - DialogFlowExample: This Choregraphe project ties all of the above services together to create a basic dialog flow program. It contains the barebones and can be used as a template to create further applications. +- VoskClient: This is a socket client for a python 3 vosk server (see below). This has been kept in a separate project to DialogFlowService even though they have a lot of duplicated code as VoskClient isn't ready for primetime, it is still very much a prototype. +- VoskServer: This is a Python 3 server hosting access to the Vosk Speech Recognition API. It was used during an experiment and can be optionally toggled in DialogFlowExample's demonstration listener service. Remember that both this and the VoskClient must be running in addition to the DialogFlowService for this to be available. ## Setup/Configuration You must install the NAOqi Python 2.7 SDK from [here](http://doc.aldebaran.com/2-5/dev/python/install_guide.html). @@ -16,12 +16,14 @@ You must install the NAOqi Python 2.7 SDK from [here](http://doc.aldebaran.com/2 `requirements.txt` files have been provided where necessary to pin dependencies to the correct versions. Entire pip dumps weren't provided as they may have been polluted however the important libraries are in these files. Both services that can be run on the laptop (DialogFlowService and VoskClient) accept command line arguments to configure the target robot: -``` -service.py --ip --port +```shell +python service.py --ip --port ``` In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_APPLICATION_CREDENTIALS in the environment variables to the correct path to your JSON token. I'd recommend reading the setup steps for Dialog Flow [here](https://cloud.google.com/dialogflow/es/docs/quick/setup). +During testing if you would like to run the `ListenerService` on its own (for example the one included in the example project), you can simply run it with `python ListenerService.py` and it will prompt you for the connection details for the robot. Note that you'll need to make modifications to your initial python block (as discussed below) to support an already-running service. + ## Creating a new project To create a new project with dialog flow, you'll want to follow the setup above, as well as create a new Dialog Flow Agent. To create a new Choregraphe program, create it as you would normally, then copy and paste `DialogFlowExample/scripts` into your new project. Then add the following to your `manifest.xml`: @@ -128,6 +130,6 @@ Custom actions can either be implemented by adding them in the `ListenerService. ## Future Steps As part of further development of this system, the following could be investigated: -- Adapting the system that detects speech to be more sensitive and to account for background noise, allowing for single-word responses to be captured easier. +- Adapting the system that detects speech to be more sensitive and to account for background noise, allowing for single-word responses to be captured easier. Maybe integrating some kind of voice activity detection API. - The Vosk API could be promising, however this same approach could be used to support virtually any speech recognition system. - Look into using a higher sample rate from the microphones, that could lend itself to better clarity and therefore better recognition. \ No newline at end of file From 9f966ea502ebb9a15485527174b28c9275230c34 Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Wed, 29 Jun 2022 15:25:44 +0100 Subject: [PATCH 12/13] Tidy the vosk server --- .../Dialog Flow/VoskServer/server.py | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/Applications-Python/Dialog Flow/VoskServer/server.py b/Applications-Python/Dialog Flow/VoskServer/server.py index 410836f..a91b89e 100644 --- a/Applications-Python/Dialog Flow/VoskServer/server.py +++ b/Applications-Python/Dialog Flow/VoskServer/server.py @@ -1,22 +1,19 @@ # Python 3 Server for vosk. -# This is a proof of concept, it ignores disconnect handling and doesn't gracefully exit when pressing ctrl+c (press return instead). - -import os -import struct -import time -import json +# This is a proof of concept, it ignores socket disconnections and doesn't gracefully exit when pressing ctrl+c (press return instead). from vosk import Model, KaldiRecognizer, SetLogLevel -import sys -import wave - -import socket +import json +import os import signal - - +import socket +import struct +import sys from threading import Thread +import time +import wave +# Init en-us Vosk model. model = Model(lang="en-us") @@ -49,12 +46,11 @@ def run(self): datbuf += data read_count += len(data) + # Accepting each chunk like this seems to yield better results rec.AcceptWaveform(data) print('generate response') - # rec.AcceptWaveform(datbuf) - print('send response') resp = json.loads(rec.FinalResult())['text'] print(resp) @@ -63,14 +59,10 @@ def run(self): # Wait for client to close. # conn.recv(1) - time.sleep(1) print('end') - # if not data: - # break - # https://stackoverflow.com/questions/15189888/python-socket-accept-in-the-main-thread-prevents-quitting pid = os.getpid() From 7ec603fc9c68328183ed22f506b02cc2123a63bc Mon Sep 17 00:00:00 2001 From: Reece Mackie <20544390+Rover656@users.noreply.github.com> Date: Wed, 29 Jun 2022 15:51:55 +0100 Subject: [PATCH 13/13] More cleaning --- .../DialogFlowExample/scripts/ListenerService.py | 4 ++-- Applications-Python/Dialog Flow/README.md | 8 +++++--- Applications-Python/Dialog Flow/VoskServer/server.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py index cb19c75..0990ba8 100644 --- a/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py +++ b/Applications-Python/Dialog Flow/DialogFlowExample/scripts/ListenerService.py @@ -366,8 +366,8 @@ def eyes_ignoring(self): @qi.nobind def set_eyes(self, r, g, b): """Set Pepper's face LEDs""" - self.leds.fadeRGB("FaceLeds", r / 255, g / 255, b / 255, 0) # 0 seconds fade to not freeze our listener + self.leds.fadeRGB("FaceLeds", r / 255, g / 255, b / 255, 0) # 0 seconds fade as this is a blocking call. if __name__ == "__main__": - stk.runner.run_service(ListenerService) \ No newline at end of file + stk.runner.run_service(ListenerService)# diff --git a/Applications-Python/Dialog Flow/README.md b/Applications-Python/Dialog Flow/README.md index 7bc2702..d5024f1 100644 --- a/Applications-Python/Dialog Flow/README.md +++ b/Applications-Python/Dialog Flow/README.md @@ -24,6 +24,8 @@ In addition, to authorise to Google Cloud for Dialog Flow, you must set GOOGLE_A During testing if you would like to run the `ListenerService` on its own (for example the one included in the example project), you can simply run it with `python ListenerService.py` and it will prompt you for the connection details for the robot. Note that you'll need to make modifications to your initial python block (as discussed below) to support an already-running service. +To run the example, you must set up the dialog flow service correctly, configure an agent on Google Dialog Flow and setup its intents. Then you must change the Google project ID in the same place as disclosed below. + ## Creating a new project To create a new project with dialog flow, you'll want to follow the setup above, as well as create a new Dialog Flow Agent. To create a new Choregraphe program, create it as you would normally, then copy and paste `DialogFlowExample/scripts` into your new project. Then add the following to your `manifest.xml`: @@ -34,7 +36,7 @@ To create a new Choregraphe program, create it as you would normally, then copy ``` This tells NAOqi to install the ListenerService. Then you'll want to copy the "Start Listener" block from the graph into your own project. This just promps NAOqi to launch this service and starts it's listener. Remember to have the Dialog Flow server running on your PC before you do, otherwise the program will stop immediately. -Then add a new Python Box with the following code in it. You will also need to add an input named `listenerStarted` and an output named `onStarted`. Then add a memory event on the left of the graph attached to the event `ListenerServiceStarted`, you'll likely have to use the `Create new key` button. Plug this into `listenerStarted`. This lets the script know we're about ready to begin. We then wait a couple of seconds for the service manager to keep up then start our program. +Then add a new Python Box with the following code in it. You will also need to add a `"bang"` input named `listenerStarted` and a `"bang"` output named `onStarted`. Then add a memory event on the left of the graph attached to the event `ListenerServiceStarted`, you'll likely have to use the `Create new key` button. Plug this into `listenerStarted`. This lets the script know we're about ready to begin. We then wait a couple of seconds for the service manager to keep up then start our program. ```py import time @@ -86,7 +88,7 @@ class MyClass(GeneratedClass): # Grab the listener and start our program. self.listener = ALProxy('ListenerService') self.logger.info('Starting listener.') - self.listener.start_listening('soc-pepper-summer', self.packageUid()) + self.listener.start_listening('', self.packageUid()) # TODO: Set your project ID here. self.onStarted() def onInput_onStop(self): @@ -132,4 +134,4 @@ Custom actions can either be implemented by adding them in the `ListenerService. As part of further development of this system, the following could be investigated: - Adapting the system that detects speech to be more sensitive and to account for background noise, allowing for single-word responses to be captured easier. Maybe integrating some kind of voice activity detection API. - The Vosk API could be promising, however this same approach could be used to support virtually any speech recognition system. -- Look into using a higher sample rate from the microphones, that could lend itself to better clarity and therefore better recognition. \ No newline at end of file +- Look into using a higher sample rate from the microphones, that could lend itself to better clarity and therefore better recognition. diff --git a/Applications-Python/Dialog Flow/VoskServer/server.py b/Applications-Python/Dialog Flow/VoskServer/server.py index a91b89e..5544096 100644 --- a/Applications-Python/Dialog Flow/VoskServer/server.py +++ b/Applications-Python/Dialog Flow/VoskServer/server.py @@ -68,5 +68,5 @@ def run(self): pid = os.getpid() sl = SocketServer() sl.start() -input('Socket is listening, press any key to abort...') +input('Socket is listening, press any key to abort... Do not hit Ctrl + C or you will have to task kill.') os.kill(pid, 9)