From ae5ef1003a63a21f2f7c2351f54a796d1ee1dd0b Mon Sep 17 00:00:00 2001 From: poleli Date: Mon, 11 Nov 2024 14:37:13 +0800 Subject: [PATCH] [VideoTranslation][ClientSampleCode][Python] Initial check in client sample code for python. (#2655) --- samples/video-translation/.gitignore | 2 + .../python/.vscode/launch_sample.json | 141 +++++ samples/video-translation/python/main.py | 272 +++++++++ .../video_translation_client.py | 561 ++++++++++++++++++ .../video_translation_const.py | 4 + .../video_translation_dataclass.py | 80 +++ .../video_translation_enum.py | 29 + .../video_translation_util.py | 39 ++ samples/video-translation/python/readme.md | 107 ++++ 9 files changed, 1235 insertions(+) create mode 100644 samples/video-translation/.gitignore create mode 100644 samples/video-translation/python/.vscode/launch_sample.json create mode 100644 samples/video-translation/python/main.py create mode 100644 samples/video-translation/python/microsoft_video_translation_client/video_translation_client.py create mode 100644 samples/video-translation/python/microsoft_video_translation_client/video_translation_const.py create mode 100644 samples/video-translation/python/microsoft_video_translation_client/video_translation_dataclass.py create mode 100644 samples/video-translation/python/microsoft_video_translation_client/video_translation_enum.py create mode 100644 samples/video-translation/python/microsoft_video_translation_client/video_translation_util.py create mode 100644 samples/video-translation/python/readme.md diff --git a/samples/video-translation/.gitignore b/samples/video-translation/.gitignore new file mode 100644 index 000000000..cb99fa775 --- /dev/null +++ b/samples/video-translation/.gitignore @@ -0,0 +1,2 @@ +# to avoid check in launch.json due to it contains sub keys. +launch.json \ No newline at end of file diff --git a/samples/video-translation/python/.vscode/launch_sample.json b/samples/video-translation/python/.vscode/launch_sample.json new file mode 100644 index 000000000..557cd9ee9 --- /dev/null +++ b/samples/video-translation/python/.vscode/launch_sample.json @@ -0,0 +1,141 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "eus-translate", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "create_translation_and_iteration_and_wait_until_terminated", + "--source_locale", "zh-CN", + "--target_locale", "en-US", + "--voice_kind", "PlatformVoice", + "--speaker_count", "1", + "--subtitle_max_char_count_per_segment", "20", + "--export_subtitle_in_video", "true", + "--video_file_blob_url", "https://xx.blob.core.windows.net/xx.mp4?sv=xx" + ] + }, + { + "name": "eus-api-create-translation", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--subKey", "xx", + "--api_version", "2024-05-20-preview", + "request_create_translation_api", + "--translation_id", "xx", + "--operation_id", "xx", + "--translation_display_name", "xx", + "--translation_description", "xx", + "--source_locale", "zh-CN", + "--target_locale", "en-US", + "--voice_kind", "PlatformVoice", + "--speaker_count", "1", + "--subtitle_max_char_count_per_segment", "20", + "--export_subtitle_in_video", "true", + "--video_file_blob_url", "https://xx.blob.core.windows.net/xx.mp4?sv=xx" + ] + }, + { + "name": "eus-api-create-iteration", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "request_create_iteration_api", + "--translation_id", "xx", + "--iteration_id", "xx", + "--operation_id", "xx", + "--iteration_description", "xx", + "--speaker_count", "1", + "--subtitle_max_char_count_per_segment", "20", + "--export_subtitle_in_video", "true", + "--webvtt_file_kind", "MetadataJson", + "--webvtt_file_blob_url", "https://xx.blob.core.windows.net/xx.vtt?sv=xx" + ] + }, + { + "name": "eus-api-get-operation", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "request_get_operation_api", + "--operation_location", "https://xx.api.cognitive.microsoft.com/videotranslation/operations/xx?api-version=2024-05-20-preview" + ] + }, + { + "name": "eus-api-get-translation", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "request_get_translation_api", + "--translation_id", "xx" + ] + }, + { + "name": "eus-api-get-iteration", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "request_get_iteration_api", + "--translation_id", "xx", + "--iteration_id", "xx" + ] + }, + { + "name": "eus-api-list-translations", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "request_list_translations_api" + ] + }, + { + "name": "eus-api-delete-translation", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "console": "integratedTerminal", + "args": [ + "--region", "eastus", + "--sub_key", "xx", + "--api_version", "2024-05-20-preview", + "request_delete_translation_api", + "--translation_id", "xx" + ] + } + ] +} \ No newline at end of file diff --git a/samples/video-translation/python/main.py b/samples/video-translation/python/main.py new file mode 100644 index 000000000..f2780ab54 --- /dev/null +++ b/samples/video-translation/python/main.py @@ -0,0 +1,272 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +import argparse +from termcolor import colored + +from microsoft_video_translation_client.video_translation_enum import * +from microsoft_video_translation_client.video_translation_dataclass import * +from microsoft_video_translation_client.video_translation_client import * + +def handle_create_translation_and_iteration_and_wait_until_terminated(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, translation, iteration = client.create_translate_and_run_first_iteration_until_terminated( + video_file_url = args.video_file_blob_url, + source_locale = args.source_locale, + target_locale = args.target_locale, + voice_kind = args.voice_kind, + speaker_count = args.speaker_count, + subtitle_max_char_count_per_segment = args.subtitle_max_char_count_per_segment, + export_subtitle_in_video = args.export_subtitle_in_video, + ) + if not success: + return + print(colored("success", 'green')) + +def handle_request_create_translation_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + operation_id = args.operationId + if operation_id is None: + operation_id = str(uuid.uuid4()) + success, error, translation, operation_location = client.request_create_translation( + translation_id = args.translation_id, + video_file_url = args.video_file_blob_url, + source_locale = args.source_locale, + target_locale = args.target_locale, + voice_kind = args.voice_kind, + speaker_count = args.speaker_count, + subtitle_max_char_count_per_segment = args.subtitle_max_char_count_per_segment, + export_subtitle_in_video = args.export_subtitle_in_wideo, + translation_display_name = args.translation_display_name, + translation_description = args.translation_description, + operation_id = operation_id, + ) + if not success: + print(colored(f"Failed to request create translation API with error: {error}", 'red')) + return + print(colored("succesfully created translation:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(translation), indent = 2) + print(json_formatted_str) + print(f"Operation location: {operation_location}") + +def handle_request_create_iteration_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, iteration, operation_location = client.request_create_iteration( + translation_id = args.translation_id, + iteration_id = args.iteration_id, + webvtt_file_kind = args.webvtt_file_kind, + webvtt_file_url = args.webvtt_file_blob_url, + speaker_count = args.speaker_count, + subtitle_max_char_count_per_segment = args.subtitle_max_char_count_per_segment, + export_subtitle_in_video = args.export_subtitle_in_wideo, + iteration_description = args.iteration_description, + operation_id = args.operation_id, + ) + if not success: + print(colored(f"Failed to request create iteration API with error: {error}", 'red')) + return + print(colored("succesfully created iteration:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(iteration), indent = 2) + print(json_formatted_str) + print(f"Operation location: {operation_location}") + +def handle_request_get_operation_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, operation = client.request_get_operation( + operation_location = urllib3.util.parse_url(args.operation_location), + printUrl = True, + ) + if not success: + print(colored(f"Failed to request get operation API with error: {error}", 'red')) + return + print(colored("succesfully get operation:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(operation), indent = 2) + print(json_formatted_str) + +def handle_request_get_translation_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, translation = client.request_get_translation( + translation_id = args.translation_id, + ) + if not success: + print(colored(f"Failed to request get translation API with error: {error}", 'red')) + return + if translation is None: + print(colored("Translation not found", 'yellow')) + else: + print(colored("succesfully get translation:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(translation), indent = 2) + print(json_formatted_str) + +def handle_request_get_iteration_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, iteration = client.request_get_iteration( + translation_id = args.translation_id, + iteration_id = args.iteration_id, + ) + if not success: + print(colored(f"Failed to request get iteration API with error: {error}", 'red')) + return + if iteration is None: + print(colored("Iteration not found", 'yellow')) + else: + print(colored("succesfully get iteration:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(iteration), indent = 2) + print(json_formatted_str) + +def handle_request_list_translations_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, translation = client.request_list_translations() + if not success: + print(colored(f"Failed to request list translation API with error: {error}", 'red')) + return + print(colored("succesfully list translations:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(translation), indent = 2) + print(json_formatted_str) + +def handle_request_delete_translation_api(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error = client.request_delete_translation(args.translationId) + if not success: + print(colored(f"Failed to request delete translation API with error: {error}", 'red')) + return + print(colored("succesfully delete translation.", 'green')) + +def handle_create_iteration_with_webvtt_and_wait_until_terminated(args): + client = VideoTranslationClient( + region = args.region, + sub_key = args.sub_key, + api_version = args.api_version, + ) + + success, error, translation, iteration = client.run_iteration_with_webvtt_until_terminated( + translation_id = args.translation_id, + webvtt_file_kind = args.webvtt_file_kind, + webvtt_file_url = args.webvtt_file_blob_url, + speaker_count = args.speaker_count, + subtitle_max_char_count_per_segment = args.subtitle_max_char_count_per_segment, + export_subtitle_in_video = args.export_subtitle_in_video, + ) + if not success: + print(colored(error, 'red')) + return + print(colored("success", 'green')) + +root_parser = argparse.ArgumentParser( + prog='main.py', + description='Translate video from source locale to target locale with PersonalVoice or PlatformVoice', + epilog='Microsoft VideoTranslation' +) + +root_parser.add_argument("--region", required = True, help="specify speech resource region.") +root_parser.add_argument("--sub_key", required = True, help="specify speech resource subscription key.") +root_parser.add_argument("--api_version", required = True, help="specify API version.") +sub_parsers = root_parser.add_subparsers(required=True, help='subcommand help') + +translate_parser = sub_parsers.add_parser('create_translation_and_iteration_and_wait_until_terminated', help='Create translation with video file blob url, and create first iteration for the translation.') +translate_parser.add_argument('--video_file_blob_url', required = True, type=str, help='Video file blob url for video translation.') +translate_parser.add_argument('--source_locale', required = True, type=str, help='Source locale of the video file.') +translate_parser.add_argument('--target_locale', required = True, type=str, help='Target locale of the translation.') +translate_parser.add_argument('--voice_kind', required = True, type=str, help='Voice kind used for TTS synthesizing, value can be one of: PlatformVoice or PersonalVoice.') +translate_parser.add_argument('--speaker_count', required = False, type=int, help='Speaker count of the video, optional, auto detect if not provided.') +translate_parser.add_argument('--subtitle_max_char_count_per_segment', required = False, type=int, help='Subtitle max char count per segment, optional.') +translate_parser.add_argument('--export_subtitle_in_video', required = False, type=bool, help='Whether export subtitle in translated video, optional, False by default.') +translate_parser.set_defaults(func = handle_create_translation_and_iteration_and_wait_until_terminated) + +translate_parser = sub_parsers.add_parser('create_iteration_with_webvtt_and_wait_until_terminated', help='Create iteration with provided webvtt based on created translation.') +translate_parser.add_argument('--translation_id', required = True, type=str, help='Translation ID.') +translate_parser.add_argument('--webvtt_file_kind', required = True, type=str, help='Webvtt file kind, one of: MetadataJson, SourceLocaleSubtitle, TargetLocaleSubtitle') +translate_parser.add_argument('--webvtt_file_blob_url', required = True, type=str, help='Webvtt file blob url for webvtt file.') +translate_parser.add_argument('--speaker_count', required = False, type=int, help='Speaker count of the video, optional, auto detect if not provided.') +translate_parser.add_argument('--subtitle_max_char_count_per_segment', required = False, type=int, help='Subtitle max char count per segment, optional.') +translate_parser.add_argument('--export_subtitle_in_wideo', required = False, type=bool, help='Whether export subtitle in translated video, optional, False by default.') +translate_parser.set_defaults(func = handle_create_iteration_with_webvtt_and_wait_until_terminated) + +translate_parser = sub_parsers.add_parser('request_create_translation_api', help='Request create translation API.') +translate_parser.add_argument('--translation_id', required = True, type=str, help='Translation ID.') +translate_parser.add_argument('--video_file_blob_url', required = True, type=str, help='Video file blob url for video translation.') +translate_parser.add_argument('--source_locale', required = True, type=str, help='Source locale of the video file.') +translate_parser.add_argument('--target_locale', required = True, type=str, help='Target locale of the translation.') +translate_parser.add_argument('--voice_kind', required = True, type=str, help='Voice kind used for TTS synthesizing, value can be one of: PlatformVoice or PersonalVoice.') +translate_parser.add_argument('--speaker_count', required = False, type=int, help='Speaker count of the video, optional, auto detect if not provided.') +translate_parser.add_argument('--subtitle_max_char_count_per_segment', required = False, type=int, help='Subtitle max char count per segment, optional.') +translate_parser.add_argument('--export_subtitle_in_wideo', required = False, type=bool, help='Whether export subtitle in translated video, optional, False by default.') +translate_parser.add_argument('--translation_display_name', required = False, type=str, help='Translation display name.') +translate_parser.add_argument('--translation_description', required = False, type=str, help='Translation description.') +translate_parser.add_argument('--operation_id', required = False, type=str, help='Specify operation ID.') +translate_parser.set_defaults(func = handle_request_create_translation_api) + +translate_parser = sub_parsers.add_parser('request_get_operation_api', help='Request get operation API.') +translate_parser.add_argument('--operation_location', required = True, type=str, help='Operation location.') +translate_parser.set_defaults(func = handle_request_get_operation_api) + +translate_parser = sub_parsers.add_parser('request_get_translation_api', help='Request get translation API.') +translate_parser.add_argument('--translation_id', required = True, type=str, help='Translation ID.') +translate_parser.set_defaults(func = handle_request_get_translation_api) + +translate_parser = sub_parsers.add_parser('request_list_translations_api', help='Request list translations API.') +translate_parser.set_defaults(func = handle_request_list_translations_api) + +translate_parser = sub_parsers.add_parser('request_delete_translation_api', help='Request delete translation API.') +translate_parser.add_argument('--translation_id', required = True, type=str, help='Translation ID.') +translate_parser.set_defaults(func = handle_request_delete_translation_api) + +translate_parser = sub_parsers.add_parser('request_create_iteration_api', help='Request create iteration API.') +translate_parser.add_argument('--translation_id', required = True, type=str, help='Translation ID.') +translate_parser.add_argument('--iteration_id', required = True, type=str, help='Iteration ID.') +translate_parser.add_argument('--webvtt_file_blob_url', required = False, type=str, help='Webvtt file blob url for content editing.') +translate_parser.add_argument('--webvtt_file_kind', required = False, type=str, help='Webvtt file kind, one of: MetadataJson, SourceLocaleSubtitle, TargetLocaleSubtitle.') +translate_parser.add_argument('--speaker_count', required = False, type=int, help='Speaker count of the video, optional, auto detect if not provided.') +translate_parser.add_argument('--subtitle_max_char_count_per_segment', required = False, type=int, help='Subtitle max char count per segment, optional.') +translate_parser.add_argument('--export_subtitle_in_video', required = False, type=bool, help='Whether export subtitle in translated video, optional, False by default.') +translate_parser.add_argument('--iteration_description', required = False, type=str, help='Iteration description.') +translate_parser.add_argument('--operation_id', required = False, type=str, help='Specify operation ID.') +translate_parser.set_defaults(func = handle_request_create_iteration_api) + +translate_parser = sub_parsers.add_parser('request_get_iteration_api', help='Request get iteration API.') +translate_parser.add_argument('--translation_id', required = True, type=str, help='Translation ID.') +translate_parser.add_argument('--iteration_id', required = True, type=str, help='Iteration ID.') +translate_parser.set_defaults(func = handle_request_get_iteration_api) + +args = root_parser.parse_args() +args.func(args) diff --git a/samples/video-translation/python/microsoft_video_translation_client/video_translation_client.py b/samples/video-translation/python/microsoft_video_translation_client/video_translation_client.py new file mode 100644 index 000000000..ea7013d24 --- /dev/null +++ b/samples/video-translation/python/microsoft_video_translation_client/video_translation_client.py @@ -0,0 +1,561 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +import urllib3 +import orjson +import uuid +import requests +import datetime +import locale +import json +import dataclasses +from termcolor import colored +from enum import Enum +from datetime import datetime +from typing import List +from urllib3.util import Url +from microsoft_video_translation_client.video_translation_const import * +from microsoft_video_translation_client.video_translation_enum import * +from microsoft_video_translation_client.video_translation_dataclass import * +from microsoft_video_translation_client.video_translation_util import * +from urllib.parse import urlencode +from pydantic import BaseModel +import time + +class VideoTranslationClient: + URL_SEGMENT_NAME_TRANSLATIONS = "translations" + URL_SEGMENT_NAME_ITERATIONS = "iterations" + URL_PATH_ROOT = "videotranslation" + + region = "" + sub_key = "" + api_version = "" + + def __init__(self, region, sub_key, api_version): + if region is None or sub_key is None: + raise ValueError + self.region = region + self.sub_key = sub_key + self.api_version = api_version + + # not retry for below response code: + # OK = 200, + # Created = 201, + # NoContent = 204, + # BadRequest = 400 + # Unauthorized = 401 + # Forbidden = 403 + # NotFound = 404 + # Conflict = 409 + status_forcelist = tuple(set(x for x in requests.status_codes._codes) - set(x for x in [200, 201, 204, 400, 401, 403, 404, 409])) + retries = urllib3.Retry(total=5, status_forcelist=status_forcelist) + timeout = urllib3.util.Timeout(10) + self.http = urllib3.PoolManager(timeout=timeout, retries=retries) + + # For most common scenario, customer not need provide webvtt first iteration. + # Even, it is supported to provide webvtt for the first iteration, customer can customize the client code if they want to run first iteration with webvtt. + def create_translate_and_run_first_iteration_until_terminated( + self, + video_file_url: Url, + source_locale: locale, + target_locale: locale, + voice_kind: VoiceKind, + speaker_count: int = None, + subtitle_max_char_count_per_segment: int = None, + export_subtitle_in_video: bool = None + ) -> tuple[bool, str, TranslationDefinition, IterationDefinition]: + if video_file_url is None or source_locale is None or target_locale is None or voice_kind is None or voice_kind is None: + raise ValueError + + now = datetime.now() + nowString = now.strftime("%m%d%Y%H%M%S") + translation_id = f"{nowString}_{source_locale}_{target_locale}_{voice_kind}" + success, error, translation = self.create_translation_until_terminated( + translation_id = translation_id, + video_file_url = video_file_url, + source_locale = source_locale, + target_locale = target_locale, + voice_kind = voice_kind, + ) + if not success: + return False, error, None, None + + print(colored("succesfully created translation:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(translation), indent = 2) + print(json_formatted_str) + + iteration_id = f"{nowString}_default" + success, error, iteration = self.create_iteration_until_terminated( + translation_id = translation_id, + iteration_id = iteration_id, + speaker_count = speaker_count, + subtitle_max_char_count_per_segment = subtitle_max_char_count_per_segment, + export_subtitle_in_video = export_subtitle_in_video, + ) + if not success: + return False, error, None, None + + print(colored("succesfully created iteration:", 'green')) + json_formatted_str = json.dumps(dataclasses.asdict(iteration), indent = 2) + print(json_formatted_str) + + return True, None, translation, iteration + + # For iteration from secondary, webvtt file is required. + def run_iteration_with_webvtt_until_terminated( + self, + translation_id: str, + webvtt_file_kind: WebvttFileKind, + webvtt_file_url: Url, + speaker_count: int = None, + subtitle_max_char_count_per_segment: int = None, + export_subtitle_in_video: bool = None + ) -> tuple[bool, str, TranslationDefinition, IterationDefinition]: + if webvtt_file_kind is None or webvtt_file_url is None: + raise ValueError + + success, error, translation = self.request_get_translation( + translation_id = translation_id, + ) + if not success: + return False, error, None + elif translation is None: + return False, f"Not found translation ID: {translation_id}", None + + now = datetime.now() + iteration_id = now.strftime("%m%d%Y%H%M%S") + + success, error, iteration = self.create_iteration_until_terminated( + translation_id = translation_id, + iteration_id = iteration_id, + webvtt_file_kind = webvtt_file_kind, + webvtt_file_url = webvtt_file_url, + speaker_count = speaker_count, + subtitle_max_char_count_per_segment = subtitle_max_char_count_per_segment, + export_subtitle_in_video = export_subtitle_in_video, + ) + if not success: + return False, error, None + + return True, None, translation, iteration + + def create_translation_until_terminated( + self, + translation_id: str, + video_file_url: Url, + source_locale: locale, + target_locale: locale, + voice_kind: VoiceKind, + ) -> tuple[bool, str, TranslationDefinition]: + operation_id = str(uuid.uuid4()) + success, error, response_translation, operation_location = self.request_create_translation( + translation_id = translation_id, + video_file_url = video_file_url, + source_locale = source_locale, + target_locale = target_locale, + voice_kind = voice_kind, + speaker_count = None, + subtitle_max_char_count_per_segment = None, + export_subtitle_in_video = None, + translation_display_name = None, + translation_description = None, + operation_id = operation_id) + if not success or operation_location is None: + print(colored(f"Failed to create translation with ID {translation_id} with error: {error}", 'red')) + return False, error, None + + self.request_operation_until_terminated(operation_location) + + success, error, response_translation = self.request_get_translation(translation_id) + if not success: + print(colored(f"Failed to query translation {translation_id} with error: {error}", 'red')) + return False, error, None + if response_translation.status != OperationStatus.Succeeded: + print(colored(f"Translation creation failed with error: {error}", 'red')) + print(json.dumps(dataclasses.asdict(response_translation), indent = 2)) + return False, response_translation.translationFailureReason, None + + return True, None, response_translation + + def request_operation_until_terminated( + self, + operation_location: Url): + success, error, response_operation = self.request_get_operation(operation_location = operation_location, printUrl = True) + if not success or response_operation is None: + print(colored(f"Failed to query operation for translation creation operation from location {operation_location} with error: {error}", 'red')) + return + + lastStatus = None + while response_operation.status in [OperationStatus.Running, OperationStatus.NotStarted]: + success, error, response_operation = self.request_get_operation(operation_location = operation_location, printUrl = False) + if not success or response_operation is None: + print(colored(f"Failed to query operation for translation creation operation from location {operation_location} with error: {error}", 'red')) + return + if lastStatus != response_operation.status: + print(response_operation.status) + lastStatus = response_operation.status + print(".", end="") + # in seconds + time.sleep(5) + + return response_operation.status + + def create_iteration_until_terminated( + self, + translation_id: str, + iteration_id: str, + webvtt_file_kind: WebvttFileKind = None, + webvtt_file_url: Url = None, + speaker_count: int = None, + subtitle_max_char_count_per_segment: int = None, + export_subtitle_in_video: bool = None, + ) -> tuple[bool, str, IterationDefinition]: + if translation_id is None or iteration_id is None: + raise ValueError + success, error, response_iteration, operation_location = self.request_create_iteration( + translation_id = translation_id, + iteration_id = iteration_id, + webvtt_file_kind = webvtt_file_kind, + webvtt_file_url = webvtt_file_url, + speaker_count = speaker_count, + subtitle_max_char_count_per_segment = subtitle_max_char_count_per_segment, + export_subtitle_in_video = export_subtitle_in_video, + iteration_description = None, + operation_id = None) + if not success: + print(colored(f"Failed to create iteration with ID {iteration_id} for translation {translation_id} with error: {error}", 'red')) + return False, error, None + + self.request_operation_until_terminated(operation_location) + + success, error, response_iteration = self.request_get_iteration(translation_id, iteration_id) + if not success: + print(colored(f"Failed to query iteration {iteration_id} for translation {translation_id} with error: {error}", 'red')) + return False, error, None + if response_iteration.status != OperationStatus.Succeeded: + print(colored(f"Iteration creation failed with error: {error}", 'red')) + print(json.dumps(dataclasses.asdict(operation_location), indent = 2)) + return False, response_iteration.translationFailureReason, None + + return True, None, response_iteration + + def build_translations_path(self) -> str: + return f"{self.URL_PATH_ROOT}/{self.URL_SEGMENT_NAME_TRANSLATIONS}" + + def build_translation_path(self, + translation_id: str) -> str: + if translation_id is None: + raise ValueError + translations_path = self.build_translations_path() + return f"{translations_path}/{translation_id}" + + def build_iterations_path(self, + translation_id: str) -> str: + if translation_id is None: + raise ValueError + translation_path = self.build_translation_path(translation_id) + return f"{translation_path}/{self.URL_SEGMENT_NAME_ITERATIONS}" + + def build_iteration_path(self, + translation_id: str, + iteration_id: str) -> str: + if translation_id is None or iteration_id is None: + raise ValueError + iterations_path = self.build_iterations_path(translation_id) + return f"{iterations_path}/{iteration_id}" + + def build_host(self) -> str: + return f"{self.region}.api.cognitive.microsoft.com" + + def build_url(self, + segments: str) -> Url: + if segments is None: + raise ValueError + host = self.build_host() + return urllib3.util.parse_url(f"https://{host}/{segments}?api-version={self.api_version}") + + def build_translations_url(self) -> Url: + path = self.build_translations_path() + return self.build_url(path) + + def build_translation_url(self, + translation_id: str) -> Url: + if translation_id is None: + raise ValueError + path = self.build_translation_path(translation_id) + return self.build_url(path) + + def build_iterations_url(self, + translation_id: str) -> Url: + if translation_id is None: + raise ValueError + path = self.build_iterations_path() + return self.build_url(path) + + def build_iteration_url(self, + translation_id: str, + iteration_id: str) -> Url: + if translation_id is None or iteration_id is None: + raise ValueError + path = self.build_iteration_path(translation_id, iteration_id) + return self.build_url(path) + + def build_request_header(self) -> dict: + return { + "Ocp-Apim-Subscription-Key": self.sub_key + } + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/operation-operations/get-operation?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_get_operation(self, + operation_location: Url, + printUrl: bool) -> tuple[bool, str, OperationDefinition]: + if operation_location is None: + raise ValueError + + headers = self.build_request_header() + + if printUrl: + print(f"Requesting http GET: {operation_location}") + response = self.http.request("GET", operation_location.url, headers = headers) + + # OK = 200, + # NotFound = 404, + if response.status == 200: + response_json = response.json() + response = dict_to_dataclass( + data = response_json, + dataclass_type = OperationDefinition) + return True, None, response + elif response.status == 404: + return True, None, None + + return False, response.reason, None + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/translation-operations/get-translation?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_get_translation(self, + translation_id: str) -> tuple[bool, str, TranslationDefinition]: + if translation_id is None: + raise ValueError + + url = self.build_translation_url(translation_id) + headers = self.build_request_header() + + print(f"Requesting http GET: {url}") + response = self.http.request("GET", url.url, headers = headers) + + # OK = 200, + # NotFound = 404, + if response.status == 200: + response_translation_json = response.json() + response_translation = dict_to_dataclass( + data = response_translation_json, + dataclass_type = TranslationDefinition) + return True, None, response_translation + elif response.status == 404: + return True, None, None + + return False, response.reason, None + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/iteration-operations/get-iteration?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_get_iteration(self, + translation_id: str, + iteration_id: str) -> tuple[bool, str, IterationDefinition]: + if translation_id is None or iteration_id is None: + raise ValueError + + url = self.build_iteration_url(translation_id, iteration_id) + headers = self.build_request_header() + + print(f"Requesting http GET: {url}") + response = self.http.request("GET", url.url, headers = headers) + + # OK = 200, + # NotFound = 404, + if response.status == 200: + response_iteration_json = response.json() + response_iteration = dict_to_dataclass( + data = response_iteration_json, + dataclass_type = IterationDefinition) + return True, None, response_iteration + elif response.status == 404: + return True, None, None + + return False, response.reason, None + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/translation-operations/list-translation?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_list_translations(self, + top: int = None, + skip: int = None, + maxPageSize: int = None) -> tuple[bool, str, PagedTranslationDefinition]: + + url = self.build_translations_url() + args = {} + if top is not None: + args["top"] = top + if skip is not None: + args["skip"] = skip + if maxPageSize is not None: + args["maxPageSize"] = maxPageSize + + url = append_url_args(url, args) + + headers = self.build_request_header() + + print(f"Requesting http GET: {url}") + response = self.http.request("GET", url.url, headers = headers) + + # OK = 200, + if not response.status in [200]: + error = response.data.decode('utf-8') + return False, error, None + response_translations_json = response.json() + response_translations = dict_to_dataclass( + data = response_translations_json, + dataclass_type = PagedTranslationDefinition) + return True, None, response_translations + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/iteration-operations/list-iteration?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_list_iterations(self) -> tuple[bool, str, PagedIterationDefinition]: + url = self.build_iterations_url() + headers = self.build_request_header() + + print(f"Requesting http GET: {url}") + response = self.http.request("GET", url.url, headers = headers) + + # OK = 200, + if not response.status in [200]: + error = response.data.decode('utf-8') + return False, error, None + response_iterations_json = response.json() + response_iterations = dict_to_dataclass( + data = response_iterations_json, + dataclass_type = PagedIterationDefinition) + return True, None, response_iterations + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/translation-operations/delete-translation?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_delete_translation(self, + translation_id: str) -> tuple[bool, str]: + url = self.build_translation_url(translation_id) + headers = self.build_request_header() + + print(f"Requesting http DELETE: {url}") + response = self.http.request("DELETE", url.url, headers = headers) + + # NoContent = 204, + if not response.status in [204]: + error = response.data.decode('utf-8') + return False, error + return True, None + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/translation-operations/create-translation?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_create_translation( + self, + translation_id: str, + video_file_url: str, + source_locale: locale, + target_locale: locale, + voice_kind: VoiceKind, + speaker_count: int = None, + subtitle_max_char_count_per_segment: int = None, + export_subtitle_in_video: bool = None, + translation_display_name: str = None, + translation_description: str = None, + operation_id: str = None, + ) -> tuple[bool, str, TranslationDefinition, Url]: + if translation_id is None or video_file_url is None or source_locale is None or target_locale is None or voice_kind is None: + raise ValueError + + translation_create_input_body = TranslationInputDefinition( + sourceLocale = source_locale, + targetLocale = target_locale, + voiceKind = voice_kind, + videoFileUrl=video_file_url, + speakerCount =speaker_count, + subtitleMaxCharCountPerSegment = subtitle_max_char_count_per_segment, + exportSubtitleInVideo = export_subtitle_in_video, + ) + + translation_create_body = TranslationDefinition( + input = translation_create_input_body, + displayName = translation_display_name, + description = translation_description, + ) + + encoded_translation_create_body = orjson.dumps(dataclasses.asdict(translation_create_body)) + + url = self.build_translation_url(translation_id) + headers = self.build_request_header() + headers["Operation-Id"] = operation_id + + print(f"Requesting http PUT: {url}") + response = self.http.request("PUT", url.url, headers = headers, body=encoded_translation_create_body) + + # OK = 200, + # Created = 201, + if not response.status in [200, 201]: + error = response.data.decode('utf-8') + return False, error, None, None + response_translation_json = response.json() + response_translation = dict_to_dataclass( + data = response_translation_json, + dataclass_type = TranslationDefinition) + operation_location = response.headers[HTTP_HEADERS_OPERATION_LOCATION] + operation_location_url = urllib3.util.parse_url(operation_location) + return True, None, response_translation, operation_location_url + + # https://learn.microsoft.com/en-us/rest/api/aiservices/videotranslation/iteration-operations/create-iteration?view=rest-aiservices-videotranslation-2024-05-20-preview&tabs=HTTP + def request_create_iteration( + self, + translation_id: str, + iteration_id: str, + webvtt_file_kind: WebvttFileKind = None, + webvtt_file_url: Url = None, + speaker_count: int = None, + subtitle_max_char_count_per_segment: int = None, + export_subtitle_in_video: bool = None, + iteration_description: str = None, + operation_id: str = None, + ) -> tuple[bool, str, IterationDefinition, Url]: + if translation_id is None or iteration_id is None: + raise ValueError + + translation_create_input_body = IterationInputDefinition( + speakerCount = speaker_count, + exportSubtitleInVideo = export_subtitle_in_video, + subtitleMaxCharCountPerSegment = subtitle_max_char_count_per_segment, + ) + + if webvtt_file_kind is not None and webvtt_file_kind is not None: + translation_create_input_body.webvttFile = WebvttFileDefinition( + kind = webvtt_file_kind, + url = webvtt_file_url, + ) + + iteration_create_body = IterationDefinition( + input = translation_create_input_body, + description = iteration_description, + ) + + encoded_iteration_create_body = orjson.dumps(dataclasses.asdict(iteration_create_body)) + url = self.build_iteration_url(translation_id, iteration_id) + if operation_id is None: + operation_id = str(uuid.uuid4()) + headers = self.build_request_header() + headers["Operation-Id"] = operation_id + + print(f"Requesting http PUT: {url}") + response = self.http.request("PUT", url.url, headers = headers, body=encoded_iteration_create_body) + + # OK = 200, + # Created = 201, + if not response.status in [200, 201]: + error = response.data.decode('utf-8') + return False, error, None, None + response_iteration_json = response.json() + response_iteration = dict_to_dataclass( + data = response_iteration_json, + dataclass_type = IterationDefinition) + operation_location = response.headers[HTTP_HEADERS_OPERATION_LOCATION] + operation_location_url = urllib3.util.parse_url(operation_location) + return True, None, response_iteration, operation_location_url + \ No newline at end of file diff --git a/samples/video-translation/python/microsoft_video_translation_client/video_translation_const.py b/samples/video-translation/python/microsoft_video_translation_client/video_translation_const.py new file mode 100644 index 000000000..3d2ba4da6 --- /dev/null +++ b/samples/video-translation/python/microsoft_video_translation_client/video_translation_const.py @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +HTTP_HEADERS_OPERATION_LOCATION = "Operation-Location" diff --git a/samples/video-translation/python/microsoft_video_translation_client/video_translation_dataclass.py b/samples/video-translation/python/microsoft_video_translation_client/video_translation_dataclass.py new file mode 100644 index 000000000..4e2c1ec4e --- /dev/null +++ b/samples/video-translation/python/microsoft_video_translation_client/video_translation_dataclass.py @@ -0,0 +1,80 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +import locale +from datetime import datetime +from dataclasses import dataclass +from urllib3.util import Url +from typing import Optional + +from microsoft_video_translation_client.video_translation_enum import * + +@dataclass(kw_only=True) +class OperationDefinition(): + id: str + status: OperationStatus + +@dataclass(kw_only=True) +class TranslationInputBaseDefinition(): + speakerCount: Optional[int] = None + exportSubtitleInVideo: Optional[bool] = None + subtitleMaxCharCountPerSegment: Optional[int] = None + +@dataclass(kw_only=True) +class WebvttFileDefinition(): + url: Url + kind: WebvttFileKind + +@dataclass(kw_only=True) +class TranslationInputDefinition(TranslationInputBaseDefinition): + # This is optional because the moment after translation created, API has not downloaded video file to server side yet. + videoFileUrl: Optional[str] = None + sourceLocale: locale + targetLocale: locale + voiceKind: VoiceKind + +@dataclass(kw_only=True) +class StatelessResourceBaseDefinition(): + id: Optional[str] = None + displayName: Optional[str] = None + description: Optional[str] = None + createdDateTime: Optional[datetime] = None + +@dataclass(kw_only=True) +class StatefulResourceBaseDefinition(StatelessResourceBaseDefinition): + status: Optional[OneApiState] = None + lastActionDateTime: Optional[datetime] = None + +@dataclass(kw_only=True) +class IterationInputDefinition(TranslationInputBaseDefinition): + webvttFile: Optional[WebvttFileDefinition] = None + +@dataclass(kw_only=True) +class IterationResultDefinition(): + translatedVideoFileUrl: Optional[Url] = None + sourceLocaleSubtitleWebvttFileUrl: Optional[Url] = None + targetLocaleSubtitleWebvttFileUrl: Optional[Url] = None + metadataJsonWebvttFileUrl: Optional[Url] = None + +@dataclass(kw_only=True) +class IterationDefinition(StatefulResourceBaseDefinition): + input: IterationInputDefinition + result: Optional[IterationResultDefinition] = None + iterationFailureReason: Optional[str] = None + +@dataclass(kw_only=True) +class TranslationDefinition(StatefulResourceBaseDefinition): + input: TranslationInputDefinition + latestIteration: Optional[IterationDefinition] = None + latestSucceededIteration: Optional[IterationDefinition] = None + translationFailureReason: Optional[str] = None + +@dataclass(kw_only=True) +class PagedTranslationDefinition(): + value: list[TranslationDefinition] + nextLink: Optional[Url] = None + +@dataclass(kw_only=True) +class PagedIterationDefinition(): + value: list[IterationDefinition] + nextLink: Optional[Url] = None diff --git a/samples/video-translation/python/microsoft_video_translation_client/video_translation_enum.py b/samples/video-translation/python/microsoft_video_translation_client/video_translation_enum.py new file mode 100644 index 000000000..8f37ede9d --- /dev/null +++ b/samples/video-translation/python/microsoft_video_translation_client/video_translation_enum.py @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +from enum import Enum + +class VoiceKind(str, Enum): + PlatformVoice = 'PlatformVoice' + PersonalVoice = 'PersonalVoice' + +class Region(str, Enum): + eastus = 'eastus' + +class OneApiState(str, Enum): + NotStarted = 'NotStarted' + Running = 'Running' + Succeeded = 'Succeeded' + Failed = 'Failed' + +class OperationStatus(str, Enum): + NotStarted = 'NotStarted' + Running = 'Running' + Succeeded = 'Succeeded' + Failed = 'Failed' + Canceled = 'Canceled' + +class WebvttFileKind(str, Enum): + SourceLocaleSubtitle = 'SourceLocaleSubtitle' + TargetLocaleSubtitle = 'TargetLocaleSubtitle' + MetadataJson = 'MetadataJson' diff --git a/samples/video-translation/python/microsoft_video_translation_client/video_translation_util.py b/samples/video-translation/python/microsoft_video_translation_client/video_translation_util.py new file mode 100644 index 000000000..e65e9fac4 --- /dev/null +++ b/samples/video-translation/python/microsoft_video_translation_client/video_translation_util.py @@ -0,0 +1,39 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +from dataclasses import dataclass, fields, is_dataclass +from typing import Any, Type +from urllib3.util import Url +from urllib.parse import urlencode +import urllib3 + +def dict_to_dataclass(data: dict, dataclass_type: Type[Any]) -> Any: + if not is_dataclass(dataclass_type): + raise ValueError(f"{dataclass_type} is not a dataclass") + + # Retrieve the dataclass fields + field_names = {field.name: field.type for field in fields(dataclass_type)} + filtered_data = {} + + for key, value in data.items(): + if key in field_names: + field_type = field_names[key] + if is_dataclass(field_type): # Check for nested dataclass + filtered_data[key] = dict_to_dataclass(value, field_type) + else: + filtered_data[key] = value + + return dataclass_type(**filtered_data) + + +def append_url_args(url: Url, args: dict) -> Url: + encoded_args = "" + if len(args) == 0: + return url + else: + encoded_args += urlencode(args) + if "?" in url.url: + url = f"{url}&{encoded_args}" + else: + url = f"{url}?{encoded_args}" + return urllib3.util.parse_url(url) \ No newline at end of file diff --git a/samples/video-translation/python/readme.md b/samples/video-translation/python/readme.md new file mode 100644 index 000000000..3f4c34c91 --- /dev/null +++ b/samples/video-translation/python/readme.md @@ -0,0 +1,107 @@ + +# Video translation client sample code for python + +# Prerepuest +## Tested OS: + Ubuntu 24.04.1 LTS + Windows 11 Enterprise +## Python version: + 3.11.10 +## Dependency modules: + pip3 install termcolor + pip3 install orjson + pip3 install urllib3 + pip3 install requests + pip3 install pydantic + +# Platform dependency: +## VS Code +### Create environment with command, Ctrl+Shift+P: + Python: Create Environment + Python: Select Interpreter + Python: 3.11.10 + +### Debug + Copy .\.vscode\launch_sample.json file to .\.vscode\launch.json + And replace the placeholder with actual vaules like: sub_key, source_locale, target_locale, translation_id, video_file_blob_url etc. + +# Conda support: + conda create -n VideoTranslation_ClientSampleCode python=3.11.10 + conda activate VideoTranslation_ClientSampleCode + +# File Description +| Files | Description | +| --- | --- | +| [main.py](main.py) | client tool main definition | +| [video_translation_client.py](microsoft_video_translation_client\video_translation_client.py) | video translation client definition | +| [video_translation_dataclass.py](microsoft_video_translation_client\video_translation_dataclass.py) | video translation data contract definition | +| [video_translation_enum.py](microsoft_video_translation_client\video_translation_enum.py) | video translation enum definition | +| [video_translation_const.py](microsoft_video_translation_client\video_translation_const.py) | video translation constant definition | +| [video_translation_util.py](microsoft_video_translation_client\video_translation_util.py) | video translation utility function definition | + +# Usage for command line tool: +## Usage +Run main.py with command in below pattern: + python main.py --api-version 2024-05-20-preview --region eastus --sub_key [YourSpeechresourceKey] [SubCommands] [args...] + +## Global parameters +| Argument name | Description | +| --- | --- | +| region | region of the speech resource | +| sub-key | speech resource key | +| api-version | API version, supported version: 2024-05-20-preview | + +## Sub commands definition +| SubCommand | Description | +| --- | --- | +| create_translation_and_iteration_and_wait_until_terminated | Create translation and run first iteration for the video file from source locale to target locale, and wait until iteration terminated | +| create_iteration_with_webvtt_and_wait_until_terminated | Run iteration on an existing translation with webvtt, and wait until iteration terminated | +| request_create_translation_api | Request create translation API | +| request_get_operation_api | Request get operation by ID API | +| request_get_translation_api | Request get translation by ID API | +| request_list_translations_api | Request list translations API | +| request_delete_translation_api | Request delete translation API | +| request_create_iteration_api | Request create iteration API | +| request_get_iteration_api | Request get iteration API | + +## HTTP client library +Video translation client is defined as class VideoTranslationClient in file [video_translation_client.py](microsoft_video_translation_client\video_translation_client.py) +### Function definitions: +| Function | Description | +| --- | --- | +| create_translate_and_run_first_iteration_until_terminated | Create translation and run first iteration for the video file from source locale to target locale, and wait until iteration terminated | +| run_iteration_with_webvtt_until_terminated | Run iteration on an existing translation with webvtt, and wait until iteration terminated | +| create_translation_until_terminated | Create translation and wait until terminated | +| create_iteration_until_terminated | Create iteration and wait until terminated | +| request_operation_until_terminated | Query operation and wait until terminated | +| request_create_translation | Request create translation PUT API | +| request_get_operation | Request query operation GET API | +| request_get_translation | Query get translation GET API | +| request_delete_translation | Delete translation DELETE API | +| request_create_iteration | Request create iteration PUT API | +| request_list_translations | Query list translations LIST API | +| request_get_iteration | Query get iteration GET API | +| request_list_iterations | Query list iterations LIST API | + +# Usage sample for client class: +``` + client = VideoTranslationClient( + region = "eastus", + sub_key = "[YourSpeechresourceKey]", + ) + success, error, translation, iteration = client.create_translate_and_run_first_iteration_until_terminated( + video_file_url = "https://xx.blob.core.windows.net/users/xx/xx.mp4?sv=xx", + source_locale = "zh-CN", + target_locale = "en-US", + voice_kind = "PlatformVoice", + speaker_count = "2", + subtitle_max_char_count_per_segment = "30", + export_subtitle_in_video = True, + ) + if not success: + return + print(colored("success", 'green')) +``` +Reference function handleCreateTranslationAndIterationAndWaitUntilTerminated in [main.py](main.py) + +