Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

功能如下 #144

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
750 changes: 10 additions & 740 deletions README.md

Large diffs are not rendered by default.

Binary file added WeixinBot.zip
Binary file not shown.
544 changes: 544 additions & 0 deletions autosub.py

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions googlekey.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"type": "service_account",
"project_id": "wechatbot-154909",
"private_key_id": "e53738e2d14ccd6a29de580163d1ff49cc08e0c5",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCgnnEvff7A8nHA\nNsBCQpqcOHKhblAZSEFMFZkOKjdjxzszpdZL4nGZDEJv/LnN96pQUwdzmdq/I3Nz\n98O9L1GjU2WdC8g4pHnMAz70xUWqf2yQDJkXMMbHxs+jvs19agkaFI0FIU0cjG1f\nmlupAYPbvELbEkvw8mcKBV4IcAGTmGb1I72uwk15obAYYH+VtEVefkNaWx4eHHK6\n/6qXsU47wTLM+9v2iDyOp6GEwCHdpZXg8ghd0SPE3KRo/7nPZm3iuj2cKizrGjLg\nojl4kpFyh/jMHxfpjDTBICAv5UMc2ZMLffPgpUh1QjZnsBtaolU5T0LUSylgrbG/\nLKbGPigtAgMBAAECggEAHJp6v6Fpd8T1OuzFKkegLwfiahsyCn6SmlESU7Jy8MLv\nKRB0bEkkP8QuJLFWCXb8jpdqsUyJ5xYSHmnWIt/FYxeb9e6+NlAcHJfLY12qPWW5\n2KFVtgqKq9Mc8SUuhRIYEtvsDSjUCax8YdUkU6GgcMZDBa5pdbxFW0R/bXM85KKs\nH82dXtylgt15/SNrYKFnCXOFmmZri8GT1We1qNGBJqcJbPA0v3qhTWWmW0E9Zhrp\nMO1AEBR285wGq2E2WFVq4sH9IQoGobXevg04Qr+BGyLAerZrYQWbCfFNcknZcQCX\nnR6FFagNLtoEYBCl0Fnll8WVaR2Sm1v9R4FdPctU4QKBgQDN8OgWfS+XUxYv7Cns\n6YQDhEouqdqHV/g4b8k0fHgi2XWGhMhnEs984JnCyfoSzeng3s3koz9P+GWEn0Cd\n92ud3AMqvaQ+XKbNlShaoyYKsa5mLPqbdxKIU/BdKLD+0t/Y+Uk6MNadLfRyL2lz\nh6nO6CK1fxd62udDATCx24J4iQKBgQDHqUaHZlj+PR2y300WrLKk+oBCRYEYOpF4\nuGnrNETnu99yBiRI35yRQ/UAYShEo/B0G2wV7LNon8NMG/XdJG1qqX7QS/lWbE8x\nh9dZbW/GopVGyH3eOPDdjFkqgM2/9coSNgtaHQRpoUwpB6sNqr+3CpWSS/If/gey\nPw5Mt9UBhQKBgHIldi6I4qkId4LGbpKO+AoO+CMXKDXeT4nQhABggSIn0BloXb+r\n0G+R+gfadY6YrWpjdFGnHj+QGYzBzEoCSpbrzKGTDYc3SYknhH+AcGR7CYQ+qAUl\noZMIm3C3Titf7IzR14G2ci4au6PSnatYZYl2Z06cDAKfdJFSBpB+b7mpAoGBAJGz\nbL2IKa82SELtA2NlmWQdTZWPBPr6WogfA2RZm2METbX212m6jJXRYqvpOqqEfAcu\nr7x8JBxYYftbeBrt507r65fzqTuxBEWf3L3sx6HLqWWjD38oTFVQgw0qO3s20URi\n3fvfqwWuaFaUuvXZQdhASBAfjcxc457mtvezOyTFAoGAJnw8jFhxTqCfZCi3YHqZ\nclBgQ/w8ahqo+V2wF4zwuhyFmqbjuw7N4zOdezXgaTZCM7SDpvlpp4Ygh7ubO+91\nZ71FoGnR78wh8XjEQ1ixSWzp42HQQrUCYgnhFnBOjJaNxjGTGlMMo+Oej6Mt08Ax\nX3s6PmAl5aYA3kiItAlvbms=\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "106655796840196313946",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://accounts.google.com/o/oauth2/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/webot-361%40wechatbot-154909.iam.gserviceaccount.com"
}
45 changes: 45 additions & 0 deletions language_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
LC_AFRIKAANS = "af"
LC_ARABIC = "ar"
LC_BULGARIAN = "bg"
LC_CATALAN = "ca"
LC_CHINESE_SIMPLIFIED = "ch"
LC_CZECH = "cs"
LC_CYMRAEG = "cy"
LC_DANSK = "da"
LC_DEUTSCH = "de"
LC_GREEK = "el"
LC_ENGLISH = "en"
LC_SPANISH = "es"
LC_EUSKARA = "eu"
LC_SUOMI = "fi"
LC_FRENCH = "fr"
LC_HEBREW = "he"
LC_HINDI = "hi"
LC_CROATIAN = "hr"
LC_HUNGARIAN = "hu"
LC_INDONESIAN = "id"
LC_ITALIANO = "it"
LC_JAPANESE = "ja"
LC_KHMER = "kh"
LC_KOREAN = "ko"
LC_LITHUANIAN = "lt"
LC_MALAYALAM = "ml"
LC_BAHASA_MELAYU = "ms"
LC_NORSK = "nb"
LC_NEDERLANDS = "nl"
LC_PUNJABI = "pa"
LC_FILIPINO = "ph"
LC_POLSKI = "pl"
LC_PORTUGUESE = "pt"
LC_ROMANIAN = "ro"
LC_SERBIAN = "rs"
LC_RUSSIAN = "ru"
LC_SLOVAK = "sk"
LC_SVENSKA = "sv"
LC_TAMIL = "ta"
LC_TELUGU = "te"
LC_THAI = "th"
LC_TURKISH = "tr"
LC_UKRAINIAN = "uk"
LC_TIENG_VIET = "vn"
LC_CHINESE_TRADITIONAL = "zh"
Binary file added language_codes.pyc
Binary file not shown.
60 changes: 60 additions & 0 deletions maintest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

import subprocess
import os
import codecs
import json
import re

vedio_formats = ['mp4','avi','wmv','mov'] # 1
audio_formats = ['wav','flac','mp3','aiff'] # 2

def file_upload(voice):
regex = r"(.+)\/(.+)"
if re.search(regex, voice):
match = re.search(regex, voice)
file_dir = match.group(1) + '/'
file_name_and_type = match.group(2).lower()
else:
raise fileNameError('fileNameError')
regex = r"(.+)\.(.+)"
if re.search(regex, file_name_and_type):
match = re.search(regex, file_name_and_type)
file_name = match.group(1)
file_type = match.group(2).lower()
else:
raise fileNameError('fileNameError')
file_pwd = file_dir + file_name_and_type
transcripts_timed_pwd = file_dir + file_name + '.json'
autosubing(file_pwd,transcripts_timed_pwd,file_type)
json_data = open(transcripts_timed_pwd)
transcripts_timed = json.load(json_data)
transcripts_content = ''
for i in transcripts_timed:
transcripts_content = transcripts_content + ' ' + i['content']
json_data.close()
return transcripts_content

def autosubing(file_pwd,transcripts_timed_pwd,file_type):
if file_format(file_type) == 1:
# command = "python autosub.py -F json -V %s" %(file_pwd)
command = "python autosub.py %s -F json" %(file_pwd)
else:
command = "python autosub.py %s -F json" %(file_pwd)
subprocess.call(command, shell=True)
print "Autosubed"


# throw formatError
def file_format(file_type):
if file_type in vedio_formats:
return 1;
elif file_type in audio_formats:
return 2
else: raise Exception('Format prohibited')

# dir1 = '/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_2546547996039896197.mp3'
# dir2 = '/Users/n0where/Desktop/DFA_01.flac'
# dir3 = '/Users/n0where/GoogleDrive/ASQ/ASQ/transcripts/Chem101.mp4'
# dir4 = '/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_1089270824656503909.mp3'
# dir5 = '/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_8675834799709315495.mp3'
# print file_upload(dir5)
Binary file added maintest.pyc
Binary file not shown.
5 changes: 5 additions & 0 deletions response_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
RESPONSE_OK = 100
RESPONSE_BAD_REQUEST = 400
RESPONSE_UNAUTHORIZED = 401
RESPONSE_NOT_FOUND = 404
RESPONSE_500 = 500
Binary file added response_codes.pyc
Binary file not shown.
33 changes: 33 additions & 0 deletions simsimi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from language_codes import LC_ENGLISH
import urllib2, urllib, json
from response_codes import RESPONSE_OK

class SimSimiException(Exception):
pass

class SimSimi(object):

def __init__(self, *args, **kwargs):
self.conversation_request_url = kwargs.get('conversation_request_url','http://sandbox.api.simsimi.com/request.p')
self.conversation_key = kwargs.get('conversation_key','')
self.conversation_language = kwargs.get('conversation_language', LC_ENGLISH)
self.conversation_filter = kwargs.get('conversation_filter','0.0')

def getConversation(self, text):

requestParam = {
'key':self.conversation_key,
'lc':self.conversation_language,
'ft':self.conversation_filter,
'text':text
}

requestUrl = "%s?%s" % (self.conversation_request_url, urllib.urlencode(requestParam))

response = urllib2.urlopen(requestUrl)
responseDict = json.loads(str(response.read()))

if responseDict['result'] != RESPONSE_OK:
raise SimSimiException("SimSimiException occured: %s" % responseDict['msg'])

return responseDict
Binary file added simsimi.pyc
Binary file not shown.
37 changes: 37 additions & 0 deletions simsimitest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#coding=utf-8
import simsimi
import language_codes
import response_codes
import re
from simsimi import SimSimiException

keys = ['20666778-3204-480a-b98b-0d705ad7c170','45c0139e-7a59-4c06-9a83-7a9e4c8f6470','59a9d8b2-e4d0-495a-8a9f-1168cbb1193f']
keys_sum = 0
simSimis = []

for i in range(len(keys)):
simSimis.append(simsimi.SimSimi(
conversation_language=language_codes.LC_CHINESE_SIMPLIFIED,
conversation_key=keys[i] ))
simSimi = simSimis[0]

def foo():
global simSimi
global keys_sum
try:
response = simSimi.getConversation(u'。。'.encode('utf-8'))
except Exception, e:
print e
if str(e).find("Not found") != -1:
response = {'response': "傻逼"}
elif str(e).find("Limit Exceeded") != -1:
response = {'response': "到达每日上限了,正在自动更换api_key,使用第%d个api_key"%((keys_sum+1) % len(keys)+1)}
keys_sum = keys_sum + 1
simSimi = simSimis[keys_sum % len(keys)]
else:
response = {'response': "代码出现了未知的问题"}

print response['response']

for i in range(3):
foo()
99 changes: 99 additions & 0 deletions transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google Cloud Speech API sample application using the REST API for batch
processing."""

# [START import_libraries]
import argparse
import base64
import json

from googleapiclient import discovery
import httplib2
from oauth2client.client import GoogleCredentials
# [END import_libraries]


# [START authenticating]
DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
'version={apiVersion}')


# Application default credentials provided by env variable
# GOOGLE_APPLICATION_CREDENTIALS
def get_speech_service():
credentials = GoogleCredentials.get_application_default().create_scoped(
['https://www.googleapis.com/auth/cloud-platform'])
http = httplib2.Http()
credentials.authorize(http)

return discovery.build(
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
# [END authenticating]


def transcribe(speech_file):
"""Transcribe the given audio file.

Args:
speech_file: the name of the audio file.
"""
# [START construct_request]
with open(speech_file, 'rb') as speech:
# Base64 encode the binary audio file for inclusion in the JSON
# request.
speech_content = base64.b64encode(speech.read())

service = get_speech_service()
service_request = service.speech().syncrecognize(
body={
'config': {
# There are a bunch of config options you can specify. See
# https://goo.gl/KPZn97 for the full list.
'encoding': 'LINEAR16', # raw 16-bit signed LE samples
'sampleRate': 16000, # 16 khz
# See http://g.co/cloud/speech/docs/languages for a list of
# supported languages.
'languageCode': 'cmn-Hans-CN', # a BCP-47 language tag
},
'audio': {
'content': speech_content.decode('UTF-8')
}
})
# [END construct_request]
# [START send_request]
response = service_request.execute()
print response
return response
# print(json.dumps(response))
# [END send_request]

def get_content(speech_file):
content = transcribe(speech_file)
if content.has_key('results'):
return content['results'][0]['alternatives'][0]['transcript']
else: return 'Wrong rec'

print get_content('/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_2376641155930976295.mp3')
# [START run_application]
# if __name__ == '__main__':
# parser = argparse.ArgumentParser()
# parser.add_argument(
# 'speech_file', help='Full path of audio file to be recognized')
# args = parser.parse_args()
# main(args.speech_file)
# [END run_application]

Binary file added transcribe.pyc
Binary file not shown.
Loading