|
1 |
| -def main(): |
2 |
| - print("main") |
3 |
| - |
4 |
| -if __name__ == "__main__": |
| 1 | +import requests |
| 2 | +import time |
| 3 | +import os |
| 4 | +import json |
| 5 | + |
| 6 | +endpoint = 'https://api.assemblyai.com/v2/transcript' |
| 7 | + |
| 8 | +# get token from environment variable |
| 9 | +api_token = os.environ.get('ASSEMBLY_AI_TOKEN') |
| 10 | +content_url = os.environ.get('CONTENT_URL') |
| 11 | + |
| 12 | +headers = { |
| 13 | + 'authorization': api_token, |
| 14 | + 'content-type': 'application/json' |
| 15 | +} |
| 16 | +transcript_endpoint = 'https://api.assemblyai.com/v2/transcript' |
| 17 | + |
| 18 | +def request_transcript(): |
| 19 | + transcript_request = { |
| 20 | + 'audio_url': content_url, |
| 21 | + 'filter_profanity': True, # Profanity Filtering https://www.assemblyai.com/docs/core-transcription#profanity-filtering |
| 22 | + 'punctuate': True, # Automate Punctuation and Casing https://www.assemblyai.com/docs/core-transcription#automatic-punctuation-and-casing |
| 23 | + 'language_detection': True, # Automatic Language Detection https://www.assemblyai.com/docs/core-transcription#automatic-language-detection |
| 24 | + 'auto_highlights': True, # Detect Important Phrases and Words https://www.assemblyai.com/docs/audio-intelligence#detect-important-phrases-and-words |
| 25 | + 'content_safety': True, # Content Moderation https://www.assemblyai.com/docs/audio-intelligence#content-moderation |
| 26 | + 'iab_categories': True, # Topic Detection(IAB Categories) https://www.assemblyai.com/docs/audio-intelligence#topic-detection-iab-classification |
| 27 | + 'sentiment_analysis': True, # Sentiment Analysis https://www.assemblyai.com/docs/audio-intelligence#sentiment-analysis |
| 28 | + 'summary_type': 'bullets', # Summary bullets https://www.assemblyai.com/docs/audio-intelligence#summarization |
| 29 | + #'summary_type': 'gist', # Summary gist |
| 30 | + #'summary_type': 'headline', # Summary headline |
| 31 | + #'summary_type': 'paragraph', # Summary paragraph |
| 32 | + 'auto_chapters': True, # Automatic Chapters https://www.assemblyai.com/docs/audio-intelligence#auto-chapters |
| 33 | + 'entity_detection': True, # Entity Detection https://www.assemblyai.com/docs/audio-intelligence#entity-detection |
| 34 | + } |
| 35 | + transcript_response = requests.post( |
| 36 | + transcript_endpoint, |
| 37 | + json=transcript_request, |
| 38 | + headers=headers |
| 39 | + ) |
| 40 | + return transcript_response.json() |
| 41 | + |
| 42 | +def make_polling_endpoint(transcript_response): |
| 43 | + polling_endpoint = 'https://api.assemblyai.com/v2/transcript/' |
| 44 | + polling_endpoint += transcript_response['id'] |
| 45 | + print('Polling endpoint: ' + polling_endpoint) |
| 46 | + return polling_endpoint |
| 47 | + |
| 48 | +def wait_for_completion(polling_endpoint): |
| 49 | + while True: |
| 50 | + polling_response = requests.get(polling_endpoint, headers=headers) |
| 51 | + polling_response = polling_response.json() |
| 52 | + |
| 53 | + if polling_response['status'] == 'completed': |
| 54 | + return polling_response |
| 55 | + |
| 56 | + if polling_response['status'] == 'error': |
| 57 | + print('Error: ' + polling_response['error']) |
| 58 | + break |
| 59 | + |
| 60 | + print ('Status: ' + polling_response['status']) |
| 61 | + time.sleep(5) |
| 62 | + |
| 63 | +def get_paragraphs(polling_endpoint): |
| 64 | + response = requests.get(polling_endpoint + '/paragraphs', headers=headers) |
| 65 | + response = response.json() |
| 66 | + data = [] |
| 67 | + for para in response['paragraphs']: |
| 68 | + data.append(para) |
| 69 | + return data |
| 70 | + |
| 71 | +def get_sentences(polling_endpoint): |
| 72 | + response = requests.get(polling_endpoint + '/sentences', headers=headers) |
| 73 | + response = response.json() |
| 74 | + data = [] |
| 75 | + for para in response['sentences']: |
| 76 | + data.append(para) |
| 77 | + return data |
| 78 | + |
| 79 | +def main(): |
| 80 | + # save request transcript |
| 81 | + transcript_response = request_transcript() |
| 82 | + json_object = json.dumps(transcript_response, indent=4) |
| 83 | + with open('request.json', 'w') as outfile: |
| 84 | + outfile.write(json_object) |
| 85 | + |
| 86 | + # poll and wait |
| 87 | + polling_endpoint = make_polling_endpoint(transcript_response) |
| 88 | + transcription = wait_for_completion(polling_endpoint) |
| 89 | + |
| 90 | + # save transcription |
| 91 | + json_object = json.dumps(transcription, indent=4) |
| 92 | + with open('transcription.json', 'w') as outfile: |
| 93 | + outfile.write(json_object) |
| 94 | + |
| 95 | + # save paragraphs |
| 96 | + paragraphs = get_paragraphs(polling_endpoint) |
| 97 | + json_object = json.dumps(paragraphs, indent=4) |
| 98 | + with open('paragraphs.json', 'w') as outfile: |
| 99 | + outfile.write(json_object) |
| 100 | + |
| 101 | + # save sentences |
| 102 | + sentences = get_sentences(polling_endpoint) |
| 103 | + json_object = json.dumps(sentences, indent=4) |
| 104 | + with open('sentences.json', 'w') as outfile: |
| 105 | + outfile.write(json_object) |
| 106 | + |
| 107 | + # save srt |
| 108 | + response = requests.get(polling_endpoint + '/srt', headers=headers) |
| 109 | + response = response.text |
| 110 | + with open('srt.txt', 'w') as outfile: |
| 111 | + outfile.write(response) |
| 112 | + |
| 113 | + # save vtt |
| 114 | + response = requests.get(polling_endpoint + '/vtt', headers=headers) |
| 115 | + response = response.text |
| 116 | + with open('vtt.txt', 'w') as outfile: |
| 117 | + outfile.write(response) |
| 118 | + |
| 119 | +if __name__ == '__main__': |
5 | 120 | main()
|
0 commit comments