@@ -105,7 +105,7 @@ def get_transcript(video_id, language, video_info, verbose=True):
105
105
transcript = ''
106
106
current_chapter_index = 0
107
107
chapters = video_info ["chapters" ]
108
- logging .info (f"""Transcript_List Length: {
108
+ logging .info (f"""Transcript List Length: {
109
109
len (transcript_list )} , Chapter Length: { len (chapters )} """ )
110
110
111
111
for i , line in enumerate (transcript_list ):
@@ -125,8 +125,6 @@ def get_transcript(video_id, language, video_info, verbose=True):
125
125
buffer_time = 2
126
126
127
127
if start_time >= chapter_start_seconds - buffer_time :
128
- logging .info (
129
- f'\n \n ## { chapters [current_chapter_index ]["title" ]} \n ' )
130
128
current_chapter_index += 1
131
129
except Exception as e :
132
130
logging .error (
@@ -148,21 +146,19 @@ def get_transcript(video_id, language, video_info, verbose=True):
148
146
149
147
def process_and_save_transcript (video_id , video_info , language , generate_punctuated , output_dir , filename , verbose , punctuation_model ):
150
148
try :
149
+ logging .info ('Getting transcript...' )
151
150
raw_transcript = get_transcript (
152
151
video_id , language , video_info , verbose )
153
- logging .info ("Raw Transcript Length: %d" , len (raw_transcript ))
154
152
155
153
if generate_punctuated :
154
+ logging .info ('Generating punctuated transcript...' )
156
155
with_punctuation = add_punctuation (
157
156
raw_transcript , punctuation_model )
158
157
with_punctuation = remove_period_after_hashes (with_punctuation )
159
- logging .info ("Punctuation Char Length: %d" , len ( with_punctuation ) )
158
+ logging .info ('Capitalizing sentences...' )
160
159
sentences = nltk .sent_tokenize (with_punctuation )
161
- logging .info ("Sentences to process, (punctuated): %d" ,
162
- len (sentences ))
163
160
else :
164
161
sentences = nltk .sent_tokenize (raw_transcript )
165
- logging .info ("Sentences to process, (raw): %d" , len (sentences ))
166
162
167
163
# Capitalize sentences without batching
168
164
capitalized_sentences = capitalize_sentences (sentences )
@@ -171,13 +167,16 @@ def process_and_save_transcript(video_id, video_info, language, generate_punctua
171
167
capitalized_transcript = double_linesep .join (capitalized_sentences )
172
168
output_path = os .path .join (output_dir , f'{ filename } .md' )
173
169
170
+ logging .info (f'Saving transcript to { output_path } ...' )
174
171
with open (output_path , 'w' , encoding = 'utf-8' ) as f :
175
172
f .write (capitalized_transcript )
176
173
174
+ # set log level to info to print the output path
175
+ logging .getLogger ().setLevel (logging .INFO )
177
176
if generate_punctuated :
178
- logging .info (f'Punctuated transcript saved to { output_path } ' )
177
+ logging .info (f'Punctuated transcript saved to \' { output_path } \' ' )
179
178
else :
180
- logging .info (f'Raw transcript saved to { output_path } ' )
179
+ logging .info (f'Raw transcript saved to \' { output_path } \' ' )
181
180
182
181
except Exception as e :
183
182
logging .error (f'Error: { e } ' )
@@ -191,6 +190,7 @@ def getVideoInfo(video_id):
191
190
raise Exception (
192
191
"No API key found, please set the YOUTUBE_API_KEY environment variable. \n Example: export YOUTUBE_API_KEY=your_api_key"
193
192
)
193
+ logging .info ('Getting video info...' )
194
194
youtube = googleapiclient .discovery .build (
195
195
"youtube" , "v3" , developerKey = api_key )
196
196
request = youtube .videos ().list (part = "id,snippet" ,
@@ -242,6 +242,10 @@ def main():
242
242
243
243
args = parser .parse_args ()
244
244
245
+ # if verbose is false, set logging level to error
246
+ if not args .verbose :
247
+ logging .getLogger ().setLevel (logging .ERROR )
248
+
245
249
video_id = parse_youtube_url (args .url )
246
250
video_info = getVideoInfo (video_id )
247
251
filename = args .filename or clean_for_filename (
0 commit comments