Skip to content

Commit 846c77d

Browse files
committed
feat: better logging support
1 parent 6398948 commit 846c77d

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

index.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def get_transcript(video_id, language, video_info, verbose=True):
105105
transcript = ''
106106
current_chapter_index = 0
107107
chapters = video_info["chapters"]
108-
logging.info(f"""Transcript_List Length: {
108+
logging.info(f"""Transcript List Length: {
109109
len(transcript_list)}, Chapter Length: {len(chapters)}""")
110110

111111
for i, line in enumerate(transcript_list):
@@ -125,8 +125,6 @@ def get_transcript(video_id, language, video_info, verbose=True):
125125
buffer_time = 2
126126

127127
if start_time >= chapter_start_seconds - buffer_time:
128-
logging.info(
129-
f'\n\n## {chapters[current_chapter_index]["title"]}\n')
130128
current_chapter_index += 1
131129
except Exception as e:
132130
logging.error(
@@ -148,21 +146,19 @@ def get_transcript(video_id, language, video_info, verbose=True):
148146

149147
def process_and_save_transcript(video_id, video_info, language, generate_punctuated, output_dir, filename, verbose, punctuation_model):
150148
try:
149+
logging.info('Getting transcript...')
151150
raw_transcript = get_transcript(
152151
video_id, language, video_info, verbose)
153-
logging.info("Raw Transcript Length: %d", len(raw_transcript))
154152

155153
if generate_punctuated:
154+
logging.info('Generating punctuated transcript...')
156155
with_punctuation = add_punctuation(
157156
raw_transcript, punctuation_model)
158157
with_punctuation = remove_period_after_hashes(with_punctuation)
159-
logging.info("Punctuation Char Length: %d", len(with_punctuation))
158+
logging.info('Capitalizing sentences...')
160159
sentences = nltk.sent_tokenize(with_punctuation)
161-
logging.info("Sentences to process, (punctuated): %d",
162-
len(sentences))
163160
else:
164161
sentences = nltk.sent_tokenize(raw_transcript)
165-
logging.info("Sentences to process, (raw): %d", len(sentences))
166162

167163
# Capitalize sentences without batching
168164
capitalized_sentences = capitalize_sentences(sentences)
@@ -171,13 +167,16 @@ def process_and_save_transcript(video_id, video_info, language, generate_punctua
171167
capitalized_transcript = double_linesep.join(capitalized_sentences)
172168
output_path = os.path.join(output_dir, f'{filename}.md')
173169

170+
logging.info(f'Saving transcript to {output_path}...')
174171
with open(output_path, 'w', encoding='utf-8') as f:
175172
f.write(capitalized_transcript)
176173

174+
# set log level to info to print the output path
175+
logging.getLogger().setLevel(logging.INFO)
177176
if generate_punctuated:
178-
logging.info(f'Punctuated transcript saved to {output_path}')
177+
logging.info(f'Punctuated transcript saved to \'{output_path}\'')
179178
else:
180-
logging.info(f'Raw transcript saved to {output_path}')
179+
logging.info(f'Raw transcript saved to \'{output_path}\'')
181180

182181
except Exception as e:
183182
logging.error(f'Error: {e}')
@@ -191,6 +190,7 @@ def getVideoInfo(video_id):
191190
raise Exception(
192191
"No API key found, please set the YOUTUBE_API_KEY environment variable. \n Example: export YOUTUBE_API_KEY=your_api_key"
193192
)
193+
logging.info('Getting video info...')
194194
youtube = googleapiclient.discovery.build(
195195
"youtube", "v3", developerKey=api_key)
196196
request = youtube.videos().list(part="id,snippet",
@@ -242,6 +242,10 @@ def main():
242242

243243
args = parser.parse_args()
244244

245+
# if verbose is false, set logging level to error
246+
if not args.verbose:
247+
logging.getLogger().setLevel(logging.ERROR)
248+
245249
video_id = parse_youtube_url(args.url)
246250
video_info = getVideoInfo(video_id)
247251
filename = args.filename or clean_for_filename(

0 commit comments

Comments
 (0)