Includes full pagination support, so that you actually fetch all of the logs
# Usage:
# ./get_all_logs.py <log_group> <log_stream> <output_file_path>
# e.g.
# ./get_all_logs.py /syslog/myinstance my_log_stream output.log
#!/usr/bin/env python3
'Consumes all logs from a cloudwatch log stream'
import json
import sys
from datetime import datetime
from itertools import count
import boto3
client = boto3.client('logs')
logGroupName = sys.argv[1]
logStreamName = sys.argv[2]
output = sys.argv[3]
time_start = datetime.now()
total = 0
page_args = {}
with open(output, 'w', encoding='utf-8') as ostream:
for i in count():
response = client.get_log_events(
**{
'logGroupName': logGroupName,
'logStreamName': logStreamName,
'startFromHead': True,
},
**page_args
)
ostream.write('\n'.join([json.dumps(event) for event in response['events']]))
total += len(response['events'])
if response['nextForwardToken'] == page_args.get('nextToken', ''):
break
page_args = {'nextToken': response['nextForwardToken']}
print(json.dumps(
{'duration': str(datetime.now()-time_start), 'total': total, 'next_page': page_args}
))
print('- Finished')