Python script that uses the AWS SDK (Boto3) and the Elasticsearch client library (Elasticsearch-py) to take a daily backup of all available indexes in Elasticsearch and store them in an S3 bucket. Additionally, it removes backup files older than 7 days from the S3 bucket.
```python
import os
import time
from datetime import datetime, timedelta
import boto3
from elasticsearch import Elasticsearch
# Elasticsearch connection settings
es_host = 'your_elasticsearch_host'
es_port = 9200
# S3 bucket settings
bucket_name = 'your_s3_bucket_name'
backup_folder = 'elasticsearch-backups/'
# AWS credentials
aws_access_key = 'your_aws_access_key'
aws_secret_key = 'your_aws_secret_key'
# Establish Elasticsearch connection
es = Elasticsearch([{'host': es_host, 'port': es_port}])
# Create S3 client
s3 = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
# Get current date and time
current_date = datetime.now()
# Define backup name based on current date and time
backup_name = current_date.strftime("%Y%m%d%H%M%S")
# Get all available indexes in Elasticsearch
indexes = es.indices.get_alias().keys()
# Create backup folder in S3 bucket if it doesn't exist
s3.put_object(Bucket=bucket_name, Key=backup_folder)
# Iterate over each index and perform backup
for index in indexes:
# Create Elasticsearch snapshot
snapshot_body = {
"indices": index,
"ignore_unavailable": True,
"include_global_state": False
}
es.snapshot.create(repository='my_backup', snapshot=backup_name, body=snapshot_body)
# Upload snapshot file to S3 bucket
snapshot_path = f"/tmp/{backup_name}"
os.makedirs(snapshot_path, exist_ok=True)
snapshot_file = f"{index}-{backup_name}"
es.snapshot.restore(repository='my_backup', snapshot=backup_name, body=snapshot_body)
time.sleep(5) # Wait for the snapshot to complete
es.snapshot.get(repository='my_backup', snapshot=backup_name, snapshot=backup_name, local=snapshot_path)
s3.upload_file(f"{snapshot_path}/{snapshot_file}", bucket_name, f"{backup_folder}/{snapshot_file}")
# Delete the local snapshot file
os.remove(f"{snapshot_path}/{snapshot_file}")
# Remove backup files older than 7 days from S3 bucket
seven_days_ago = current_date - timedelta(days=7)
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=backup_folder)
if 'Contents' in response:
for obj in response['Contents']:
if obj['LastModified'].replace(tzinfo=None) < seven_days_ago:
s3.delete_object(Bucket=bucket_name, Key=obj['Key'])
```
Make sure to replace the placeholders (`your_elasticsearch_host`, `your_s3_bucket_name`, `your_aws_access_key`, `your_aws_secret_key`) with your actual Elasticsearch host, S3 bucket name, and AWS credentials.
This script assumes that you have already set up an Elasticsearch repository called "my_backup" to store the snapshots. Additionally, it assumes that you have already installed the required libraries (`boto3` and `elasticsearch`). You can install them using pip:
```
pip install boto3 elasticsearch
```
Remember to schedule this script to run daily using a cron job or any other task scheduling mechanism.
Top Brands
No comments:
Post a Comment