OK, so I had some spare time to play with this and this is what I came up with. This is only useful if you have access to your own logfiles, and if you're running postfix mail server on Linux.
Its ugly, clumsy and kludged, but it's working for me. I'm hoping that others can hack this around to make it work on other platforms. And I'm sure someone can improve on my code. I'll make a wiki-page with the fruits of this thread.
This script generates a report log and optionally an SQL script to remove offending emails. Note that you might not necessarily want to remove all the emails, so you need to review the report first. If you're gung ho, I guess you could automate the script, but I certainly wouldn't advise it.
Code:
#!/bin/bash
# SugarCRM logfile Bounce parser v. 1.1
OUTPUTFILE=bad_emails_`date +%F`.txt
OUTPUTSQL=bad_emails_`date +%F`.sql
MAILLOG=mail.log
function text_report {
echo "Starting to Analyse logs at `date`" > $OUTPUTFILE
echo "Start Date = " `head -1 $MAILLOG | awk '{ print $1, $2, $3 }'` >> $OUTPUTFILE
echo "End Date = " `tail -1 $MAILLOG | awk '{ print $1, $2, $3 }'` >> $OUTPUTFILE
echo "Crunching " `wc -l $MAILLOG | awk '{ print $1 }'` " lines" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "======= Host not found ========" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "=> Domains associated with Host Not found" >> $OUTPUTFILE
grep "Host not found" $MAILLOG | sed -e 's/.*for name=//' | sed -e 's/ type=.*//' | sort | uniq -c >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "=> Email addresses associated with Host Not found" >> $OUTPUTFILE
grep "Host not found" $MAILLOG | sed -e 's/.*to=<//' | sed -e 's/>, relay=none.*//' | sort | uniq -c >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "========= Connection Refused ===========" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "==> Domains with 'Connection Refused'" >> $OUTPUTFILE
grep "Connection refused" $MAILLOG | sed -e 's/.*connect to //' | sed -e 's/ Connection refused.*//' | sort | uniq -c >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "==> Emails associated with 'Connection Refused'" >> $OUTPUTFILE
grep "Connection refused" $MAILLOG | sed -e 's/.*to=<//' | sed -e 's/>, relay=none.*//' | grep -v "Connection refused" | sort | uniq -c >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "====== Connection Timed Out ======== " >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "==> Domains with Connection Timed Out" >> $OUTPUTFILE
grep "Connection timed out" $MAILLOG | sed -e 's/.*connect to //' | sed -e 's/ Connection timed out.*//' | sort | uniq -c >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo >> $OUTPUTFILE
echo "==> Emails associated with Connection Timed Out" >> $OUTPUTFILE
grep "Connection timed out" $MAILLOG | sed -e 's/.*to=<//' | sed -e 's/>, relay=none.*//' | grep -v "Connection timed out" | sort | uniq -c >> $OUTPUTFILE
echo "==================" >> $OUTPUTFILE
echo "Your report has been generated in " $OUTPUTFILE
echo
}
function sql_init {
echo "-- Initialising file - " `date` > $OUTPUTSQL
}
function sql_emails {
echo "-- Emails for Host not found" >> $OUTPUTSQL
for email in `grep "Host not found" $MAILLOG | sed -e 's/.*to=<//' | sed -e 's/>, relay=none.*//' | sort -u`
do
echo 'UPDATE email_addresses set invalid_email=0, date_modified=now() WHERE email_address = "'$email'";' >> $OUTPUTSQL
done
echo "-- Emails for Connection Refused" >> $OUTPUTSQL
for email in `grep "Connection refused" $MAILLOG | sed -e 's/.*to=<//' | sed -e 's/>, relay=none.*//' | grep -v "Connection refused" | sort -u`
do
echo 'UPDATE email_addresses set invalid_email=0, date_modified=now() WHERE email_address = "'$email'";' >> $OUTPUTSQL
done
echo "-- Emails for Connection Timed Out" >> $OUTPUTSQL
for email in `grep "Connection timed out" $MAILLOG | sed -e 's/.*to=<//' | sed -e 's/>, relay=none.*//' | grep -v "Connection timed out" | sort -u`
do
echo 'UPDATE email_addresses set invalid_email=0, date_modified=now() WHERE email_address = "'$email'";' >> $OUTPUTSQL
done
}
function sql_domains {
# NOT ACTIVE YET
echo "-- Domains for Host not found" >> $OUTPUTSQL
for domain in `xxx`
do
echo 'UPDATE email_addresses set invalid_email=0, date_modified=now() WHERE email_address LIKE "%'$domain'";' >> $OUTPUTSQL
done
echo "-- Domains for Connection Refused" >> $OUTPUTSQL
for domain in `xxx`
do
echo 'UPDATE email_addresses set invalid_email=0, date_modified=now() WHERE email_address LIKE "%'$domain'";' >> $OUTPUTSQL
done
echo "-- Domains for Connection Timed Out" >> $OUTPUTSQL
for domain in `xxx`
do
echo 'UPDATE email_addresses set invalid_email=0, date_modified=now() WHERE email_address LIKE "%'$domain'";' >> $OUTPUTSQL
done
}
function sql_end {
echo "SQL report generated to $OUTPUTSQL."
}
# Main control structure
echo "----------------------------------------------------------"
echo "Analysing $MAILLOG and outputting to $OUTPUTFILE and/or $OUTPUTSQL"
echo "Log Start Date = " `head -1 $MAILLOG | awk '{ print $1, $2, $3 }'`
echo "Log End Date = " `tail -1 $MAILLOG | awk '{ print $1, $2, $3 }'`
echo "Crunching " `wc -l $MAILLOG | awk '{ print $1 }'` " lines"
echo "----------------------------------------------------------"
echo Choose an option
echo
echo " 1) Run Text Report only"
echo " 2) Run Text Report and generate SQL code"
echo " 3) Generate SQL code only"
read CONN
case $CONN in
"1") text_report;;
"2") text_report
sql_init
sql_emails
sql_end;;
"3") sql_init
sql_emails
sql_end;;
*) echo "Sorry, try again";;
esac And yes, a neat web interface would be much, much better!
Bookmarks