Email analyzer in python
#include <iostream>
#include <string>
#include <algorithm>
#include <sstream>
#include <set>
#include <fstream>
#include <unordered_map>
#include <map>
//#include "stdafx.h"
//#include "SmartlyParser.h"
typedef std::vector<std::set<std::string>> setWords;
class SmartAnalyzer
{
private:
double sentence_intersection(std::set<std::string> const& a, std::set<std::string> const& b);
std::vector<std::string> parseSentences(std::string const& text);
setWords stringSets(std::string const& text);
std::string format(std::string text, bool const& includeDot = false);
public:
SmartAnalyzer() {}
std::string getSummary(std::string title, std::string const& text, int const& limit);
};
double SmartAnalyzer::sentence_intersection(std::set<std::string> const& a, std::set<std::string> const& b)
{
std::vector<std::string> common;
std::set_intersection(a.begin(), a.end(), b.begin(), b.end(), std::back_inserter(common));
return (double) common.size() / ((a.size() + b.size()) / 2);
}
std::vector<std::string> SmartAnalyzer::parseSentences(std::string const& text)
{
std::vector<std::string> output;
std::istringstream iss(text);
std::string token;
while (std::getline(iss, token, '.')) {
output.push_back(token);
}
return output;
}
setWords SmartAnalyzer::stringSets(std::string const& text)
{
setWords output;
std::istringstream iss(text), current;
std::string token;
while (std::getline(iss, token, '.')) {
current.clear();
current.str(token);
output.push_back(std::set<std::string>((std::istream_iterator<std::string>(current)), std::istream_iterator<std::string>()));
}
return output;
}
std::string SmartAnalyzer::format(std::string text, bool const& includeDot)
{
text.erase(std::remove_if(text.begin(), text.end(), [includeDot](char c) { return c == ',' || c == '!' || c == '"' || (includeDot && c == '.' ); }), text.end());
std::transform(text.begin(), text.end(), text.begin(), ::tolower);
return text;
}
std::string SmartAnalyzer::getSummary(std::string title, std::string const& text, int const& limit)
{
std::vector<std::string> sentences = parseSentences(text);
int sentLen = sentences.size();
setWords sentencesC = stringSets(format(text));
std::set<std::string> titles = std::set<std::string>((std::istream_iterator<std::string>(std::istringstream(format(title, true)))), std::istream_iterator<std::string>());
double sum;
std::map<double, int> sentencesWeight;
for (int i = 0; i < sentLen; i++)
{
sum = 0;
for (int j = 0; j < sentLen; j++)
{
if (i == j) { /* find intersection with the title, instead of self */
sum += sentence_intersection(sentencesC[i], titles) * 2;
continue;
}
sum += sentence_intersection(sentencesC[i], sentencesC[j]);
}
sentencesWeight.insert({ sum, i });
}
std::string output;
for (std::map<double, int>::reverse_iterator it = sentencesWeight.rbegin(); it != sentencesWeight.rend(); ++it)
{
if (std::string(sentences[it->second] + output).size() > limit) {
break;
}
output += sentences[it->second] + ".";
}
if (output.empty())
{
output = sentences[sentencesWeight.rbegin()->second];
std::size_t pos = output.size();
while ((pos = output.rfind(',', pos)) != std::string::npos && output.size() > limit)
{
output = output.substr(0, pos);
pos--;
}
output += '.';
}
return output;
}
int _tmain()
{
std::ifstream ifs("F:\\Analyzer\\text.txt");
std::string content((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
std::string title = "Stack, overflow";
SmartAnalyzer a;
std::cout << a.getSummary(title, content, 5 * 36);
getchar();
return 0;
}
#include <string>
#include <algorithm>
#include <sstream>
#include <set>
#include <fstream>
#include <unordered_map>
#include <map>
//#include "stdafx.h"
//#include "SmartlyParser.h"
typedef std::vector<std::set<std::string>> setWords;
class SmartAnalyzer
{
private:
double sentence_intersection(std::set<std::string> const& a, std::set<std::string> const& b);
std::vector<std::string> parseSentences(std::string const& text);
setWords stringSets(std::string const& text);
std::string format(std::string text, bool const& includeDot = false);
public:
SmartAnalyzer() {}
std::string getSummary(std::string title, std::string const& text, int const& limit);
};
double SmartAnalyzer::sentence_intersection(std::set<std::string> const& a, std::set<std::string> const& b)
{
std::vector<std::string> common;
std::set_intersection(a.begin(), a.end(), b.begin(), b.end(), std::back_inserter(common));
return (double) common.size() / ((a.size() + b.size()) / 2);
}
std::vector<std::string> SmartAnalyzer::parseSentences(std::string const& text)
{
std::vector<std::string> output;
std::istringstream iss(text);
std::string token;
while (std::getline(iss, token, '.')) {
output.push_back(token);
}
return output;
}
setWords SmartAnalyzer::stringSets(std::string const& text)
{
setWords output;
std::istringstream iss(text), current;
std::string token;
while (std::getline(iss, token, '.')) {
current.clear();
current.str(token);
output.push_back(std::set<std::string>((std::istream_iterator<std::string>(current)), std::istream_iterator<std::string>()));
}
return output;
}
std::string SmartAnalyzer::format(std::string text, bool const& includeDot)
{
text.erase(std::remove_if(text.begin(), text.end(), [includeDot](char c) { return c == ',' || c == '!' || c == '"' || (includeDot && c == '.' ); }), text.end());
std::transform(text.begin(), text.end(), text.begin(), ::tolower);
return text;
}
std::string SmartAnalyzer::getSummary(std::string title, std::string const& text, int const& limit)
{
std::vector<std::string> sentences = parseSentences(text);
int sentLen = sentences.size();
setWords sentencesC = stringSets(format(text));
std::set<std::string> titles = std::set<std::string>((std::istream_iterator<std::string>(std::istringstream(format(title, true)))), std::istream_iterator<std::string>());
double sum;
std::map<double, int> sentencesWeight;
for (int i = 0; i < sentLen; i++)
{
sum = 0;
for (int j = 0; j < sentLen; j++)
{
if (i == j) { /* find intersection with the title, instead of self */
sum += sentence_intersection(sentencesC[i], titles) * 2;
continue;
}
sum += sentence_intersection(sentencesC[i], sentencesC[j]);
}
sentencesWeight.insert({ sum, i });
}
std::string output;
for (std::map<double, int>::reverse_iterator it = sentencesWeight.rbegin(); it != sentencesWeight.rend(); ++it)
{
if (std::string(sentences[it->second] + output).size() > limit) {
break;
}
output += sentences[it->second] + ".";
}
if (output.empty())
{
output = sentences[sentencesWeight.rbegin()->second];
std::size_t pos = output.size();
while ((pos = output.rfind(',', pos)) != std::string::npos && output.size() > limit)
{
output = output.substr(0, pos);
pos--;
}
output += '.';
}
return output;
}
int _tmain()
{
std::ifstream ifs("F:\\Analyzer\\text.txt");
std::string content((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
std::string title = "Stack, overflow";
SmartAnalyzer a;
std::cout << a.getSummary(title, content, 5 * 36);
getchar();
return 0;
}
Comments
Post a Comment