-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.cpp
42 lines (34 loc) · 1.4 KB
/
test.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*******************************************************************************
* Copyright (c) 2013 István Endredy.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser Public License v3
* which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/
*
* JusText is written by Jan Pomikalek, in python.
* https://code.google.com/p/justext/
* It has very good quality, so I have rewritten it in c++. :)
*
* This is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser Public License for more details.
******************************************************************************/
#include "justext.h"
#include "tools.h"
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[])
{
std::string url = "http://www.autoblog.com/2015/01/16/german-gp-f1-moving-hockenheim/";
std::string html = file2string("test/sample.html");
std::string encoding = detectCodePage(html);
//create jusText object, with stopwords file
Justext j("stoplists/English.txt");
//if you want a debug html output as well
j.setDebug(true);
//call justext algorithm
std::string result = j.getContent(html, encoding, url);
printf("result: %s\n", result.c_str());
return 0;
}