From f1309a1be3824fd12a528e4aed4de12a17ca49b3 Mon Sep 17 00:00:00 2001
From: David Belicza <87.bdavid@gmail.com>
Date: Sun, 20 Sep 2020 16:55:12 +0200
Subject: [PATCH 1/5] Prepare the package for refactoring
---
.github/workflows/tests.yml | 39 +
.gitignore | 4 +-
.travis.yml | 11 -
LICENSE | 21 +
composer.json | 15 +-
composer.lock | 1409 -----------------
phpunit.xml | 27 -
readme.md | 105 +-
src/TextRankFacade.php | 7 -
src/Tool/Graph.php | 15 -
src/Tool/Parser.php | 14 -
src/Tool/Score.php | 14 -
src/Tool/StopWords/English.php | 12 -
src/Tool/StopWords/French.php | 12 -
src/Tool/StopWords/German.php | 15 +-
src/Tool/StopWords/Italian.php | 5 -
src/Tool/StopWords/Norwegian.php | 12 -
src/Tool/StopWords/Russian.php | 13 -
src/Tool/StopWords/Spanish.php | 12 -
src/Tool/StopWords/StopWordsAbstract.php | 12 -
src/Tool/Summarize.php | 14 -
src/Tool/Text.php | 14 -
tests/{ => functional}/TextRankFacadeTest.php | 11 +-
tests/phpunit.xml | 34 +
{res => tests/resource}/sample1.txt | 0
25 files changed, 144 insertions(+), 1703 deletions(-)
create mode 100644 .github/workflows/tests.yml
delete mode 100644 .travis.yml
create mode 100644 LICENSE
delete mode 100644 composer.lock
delete mode 100644 phpunit.xml
rename tests/{ => functional}/TextRankFacadeTest.php (93%)
create mode 100644 tests/phpunit.xml
rename {res => tests/resource}/sample1.txt (100%)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..f3f4b41
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,39 @@
+name: tests
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ run:
+ runs-on: ${{ matrix.operating-system }}
+ strategy:
+ matrix:
+ operating-system: [ubuntu-latest]
+ php-versions: ['7.4']
+ name: PHP ${{ matrix.php-versions }} Test on ${{ matrix.operating-system }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+
+ - name: Setup PHP
+ uses: shivammathur/setup-php@v2
+ with:
+ php-version: ${{ matrix.php-versions }}
+ coverage: xdebug
+
+ - name: Validate composer files
+ run: composer validate
+
+ - name: Install dependencies
+ if: steps.composer-cache.outputs.cache-hit != 'true'
+ run: composer install --prefer-dist --no-progress --no-suggest
+
+ - name: Run test suite
+ run: composer test
+
+ - name: Publish Analysis
+ uses: codecov/codecov-action@v1.0.13
+ with:
+ name: Code Analysis
+ directory: ./var/code-coverage/clover/coverage.xml
diff --git a/.gitignore b/.gitignore
index ab27d1e..fa374aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
/.idea
-/vendor
\ No newline at end of file
+/composer.lock
+/vendor
+/var
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 0895ccb..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-language: php
-php:
- - '7.1'
- - '7.2'
-
-dist: trusty
-sudo: required
-group: edge
-
-before_script:
- - composer install
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..29afed1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 PHP-Science
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/composer.json b/composer.json
index 4d07d2e..3dc3015 100644
--- a/composer.json
+++ b/composer.json
@@ -10,19 +10,24 @@
}
],
"require": {
- "php": "7.*",
+ "php": "7.4.*",
"ext-ctype": "*",
- "ext-mbstring": "*"
+ "ext-mbstring": "*",
+ "php-science/pagerank": "1.*"
},
"require-dev": {
- "phpunit/phpunit": "^5.4"
+ "phpunit/phpunit": "^9"
},
"autoload": {
"psr-4": {
- "PhpScience\\TextRank\\": ["src/", "tests/"]
+ "PhpScience\\TextRank\\": [
+ "src/",
+ "tests/unit/",
+ "tests/functional/"
+ ]
}
},
"scripts": {
- "test": "phpunit --colors='always' $(pwd)/tests"
+ "test": "vendor/bin/phpunit -c $(pwd)/tests/phpunit.xml --colors='always' --do-not-cache-result"
}
}
diff --git a/composer.lock b/composer.lock
deleted file mode 100644
index 65ae386..0000000
--- a/composer.lock
+++ /dev/null
@@ -1,1409 +0,0 @@
-{
- "_readme": [
- "This file locks the dependencies of your project to a known state",
- "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
- "This file is @generated automatically"
- ],
- "content-hash": "97ff0c108502ab602ba79bf3a251e208",
- "packages": [],
- "packages-dev": [
- {
- "name": "doctrine/instantiator",
- "version": "1.1.0",
- "source": {
- "type": "git",
- "url": "https://github.com/doctrine/instantiator.git",
- "reference": "185b8868aa9bf7159f5f953ed5afb2d7fcdc3bda"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/doctrine/instantiator/zipball/185b8868aa9bf7159f5f953ed5afb2d7fcdc3bda",
- "reference": "185b8868aa9bf7159f5f953ed5afb2d7fcdc3bda",
- "shasum": ""
- },
- "require": {
- "php": "^7.1"
- },
- "require-dev": {
- "athletic/athletic": "~0.1.8",
- "ext-pdo": "*",
- "ext-phar": "*",
- "phpunit/phpunit": "^6.2.3",
- "squizlabs/php_codesniffer": "^3.0.2"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.2.x-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "Doctrine\\Instantiator\\": "src/Doctrine/Instantiator/"
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Marco Pivetta",
- "email": "ocramius@gmail.com",
- "homepage": "http://ocramius.github.com/"
- }
- ],
- "description": "A small, lightweight utility to instantiate objects in PHP without invoking their constructors",
- "homepage": "https://github.com/doctrine/instantiator",
- "keywords": [
- "constructor",
- "instantiate"
- ],
- "time": "2017-07-22T11:58:36+00:00"
- },
- {
- "name": "myclabs/deep-copy",
- "version": "1.8.1",
- "source": {
- "type": "git",
- "url": "https://github.com/myclabs/DeepCopy.git",
- "reference": "3e01bdad3e18354c3dce54466b7fbe33a9f9f7f8"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/3e01bdad3e18354c3dce54466b7fbe33a9f9f7f8",
- "reference": "3e01bdad3e18354c3dce54466b7fbe33a9f9f7f8",
- "shasum": ""
- },
- "require": {
- "php": "^7.1"
- },
- "replace": {
- "myclabs/deep-copy": "self.version"
- },
- "require-dev": {
- "doctrine/collections": "^1.0",
- "doctrine/common": "^2.6",
- "phpunit/phpunit": "^7.1"
- },
- "type": "library",
- "autoload": {
- "psr-4": {
- "DeepCopy\\": "src/DeepCopy/"
- },
- "files": [
- "src/DeepCopy/deep_copy.php"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "description": "Create deep copies (clones) of your objects",
- "keywords": [
- "clone",
- "copy",
- "duplicate",
- "object",
- "object graph"
- ],
- "time": "2018-06-11T23:09:50+00:00"
- },
- {
- "name": "phpdocumentor/reflection-common",
- "version": "1.0.1",
- "source": {
- "type": "git",
- "url": "https://github.com/phpDocumentor/ReflectionCommon.git",
- "reference": "21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/phpDocumentor/ReflectionCommon/zipball/21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6",
- "reference": "21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6",
- "shasum": ""
- },
- "require": {
- "php": ">=5.5"
- },
- "require-dev": {
- "phpunit/phpunit": "^4.6"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.0.x-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "phpDocumentor\\Reflection\\": [
- "src"
- ]
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Jaap van Otterdijk",
- "email": "opensource@ijaap.nl"
- }
- ],
- "description": "Common reflection classes used by phpdocumentor to reflect the code structure",
- "homepage": "http://www.phpdoc.org",
- "keywords": [
- "FQSEN",
- "phpDocumentor",
- "phpdoc",
- "reflection",
- "static analysis"
- ],
- "time": "2017-09-11T18:02:19+00:00"
- },
- {
- "name": "phpdocumentor/reflection-docblock",
- "version": "4.3.0",
- "source": {
- "type": "git",
- "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git",
- "reference": "94fd0001232e47129dd3504189fa1c7225010d08"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/94fd0001232e47129dd3504189fa1c7225010d08",
- "reference": "94fd0001232e47129dd3504189fa1c7225010d08",
- "shasum": ""
- },
- "require": {
- "php": "^7.0",
- "phpdocumentor/reflection-common": "^1.0.0",
- "phpdocumentor/type-resolver": "^0.4.0",
- "webmozart/assert": "^1.0"
- },
- "require-dev": {
- "doctrine/instantiator": "~1.0.5",
- "mockery/mockery": "^1.0",
- "phpunit/phpunit": "^6.4"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "4.x-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "phpDocumentor\\Reflection\\": [
- "src/"
- ]
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Mike van Riel",
- "email": "me@mikevanriel.com"
- }
- ],
- "description": "With this component, a library can provide support for annotations via DocBlocks or otherwise retrieve information that is embedded in a DocBlock.",
- "time": "2017-11-30T07:14:17+00:00"
- },
- {
- "name": "phpdocumentor/type-resolver",
- "version": "0.4.0",
- "source": {
- "type": "git",
- "url": "https://github.com/phpDocumentor/TypeResolver.git",
- "reference": "9c977708995954784726e25d0cd1dddf4e65b0f7"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/phpDocumentor/TypeResolver/zipball/9c977708995954784726e25d0cd1dddf4e65b0f7",
- "reference": "9c977708995954784726e25d0cd1dddf4e65b0f7",
- "shasum": ""
- },
- "require": {
- "php": "^5.5 || ^7.0",
- "phpdocumentor/reflection-common": "^1.0"
- },
- "require-dev": {
- "mockery/mockery": "^0.9.4",
- "phpunit/phpunit": "^5.2||^4.8.24"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.0.x-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "phpDocumentor\\Reflection\\": [
- "src/"
- ]
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Mike van Riel",
- "email": "me@mikevanriel.com"
- }
- ],
- "time": "2017-07-14T14:27:02+00:00"
- },
- {
- "name": "phpspec/prophecy",
- "version": "1.8.0",
- "source": {
- "type": "git",
- "url": "https://github.com/phpspec/prophecy.git",
- "reference": "4ba436b55987b4bf311cb7c6ba82aa528aac0a06"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/phpspec/prophecy/zipball/4ba436b55987b4bf311cb7c6ba82aa528aac0a06",
- "reference": "4ba436b55987b4bf311cb7c6ba82aa528aac0a06",
- "shasum": ""
- },
- "require": {
- "doctrine/instantiator": "^1.0.2",
- "php": "^5.3|^7.0",
- "phpdocumentor/reflection-docblock": "^2.0|^3.0.2|^4.0",
- "sebastian/comparator": "^1.1|^2.0|^3.0",
- "sebastian/recursion-context": "^1.0|^2.0|^3.0"
- },
- "require-dev": {
- "phpspec/phpspec": "^2.5|^3.2",
- "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.5 || ^7.1"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.8.x-dev"
- }
- },
- "autoload": {
- "psr-0": {
- "Prophecy\\": "src/"
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Konstantin Kudryashov",
- "email": "ever.zet@gmail.com",
- "homepage": "http://everzet.com"
- },
- {
- "name": "Marcello Duarte",
- "email": "marcello.duarte@gmail.com"
- }
- ],
- "description": "Highly opinionated mocking framework for PHP 5.3+",
- "homepage": "https://github.com/phpspec/prophecy",
- "keywords": [
- "Double",
- "Dummy",
- "fake",
- "mock",
- "spy",
- "stub"
- ],
- "time": "2018-08-05T17:53:17+00:00"
- },
- {
- "name": "phpunit/php-code-coverage",
- "version": "4.0.8",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/php-code-coverage.git",
- "reference": "ef7b2f56815df854e66ceaee8ebe9393ae36a40d"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/ef7b2f56815df854e66ceaee8ebe9393ae36a40d",
- "reference": "ef7b2f56815df854e66ceaee8ebe9393ae36a40d",
- "shasum": ""
- },
- "require": {
- "ext-dom": "*",
- "ext-xmlwriter": "*",
- "php": "^5.6 || ^7.0",
- "phpunit/php-file-iterator": "^1.3",
- "phpunit/php-text-template": "^1.2",
- "phpunit/php-token-stream": "^1.4.2 || ^2.0",
- "sebastian/code-unit-reverse-lookup": "^1.0",
- "sebastian/environment": "^1.3.2 || ^2.0",
- "sebastian/version": "^1.0 || ^2.0"
- },
- "require-dev": {
- "ext-xdebug": "^2.1.4",
- "phpunit/phpunit": "^5.7"
- },
- "suggest": {
- "ext-xdebug": "^2.5.1"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "4.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sb@sebastian-bergmann.de",
- "role": "lead"
- }
- ],
- "description": "Library that provides collection, processing, and rendering functionality for PHP code coverage information.",
- "homepage": "https://github.com/sebastianbergmann/php-code-coverage",
- "keywords": [
- "coverage",
- "testing",
- "xunit"
- ],
- "time": "2017-04-02T07:44:40+00:00"
- },
- {
- "name": "phpunit/php-file-iterator",
- "version": "1.4.5",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/php-file-iterator.git",
- "reference": "730b01bc3e867237eaac355e06a36b85dd93a8b4"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/php-file-iterator/zipball/730b01bc3e867237eaac355e06a36b85dd93a8b4",
- "reference": "730b01bc3e867237eaac355e06a36b85dd93a8b4",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.4.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sb@sebastian-bergmann.de",
- "role": "lead"
- }
- ],
- "description": "FilterIterator implementation that filters files based on a list of suffixes.",
- "homepage": "https://github.com/sebastianbergmann/php-file-iterator/",
- "keywords": [
- "filesystem",
- "iterator"
- ],
- "time": "2017-11-27T13:52:08+00:00"
- },
- {
- "name": "phpunit/php-text-template",
- "version": "1.2.1",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/php-text-template.git",
- "reference": "31f8b717e51d9a2afca6c9f046f5d69fc27c8686"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/php-text-template/zipball/31f8b717e51d9a2afca6c9f046f5d69fc27c8686",
- "reference": "31f8b717e51d9a2afca6c9f046f5d69fc27c8686",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3"
- },
- "type": "library",
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de",
- "role": "lead"
- }
- ],
- "description": "Simple template engine.",
- "homepage": "https://github.com/sebastianbergmann/php-text-template/",
- "keywords": [
- "template"
- ],
- "time": "2015-06-21T13:50:34+00:00"
- },
- {
- "name": "phpunit/php-timer",
- "version": "1.0.9",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/php-timer.git",
- "reference": "3dcf38ca72b158baf0bc245e9184d3fdffa9c46f"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/php-timer/zipball/3dcf38ca72b158baf0bc245e9184d3fdffa9c46f",
- "reference": "3dcf38ca72b158baf0bc245e9184d3fdffa9c46f",
- "shasum": ""
- },
- "require": {
- "php": "^5.3.3 || ^7.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.0-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sb@sebastian-bergmann.de",
- "role": "lead"
- }
- ],
- "description": "Utility class for timing",
- "homepage": "https://github.com/sebastianbergmann/php-timer/",
- "keywords": [
- "timer"
- ],
- "time": "2017-02-26T11:10:40+00:00"
- },
- {
- "name": "phpunit/php-token-stream",
- "version": "2.0.2",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/php-token-stream.git",
- "reference": "791198a2c6254db10131eecfe8c06670700904db"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/php-token-stream/zipball/791198a2c6254db10131eecfe8c06670700904db",
- "reference": "791198a2c6254db10131eecfe8c06670700904db",
- "shasum": ""
- },
- "require": {
- "ext-tokenizer": "*",
- "php": "^7.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^6.2.4"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "2.0-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Wrapper around PHP's tokenizer extension.",
- "homepage": "https://github.com/sebastianbergmann/php-token-stream/",
- "keywords": [
- "tokenizer"
- ],
- "time": "2017-11-27T05:48:46+00:00"
- },
- {
- "name": "phpunit/phpunit",
- "version": "5.7.27",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/phpunit.git",
- "reference": "b7803aeca3ccb99ad0a506fa80b64cd6a56bbc0c"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/b7803aeca3ccb99ad0a506fa80b64cd6a56bbc0c",
- "reference": "b7803aeca3ccb99ad0a506fa80b64cd6a56bbc0c",
- "shasum": ""
- },
- "require": {
- "ext-dom": "*",
- "ext-json": "*",
- "ext-libxml": "*",
- "ext-mbstring": "*",
- "ext-xml": "*",
- "myclabs/deep-copy": "~1.3",
- "php": "^5.6 || ^7.0",
- "phpspec/prophecy": "^1.6.2",
- "phpunit/php-code-coverage": "^4.0.4",
- "phpunit/php-file-iterator": "~1.4",
- "phpunit/php-text-template": "~1.2",
- "phpunit/php-timer": "^1.0.6",
- "phpunit/phpunit-mock-objects": "^3.2",
- "sebastian/comparator": "^1.2.4",
- "sebastian/diff": "^1.4.3",
- "sebastian/environment": "^1.3.4 || ^2.0",
- "sebastian/exporter": "~2.0",
- "sebastian/global-state": "^1.1",
- "sebastian/object-enumerator": "~2.0",
- "sebastian/resource-operations": "~1.0",
- "sebastian/version": "^1.0.6|^2.0.1",
- "symfony/yaml": "~2.1|~3.0|~4.0"
- },
- "conflict": {
- "phpdocumentor/reflection-docblock": "3.0.2"
- },
- "require-dev": {
- "ext-pdo": "*"
- },
- "suggest": {
- "ext-xdebug": "*",
- "phpunit/php-invoker": "~1.1"
- },
- "bin": [
- "phpunit"
- ],
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "5.7.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de",
- "role": "lead"
- }
- ],
- "description": "The PHP Unit Testing framework.",
- "homepage": "https://phpunit.de/",
- "keywords": [
- "phpunit",
- "testing",
- "xunit"
- ],
- "time": "2018-02-01T05:50:59+00:00"
- },
- {
- "name": "phpunit/phpunit-mock-objects",
- "version": "3.4.4",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git",
- "reference": "a23b761686d50a560cc56233b9ecf49597cc9118"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/a23b761686d50a560cc56233b9ecf49597cc9118",
- "reference": "a23b761686d50a560cc56233b9ecf49597cc9118",
- "shasum": ""
- },
- "require": {
- "doctrine/instantiator": "^1.0.2",
- "php": "^5.6 || ^7.0",
- "phpunit/php-text-template": "^1.2",
- "sebastian/exporter": "^1.2 || ^2.0"
- },
- "conflict": {
- "phpunit/phpunit": "<5.4.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^5.4"
- },
- "suggest": {
- "ext-soap": "*"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "3.2.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sb@sebastian-bergmann.de",
- "role": "lead"
- }
- ],
- "description": "Mock Object library for PHPUnit",
- "homepage": "https://github.com/sebastianbergmann/phpunit-mock-objects/",
- "keywords": [
- "mock",
- "xunit"
- ],
- "time": "2017-06-30T09:13:00+00:00"
- },
- {
- "name": "sebastian/code-unit-reverse-lookup",
- "version": "1.0.1",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git",
- "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/4419fcdb5eabb9caa61a27c7a1db532a6b55dd18",
- "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18",
- "shasum": ""
- },
- "require": {
- "php": "^5.6 || ^7.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^5.7 || ^6.0"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Looks up which function or method a line of code belongs to",
- "homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/",
- "time": "2017-03-04T06:30:41+00:00"
- },
- {
- "name": "sebastian/comparator",
- "version": "1.2.4",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/comparator.git",
- "reference": "2b7424b55f5047b47ac6e5ccb20b2aea4011d9be"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/2b7424b55f5047b47ac6e5ccb20b2aea4011d9be",
- "reference": "2b7424b55f5047b47ac6e5ccb20b2aea4011d9be",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3",
- "sebastian/diff": "~1.2",
- "sebastian/exporter": "~1.2 || ~2.0"
- },
- "require-dev": {
- "phpunit/phpunit": "~4.4"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.2.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Jeff Welch",
- "email": "whatthejeff@gmail.com"
- },
- {
- "name": "Volker Dusch",
- "email": "github@wallbash.com"
- },
- {
- "name": "Bernhard Schussek",
- "email": "bschussek@2bepublished.at"
- },
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Provides the functionality to compare PHP values for equality",
- "homepage": "http://www.github.com/sebastianbergmann/comparator",
- "keywords": [
- "comparator",
- "compare",
- "equality"
- ],
- "time": "2017-01-29T09:50:25+00:00"
- },
- {
- "name": "sebastian/diff",
- "version": "1.4.3",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/diff.git",
- "reference": "7f066a26a962dbe58ddea9f72a4e82874a3975a4"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/7f066a26a962dbe58ddea9f72a4e82874a3975a4",
- "reference": "7f066a26a962dbe58ddea9f72a4e82874a3975a4",
- "shasum": ""
- },
- "require": {
- "php": "^5.3.3 || ^7.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.4-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Kore Nordmann",
- "email": "mail@kore-nordmann.de"
- },
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Diff implementation",
- "homepage": "https://github.com/sebastianbergmann/diff",
- "keywords": [
- "diff"
- ],
- "time": "2017-05-22T07:24:03+00:00"
- },
- {
- "name": "sebastian/environment",
- "version": "2.0.0",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/environment.git",
- "reference": "5795ffe5dc5b02460c3e34222fee8cbe245d8fac"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/5795ffe5dc5b02460c3e34222fee8cbe245d8fac",
- "reference": "5795ffe5dc5b02460c3e34222fee8cbe245d8fac",
- "shasum": ""
- },
- "require": {
- "php": "^5.6 || ^7.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^5.0"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "2.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Provides functionality to handle HHVM/PHP environments",
- "homepage": "http://www.github.com/sebastianbergmann/environment",
- "keywords": [
- "Xdebug",
- "environment",
- "hhvm"
- ],
- "time": "2016-11-26T07:53:53+00:00"
- },
- {
- "name": "sebastian/exporter",
- "version": "2.0.0",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/exporter.git",
- "reference": "ce474bdd1a34744d7ac5d6aad3a46d48d9bac4c4"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/ce474bdd1a34744d7ac5d6aad3a46d48d9bac4c4",
- "reference": "ce474bdd1a34744d7ac5d6aad3a46d48d9bac4c4",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3",
- "sebastian/recursion-context": "~2.0"
- },
- "require-dev": {
- "ext-mbstring": "*",
- "phpunit/phpunit": "~4.4"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "2.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Jeff Welch",
- "email": "whatthejeff@gmail.com"
- },
- {
- "name": "Volker Dusch",
- "email": "github@wallbash.com"
- },
- {
- "name": "Bernhard Schussek",
- "email": "bschussek@2bepublished.at"
- },
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- },
- {
- "name": "Adam Harvey",
- "email": "aharvey@php.net"
- }
- ],
- "description": "Provides the functionality to export PHP variables for visualization",
- "homepage": "http://www.github.com/sebastianbergmann/exporter",
- "keywords": [
- "export",
- "exporter"
- ],
- "time": "2016-11-19T08:54:04+00:00"
- },
- {
- "name": "sebastian/global-state",
- "version": "1.1.1",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/global-state.git",
- "reference": "bc37d50fea7d017d3d340f230811c9f1d7280af4"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/global-state/zipball/bc37d50fea7d017d3d340f230811c9f1d7280af4",
- "reference": "bc37d50fea7d017d3d340f230811c9f1d7280af4",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3"
- },
- "require-dev": {
- "phpunit/phpunit": "~4.2"
- },
- "suggest": {
- "ext-uopz": "*"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.0-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Snapshotting of global state",
- "homepage": "http://www.github.com/sebastianbergmann/global-state",
- "keywords": [
- "global state"
- ],
- "time": "2015-10-12T03:26:01+00:00"
- },
- {
- "name": "sebastian/object-enumerator",
- "version": "2.0.1",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/object-enumerator.git",
- "reference": "1311872ac850040a79c3c058bea3e22d0f09cbb7"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/1311872ac850040a79c3c058bea3e22d0f09cbb7",
- "reference": "1311872ac850040a79c3c058bea3e22d0f09cbb7",
- "shasum": ""
- },
- "require": {
- "php": ">=5.6",
- "sebastian/recursion-context": "~2.0"
- },
- "require-dev": {
- "phpunit/phpunit": "~5"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "2.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Traverses array structures and object graphs to enumerate all referenced objects",
- "homepage": "https://github.com/sebastianbergmann/object-enumerator/",
- "time": "2017-02-18T15:18:39+00:00"
- },
- {
- "name": "sebastian/recursion-context",
- "version": "2.0.0",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/recursion-context.git",
- "reference": "2c3ba150cbec723aa057506e73a8d33bdb286c9a"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/2c3ba150cbec723aa057506e73a8d33bdb286c9a",
- "reference": "2c3ba150cbec723aa057506e73a8d33bdb286c9a",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3"
- },
- "require-dev": {
- "phpunit/phpunit": "~4.4"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "2.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Jeff Welch",
- "email": "whatthejeff@gmail.com"
- },
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- },
- {
- "name": "Adam Harvey",
- "email": "aharvey@php.net"
- }
- ],
- "description": "Provides functionality to recursively process PHP variables",
- "homepage": "http://www.github.com/sebastianbergmann/recursion-context",
- "time": "2016-11-19T07:33:16+00:00"
- },
- {
- "name": "sebastian/resource-operations",
- "version": "1.0.0",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/resource-operations.git",
- "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/resource-operations/zipball/ce990bb21759f94aeafd30209e8cfcdfa8bc3f52",
- "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52",
- "shasum": ""
- },
- "require": {
- "php": ">=5.6.0"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de"
- }
- ],
- "description": "Provides a list of PHP built-in functions that operate on resources",
- "homepage": "https://www.github.com/sebastianbergmann/resource-operations",
- "time": "2015-07-28T20:34:47+00:00"
- },
- {
- "name": "sebastian/version",
- "version": "2.0.1",
- "source": {
- "type": "git",
- "url": "https://github.com/sebastianbergmann/version.git",
- "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/sebastianbergmann/version/zipball/99732be0ddb3361e16ad77b68ba41efc8e979019",
- "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019",
- "shasum": ""
- },
- "require": {
- "php": ">=5.6"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "2.0.x-dev"
- }
- },
- "autoload": {
- "classmap": [
- "src/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "BSD-3-Clause"
- ],
- "authors": [
- {
- "name": "Sebastian Bergmann",
- "email": "sebastian@phpunit.de",
- "role": "lead"
- }
- ],
- "description": "Library that helps with managing the version number of Git-hosted PHP projects",
- "homepage": "https://github.com/sebastianbergmann/version",
- "time": "2016-10-03T07:35:21+00:00"
- },
- {
- "name": "symfony/polyfill-ctype",
- "version": "v1.9.0",
- "source": {
- "type": "git",
- "url": "https://github.com/symfony/polyfill-ctype.git",
- "reference": "e3d826245268269cd66f8326bd8bc066687b4a19"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/e3d826245268269cd66f8326bd8bc066687b4a19",
- "reference": "e3d826245268269cd66f8326bd8bc066687b4a19",
- "shasum": ""
- },
- "require": {
- "php": ">=5.3.3"
- },
- "suggest": {
- "ext-ctype": "For best performance"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.9-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "Symfony\\Polyfill\\Ctype\\": ""
- },
- "files": [
- "bootstrap.php"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Symfony Community",
- "homepage": "https://symfony.com/contributors"
- },
- {
- "name": "Gert de Pagter",
- "email": "BackEndTea@gmail.com"
- }
- ],
- "description": "Symfony polyfill for ctype functions",
- "homepage": "https://symfony.com",
- "keywords": [
- "compatibility",
- "ctype",
- "polyfill",
- "portable"
- ],
- "time": "2018-08-06T14:22:27+00:00"
- },
- {
- "name": "symfony/yaml",
- "version": "v4.1.3",
- "source": {
- "type": "git",
- "url": "https://github.com/symfony/yaml.git",
- "reference": "46bc69aa91fc4ab78a96ce67873a6b0c148fd48c"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/symfony/yaml/zipball/46bc69aa91fc4ab78a96ce67873a6b0c148fd48c",
- "reference": "46bc69aa91fc4ab78a96ce67873a6b0c148fd48c",
- "shasum": ""
- },
- "require": {
- "php": "^7.1.3",
- "symfony/polyfill-ctype": "~1.8"
- },
- "conflict": {
- "symfony/console": "<3.4"
- },
- "require-dev": {
- "symfony/console": "~3.4|~4.0"
- },
- "suggest": {
- "symfony/console": "For validating YAML files using the lint command"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "4.1-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "Symfony\\Component\\Yaml\\": ""
- },
- "exclude-from-classmap": [
- "/Tests/"
- ]
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Fabien Potencier",
- "email": "fabien@symfony.com"
- },
- {
- "name": "Symfony Community",
- "homepage": "https://symfony.com/contributors"
- }
- ],
- "description": "Symfony Yaml Component",
- "homepage": "https://symfony.com",
- "time": "2018-07-26T11:24:31+00:00"
- },
- {
- "name": "webmozart/assert",
- "version": "1.3.0",
- "source": {
- "type": "git",
- "url": "https://github.com/webmozart/assert.git",
- "reference": "0df1908962e7a3071564e857d86874dad1ef204a"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/webmozart/assert/zipball/0df1908962e7a3071564e857d86874dad1ef204a",
- "reference": "0df1908962e7a3071564e857d86874dad1ef204a",
- "shasum": ""
- },
- "require": {
- "php": "^5.3.3 || ^7.0"
- },
- "require-dev": {
- "phpunit/phpunit": "^4.6",
- "sebastian/version": "^1.0.1"
- },
- "type": "library",
- "extra": {
- "branch-alias": {
- "dev-master": "1.3-dev"
- }
- },
- "autoload": {
- "psr-4": {
- "Webmozart\\Assert\\": "src/"
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "Bernhard Schussek",
- "email": "bschussek@gmail.com"
- }
- ],
- "description": "Assertions to validate method input/output with nice error messages.",
- "keywords": [
- "assert",
- "check",
- "validate"
- ],
- "time": "2018-01-29T19:49:41+00:00"
- }
- ],
- "aliases": [],
- "minimum-stability": "stable",
- "stability-flags": [],
- "prefer-stable": false,
- "prefer-lowest": false,
- "platform": {
- "php": "7.*",
- "ext-ctype": "*",
- "ext-mbstring": "*"
- },
- "platform-dev": []
-}
diff --git a/phpunit.xml b/phpunit.xml
deleted file mode 100644
index 8c26e40..0000000
--- a/phpunit.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-
-
-
-
-
-
- ./tests/
-
-
-
-
-
- src
-
-
-
\ No newline at end of file
diff --git a/readme.md b/readme.md
index fa8fa15..2206659 100644
--- a/readme.md
+++ b/readme.md
@@ -1,73 +1,32 @@
-# PHP.Science - TextRank
-
-[![Build Status](https://travis-ci.org/DavidBelicza/PHP-Science-TextRank.svg?branch=master)](https://travis-ci.org/DavidBelicza/PHP-Science-TextRank)
-[![Latest Stable Version](https://poser.pugx.org/php-science/textrank/v/stable.svg)](https://packagist.org/packages/php-science/textrank)
-[![License](https://img.shields.io/badge/license-MIT-33CCFF.svg)](https://opensource.org/licenses/MIT)
-[![composer.lock](https://poser.pugx.org/php-science/textrank/composerlock)](https://packagist.org/packages/php-science/textrank)
-
-This source code is an implementation of the TextRank algorithm (Automatic summarization) on PHP7 strict mode. It can summarize a text, article for example to a short paragraph. Before it would start the summarizing it removes the junk words what are defined in the Stopwords namespace. It is possible to extend it with another languages.
-
-
-
-## Authors, Contributors
-
-Name | GitHub user
---- | ---
-David Belicza | @DavidBelicza
-Riccardo Marton | @riccardomarton
-Syndesi | @Syndesi
-vincentsch | @vincentsch
-Andrew Welch | @khalwat
-Andrey Astashov | @mvcaaa
-Leo Toneff | @bragle
-
-## TextRank or Automatic summarization
-> Automatic summarization is the process of reducing a text document with a computer program in order to create a summary that retains the most important points of the original document. Technologies that can make a coherent summary take into account variables such as length, writing style and syntax. Automatic data summarization is part of machine learning and data mining. The main idea of summarization is to find a representative subset of the data, which contains the information of the entire set. Summarization technologies are used in a large number of sectors in industry today. - Wikipedia
-
-The algorithm of this implementation is:
-* Find sentences,
-* Remove stopwords,
-* Create integer values by find and count the matching words,
-* Change the integer values by the related words' integer values,
-* Normalize values to create scores,
-* Order by scores
-
-## Install
-```
-composer require php-science/textrank
-```
-
-## Test
-```
-cd project-folder
-composer test
-```
-or
-```
-cd project-folder
-phpunit --colors='always' $(pwd)/tests
-```
-
-## Examples
-```php
-
-use PhpScience\TextRank\Tool\StopWords\English;
-
-// String contains a long text, see the /res/sample1.txt file.
-$text = "Lorem ipsum...";
-
-$api = new TextRankFacade();
-// English implementation for stopwords/junk words:
-$stopWords = new English();
-$api->setStopWords($stopWords);
-
-// Array of the most important keywords:
-$result = $api->getOnlyKeyWords($text);
-
-// Array of the sentences from the most important part of the text:
-$result = $api->getHighlights($text);
-
-// Array of the most important sentences from the text:
-$result = $api->summarizeTextBasic($text);
-```
-More examples: https://github.com/DoveID/PHP-Science-TextRank/blob/master/tests/TextRankFacadeTest.php
+
+PHP.Science TextRank
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This source code is an OOP implementation of the TextRank algorithm.
+
The minimum required PHP version is 7.4.
+
+
+
+
+## About
+
+v 2.0.0 WIP
diff --git a/src/TextRankFacade.php b/src/TextRankFacade.php
index 46758d9..008d22a 100644
--- a/src/TextRankFacade.php
+++ b/src/TextRankFacade.php
@@ -1,11 +1,4 @@
- */
declare(strict_types=1);
diff --git a/src/Tool/Graph.php b/src/Tool/Graph.php
index 738cde0..06cdbe9 100644
--- a/src/Tool/Graph.php
+++ b/src/Tool/Graph.php
@@ -1,24 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool;
-/**
- * Class Graph
- *
- * This graph store the sentences and their words with the indexes. This graph
- * is the full map of the whole text.
- *
- * @package PhpScience\TextRank\Tool
- */
class Graph
{
/**
diff --git a/src/Tool/Parser.php b/src/Tool/Parser.php
index fdd1112..76829be 100644
--- a/src/Tool/Parser.php
+++ b/src/Tool/Parser.php
@@ -1,11 +1,4 @@
- */
declare(strict_types=1);
@@ -13,13 +6,6 @@
use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
-/**
- * Class Parser
- *
- * This class purpose to parse a real text to sentences and array.
- *
- * @package PhpScience\TextRank\Tool
- */
class Parser
{
/**
diff --git a/src/Tool/Score.php b/src/Tool/Score.php
index 26c790a..f28b2c3 100644
--- a/src/Tool/Score.php
+++ b/src/Tool/Score.php
@@ -1,23 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool;
-/**
- * Class Score
- *
- * It handles words and assigns weighted numbers to them.
- *
- * @package PhpScience\TextRank\Tool
- */
class Score
{
/**
diff --git a/src/Tool/StopWords/English.php b/src/Tool/StopWords/English.php
index 09a0828..4b08b8b 100644
--- a/src/Tool/StopWords/English.php
+++ b/src/Tool/StopWords/English.php
@@ -1,21 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class English
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
class English extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/French.php b/src/Tool/StopWords/French.php
index 0e67ad7..9941bdd 100644
--- a/src/Tool/StopWords/French.php
+++ b/src/Tool/StopWords/French.php
@@ -1,21 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class French
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
class French extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/German.php b/src/Tool/StopWords/German.php
index 6faf7b0..24130ea 100644
--- a/src/Tool/StopWords/German.php
+++ b/src/Tool/StopWords/German.php
@@ -1,18 +1,9 @@
- */
+
declare(strict_types=1);
+
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class German
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
+
class German extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/Italian.php b/src/Tool/StopWords/Italian.php
index 6aa3093..67ca0e9 100644
--- a/src/Tool/StopWords/Italian.php
+++ b/src/Tool/StopWords/Italian.php
@@ -4,11 +4,6 @@
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class Italian
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
class Italian extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/Norwegian.php b/src/Tool/StopWords/Norwegian.php
index 6d1af40..b84ac91 100644
--- a/src/Tool/StopWords/Norwegian.php
+++ b/src/Tool/StopWords/Norwegian.php
@@ -1,21 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class Norwegian
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
class Norwegian extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/Russian.php b/src/Tool/StopWords/Russian.php
index 82edf0f..a9ad80f 100644
--- a/src/Tool/StopWords/Russian.php
+++ b/src/Tool/StopWords/Russian.php
@@ -1,22 +1,9 @@
- * @author Andrey Astashov (Russian StopWords)
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class Russian
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
class Russian extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/Spanish.php b/src/Tool/StopWords/Spanish.php
index f7cdffd..325a2c7 100644
--- a/src/Tool/StopWords/Spanish.php
+++ b/src/Tool/StopWords/Spanish.php
@@ -1,21 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class Spanish
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
class Spanish extends StopWordsAbstract
{
/**
diff --git a/src/Tool/StopWords/StopWordsAbstract.php b/src/Tool/StopWords/StopWordsAbstract.php
index 33b2128..87d46a5 100644
--- a/src/Tool/StopWords/StopWordsAbstract.php
+++ b/src/Tool/StopWords/StopWordsAbstract.php
@@ -1,21 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool\StopWords;
-/**
- * Class StopWordsAbstract
- *
- * @package PhpScience\TextRank\Tool\StopWords
- */
abstract class StopWordsAbstract
{
/**
diff --git a/src/Tool/Summarize.php b/src/Tool/Summarize.php
index f63c0d0..fd4fed4 100644
--- a/src/Tool/Summarize.php
+++ b/src/Tool/Summarize.php
@@ -1,23 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool;
-/**
- * Class Summarize
- *
- * This is for summarize the text from parsed data.
- *
- * @package PhpScience\TextRank\Tool
- */
class Summarize
{
/**
diff --git a/src/Tool/Text.php b/src/Tool/Text.php
index 037cacf..458fc04 100644
--- a/src/Tool/Text.php
+++ b/src/Tool/Text.php
@@ -1,23 +1,9 @@
- */
declare(strict_types=1);
namespace PhpScience\TextRank\Tool;
-/**
- * Class Text
- *
- * This class is for store the parsed texts.
- *
- * @package PhpScience\TextRank\Tool
- */
class Text
{
/**
diff --git a/tests/TextRankFacadeTest.php b/tests/functional/TextRankFacadeTest.php
similarity index 93%
rename from tests/TextRankFacadeTest.php
rename to tests/functional/TextRankFacadeTest.php
index d1ef4e5..41151ac 100644
--- a/tests/TextRankFacadeTest.php
+++ b/tests/functional/TextRankFacadeTest.php
@@ -1,11 +1,4 @@
- */
declare(strict_types=1);
@@ -19,11 +12,11 @@ class TextRankFacadeTest extends \PHPUnit\Framework\TestCase
{
protected $sampleText1;
- public function setUp()
+ public function setUp(): void
{
parent::setUp();
- $path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'res'
+ $path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'resource'
. DIRECTORY_SEPARATOR . 'sample1.txt';
$file = fopen($path, 'r');
diff --git a/tests/phpunit.xml b/tests/phpunit.xml
new file mode 100644
index 0000000..dca7fc5
--- /dev/null
+++ b/tests/phpunit.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+ ../tests/unit
+
+
+ ../tests/functional
+
+
+
+
+
+
+ ../src
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/res/sample1.txt b/tests/resource/sample1.txt
similarity index 100%
rename from res/sample1.txt
rename to tests/resource/sample1.txt
From 40e40d97a3817a612a0fc17e5ace82c2264d390f Mon Sep 17 00:00:00 2001
From: David Belicza <87.bdavid@gmail.com>
Date: Tue, 22 Sep 2020 19:19:12 +0200
Subject: [PATCH 2/5] Reimplementing the parsing/ranking logic
---
.gitignore | 1 +
src/Builder/PageRankDataSourceBuilder.php | 47 +++++++++++
src/Builder/TextBuilder.php | 47 +++++++++++
src/Builder/TextBuilderInterface.php | 12 +++
src/Builder/TextRankOutputBuilder.php | 10 +++
.../TextRankOutputBuilderInterface.php | 10 +++
src/Data/Text.php | 31 +++++++
src/Data/Text/Sentence.php | 30 +++++++
src/Data/Text/SentenceInterface.php | 22 +++++
src/Data/Text/TokenMap.php | 25 ++++++
src/Data/Text/TokenMapInterface.php | 17 ++++
src/Data/TextInterface.php | 18 +++++
src/Data/TextRankOutput.php | 10 +++
src/Data/TextRankOutputInterface.php | 10 +++
src/Facade/TextRank.php | 50 ++++++++++++
src/Service/Parser.php | 53 ++++++++++++
src/Strategy/PageRankStrategy.php | 81 +++++++++++++++++++
.../RankingAlgorithmStrategyInterface.php | 13 +++
18 files changed, 487 insertions(+)
create mode 100644 src/Builder/PageRankDataSourceBuilder.php
create mode 100644 src/Builder/TextBuilder.php
create mode 100644 src/Builder/TextBuilderInterface.php
create mode 100644 src/Builder/TextRankOutputBuilder.php
create mode 100644 src/Builder/TextRankOutputBuilderInterface.php
create mode 100644 src/Data/Text.php
create mode 100644 src/Data/Text/Sentence.php
create mode 100644 src/Data/Text/SentenceInterface.php
create mode 100644 src/Data/Text/TokenMap.php
create mode 100644 src/Data/Text/TokenMapInterface.php
create mode 100644 src/Data/TextInterface.php
create mode 100644 src/Data/TextRankOutput.php
create mode 100644 src/Data/TextRankOutputInterface.php
create mode 100644 src/Facade/TextRank.php
create mode 100644 src/Service/Parser.php
create mode 100644 src/Strategy/PageRankStrategy.php
create mode 100644 src/Strategy/RankingAlgorithmStrategyInterface.php
diff --git a/.gitignore b/.gitignore
index fa374aa..af0e4c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
/.idea
+/.tmp
/composer.lock
/vendor
/var
diff --git a/src/Builder/PageRankDataSourceBuilder.php b/src/Builder/PageRankDataSourceBuilder.php
new file mode 100644
index 0000000..fd08c28
--- /dev/null
+++ b/src/Builder/PageRankDataSourceBuilder.php
@@ -0,0 +1,47 @@
+getSentences() as $sentence) {
+ foreach ($sentence->getVector() as $index => $tokenId) {
+ if (!isset($dataSource[$tokenId])) {
+ $dataSource[$tokenId] = [
+ self::ID => $tokenId,
+ self::LEFT => [],
+ self::RIGHT => []
+ ];
+ }
+
+ if ($sentence->isIndexExists($index - 1)) {
+ $previousTokenId = $sentence->getTokenId($index - 1);
+ if ($text->getTokenMap()->isExists($previousTokenId)) {
+ $dataSource[$tokenId][self::LEFT][] = $previousTokenId;
+ }
+ }
+
+ if ($sentence->isIndexExists($index + 1)) {
+ $nextTokenId = $sentence->getTokenId($index + 1);
+ if ($text->getTokenMap()->isExists($nextTokenId)) {
+ $dataSource[$tokenId][self::RIGHT][] = $nextTokenId;
+ }
+ }
+ }
+ }
+
+ return $dataSource;
+ }
+}
diff --git a/src/Builder/TextBuilder.php b/src/Builder/TextBuilder.php
new file mode 100644
index 0000000..78dae18
--- /dev/null
+++ b/src/Builder/TextBuilder.php
@@ -0,0 +1,47 @@
+ $sentenceTokenList) {
+ $sentenceVector = [];
+ foreach ($sentenceTokenList as $token) {
+ if (!isset($tokens[$token])) {
+ $tokens[$token] = $i;
+ $tokenId = $i;
+ $i++;
+ } else {
+ $tokenId = $tokens[$token];
+ }
+
+ $sentenceVector[] = $tokenId;
+ }
+
+ $sentence = new Sentence();
+ $sentence->setVector($sentenceVector);
+ $sentences[] = $sentence;
+ }
+
+ $tokenMap = new TokenMap();
+ $tokenMap->setTokenMap(array_flip($tokens));
+
+ return new Text(
+ $tokenMap,
+ $sentences
+ );
+ }
+}
diff --git a/src/Builder/TextBuilderInterface.php b/src/Builder/TextBuilderInterface.php
new file mode 100644
index 0000000..1f7d47f
--- /dev/null
+++ b/src/Builder/TextBuilderInterface.php
@@ -0,0 +1,12 @@
+tokenMap = $tokenMap;
+ $this->sentences = $sentences;
+ }
+
+ public function getTokenMap(): TokenMapInterface
+ {
+ return $this->tokenMap;
+ }
+
+ public function getSentences(): array
+ {
+ return $this->sentences;
+ }
+}
diff --git a/src/Data/Text/Sentence.php b/src/Data/Text/Sentence.php
new file mode 100644
index 0000000..6acd5e8
--- /dev/null
+++ b/src/Data/Text/Sentence.php
@@ -0,0 +1,30 @@
+vector = $vector;
+ }
+
+ public function getVector(): array
+ {
+ return $this->vector;
+ }
+
+ public function isIndexExists(int $index): bool
+ {
+ return isset($this->vector[$index]);
+ }
+
+ public function getTokenId(int $index): int
+ {
+ return $this->vector[$index];
+ }
+}
diff --git a/src/Data/Text/SentenceInterface.php b/src/Data/Text/SentenceInterface.php
new file mode 100644
index 0000000..59c3d48
--- /dev/null
+++ b/src/Data/Text/SentenceInterface.php
@@ -0,0 +1,22 @@
+tokenMap = $tokenMap;
+ }
+
+ public function isExists(int $tokenId): bool
+ {
+ return isset($this->tokenMap[$tokenId]);
+ }
+
+ public function getToken(int $tokenId): string
+ {
+ return $this->tokenMap[$tokenId];
+ }
+}
diff --git a/src/Data/Text/TokenMapInterface.php b/src/Data/Text/TokenMapInterface.php
new file mode 100644
index 0000000..bf54db0
--- /dev/null
+++ b/src/Data/Text/TokenMapInterface.php
@@ -0,0 +1,17 @@
+parse($rawText);
+
+ $pageRankStrategy = new PageRankStrategy(
+ new PageRankDataSourceBuilder()
+ );
+
+ $nodeCollection = $pageRankStrategy->rank($text);
+
+ echo PHP_EOL;
+
+ $i = 0;
+ $nodes = [];
+
+ foreach ($nodeCollection->getNodes() as $node) {
+ $nodes[] = $node;
+ $i++;
+
+ if ($i === $maxKeywords) {
+ break;
+ }
+
+ /*echo $text->getTokenMap()->getToken($node->getId());
+ echo ' - ';
+ echo $node->getRank();
+ echo PHP_EOL;*/
+ }
+
+ return $nodes;
+ }
+}
diff --git a/src/Service/Parser.php b/src/Service/Parser.php
new file mode 100644
index 0000000..35e0913
--- /dev/null
+++ b/src/Service/Parser.php
@@ -0,0 +1,53 @@
+textBuilder = $textBuilder;
+ }
+
+ public function parse(string $rawText): TextInterface
+ {
+ $sentences = preg_split(
+ '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/',
+ $rawText,
+ -1,
+ PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
+ );
+
+ $textMap = [];
+
+ foreach ($sentences as $sentenceIndex => $sentence) {
+ $tokens = preg_split(
+ '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/',
+ $sentence,
+ -1,
+ PREG_SPLIT_NO_EMPTY
+ );
+
+ foreach ($tokens as $tokenIndex => $token) {
+ $tokens[$tokenIndex] = mb_strtolower(trim($token));
+ }
+
+ //@todo stopwords
+
+ $textMap[$sentenceIndex] = $tokens;
+ }
+
+ return $this->textBuilder->build(
+ $sentences,
+ $textMap
+ );
+ }
+}
diff --git a/src/Strategy/PageRankStrategy.php b/src/Strategy/PageRankStrategy.php
new file mode 100644
index 0000000..a831142
--- /dev/null
+++ b/src/Strategy/PageRankStrategy.php
@@ -0,0 +1,81 @@
+pageRankDataSourceBuilder = $pageRankDataSourceBuilder;
+ }
+
+ public function rank(TextInterface $text): NodeCollectionInterface
+ {
+ $dataSource = $this->pageRankDataSourceBuilder->build($text);
+ $strategy = $this->createPageRankStrategy($dataSource);
+ $ranking = $this->createRanking($strategy);
+ $pageRankAlgorithm = $this->createPageRankAlgorithm($ranking, $strategy);
+ $maxIteration = 100;
+
+ return $pageRankAlgorithm->run($maxIteration);
+ }
+
+ private function createPageRankAlgorithm(
+ RankingInterface $ranking,
+ NodeDataSourceStrategyInterface $strategy
+ ): PageRankAlgorithmInterface {
+
+ $normalizer = new Normalizer();
+
+ return new PageRankAlgorithm(
+ $ranking,
+ $strategy,
+ $normalizer
+ );
+ }
+
+ private function createPageRankStrategy(
+ array $dataSource
+ ): NodeDataSourceStrategyInterface {
+
+ $nodeBuilder = new NodeBuilder();
+ $nodeCollectionBuilder = new NodeCollectionBuilder();
+
+ return new MemorySourceStrategy(
+ $nodeBuilder,
+ $nodeCollectionBuilder,
+ $dataSource
+ );
+ }
+
+ private function createRanking(
+ NodeDataSourceStrategyInterface $strategy
+ ): RankingInterface {
+
+ $rankComparator = new RankComparator();
+
+ return new Ranking(
+ $rankComparator,
+ $strategy
+ );
+ }
+}
diff --git a/src/Strategy/RankingAlgorithmStrategyInterface.php b/src/Strategy/RankingAlgorithmStrategyInterface.php
new file mode 100644
index 0000000..3387864
--- /dev/null
+++ b/src/Strategy/RankingAlgorithmStrategyInterface.php
@@ -0,0 +1,13 @@
+
Date: Wed, 23 Sep 2020 21:53:37 +0200
Subject: [PATCH 3/5] Refactoring facade class
---
src/Builder/TextBuilder.php | 10 +-
src/Builder/TextRankOutputBuilder.php | 52 ++-
.../TextRankOutputBuilderInterface.php | 10 +-
src/Data/StopWordCollection.php | 329 ++++++++++++++++++
src/Data/Text/Sentence.php | 24 +-
src/Data/Text/SentenceInterface.php | 8 +
src/Data/TextRankOutput.php | 23 +-
src/Data/TextRankOutput/OutputValue.php | 42 +++
.../TextRankOutput/OutputValueInterface.php | 20 ++
src/Data/TextRankOutputInterface.php | 21 ++
src/Facade/TextRank.php | 62 ++--
src/Service/GetTopNodes.php | 30 ++
src/Service/GetTopNodesInterface.php | 22 ++
src/Service/Parser.php | 15 +-
src/Service/SentenceWeighting.php | 45 +++
src/Service/SentenceWeightingInterface.php | 19 +
src/Service/StopWordFilter.php | 23 ++
src/Strategy/PageRankStrategy.php | 2 +-
tests/resource/sample1.txt | 1 +
19 files changed, 713 insertions(+), 45 deletions(-)
create mode 100644 src/Data/StopWordCollection.php
create mode 100644 src/Data/TextRankOutput/OutputValue.php
create mode 100644 src/Data/TextRankOutput/OutputValueInterface.php
create mode 100644 src/Service/GetTopNodes.php
create mode 100644 src/Service/GetTopNodesInterface.php
create mode 100644 src/Service/SentenceWeighting.php
create mode 100644 src/Service/SentenceWeightingInterface.php
create mode 100644 src/Service/StopWordFilter.php
diff --git a/src/Builder/TextBuilder.php b/src/Builder/TextBuilder.php
index 78dae18..fdccf6e 100644
--- a/src/Builder/TextBuilder.php
+++ b/src/Builder/TextBuilder.php
@@ -20,6 +20,7 @@ public function build(array $originalSentences, array $textMap): TextInterface
foreach ($textMap as $sentenceIndex => $sentenceTokenList) {
$sentenceVector = [];
foreach ($sentenceTokenList as $token) {
+ $token = (string)$token;
if (!isset($tokens[$token])) {
$tokens[$token] = $i;
$tokenId = $i;
@@ -32,12 +33,19 @@ public function build(array $originalSentences, array $textMap): TextInterface
}
$sentence = new Sentence();
+ $sentence->setId($sentenceIndex);
$sentence->setVector($sentenceVector);
+ $sentence->setOriginalValue($originalSentences[$sentenceIndex]);
$sentences[] = $sentence;
}
$tokenMap = new TokenMap();
- $tokenMap->setTokenMap(array_flip($tokens));
+ $tokenMap->setTokenMap(
+ array_map(
+ 'strval',
+ array_flip($tokens)
+ )
+ );
return new Text(
$tokenMap,
diff --git a/src/Builder/TextRankOutputBuilder.php b/src/Builder/TextRankOutputBuilder.php
index b4b8590..2155724 100644
--- a/src/Builder/TextRankOutputBuilder.php
+++ b/src/Builder/TextRankOutputBuilder.php
@@ -4,7 +4,57 @@
namespace PhpScience\TextRank\Builder;
-class TextRankOutputBuilder
+use PhpScience\PageRank\Data\NodeCollectionInterface;
+use PhpScience\TextRank\Data\TextInterface;
+use PhpScience\TextRank\Data\TextRankOutput;
+use PhpScience\TextRank\Data\TextRankOutput\OutputValue;
+use PhpScience\TextRank\Data\TextRankOutputInterface;
+use PhpScience\TextRank\Service\GetTopNodesInterface;
+use PhpScience\TextRank\Service\SentenceWeightingInterface;
+
+class TextRankOutputBuilder implements TextRankOutputBuilderInterface
{
+ private GetTopNodesInterface $getTopNodes;
+ private SentenceWeightingInterface $sentenceWeighting;
+
+ public function __construct(
+ GetTopNodesInterface $getTopNodes,
+ SentenceWeightingInterface $sentenceWeighting
+ ) {
+ $this->getTopNodes = $getTopNodes;
+ $this->sentenceWeighting = $sentenceWeighting;
+ }
+
+ public function build(
+ TextInterface $text,
+ NodeCollectionInterface $nodeCollection,
+ int $maxKeywords
+ ): TextRankOutputInterface {
+ $nodes = $this->getTopNodes->execute($nodeCollection, $maxKeywords);
+ $words = [];
+
+ foreach ($nodes as $node) {
+ $token = $text
+ ->getTokenMap()
+ ->getToken($node->getId());
+ $word = new OutputValue();
+ $word->setId($node->getId());
+ $word->setValue($token);
+ $word->setRank($node->getRank());
+
+ $words[] = $word;
+ }
+
+ $textRankOutput = new TextRankOutput();
+ $textRankOutput->setKeyWords(array_slice($words, 0, $maxKeywords));
+
+ $sentences = $this->sentenceWeighting->weight($text, $words);
+
+
+
+ $textRankOutput->setSentences($sentences);
+
+ return $textRankOutput;
+ }
}
diff --git a/src/Builder/TextRankOutputBuilderInterface.php b/src/Builder/TextRankOutputBuilderInterface.php
index ad5b33f..4c42fc6 100644
--- a/src/Builder/TextRankOutputBuilderInterface.php
+++ b/src/Builder/TextRankOutputBuilderInterface.php
@@ -4,7 +4,15 @@
namespace PhpScience\TextRank\Builder;
+use PhpScience\PageRank\Data\NodeCollectionInterface;
+use PhpScience\TextRank\Data\TextInterface;
+use PhpScience\TextRank\Data\TextRankOutputInterface;
+
interface TextRankOutputBuilderInterface
{
- public function build();
+ public function build(
+ TextInterface $text,
+ NodeCollectionInterface $nodeCollection,
+ int $maxKeywords
+ ): TextRankOutputInterface;
}
diff --git a/src/Data/StopWordCollection.php b/src/Data/StopWordCollection.php
new file mode 100644
index 0000000..106bb24
--- /dev/null
+++ b/src/Data/StopWordCollection.php
@@ -0,0 +1,329 @@
+id = $id;
+ }
+
+ public function getId(): int
+ {
+ return $this->id;
+ }
+
+ public function setOriginalValue(string $originalValue): void
+ {
+ $this->originalValue = $originalValue;
+ }
+
+ public function getOriginalValue(): string
+ {
+ return $this->originalValue;
+ }
public function setVector(array $vector): void
{
diff --git a/src/Data/Text/SentenceInterface.php b/src/Data/Text/SentenceInterface.php
index 59c3d48..d8a5b17 100644
--- a/src/Data/Text/SentenceInterface.php
+++ b/src/Data/Text/SentenceInterface.php
@@ -6,6 +6,14 @@
interface SentenceInterface
{
+ public function setId(int $id): void;
+
+ public function getId(): int;
+
+ public function setOriginalValue(string $originalValue): void;
+
+ public function getOriginalValue(): string;
+
/**
* @param int[] $vector
*/
diff --git a/src/Data/TextRankOutput.php b/src/Data/TextRankOutput.php
index b4dcec2..ce47e08 100644
--- a/src/Data/TextRankOutput.php
+++ b/src/Data/TextRankOutput.php
@@ -4,7 +4,28 @@
namespace PhpScience\TextRank\Data;
-class TextRankOutput
+class TextRankOutput implements TextRankOutputInterface
{
+ private ?array $keyWords;
+ private ?array $sentences;
+ public function setKeyWords(array $keywords): void
+ {
+ $this->keyWords = $keywords;
+ }
+
+ public function getKeyWords(): ?array
+ {
+ return $this->keyWords;
+ }
+
+ public function setSentences(array $sentences): void
+ {
+ $this->sentences = $sentences;
+ }
+
+ public function getSentences(): ?array
+ {
+ return $this->sentences;
+ }
}
diff --git a/src/Data/TextRankOutput/OutputValue.php b/src/Data/TextRankOutput/OutputValue.php
new file mode 100644
index 0000000..bd12f81
--- /dev/null
+++ b/src/Data/TextRankOutput/OutputValue.php
@@ -0,0 +1,42 @@
+id = $id;
+ }
+
+ public function getId(): int
+ {
+ return $this->id;
+ }
+
+ public function setValue(string $value): void
+ {
+ $this->value = $value;
+ }
+
+ public function getValue(): string
+ {
+ return $this->value;
+ }
+
+ public function setRank(float $rank): void
+ {
+ $this->rank = $rank;
+ }
+
+ public function getRank(): float
+ {
+ return $this->rank;
+ }
+}
diff --git a/src/Data/TextRankOutput/OutputValueInterface.php b/src/Data/TextRankOutput/OutputValueInterface.php
new file mode 100644
index 0000000..e3458d3
--- /dev/null
+++ b/src/Data/TextRankOutput/OutputValueInterface.php
@@ -0,0 +1,20 @@
+parse($rawText);
-
- $pageRankStrategy = new PageRankStrategy(
- new PageRankDataSourceBuilder()
- );
-
- $nodeCollection = $pageRankStrategy->rank($text);
-
- echo PHP_EOL;
-
- $i = 0;
- $nodes = [];
-
- foreach ($nodeCollection->getNodes() as $node) {
- $nodes[] = $node;
- $i++;
+ private Parser $parser;
+ private RankingAlgorithmStrategyInterface $rankingAlgorithmStrategy;
+ private TextRankOutputBuilderInterface $textRankOutputBuilder;
+
+ public function __construct(
+ Parser $parser,
+ RankingAlgorithmStrategyInterface $rankingAlgorithmStrategy,
+ TextRankOutputBuilderInterface $textRankOutputBuilder
+ ) {
+ $this->parser = $parser;
+ $this->rankingAlgorithmStrategy = $rankingAlgorithmStrategy;
+ $this->textRankOutputBuilder = $textRankOutputBuilder;
+ }
- if ($i === $maxKeywords) {
- break;
- }
+ public function getKeywords(
+ string $rawText,
+ int $maxKeywords
+ ): TextRankOutputInterface {
- /*echo $text->getTokenMap()->getToken($node->getId());
- echo ' - ';
- echo $node->getRank();
- echo PHP_EOL;*/
- }
+ $text = $this->parser->parse($rawText);
+ $nodeCollection = $this->rankingAlgorithmStrategy->rank($text);
- return $nodes;
+ return $this->textRankOutputBuilder->build(
+ $text,
+ $nodeCollection,
+ $maxKeywords
+ );
}
}
diff --git a/src/Service/GetTopNodes.php b/src/Service/GetTopNodes.php
new file mode 100644
index 0000000..1b201d2
--- /dev/null
+++ b/src/Service/GetTopNodes.php
@@ -0,0 +1,30 @@
+getNodes());
+ $size = count($nodes);
+
+ for ($i = 0; $i < $size; $i++) {
+ for ($j = 0; $j < $size; $j++) {
+ if ($nodes[$i]->getRank() > $nodes[$j]->getRank()) {
+ $tmp = $nodes[$i];
+ $nodes[$i] = $nodes[$j];
+ $nodes[$j] = $tmp;
+ }
+ }
+ }
+
+ return $nodes;
+ }
+}
diff --git a/src/Service/GetTopNodesInterface.php b/src/Service/GetTopNodesInterface.php
new file mode 100644
index 0000000..6e257bc
--- /dev/null
+++ b/src/Service/GetTopNodesInterface.php
@@ -0,0 +1,22 @@
+textBuilder = $textBuilder;
+ $this->stopWordFilter = $stopWordFilter;
}
public function parse(string $rawText): TextInterface
@@ -37,10 +40,14 @@ public function parse(string $rawText): TextInterface
);
foreach ($tokens as $tokenIndex => $token) {
- $tokens[$tokenIndex] = mb_strtolower(trim($token));
- }
+ $token = mb_strtolower(trim($token));
- //@todo stopwords
+ if ($this->stopWordFilter->isStopWord($token)) {
+ unset($tokens[$tokenIndex]);
+ } else {
+ $tokens[$tokenIndex] = mb_strtolower(trim($token));
+ }
+ }
$textMap[$sentenceIndex] = $tokens;
}
diff --git a/src/Service/SentenceWeighting.php b/src/Service/SentenceWeighting.php
new file mode 100644
index 0000000..69b4056
--- /dev/null
+++ b/src/Service/SentenceWeighting.php
@@ -0,0 +1,45 @@
+getId()] = $keyword->getRank();
+ }
+
+ $sentenceOutputList = [];
+
+ foreach ($text->getSentences() as $sentence) {
+ $vector = $sentence->getVector();
+ $score = .0;
+
+ foreach ($vector as $tokenId) {
+ if (isset($keywordRankMap[$tokenId])) {
+ $score += $keywordRankMap[$tokenId];
+ }
+ }
+
+ $score = $score / count($vector);
+ $sentenceOutput = new OutputValue();
+ $sentenceOutput->setId($sentence->getId());
+ $sentenceOutput->setRank($score);
+ $sentenceOutput->setValue($sentence->getOriginalValue());
+
+ $sentenceOutputList[] = $sentenceOutput;
+ }
+
+ return $sentenceOutputList;
+ }
+}
diff --git a/src/Service/SentenceWeightingInterface.php b/src/Service/SentenceWeightingInterface.php
new file mode 100644
index 0000000..7bd29c5
--- /dev/null
+++ b/src/Service/SentenceWeightingInterface.php
@@ -0,0 +1,19 @@
+stopWordCollection = $stopWordCollection;
+ }
+
+ public function isStopWord(string $word): bool
+ {
+ return array_search($word, $this->stopWordCollection->words) !== false;
+ }
+}
diff --git a/src/Strategy/PageRankStrategy.php b/src/Strategy/PageRankStrategy.php
index a831142..20bab66 100644
--- a/src/Strategy/PageRankStrategy.php
+++ b/src/Strategy/PageRankStrategy.php
@@ -34,7 +34,7 @@ public function rank(TextInterface $text): NodeCollectionInterface
$strategy = $this->createPageRankStrategy($dataSource);
$ranking = $this->createRanking($strategy);
$pageRankAlgorithm = $this->createPageRankAlgorithm($ranking, $strategy);
- $maxIteration = 100;
+ $maxIteration = 1000;
return $pageRankAlgorithm->run($maxIteration);
}
diff --git a/tests/resource/sample1.txt b/tests/resource/sample1.txt
index 849e2a3..bab87cf 100644
--- a/tests/resource/sample1.txt
+++ b/tests/resource/sample1.txt
@@ -1 +1,2 @@
Over the past fortnight we asked you to nominate your top extensions for the GNOME desktop. And you did just that. Having now sifted through the hundreds of entries, we’re ready to reveal your favourite GNOME Shell extensions. GNOME 3 (which is more commonly used with the GNOME Shell) has an extension framework that lets developers (and users) extend, build on, and shape how the desktop looks, acts and functions. Dash to Dock takes the GNOME Dash — this is the ‘favourites bar’ that appears on the left-hand side of the screen in the Activities overlay — and transforms it into a desktop dock. And just like Plank, Docky or AWN you can add app launchers, rearrange them, and use them to minimise, restore and switch between app windows. Dash to Dock has many of the common “Dock” features you’d expect, including autohide and intellihide, a fixed-width mode, adjustable icon size, and custom themes. My biggest pet peeve with GNOME Shell is its legacy app tray that hides in the bottom left of the screen. All extraneous non-system applets, indicators and tray icons hide down here. This makes it a little harder to use applications that rely on a system tray presence, like Skype, Franz, Telegram, and Dropbox. TopIcons Plus is the quick way to put GNOME system tray icons back where they belong: on show and in reach. The extension moves legacy tray icons from the bottom left of Gnome Shell to the right-hand side of the top panel. A well-stocked settings panel lets you adjust icon opacity, color, padding, size and tray position. Dive into the settings to adjust the sizing, styling and positioning of icons. Like the popular daily stimulant of choice, the Caffeine GNOME extension keeps your computer awake. It couldn’t be simpler to use: just click the empty mug icon. An empty cup means you’re using normal auto suspend rules – e.g., a screensaver – while a freshly brewed cup of coffee means auto suspend and screensaver are turned off. The Caffeine GNOME extension supports GNOME Shell 3.4 or later. Familiar with applications like Guake and Tilda? If so, you’ll instantly see the appeal of the (superbly named) Drop Down Terminal GNOME extension. When installed just tap the key above the tab key (though it can be changed to almost any key you wish) to get instant access to the command line. Want to speed up using workspaces? This simple tool lets you do just that. Once installed you can quickly switch between workspaces by scrolling over the top panel - no need to enter the Activities Overlay!
+t
\ No newline at end of file
From 8c776bb432dbc1824a903040e2ece5bfa19148a9 Mon Sep 17 00:00:00 2001
From: David Belicza <87.bdavid@gmail.com>
Date: Sat, 26 Sep 2020 15:32:03 +0200
Subject: [PATCH 4/5] Weighting sentences, added stopword csv reader, added
request object
---
.gitignore | 1 +
resource/stop-word/english.csv | 318 ++++++++
resource/stop-word/french.csv | 689 +++++++++++++++++
resource/stop-word/german.csv | 598 +++++++++++++++
resource/stop-word/italian.csv | 660 ++++++++++++++++
resource/stop-word/norwegian.csv | 221 ++++++
resource/stop-word/russian.csv | 559 ++++++++++++++
resource/stop-word/spanish.csv | 721 ++++++++++++++++++
src/Builder/AlgorithmOutputBuilder.php | 64 ++
...hp => AlgorithmOutputBuilderInterface.php} | 38 +-
src/Builder/PageRankDataSourceBuilder.php | 94 +--
src/Builder/StopWordCollectionBuilder.php | 31 +
.../StopWordCollectionBuilderInterface.php | 12 +
src/Builder/TextBuilder.php | 110 +--
src/Builder/TextBuilderInterface.php | 24 +-
src/Builder/TextRankOutputBuilder.php | 60 --
...TextRankOutput.php => AlgorithmOutput.php} | 62 +-
...rface.php => AlgorithmOutputInterface.php} | 60 +-
src/Data/AlgorithmRequest.php | 76 ++
src/Data/AlgorithmRequestInterface.php | 58 ++
.../OutputValue.php => RankDataObject.php} | 84 +-
...erface.php => RankDataObjectInterface.php} | 40 +-
src/Data/StopWordCollection.php | 349 +--------
src/Data/StopWordCollectionInterface.php | 10 +
src/Data/Text.php | 62 +-
src/Data/Text/Sentence.php | 104 +--
src/Data/Text/SentenceInterface.php | 60 +-
src/Data/Text/TokenMap.php | 50 +-
src/Data/Text/TokenMapInterface.php | 34 +-
src/Data/TextInterface.php | 36 +-
src/Exception/IoException.php | 10 +
src/Exception/TextRankException.php | 12 +
src/Facade/TextRank.php | 100 ++-
src/Factory/GeneralFactory.php | 59 ++
src/Factory/GeneralFactoryInterface.php | 21 +
src/Service/CsvReader.php | 33 +
src/Service/GetTopNodes.php | 30 -
src/Service/GetTopNodesInterface.php | 22 -
src/Service/Parser.php | 125 +--
src/Service/ParserInterface.php | 12 +
src/Service/ReaderInterface.php | 12 +
src/Service/SentenceWeighting.php | 112 +--
src/Service/SentenceWeightingInterface.php | 42 +-
src/Service/SortRankDataList.php | 28 +
src/Service/SortRankDataListInterface.php | 18 +
src/Service/StopWordFilter.php | 23 -
src/Strategy/PageRankStrategy.php | 163 ++--
.../RankingAlgorithmStrategyInterface.php | 29 +-
src/TextRankFacade.php | 36 +-
49 files changed, 4986 insertions(+), 1186 deletions(-)
create mode 100644 resource/stop-word/english.csv
create mode 100644 resource/stop-word/french.csv
create mode 100644 resource/stop-word/german.csv
create mode 100644 resource/stop-word/italian.csv
create mode 100644 resource/stop-word/norwegian.csv
create mode 100644 resource/stop-word/russian.csv
create mode 100644 resource/stop-word/spanish.csv
create mode 100644 src/Builder/AlgorithmOutputBuilder.php
rename src/Builder/{TextRankOutputBuilderInterface.php => AlgorithmOutputBuilderInterface.php} (57%)
create mode 100644 src/Builder/StopWordCollectionBuilder.php
create mode 100644 src/Builder/StopWordCollectionBuilderInterface.php
delete mode 100644 src/Builder/TextRankOutputBuilder.php
rename src/Data/{TextRankOutput.php => AlgorithmOutput.php} (85%)
rename src/Data/{TextRankOutputInterface.php => AlgorithmOutputInterface.php} (52%)
create mode 100644 src/Data/AlgorithmRequest.php
create mode 100644 src/Data/AlgorithmRequestInterface.php
rename src/Data/{TextRankOutput/OutputValue.php => RankDataObject.php} (80%)
rename src/Data/{TextRankOutput/OutputValueInterface.php => RankDataObjectInterface.php} (74%)
create mode 100644 src/Data/StopWordCollectionInterface.php
create mode 100644 src/Exception/IoException.php
create mode 100644 src/Exception/TextRankException.php
create mode 100644 src/Factory/GeneralFactory.php
create mode 100644 src/Factory/GeneralFactoryInterface.php
create mode 100644 src/Service/CsvReader.php
delete mode 100644 src/Service/GetTopNodes.php
delete mode 100644 src/Service/GetTopNodesInterface.php
create mode 100644 src/Service/ParserInterface.php
create mode 100644 src/Service/ReaderInterface.php
create mode 100644 src/Service/SortRankDataList.php
create mode 100644 src/Service/SortRankDataListInterface.php
delete mode 100644 src/Service/StopWordFilter.php
diff --git a/.gitignore b/.gitignore
index af0e4c2..fcab08f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
/.idea
/.tmp
+/tests/.phpunit.result.cache
/composer.lock
/vendor
/var
diff --git a/resource/stop-word/english.csv b/resource/stop-word/english.csv
new file mode 100644
index 0000000..a338b15
--- /dev/null
+++ b/resource/stop-word/english.csv
@@ -0,0 +1,318 @@
+a
+about
+above
+above
+across
+after
+afterwards
+again
+against
+all
+almost
+alone
+along
+already
+also
+although
+always
+am
+among
+amongst
+amoungst
+amount
+an
+and
+another
+any
+anyhow
+anyone
+anything
+anyway
+anywhere
+are
+around
+as
+at
+back
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+behind
+being
+below
+beside
+besides
+between
+beyond
+bill
+both
+bottom
+but
+by
+call
+can
+cannot
+cant
+co
+con
+could
+couldnt
+cry
+de
+describe
+detail
+do
+done
+down
+due
+during
+each
+eg
+eight
+either
+eleven
+else
+elsewhere
+empty
+enough
+etc
+even
+ever
+every
+everyone
+everything
+everywhere
+except
+few
+fifteen
+fify
+fill
+find
+fire
+first
+five
+for
+former
+formerly
+forty
+found
+four
+from
+front
+full
+further
+get
+give
+go
+had
+has
+hasnt
+have
+he
+hence
+her
+here
+hereafter
+hereby
+herein
+hereupon
+hers
+herself
+him
+himself
+his
+how
+however
+hundred
+ie
+if
+in
+inc
+indeed
+interest
+into
+is
+it
+its
+itself
+keep
+last
+latter
+latterly
+least
+less
+ltd
+made
+many
+may
+me
+meanwhile
+might
+mill
+mine
+more
+moreover
+most
+mostly
+move
+much
+must
+my
+myself
+name
+namely
+neither
+never
+nevertheless
+next
+nine
+no
+nobody
+none
+noone
+nor
+not
+nothing
+now
+nowhere
+of
+off
+often
+on
+once
+one
+only
+onto
+or
+other
+others
+otherwise
+our
+ours
+ourselves
+out
+over
+own
+part
+per
+perhaps
+please
+put
+rather
+re
+same
+see
+seem
+seemed
+seeming
+seems
+serious
+several
+she
+should
+show
+side
+since
+sincere
+six
+sixty
+so
+some
+somehow
+someone
+something
+sometime
+sometimes
+somewhere
+still
+such
+system
+take
+ten
+than
+that
+the
+their
+them
+themselves
+then
+thence
+there
+thereafter
+thereby
+therefore
+therein
+thereupon
+these
+they
+thickv
+thin
+third
+this
+those
+though
+three
+through
+throughout
+thru
+thus
+to
+together
+too
+top
+toward
+towards
+twelve
+twenty
+two
+un
+under
+until
+up
+upon
+us
+very
+via
+was
+we
+well
+were
+what
+whatever
+when
+whence
+whenever
+where
+whereafter
+whereas
+whereby
+wherein
+whereupon
+wherever
+whether
+which
+while
+whither
+who
+whoever
+whole
+whom
+whose
+why
+will
+with
+within
+without
+would
+yet
+you
+your
+yours
+yourself
+yourselves
diff --git a/resource/stop-word/french.csv b/resource/stop-word/french.csv
new file mode 100644
index 0000000..3909d03
--- /dev/null
+++ b/resource/stop-word/french.csv
@@ -0,0 +1,689 @@
+a
+abord
+absolument
+afin
+ah
+ai
+aie
+aient
+aies
+ailleurs
+ainsi
+ait
+allaient
+allo
+allons
+allô
+alors
+anterieur
+anterieure
+anterieures
+apres
+après
+as
+assez
+attendu
+au
+aucun
+aucune
+aucuns
+aujourd
+aujourd\'hui
+aupres
+auquel
+aura
+aurai
+auraient
+aurais
+aurait
+auras
+aurez
+auriez
+aurions
+aurons
+auront
+aussi
+autre
+autrefois
+autrement
+autres
+autrui
+aux
+auxquelles
+auxquels
+avaient
+avais
+avait
+avant
+avec
+avez
+aviez
+avions
+avoir
+avons
+ayant
+ayez
+ayons
+b
+bah
+bas
+basee
+bat
+beau
+beaucoup
+bien
+bigre
+bon
+boum
+bravo
+brrr
+c
+car
+ce
+ceci
+cela
+celle
+celle-ci
+celle-là
+celles
+celles-ci
+celles-là
+celui
+celui-ci
+celui-là
+celà
+cent
+cependant
+certain
+certaine
+certaines
+certains
+certes
+ces
+cet
+cette
+ceux
+ceux-ci
+ceux-là
+chacun
+chacune
+chaque
+cher
+chers
+chez
+chiche
+chut
+chère
+chères
+ci
+cinq
+cinquantaine
+cinquante
+cinquantième
+cinquième
+clac
+clic
+combien
+comme
+comment
+comparable
+comparables
+compris
+concernant
+contre
+couic
+crac
+d
+da
+dans
+de
+debout
+dedans
+dehors
+deja
+delà
+depuis
+dernier
+derniere
+derriere
+derrière
+des
+desormais
+desquelles
+desquels
+dessous
+dessus
+deux
+deuxième
+deuxièmement
+devant
+devers
+devra
+devrait
+different
+differentes
+differents
+différent
+différente
+différentes
+différents
+dire
+directe
+directement
+dit
+dite
+dits
+divers
+diverse
+diverses
+dix
+dix-huit
+dix-neuf
+dix-sept
+dixième
+doit
+doivent
+donc
+dont
+dos
+douze
+douzième
+dring
+droite
+du
+duquel
+durant
+dès
+début
+désormais
+e
+effet
+egale
+egalement
+egales
+eh
+elle
+elle-même
+elles
+elles-mêmes
+en
+encore
+enfin
+entre
+envers
+environ
+es
+essai
+est
+et
+etant
+etc
+etre
+eu
+eue
+eues
+euh
+eurent
+eus
+eusse
+eussent
+eusses
+eussiez
+eussions
+eut
+eux
+eux-mêmes
+exactement
+excepté
+extenso
+exterieur
+eûmes
+eût
+eûtes
+f
+fais
+faisaient
+faisant
+fait
+faites
+façon
+feront
+fi
+flac
+floc
+fois
+font
+force
+furent
+fus
+fusse
+fussent
+fusses
+fussiez
+fussions
+fut
+fûmes
+fût
+fûtes
+g
+gens
+h
+ha
+haut
+hein
+hem
+hep
+hi
+ho
+holà
+hop
+hormis
+hors
+hou
+houp
+hue
+hui
+huit
+huitième
+hum
+hurrah
+hé
+hélas
+i
+ici
+il
+ils
+importe
+j
+je
+jusqu
+jusque
+juste
+k
+l
+la
+laisser
+laquelle
+las
+le
+lequel
+les
+lesquelles
+lesquels
+leur
+leurs
+longtemps
+lors
+lorsque
+lui
+lui-meme
+lui-même
+là
+lès
+m
+ma
+maint
+maintenant
+mais
+malgre
+malgré
+maximale
+me
+meme
+memes
+merci
+mes
+mien
+mienne
+miennes
+miens
+mille
+mince
+mine
+minimale
+moi
+moi-meme
+moi-même
+moindres
+moins
+mon
+mot
+moyennant
+multiple
+multiples
+même
+mêmes
+n
+na
+naturel
+naturelle
+naturelles
+ne
+neanmoins
+necessaire
+necessairement
+neuf
+neuvième
+ni
+nombreuses
+nombreux
+nommés
+non
+nos
+notamment
+notre
+nous
+nous-mêmes
+nouveau
+nouveaux
+nul
+néanmoins
+nôtre
+nôtres
+o
+oh
+ohé
+ollé
+olé
+on
+ont
+onze
+onzième
+ore
+ou
+ouf
+ouias
+oust
+ouste
+outre
+ouvert
+ouverte
+ouverts
+o|
+où
+p
+paf
+pan
+par
+parce
+parfois
+parle
+parlent
+parler
+parmi
+parole
+parseme
+partant
+particulier
+particulière
+particulièrement
+pas
+passé
+pendant
+pense
+permet
+personne
+personnes
+peu
+peut
+peuvent
+peux
+pff
+pfft
+pfut
+pif
+pire
+pièce
+plein
+plouf
+plupart
+plus
+plusieurs
+plutôt
+possessif
+possessifs
+possible
+possibles
+pouah
+pour
+pourquoi
+pourrais
+pourrait
+pouvait
+prealable
+precisement
+premier
+première
+premièrement
+pres
+probable
+probante
+procedant
+proche
+près
+psitt
+pu
+puis
+puisque
+pur
+pure
+q
+qu
+quand
+quant
+quant-à-soi
+quanta
+quarante
+quatorze
+quatre
+quatre-vingt
+quatrième
+quatrièmement
+que
+quel
+quelconque
+quelle
+quelles
+quelqu\'un
+quelque
+quelques
+quels
+qui
+quiconque
+quinze
+quoi
+quoique
+r
+rare
+rarement
+rares
+relative
+relativement
+remarquable
+rend
+rendre
+restant
+reste
+restent
+restrictif
+retour
+revoici
+revoilà
+rien
+s
+sa
+sacrebleu
+sait
+sans
+sapristi
+sauf
+se
+sein
+seize
+selon
+semblable
+semblaient
+semble
+semblent
+sent
+sept
+septième
+sera
+serai
+seraient
+serais
+serait
+seras
+serez
+seriez
+serions
+serons
+seront
+ses
+seul
+seule
+seulement
+si
+sien
+sienne
+siennes
+siens
+sinon
+six
+sixième
+soi
+soi-même
+soient
+sois
+soit
+soixante
+sommes
+son
+sont
+sous
+souvent
+soyez
+soyons
+specifique
+specifiques
+speculatif
+stop
+strictement
+subtiles
+suffisant
+suffisante
+suffit
+suis
+suit
+suivant
+suivante
+suivantes
+suivants
+suivre
+sujet
+superpose
+sur
+surtout
+t
+ta
+tac
+tandis
+tant
+tardive
+te
+tel
+telle
+tellement
+telles
+tels
+tenant
+tend
+tenir
+tente
+tes
+tic
+tien
+tienne
+tiennes
+tiens
+toc
+toi
+toi-même
+ton
+touchant
+toujours
+tous
+tout
+toute
+toutefois
+toutes
+treize
+trente
+tres
+trois
+troisième
+troisièmement
+trop
+très
+tsoin
+tsouin
+tu
+té
+u
+un
+une
+unes
+uniformement
+unique
+uniques
+uns
+v
+va
+vais
+valeur
+vas
+vers
+via
+vif
+vifs
+vingt
+vivat
+vive
+vives
+vlan
+voici
+voie
+voient
+voilà
+vont
+vos
+votre
+vous
+vous-mêmes
+vu
+vé
+vôtre
+vôtres
+w
+x
+y
+z
+zut
+à
+â
+ça
+ès
+étaient
+étais
+était
+étant
+état
+étiez
+étions
+été
+étée
+étées
+étés
+êtes
+être
+ô
diff --git a/resource/stop-word/german.csv b/resource/stop-word/german.csv
new file mode 100644
index 0000000..fce38c4
--- /dev/null
+++ b/resource/stop-word/german.csv
@@ -0,0 +1,598 @@
+ab
+aber
+alle
+allein
+allem
+allen
+aller
+allerdings
+allerlei
+alles
+allmählich
+allzu
+als
+alsbald
+also
+am
+an
+and
+ander
+andere
+anderem
+anderen
+anderer
+andererseits
+anderes
+anderm
+andern
+andernfalls
+anders
+anstatt
+auch
+auf
+aus
+ausgenommen
+ausser
+ausserdem
+außer
+außerdem
+außerhalb
+bald
+bei
+beide
+beiden
+beiderlei
+beides
+beim
+beinahe
+bereits
+besonders
+besser
+beträchtlich
+bevor
+bezüglich
+bin
+bis
+bisher
+bislang
+bist
+bloß
+bsp.
+bzw
+ca
+ca.
+content
+da
+dabei
+dadurch
+dafür
+dagegen
+daher
+dahin
+damals
+damit
+danach
+daneben
+dann
+daran
+darauf
+daraus
+darin
+darum
+darunter
+darüber
+darüberhinaus
+das
+dass
+dasselbe
+davon
+davor
+dazu
+daß
+dein
+deine
+deinem
+deinen
+deiner
+deines
+dem
+demnach
+demselben
+den
+denen
+denn
+dennoch
+denselben
+der
+derart
+derartig
+derem
+deren
+derer
+derjenige
+derjenigen
+derselbe
+derselben
+derzeit
+des
+deshalb
+desselben
+dessen
+desto
+deswegen
+dich
+die
+diejenige
+dies
+diese
+dieselbe
+dieselben
+diesem
+diesen
+dieser
+dieses
+diesseits
+dir
+direkt
+direkte
+direkten
+direkter
+doch
+dort
+dorther
+dorthin
+drauf
+drin
+drunter
+drüber
+du
+dunklen
+durch
+durchaus
+eben
+ebenfalls
+ebenso
+eher
+eigenen
+eigenes
+eigentlich
+ein
+eine
+einem
+einen
+einer
+einerseits
+eines
+einfach
+einführen
+einführte
+einführten
+eingesetzt
+einig
+einige
+einigem
+einigen
+einiger
+einigermaßen
+einiges
+einmal
+eins
+einseitig
+einseitige
+einseitigen
+einseitiger
+einst
+einstmals
+einzig
+entsprechend
+entweder
+er
+erst
+es
+etc
+etliche
+etwa
+etwas
+euch
+euer
+eure
+eurem
+euren
+eurer
+eures
+falls
+fast
+ferner
+folgende
+folgenden
+folgender
+folgendes
+folglich
+fuer
+für
+gab
+ganze
+ganzem
+ganzen
+ganzer
+ganzes
+gar
+gegen
+gemäss
+ggf
+gleich
+gleichwohl
+gleichzeitig
+glücklicherweise
+gänzlich
+hab
+habe
+haben
+haette
+hast
+hat
+hatte
+hatten
+hattest
+hattet
+heraus
+herein
+hier
+hier
+hinter
+hiermit
+hiesige
+hin
+hinein
+hinten
+hinter
+hinterher
+http
+hätt
+hätte
+hätten
+höchstens
+ich
+igitt
+ihm
+ihn
+ihnen
+ihr
+ihre
+ihrem
+ihren
+ihrer
+ihres
+im
+immer
+immerhin
+in
+indem
+indessen
+infolge
+innen
+innerhalb
+ins
+insofern
+inzwischen
+irgend
+irgendeine
+irgendwas
+irgendwen
+irgendwer
+irgendwie
+irgendwo
+ist
+ja
+je
+jed
+jede
+jedem
+jeden
+jedenfalls
+jeder
+jederlei
+jedes
+jedoch
+jemand
+jene
+jenem
+jenen
+jener
+jenes
+jenseits
+jetzt
+jährig
+jährige
+jährigen
+jähriges
+kam
+kann
+kannst
+kaum
+kein
+keine
+keinem
+keinen
+keiner
+keinerlei
+keines
+keineswegs
+klar
+klare
+klaren
+klares
+klein
+kleinen
+kleiner
+kleines
+koennen
+koennt
+koennte
+koennten
+komme
+kommen
+kommt
+konkret
+konkrete
+konkreten
+konkreter
+konkretes
+können
+könnt
+künftig
+leider
+machen
+man
+manche
+manchem
+manchen
+mancher
+mancherorts
+manches
+manchmal
+mehr
+mehrere
+mein
+meine
+meinem
+meinen
+meiner
+meines
+mich
+mir
+mit
+mithin
+muessen
+muesst
+muesste
+muss
+musst
+musste
+mussten
+muß
+mußt
+müssen
+müsste
+müssten
+müßt
+müßte
+nach
+nachdem
+nachher
+nachhinein
+nahm
+natürlich
+neben
+nebenan
+nehmen
+nein
+nicht
+nichts
+nie
+niemals
+niemand
+nirgends
+nirgendwo
+noch
+nun
+nur
+nächste
+nämlich
+nötigenfalls
+ob
+oben
+oberhalb
+obgleich
+obschon
+obwohl
+oder
+oft
+per
+plötzlich
+schließlich
+schon
+sehr
+sehrwohl
+seid
+sein
+seine
+seinem
+seinen
+seiner
+seines
+seit
+seitdem
+seither
+selber
+selbst
+sich
+sicher
+sicherlich
+sie
+sind
+so
+sobald
+sodass
+sodaß
+soeben
+sofern
+sofort
+sogar
+solange
+solch
+solche
+solchem
+solchen
+solcher
+solches
+soll
+sollen
+sollst
+sollt
+sollte
+sollten
+solltest
+somit
+sondern
+sonst
+sonstwo
+sooft
+soviel
+soweit
+sowie
+sowohl
+tatsächlich
+tatsächlichen
+tatsächlicher
+tatsächliches
+trotzdem
+ueber
+um
+umso
+unbedingt
+und
+unmöglich
+unmögliche
+unmöglichen
+unmöglicher
+uns
+unser
+unser
+unsere
+unsere
+unserem
+unseren
+unserer
+unseres
+unter
+usw
+viel
+viele
+vielen
+vieler
+vieles
+vielleicht
+vielmals
+vom
+von
+vor
+voran
+vorher
+vorüber
+völlig
+wann
+war
+waren
+warst
+warum
+was
+weder
+weil
+weiter
+weitere
+weiterem
+weiteren
+weiterer
+weiteres
+weiterhin
+weiß
+welche
+welchem
+welchen
+welcher
+welches
+wem
+wen
+wenig
+wenige
+weniger
+wenigstens
+wenn
+wenngleich
+wer
+werde
+werden
+werdet
+weshalb
+wessen
+wichtig
+wie
+wieder
+wieso
+wieviel
+wiewohl
+will
+willst
+wir
+wird
+wirklich
+wirst
+wo
+wodurch
+wogegen
+woher
+wohin
+wohingegen
+wohl
+wohlweislich
+womit
+woraufhin
+woraus
+worin
+wurde
+wurden
+während
+währenddessen
+wär
+wäre
+wären
+würde
+würden
+z.B.
+zB
+zahlreich
+zeitweise
+zu
+zudem
+zuerst
+zufolge
+zugleich
+zuletzt
+zum
+zumal
+zur
+zurück
+zusammen
+zuviel
+zwar
+zwischen
+ähnlich
+übel
+über
+überall
+überallhin
+überdies
+übermorgen
+übrig
+übrigens
diff --git a/resource/stop-word/italian.csv b/resource/stop-word/italian.csv
new file mode 100644
index 0000000..bdedf2e
--- /dev/null
+++ b/resource/stop-word/italian.csv
@@ -0,0 +1,660 @@
+a
+abbastanza
+abbia
+abbiamo
+abbiano
+abbiate
+accidenti
+ad
+adesso
+affinche
+agl
+agli
+ahime
+ahimã¨
+ahimè
+ai
+al
+alcuna
+alcuni
+alcuno
+all
+alla
+alle
+allo
+allora
+altre
+altri
+altrimenti
+altro
+altrove
+altrui
+anche
+ancora
+anni
+anno
+ansa
+anticipo
+assai
+attesa
+attraverso
+avanti
+avemmo
+avendo
+avente
+aver
+avere
+averlo
+avesse
+avessero
+avessi
+avessimo
+aveste
+avesti
+avete
+aveva
+avevamo
+avevano
+avevate
+avevi
+avevo
+avrai
+avranno
+avrebbe
+avrebbero
+avrei
+avremmo
+avremo
+avreste
+avresti
+avrete
+avrà
+avrò
+avuta
+avute
+avuti
+avuto
+basta
+ben
+bene
+benissimo
+berlusconi
+brava
+bravo
+buono
+c
+casa
+caso
+cento
+certa
+certe
+certi
+certo
+che
+chi
+chicchessia
+chiunque
+ci
+ciascuna
+ciascuno
+cima
+cinque
+cio
+cioe
+cioã¨
+cioè
+circa
+citta
+città
+cittã
+ciã²
+ciò
+co
+codesta
+codesti
+codesto
+cogli
+coi
+col
+colei
+coll
+coloro
+colui
+come
+cominci
+comprare
+comunque
+con
+concernente
+conciliarsi
+conclusione
+consecutivi
+consecutivo
+consiglio
+contro
+cortesia
+cos
+cosa
+cosi
+cosã¬
+così
+cui
+d
+da
+dagl
+dagli
+dai
+dal
+dall
+dalla
+dalle
+dallo
+dappertutto
+davanti
+degl
+degli
+dei
+del
+dell
+della
+delle
+dello
+dentro
+detto
+deve
+devo
+di
+dice
+dietro
+dire
+dirimpetto
+diventa
+diventare
+diventato
+dopo
+doppio
+dov
+dove
+dovra
+dovrà
+dovrã
+dovunque
+due
+dunque
+durante
+e
+ebbe
+ebbero
+ebbi
+ecc
+ecco
+ed
+effettivamente
+egli
+ella
+entrambi
+eppure
+era
+erano
+eravamo
+eravate
+eri
+ero
+esempio
+esse
+essendo
+esser
+essere
+essi
+ex
+fa
+faccia
+facciamo
+facciano
+facciate
+faccio
+facemmo
+facendo
+facesse
+facessero
+facessi
+facessimo
+faceste
+facesti
+faceva
+facevamo
+facevano
+facevate
+facevi
+facevo
+fai
+fanno
+farai
+faranno
+fare
+farebbe
+farebbero
+farei
+faremmo
+faremo
+fareste
+faresti
+farete
+farà
+farò
+fatto
+favore
+fece
+fecero
+feci
+fin
+finalmente
+finche
+fine
+fino
+forse
+forza
+fosse
+fossero
+fossi
+fossimo
+foste
+fosti
+fra
+frattempo
+fu
+fui
+fummo
+fuori
+furono
+futuro
+generale
+gente
+gia
+giacche
+giorni
+giorno
+giu
+già
+giã
+gli
+gliela
+gliele
+glieli
+glielo
+gliene
+governo
+grande
+grazie
+gruppo
+ha
+haha
+hai
+hanno
+ho
+i
+ie
+ieri
+il
+improvviso
+in
+inc
+indietro
+infatti
+inoltre
+insieme
+intanto
+intorno
+invece
+io
+l
+la
+lasciato
+lato
+lavoro
+le
+lei
+li
+lo
+lontano
+loro
+lui
+lungo
+luogo
+là
+lã
+ma
+macche
+magari
+maggior
+mai
+male
+malgrado
+malissimo
+mancanza
+marche
+me
+medesimo
+mediante
+meglio
+meno
+mentre
+mesi
+mezzo
+mi
+mia
+mie
+miei
+mila
+miliardi
+milioni
+minimi
+ministro
+mio
+modo
+molta
+molti
+moltissimo
+molto
+momento
+mondo
+mosto
+nazionale
+ne
+negl
+negli
+nei
+nel
+nell
+nella
+nelle
+nello
+nemmeno
+neppure
+nessun
+nessuna
+nessuno
+niente
+no
+noi
+nome
+non
+nondimeno
+nonostante
+nonsia
+nostra
+nostre
+nostri
+nostro
+novanta
+nove
+nulla
+nuovi
+nuovo
+o
+od
+oggi
+ogni
+ognuna
+ognuno
+oltre
+oppure
+ora
+ore
+osi
+ossia
+ottanta
+otto
+paese
+parecchi
+parecchie
+parecchio
+parte
+partendo
+peccato
+peggio
+per
+perche
+perchã¨
+perchè
+perché
+percio
+perciã²
+perciò
+perfino
+pero
+persino
+persone
+perã²
+però
+piedi
+pieno
+piglia
+piu
+piuttosto
+piã¹
+più
+po
+pochissimo
+poco
+poi
+poiche
+possa
+possedere
+posteriore
+posto
+potrebbe
+preferibilmente
+presa
+press
+prima
+primo
+principalmente
+probabilmente
+promesso
+proprio
+puo
+pure
+purtroppo
+puã²
+può
+qua
+qualche
+qualcosa
+qualcuna
+qualcuno
+quale
+quali
+qualunque
+quando
+quanta
+quante
+quanti
+quanto
+quantunque
+quarto
+quasi
+quattro
+quel
+quella
+quelle
+quelli
+quello
+quest
+questa
+queste
+questi
+questo
+qui
+quindi
+quinto
+realmente
+recente
+recentemente
+registrazione
+relativo
+riecco
+rispetto
+salvo
+sara
+sarai
+saranno
+sarebbe
+sarebbero
+sarei
+saremmo
+saremo
+sareste
+saresti
+sarete
+sarà
+sarã
+sarò
+scola
+scopo
+scorso
+se
+secondo
+seguente
+seguito
+sei
+sembra
+sembrare
+sembrato
+sembrava
+sembri
+sempre
+senza
+sette
+si
+sia
+siamo
+siano
+siate
+siete
+sig
+solito
+solo
+soltanto
+sono
+sopra
+soprattutto
+sotto
+spesso
+srl
+sta
+stai
+stando
+stanno
+starai
+staranno
+starebbe
+starebbero
+starei
+staremmo
+staremo
+stareste
+staresti
+starete
+starà
+starò
+stata
+state
+stati
+stato
+stava
+stavamo
+stavano
+stavate
+stavi
+stavo
+stemmo
+stessa
+stesse
+stessero
+stessi
+stessimo
+stesso
+steste
+stesti
+stette
+stettero
+stetti
+stia
+stiamo
+stiano
+stiate
+sto
+su
+sua
+subito
+successivamente
+successivo
+sue
+sugl
+sugli
+sui
+sul
+sull
+sulla
+sulle
+sullo
+suo
+suoi
+tale
+tali
+talvolta
+tanto
+te
+tempo
+terzo
+th
+ti
+titolo
+torino
+tra
+tranne
+tre
+trenta
+triplo
+troppo
+trovato
+tu
+tua
+tue
+tuo
+tuoi
+tutta
+tuttavia
+tutte
+tutti
+tutto
+uguali
+ulteriore
+ultimo
+un
+una
+uno
+uomo
+va
+vai
+vale
+vari
+varia
+varie
+vario
+verso
+vi
+via
+vicino
+visto
+vita
+voi
+volta
+volte
+vostra
+vostre
+vostri
+vostro
+ã¨
+è
diff --git a/resource/stop-word/norwegian.csv b/resource/stop-word/norwegian.csv
new file mode 100644
index 0000000..e93528f
--- /dev/null
+++ b/resource/stop-word/norwegian.csv
@@ -0,0 +1,221 @@
+alle
+andre
+arbeid
+at
+av
+bare
+begge
+ble
+blei
+bli
+blir
+blitt
+bort
+bra
+bruke
+både
+båe
+da
+de
+deg
+dei
+deim
+deira
+deires
+dem
+den
+denne
+der
+dere
+deres
+det
+dette
+di
+din
+disse
+ditt
+du
+dykk
+dykkar
+då
+eg
+ein
+eit
+eitt
+eller
+elles
+en
+ene
+eneste
+enhver
+enn
+er
+et
+ett
+etter
+folk
+for
+fordi
+forsûke
+fra
+få
+før
+fûr
+fûrst
+gjorde
+gjûre
+god
+gå
+ha
+hadde
+han
+hans
+har
+hennar
+henne
+hennes
+her
+hjå
+ho
+hoe
+honom
+hoss
+hossen
+hun
+hva
+hvem
+hver
+hvilke
+hvilken
+hvis
+hvor
+hvordan
+hvorfor
+i
+ikke
+ikkje
+ingen
+ingi
+inkje
+inn
+innen
+inni
+ja
+jeg
+kan
+kom
+korleis
+korso
+kun
+kunne
+kva
+kvar
+kvarhelst
+kven
+kvi
+kvifor
+lage
+lang
+lik
+like
+makt
+man
+mange
+me
+med
+medan
+meg
+meget
+mellom
+men
+mens
+mer
+mest
+mi
+min
+mine
+mitt
+mot
+mye
+mykje
+må
+måte
+navn
+ned
+nei
+no
+noe
+noen
+noka
+noko
+nokon
+nokor
+nokre
+ny
+nå
+når
+og
+også
+om
+opp
+oss
+over
+part
+punkt
+på
+rett
+riktig
+samme
+sant
+seg
+selv
+si
+sia
+sidan
+siden
+sin
+sine
+sist
+sitt
+sjøl
+skal
+skulle
+slik
+slutt
+so
+som
+somme
+somt
+start
+stille
+så
+sånn
+tid
+til
+tilbake
+tilstand
+um
+under
+upp
+ut
+uten
+var
+vart
+varte
+ved
+verdi
+vere
+verte
+vi
+vil
+ville
+vite
+vore
+vors
+vort
+vår
+være
+vært
+vöre
+vört
+å
diff --git a/resource/stop-word/russian.csv b/resource/stop-word/russian.csv
new file mode 100644
index 0000000..38c4f2a
--- /dev/null
+++ b/resource/stop-word/russian.csv
@@ -0,0 +1,559 @@
+c
+а
+алло
+без
+белый
+близко
+более
+больше
+большой
+будем
+будет
+будете
+будешь
+будто
+буду
+будут
+будь
+бы
+бывает
+бывь
+был
+была
+были
+было
+быть
+в
+важная
+важное
+важные
+важный
+вам
+вами
+вас
+ваш
+ваша
+ваше
+ваши
+вверх
+вдали
+вдруг
+ведь
+везде
+вернуться
+весь
+вечер
+взгляд
+взять
+вид
+видел
+видеть
+вместе
+вне
+вниз
+внизу
+во
+вода
+война
+вокруг
+вон
+вообще
+вопрос
+восемнадцатый
+восемнадцать
+восемь
+восьмой
+вот
+впрочем
+времени
+время
+все
+все еще
+всегда
+всего
+всем
+всеми
+всему
+всех
+всею
+всю
+всюду
+вся
+всё
+второй
+вы
+выйти
+г
+где
+главный
+глаз
+говорил
+говорит
+говорить
+год
+года
+году
+голова
+голос
+город
+да
+давать
+давно
+даже
+далекий
+далеко
+дальше
+даром
+дать
+два
+двадцатый
+двадцать
+две
+двенадцатый
+двенадцать
+дверь
+двух
+девятнадцатый
+девятнадцать
+девятый
+девять
+действительно
+дел
+делал
+делать
+делаю
+дело
+день
+деньги
+десятый
+десять
+для
+до
+довольно
+долго
+должен
+должно
+должный
+дом
+дорога
+друг
+другая
+другие
+других
+друго
+другое
+другой
+думать
+душа
+е
+его
+ее
+ей
+ему
+если
+есть
+еще
+ещё
+ею
+её
+ж
+ждать
+же
+жена
+женщина
+жизнь
+жить
+за
+занят
+занята
+занято
+заняты
+затем
+зато
+зачем
+здесь
+земля
+знать
+значит
+значить
+и
+иди
+идти
+из
+или
+им
+имеет
+имел
+именно
+иметь
+ими
+имя
+иногда
+их
+к
+каждая
+каждое
+каждые
+каждый
+кажется
+казаться
+как
+какая
+какой
+кем
+книга
+когда
+кого
+ком
+комната
+кому
+конец
+конечно
+которая
+которого
+которой
+которые
+который
+которых
+кроме
+кругом
+кто
+куда
+лежать
+лет
+ли
+лицо
+лишь
+лучше
+любить
+люди
+м
+маленький
+мало
+мать
+машина
+между
+меля
+менее
+меньше
+меня
+место
+миллионов
+мимо
+минута
+мир
+мира
+мне
+много
+многочисленная
+многочисленное
+многочисленные
+многочисленный
+мной
+мною
+мог
+могу
+могут
+мож
+может
+может быть
+можно
+можхо
+мои
+мой
+мор
+москва
+мочь
+моя
+моё
+мы
+на
+наверху
+над
+надо
+назад
+наиболее
+найти
+наконец
+нам
+нами
+народ
+нас
+начала
+начать
+наш
+наша
+наше
+наши
+не
+него
+недавно
+недалеко
+нее
+ней
+некоторый
+нельзя
+нем
+немного
+нему
+непрерывно
+нередко
+несколько
+нет
+нею
+неё
+ни
+нибудь
+ниже
+низко
+никакой
+никогда
+никто
+никуда
+ним
+ними
+них
+ничего
+ничто
+но
+новый
+нога
+ночь
+ну
+нужно
+нужный
+нх
+о
+об
+оба
+обычно
+один
+одиннадцатый
+одиннадцать
+однажды
+однако
+одного
+одной
+оказаться
+окно
+около
+он
+она
+они
+оно
+опять
+особенно
+остаться
+от
+ответить
+отец
+откуда
+отовсюду
+отсюда
+очень
+первый
+перед
+писать
+плечо
+по
+под
+подойди
+подумать
+пожалуйста
+позже
+пойти
+пока
+пол
+получить
+помнить
+понимать
+понять
+пор
+пора
+после
+последний
+посмотреть
+посреди
+потом
+потому
+почему
+почти
+правда
+прекрасно
+при
+про
+просто
+против
+процентов
+путь
+пятнадцатый
+пятнадцать
+пятый
+пять
+работа
+работать
+раз
+разве
+рано
+раньше
+ребенок
+решить
+россия
+рука
+русский
+ряд
+рядом
+с
+с кем
+сам
+сама
+сами
+самим
+самими
+самих
+само
+самого
+самой
+самом
+самому
+саму
+самый
+свет
+свое
+своего
+своей
+свои
+своих
+свой
+свою
+сделать
+сеаой
+себе
+себя
+сегодня
+седьмой
+сейчас
+семнадцатый
+семнадцать
+семь
+сидеть
+сила
+сих
+сказал
+сказала
+сказать
+сколько
+слишком
+слово
+случай
+смотреть
+сначала
+снова
+со
+собой
+собою
+советский
+совсем
+спасибо
+спросить
+сразу
+стал
+старый
+стать
+стол
+сторона
+стоять
+страна
+суть
+считать
+т
+та
+так
+такая
+также
+таки
+такие
+такое
+такой
+там
+твои
+твой
+твоя
+твоё
+те
+тебе
+тебя
+тем
+теми
+теперь
+тех
+то
+тобой
+тобою
+товарищ
+тогда
+того
+тоже
+только
+том
+тому
+тот
+тою
+третий
+три
+тринадцатый
+тринадцать
+ту
+туда
+тут
+ты
+тысяч
+у
+увидеть
+уж
+уже
+улица
+уметь
+утро
+хороший
+хорошо
+хотел бы
+хотеть
+хоть
+хотя
+хочешь
+час
+часто
+часть
+чаще
+чего
+человек
+чем
+чему
+через
+четвертый
+четыре
+четырнадцатый
+четырнадцать
+что
+чтоб
+чтобы
+чуть
+шестнадцатый
+шестнадцать
+шестой
+шесть
+эта
+эти
+этим
+этими
+этих
+это
+этого
+этой
+этом
+этому
+этот
+эту
+я
+являюсь
diff --git a/resource/stop-word/spanish.csv b/resource/stop-word/spanish.csv
new file mode 100644
index 0000000..3b44c2c
--- /dev/null
+++ b/resource/stop-word/spanish.csv
@@ -0,0 +1,721 @@
+a
+actualmente
+acuerdo
+adelante
+ademas
+además
+adrede
+afirmó
+agregó
+ahi
+ahora
+ahí
+al
+algo
+alguna
+algunas
+alguno
+algunos
+algún
+alli
+allí
+alrededor
+ambos
+ampleamos
+antano
+antaño
+ante
+anterior
+antes
+apenas
+aproximadamente
+aquel
+aquella
+aquellas
+aquello
+aquellos
+aqui
+aquél
+aquélla
+aquéllas
+aquéllos
+aquí
+arriba
+arribaabajo
+aseguró
+asi
+así
+atras
+aun
+aunque
+ayer
+añadió
+aún
+b
+bajo
+bastante
+bien
+breve
+buen
+buena
+buenas
+bueno
+buenos
+c
+cada
+casi
+cerca
+cierta
+ciertas
+cierto
+ciertos
+cinco
+claro
+comentó
+como
+con
+conmigo
+conocer
+conseguimos
+conseguir
+considera
+consideró
+consigo
+consigue
+consiguen
+consigues
+contigo
+contra
+cosas
+creo
+cual
+cuales
+cualquier
+cuando
+cuanta
+cuantas
+cuanto
+cuantos
+cuatro
+cuenta
+cuál
+cuáles
+cuándo
+cuánta
+cuántas
+cuánto
+cuántos
+cómo
+d
+da
+dado
+dan
+dar
+de
+debajo
+debe
+deben
+debido
+decir
+dejó
+del
+delante
+demasiado
+demás
+dentro
+deprisa
+desde
+despacio
+despues
+después
+detras
+detrás
+dia
+dias
+dice
+dicen
+dicho
+dieron
+diferente
+diferentes
+dijeron
+dijo
+dio
+donde
+dos
+durante
+día
+días
+dónde
+e
+ejemplo
+el
+ella
+ellas
+ello
+ellos
+embargo
+empleais
+emplean
+emplear
+empleas
+empleo
+en
+encima
+encuentra
+enfrente
+enseguida
+entonces
+entre
+era
+erais
+eramos
+eran
+eras
+eres
+es
+esa
+esas
+ese
+eso
+esos
+esta
+estaba
+estabais
+estaban
+estabas
+estad
+estada
+estadas
+estado
+estados
+estais
+estamos
+estan
+estando
+estar
+estaremos
+estará
+estarán
+estarás
+estaré
+estaréis
+estaría
+estaríais
+estaríamos
+estarían
+estarías
+estas
+este
+estemos
+esto
+estos
+estoy
+estuve
+estuviera
+estuvierais
+estuvieran
+estuvieras
+estuvieron
+estuviese
+estuvieseis
+estuviesen
+estuvieses
+estuvimos
+estuviste
+estuvisteis
+estuviéramos
+estuviésemos
+estuvo
+está
+estábamos
+estáis
+están
+estás
+esté
+estéis
+estén
+estés
+ex
+excepto
+existe
+existen
+explicó
+expresó
+f
+fin
+final
+fue
+fuera
+fuerais
+fueran
+fueras
+fueron
+fuese
+fueseis
+fuesen
+fueses
+fui
+fuimos
+fuiste
+fuisteis
+fuéramos
+fuésemos
+g
+general
+gran
+grandes
+gueno
+h
+ha
+haber
+habia
+habida
+habidas
+habido
+habidos
+habiendo
+habla
+hablan
+habremos
+habrá
+habrán
+habrás
+habré
+habréis
+habría
+habríais
+habríamos
+habrían
+habrías
+habéis
+había
+habíais
+habíamos
+habían
+habías
+hace
+haceis
+hacemos
+hacen
+hacer
+hacerlo
+haces
+hacia
+haciendo
+hago
+han
+has
+hasta
+hay
+haya
+hayamos
+hayan
+hayas
+hayáis
+he
+hecho
+hemos
+hicieron
+hizo
+horas
+hoy
+hube
+hubiera
+hubierais
+hubieran
+hubieras
+hubieron
+hubiese
+hubieseis
+hubiesen
+hubieses
+hubimos
+hubiste
+hubisteis
+hubiéramos
+hubiésemos
+hubo
+i
+igual
+incluso
+indicó
+informo
+informó
+intenta
+intentais
+intentamos
+intentan
+intentar
+intentas
+intento
+ir
+j
+junto
+k
+l
+la
+lado
+largo
+las
+le
+lejos
+les
+llegó
+lleva
+llevar
+lo
+los
+luego
+lugar
+m
+mal
+manera
+manifestó
+mas
+mayor
+me
+mediante
+medio
+mejor
+mencionó
+menos
+menudo
+mi
+mia
+mias
+mientras
+mio
+mios
+mis
+misma
+mismas
+mismo
+mismos
+modo
+momento
+mucha
+muchas
+mucho
+muchos
+muy
+más
+mí
+mía
+mías
+mío
+míos
+n
+nada
+nadie
+ni
+ninguna
+ningunas
+ninguno
+ningunos
+ningún
+no
+nos
+nosotras
+nosotros
+nuestra
+nuestras
+nuestro
+nuestros
+nueva
+nuevas
+nuevo
+nuevos
+nunca
+o
+ocho
+os
+otra
+otras
+otro
+otros
+p
+pais
+para
+parece
+parte
+partir
+pasada
+pasado
+paìs
+peor
+pero
+pesar
+poca
+pocas
+poco
+pocos
+podeis
+podemos
+poder
+podria
+podriais
+podriamos
+podrian
+podrias
+podrá
+podrán
+podría
+podrían
+poner
+por
+por qué
+porque
+posible
+primer
+primera
+primero
+primeros
+principalmente
+pronto
+propia
+propias
+propio
+propios
+proximo
+próximo
+próximos
+pudo
+pueda
+puede
+pueden
+puedo
+pues
+q
+qeu
+que
+quedó
+queremos
+quien
+quienes
+quiere
+quiza
+quizas
+quizá
+quizás
+quién
+quiénes
+qué
+r
+raras
+realizado
+realizar
+realizó
+repente
+respecto
+s
+sabe
+sabeis
+sabemos
+saben
+saber
+sabes
+sal
+salvo
+se
+sea
+seamos
+sean
+seas
+segun
+segunda
+segundo
+según
+seis
+ser
+sera
+seremos
+será
+serán
+serás
+seré
+seréis
+sería
+seríais
+seríamos
+serían
+serías
+seáis
+señaló
+si
+sido
+siempre
+siendo
+siete
+sigue
+siguiente
+sin
+sino
+sobre
+sois
+sola
+solamente
+solas
+solo
+solos
+somos
+son
+soy
+soyos
+su
+supuesto
+sus
+suya
+suyas
+suyo
+suyos
+sé
+sí
+sólo
+t
+tal
+tambien
+también
+tampoco
+tan
+tanto
+tarde
+te
+temprano
+tendremos
+tendrá
+tendrán
+tendrás
+tendré
+tendréis
+tendría
+tendríais
+tendríamos
+tendrían
+tendrías
+tened
+teneis
+tenemos
+tener
+tenga
+tengamos
+tengan
+tengas
+tengo
+tengáis
+tenida
+tenidas
+tenido
+tenidos
+teniendo
+tenéis
+tenía
+teníais
+teníamos
+tenían
+tenías
+tercera
+ti
+tiempo
+tiene
+tienen
+tienes
+toda
+todas
+todavia
+todavía
+todo
+todos
+total
+trabaja
+trabajais
+trabajamos
+trabajan
+trabajar
+trabajas
+trabajo
+tras
+trata
+través
+tres
+tu
+tus
+tuve
+tuviera
+tuvierais
+tuvieran
+tuvieras
+tuvieron
+tuviese
+tuvieseis
+tuviesen
+tuvieses
+tuvimos
+tuviste
+tuvisteis
+tuviéramos
+tuviésemos
+tuvo
+tuya
+tuyas
+tuyo
+tuyos
+tú
+u
+ultimo
+un
+una
+unas
+uno
+unos
+usa
+usais
+usamos
+usan
+usar
+usas
+uso
+usted
+ustedes
+v
+va
+vais
+valor
+vamos
+van
+varias
+varios
+vaya
+veces
+ver
+verdad
+verdadera
+verdadero
+vez
+vosotras
+vosotros
+voy
+vuestra
+vuestras
+vuestro
+vuestros
+w
+x
+y
+ya
+yo
+z
+él
+éramos
+ésa
+ésas
+ése
+ésos
+ésta
+éstas
+éste
+éstos
+última
+últimas
+último
+últimos
diff --git a/src/Builder/AlgorithmOutputBuilder.php b/src/Builder/AlgorithmOutputBuilder.php
new file mode 100644
index 0000000..ecf11dd
--- /dev/null
+++ b/src/Builder/AlgorithmOutputBuilder.php
@@ -0,0 +1,64 @@
+sortRankDataList = $getTopNodes;
+ }
+
+ public function build(
+ TextInterface $text,
+ NodeCollectionInterface $nodeCollection,
+ array $sentences,
+ int $maxKeywords,
+ int $maxSentences
+ ): AlgorithmOutputInterface {
+ $words = $this->createWordList($text, $nodeCollection, $maxKeywords);
+ $sentences = $this->sortRankDataList->sort($sentences);
+
+ $textRankOutput = new AlgorithmOutput();
+ $textRankOutput->setKeyWords($words);
+ $textRankOutput->setSentences($sentences);
+
+ return $textRankOutput;
+ }
+
+ private function createWordList(
+ TextInterface $text,
+ NodeCollectionInterface $nodeCollection,
+ int $maxKeywords
+ ): array {
+ $nodes = $this
+ ->sortRankDataList
+ ->sort(array_values($nodeCollection->getNodes()));
+ $words = [];
+
+ for ($i = 0; $i < $maxKeywords; $i++) {
+ $nodeId = $nodes[$i]->getId();
+ $token = $text->getTokenMap()->getToken($nodeId);
+ $word = new RankDataObject();
+ $word->setId($nodeId);
+ $word->setValue($token);
+ $word->setRank($nodes[$i]->getRank());
+
+ $words[] = $word;
+ }
+
+ return $words;
+ }
+}
diff --git a/src/Builder/TextRankOutputBuilderInterface.php b/src/Builder/AlgorithmOutputBuilderInterface.php
similarity index 57%
rename from src/Builder/TextRankOutputBuilderInterface.php
rename to src/Builder/AlgorithmOutputBuilderInterface.php
index 4c42fc6..b556aee 100644
--- a/src/Builder/TextRankOutputBuilderInterface.php
+++ b/src/Builder/AlgorithmOutputBuilderInterface.php
@@ -1,18 +1,20 @@
-getSentences() as $sentence) {
- foreach ($sentence->getVector() as $index => $tokenId) {
- if (!isset($dataSource[$tokenId])) {
- $dataSource[$tokenId] = [
- self::ID => $tokenId,
- self::LEFT => [],
- self::RIGHT => []
- ];
- }
-
- if ($sentence->isIndexExists($index - 1)) {
- $previousTokenId = $sentence->getTokenId($index - 1);
- if ($text->getTokenMap()->isExists($previousTokenId)) {
- $dataSource[$tokenId][self::LEFT][] = $previousTokenId;
- }
- }
-
- if ($sentence->isIndexExists($index + 1)) {
- $nextTokenId = $sentence->getTokenId($index + 1);
- if ($text->getTokenMap()->isExists($nextTokenId)) {
- $dataSource[$tokenId][self::RIGHT][] = $nextTokenId;
- }
- }
- }
- }
-
- return $dataSource;
- }
-}
+getSentences() as $sentence) {
+ foreach ($sentence->getVector() as $index => $tokenId) {
+ if (!isset($dataSource[$tokenId])) {
+ $dataSource[$tokenId] = [
+ self::ID => $tokenId,
+ self::LEFT => [],
+ self::RIGHT => []
+ ];
+ }
+
+ if ($sentence->isIndexExists($index - 1)) {
+ $previousTokenId = $sentence->getTokenId($index - 1);
+ if ($text->getTokenMap()->isExists($previousTokenId)) {
+ $dataSource[$tokenId][self::LEFT][] = $previousTokenId;
+ }
+ }
+
+ if ($sentence->isIndexExists($index + 1)) {
+ $nextTokenId = $sentence->getTokenId($index + 1);
+ if ($text->getTokenMap()->isExists($nextTokenId)) {
+ $dataSource[$tokenId][self::RIGHT][] = $nextTokenId;
+ }
+ }
+ }
+ }
+
+ return $dataSource;
+ }
+}
diff --git a/src/Builder/StopWordCollectionBuilder.php b/src/Builder/StopWordCollectionBuilder.php
new file mode 100644
index 0000000..dd78e46
--- /dev/null
+++ b/src/Builder/StopWordCollectionBuilder.php
@@ -0,0 +1,31 @@
+reader = $reader;
+ }
+
+ public function build(string $path): StopWordCollectionInterface
+ {
+ $words = [];
+
+ foreach ($this->reader->read($path) as $row) {
+ $words[] = current($row);
+ }
+
+ return new StopWordCollection($words);
+ }
+}
diff --git a/src/Builder/StopWordCollectionBuilderInterface.php b/src/Builder/StopWordCollectionBuilderInterface.php
new file mode 100644
index 0000000..0bbb820
--- /dev/null
+++ b/src/Builder/StopWordCollectionBuilderInterface.php
@@ -0,0 +1,12 @@
+ $sentenceTokenList) {
- $sentenceVector = [];
- foreach ($sentenceTokenList as $token) {
- $token = (string)$token;
- if (!isset($tokens[$token])) {
- $tokens[$token] = $i;
- $tokenId = $i;
- $i++;
- } else {
- $tokenId = $tokens[$token];
- }
-
- $sentenceVector[] = $tokenId;
- }
-
- $sentence = new Sentence();
- $sentence->setId($sentenceIndex);
- $sentence->setVector($sentenceVector);
- $sentence->setOriginalValue($originalSentences[$sentenceIndex]);
- $sentences[] = $sentence;
- }
-
- $tokenMap = new TokenMap();
- $tokenMap->setTokenMap(
- array_map(
- 'strval',
- array_flip($tokens)
- )
- );
-
- return new Text(
- $tokenMap,
- $sentences
- );
- }
-}
+ $sentenceTokenList) {
+ $sentenceVector = [];
+ foreach ($sentenceTokenList as $token) {
+ $token = (string)$token;
+ if (!isset($tokens[$token])) {
+ $tokens[$token] = $i;
+ $tokenId = $i;
+ $i++;
+ } else {
+ $tokenId = $tokens[$token];
+ }
+
+ $sentenceVector[] = $tokenId;
+ }
+
+ $sentence = new Sentence();
+ $sentence->setId($sentenceIndex);
+ $sentence->setVector($sentenceVector);
+ $sentence->setOriginalValue($originalSentences[$sentenceIndex]);
+ $sentences[] = $sentence;
+ }
+
+ $tokenMap = new TokenMap();
+ $tokenMap->setTokenMap(
+ array_map(
+ 'strval',
+ array_flip($tokens)
+ )
+ );
+
+ return new Text(
+ $tokenMap,
+ $sentences
+ );
+ }
+}
diff --git a/src/Builder/TextBuilderInterface.php b/src/Builder/TextBuilderInterface.php
index 1f7d47f..3bf18b3 100644
--- a/src/Builder/TextBuilderInterface.php
+++ b/src/Builder/TextBuilderInterface.php
@@ -1,12 +1,12 @@
-getTopNodes = $getTopNodes;
- $this->sentenceWeighting = $sentenceWeighting;
- }
-
- public function build(
- TextInterface $text,
- NodeCollectionInterface $nodeCollection,
- int $maxKeywords
- ): TextRankOutputInterface {
- $nodes = $this->getTopNodes->execute($nodeCollection, $maxKeywords);
- $words = [];
-
- foreach ($nodes as $node) {
- $token = $text
- ->getTokenMap()
- ->getToken($node->getId());
- $word = new OutputValue();
- $word->setId($node->getId());
- $word->setValue($token);
- $word->setRank($node->getRank());
-
- $words[] = $word;
- }
-
- $textRankOutput = new TextRankOutput();
- $textRankOutput->setKeyWords(array_slice($words, 0, $maxKeywords));
-
- $sentences = $this->sentenceWeighting->weight($text, $words);
-
-
-
- $textRankOutput->setSentences($sentences);
-
-
- return $textRankOutput;
- }
-}
diff --git a/src/Data/TextRankOutput.php b/src/Data/AlgorithmOutput.php
similarity index 85%
rename from src/Data/TextRankOutput.php
rename to src/Data/AlgorithmOutput.php
index ce47e08..105ba6f 100644
--- a/src/Data/TextRankOutput.php
+++ b/src/Data/AlgorithmOutput.php
@@ -1,31 +1,31 @@
-keyWords = $keywords;
- }
-
- public function getKeyWords(): ?array
- {
- return $this->keyWords;
- }
-
- public function setSentences(array $sentences): void
- {
- $this->sentences = $sentences;
- }
-
- public function getSentences(): ?array
- {
- return $this->sentences;
- }
-}
+keyWords = $keywords;
+ }
+
+ public function getKeyWords(): ?array
+ {
+ return $this->keyWords;
+ }
+
+ public function setSentences(array $sentences): void
+ {
+ $this->sentences = $sentences;
+ }
+
+ public function getSentences(): ?array
+ {
+ return $this->sentences;
+ }
+}
diff --git a/src/Data/TextRankOutputInterface.php b/src/Data/AlgorithmOutputInterface.php
similarity index 52%
rename from src/Data/TextRankOutputInterface.php
rename to src/Data/AlgorithmOutputInterface.php
index 6a5fe1c..0a10c9e 100644
--- a/src/Data/TextRankOutputInterface.php
+++ b/src/Data/AlgorithmOutputInterface.php
@@ -1,31 +1,29 @@
-stopWordCsvPath = $stopWordCsvPath;
+ $this->maxKeywords = $maxKeywords;
+ $this->maxKeySentences = $maxKeySentences;
+ $this->pageRankPowerIteration = $pageRankPowerIteration;
+ }
+
+ public function getStopWordCsvPath(): string
+ {
+ return $this->stopWordCsvPath;
+ }
+
+ public function setStopWordCsvPath(string $stopWordCsvPath): void
+ {
+ $this->stopWordCsvPath = $stopWordCsvPath;
+ }
+
+ public function getRawText(): string
+ {
+ return $this->rawText;
+ }
+
+ public function setRawText(string $rawText): void
+ {
+ $this->rawText = $rawText;
+ }
+
+ public function getMaxKeywords(): int
+ {
+ return $this->maxKeywords;
+ }
+
+ public function setMaxKeywords(int $maxKeywords): void
+ {
+ $this->maxKeywords = $maxKeywords;
+ }
+
+ public function getMaxKeySentences(): int
+ {
+ return $this->maxKeySentences;
+ }
+
+ public function setMaxKeySentences(int $maxKeySentences): void
+ {
+ $this->maxKeySentences = $maxKeySentences;
+ }
+
+ public function getPageRankPowerIteration(): int
+ {
+ return $this->pageRankPowerIteration;
+ }
+
+ public function setPageRankPowerIteration(int $pageRankPowerIteration): void
+ {
+ $this->pageRankPowerIteration = $pageRankPowerIteration;
+ }
+}
diff --git a/src/Data/AlgorithmRequestInterface.php b/src/Data/AlgorithmRequestInterface.php
new file mode 100644
index 0000000..a6e0aa1
--- /dev/null
+++ b/src/Data/AlgorithmRequestInterface.php
@@ -0,0 +1,58 @@
+id = $id;
- }
-
- public function getId(): int
- {
- return $this->id;
- }
-
- public function setValue(string $value): void
- {
- $this->value = $value;
- }
-
- public function getValue(): string
- {
- return $this->value;
- }
-
- public function setRank(float $rank): void
- {
- $this->rank = $rank;
- }
-
- public function getRank(): float
- {
- return $this->rank;
- }
-}
+id = $id;
+ }
+
+ public function getId(): int
+ {
+ return $this->id;
+ }
+
+ public function setValue(string $value): void
+ {
+ $this->value = $value;
+ }
+
+ public function getValue(): string
+ {
+ return $this->value;
+ }
+
+ public function setRank(float $rank): void
+ {
+ $this->rank = $rank;
+ }
+
+ public function getRank(): float
+ {
+ return $this->rank;
+ }
+}
diff --git a/src/Data/TextRankOutput/OutputValueInterface.php b/src/Data/RankDataObjectInterface.php
similarity index 74%
rename from src/Data/TextRankOutput/OutputValueInterface.php
rename to src/Data/RankDataObjectInterface.php
index e3458d3..88f0d5d 100644
--- a/src/Data/TextRankOutput/OutputValueInterface.php
+++ b/src/Data/RankDataObjectInterface.php
@@ -1,20 +1,20 @@
-words = $words;
+ }
+
+ public function isExist(string $word): bool
+ {
+ return array_search($word, $this->words) !== false;
+ }
+}
diff --git a/src/Data/StopWordCollectionInterface.php b/src/Data/StopWordCollectionInterface.php
new file mode 100644
index 0000000..595e568
--- /dev/null
+++ b/src/Data/StopWordCollectionInterface.php
@@ -0,0 +1,10 @@
+tokenMap = $tokenMap;
- $this->sentences = $sentences;
- }
-
- public function getTokenMap(): TokenMapInterface
- {
- return $this->tokenMap;
- }
-
- public function getSentences(): array
- {
- return $this->sentences;
- }
-}
+tokenMap = $tokenMap;
+ $this->sentences = $sentences;
+ }
+
+ public function getTokenMap(): TokenMapInterface
+ {
+ return $this->tokenMap;
+ }
+
+ public function getSentences(): array
+ {
+ return $this->sentences;
+ }
+}
diff --git a/src/Data/Text/Sentence.php b/src/Data/Text/Sentence.php
index 69d2385..ee947a4 100644
--- a/src/Data/Text/Sentence.php
+++ b/src/Data/Text/Sentence.php
@@ -1,52 +1,52 @@
-id = $id;
- }
-
- public function getId(): int
- {
- return $this->id;
- }
-
- public function setOriginalValue(string $originalValue): void
- {
- $this->originalValue = $originalValue;
- }
-
- public function getOriginalValue(): string
- {
- return $this->originalValue;
- }
-
- public function setVector(array $vector): void
- {
- $this->vector = $vector;
- }
-
- public function getVector(): array
- {
- return $this->vector;
- }
-
- public function isIndexExists(int $index): bool
- {
- return isset($this->vector[$index]);
- }
-
- public function getTokenId(int $index): int
- {
- return $this->vector[$index];
- }
-}
+id = $id;
+ }
+
+ public function getId(): int
+ {
+ return $this->id;
+ }
+
+ public function setOriginalValue(string $originalValue): void
+ {
+ $this->originalValue = $originalValue;
+ }
+
+ public function getOriginalValue(): string
+ {
+ return $this->originalValue;
+ }
+
+ public function setVector(array $vector): void
+ {
+ $this->vector = $vector;
+ }
+
+ public function getVector(): array
+ {
+ return $this->vector;
+ }
+
+ public function isIndexExists(int $index): bool
+ {
+ return isset($this->vector[$index]);
+ }
+
+ public function getTokenId(int $index): int
+ {
+ return $this->vector[$index];
+ }
+}
diff --git a/src/Data/Text/SentenceInterface.php b/src/Data/Text/SentenceInterface.php
index d8a5b17..c239ffe 100644
--- a/src/Data/Text/SentenceInterface.php
+++ b/src/Data/Text/SentenceInterface.php
@@ -1,30 +1,30 @@
-tokenMap = $tokenMap;
- }
-
- public function isExists(int $tokenId): bool
- {
- return isset($this->tokenMap[$tokenId]);
- }
-
- public function getToken(int $tokenId): string
- {
- return $this->tokenMap[$tokenId];
- }
-}
+tokenMap = $tokenMap;
+ }
+
+ public function isExists(int $tokenId): bool
+ {
+ return isset($this->tokenMap[$tokenId]);
+ }
+
+ public function getToken(int $tokenId): string
+ {
+ return $this->tokenMap[$tokenId];
+ }
+}
diff --git a/src/Data/Text/TokenMapInterface.php b/src/Data/Text/TokenMapInterface.php
index bf54db0..d966eb5 100644
--- a/src/Data/Text/TokenMapInterface.php
+++ b/src/Data/Text/TokenMapInterface.php
@@ -1,17 +1,17 @@
-parser = $parser;
- $this->rankingAlgorithmStrategy = $rankingAlgorithmStrategy;
- $this->textRankOutputBuilder = $textRankOutputBuilder;
- }
-
- public function getKeywords(
- string $rawText,
- int $maxKeywords
- ): TextRankOutputInterface {
-
- $text = $this->parser->parse($rawText);
- $nodeCollection = $this->rankingAlgorithmStrategy->rank($text);
-
- return $this->textRankOutputBuilder->build(
- $text,
- $nodeCollection,
- $maxKeywords
- );
- }
-}
+parser = $generalFactory->createParser();
+ $this->pageRankAlgorithm = $generalFactory->createAlgorithmStrategy();
+ $this->algorithmOutputBuilder = $generalFactory->createAlgorithmBuilder();
+ $this->sentenceWeighting = $generalFactory->createSentenceWeighting();
+ }
+
+ public function rank(
+ AlgorithmRequestInterface $algorithmRequest
+ ): AlgorithmOutputInterface {
+
+ $text = $this->parser->parse(
+ $algorithmRequest->getRawText(),
+ $algorithmRequest->getStopWordCsvPath()
+ );
+
+ $nodeCollection = $this->pageRankAlgorithm->rank(
+ $text,
+ $algorithmRequest->getPageRankPowerIteration()
+ );
+
+ $sentences = $this->sentenceWeighting->weight(
+ $text,
+ $nodeCollection
+ );
+
+ return $this->algorithmOutputBuilder->build(
+ $text,
+ $nodeCollection,
+ $sentences,
+ $algorithmRequest->getMaxKeywords(),
+ $algorithmRequest->getMaxKeySentences()
+ );
+ }
+}
diff --git a/src/Factory/GeneralFactory.php b/src/Factory/GeneralFactory.php
new file mode 100644
index 0000000..5394225
--- /dev/null
+++ b/src/Factory/GeneralFactory.php
@@ -0,0 +1,59 @@
+getResource($path);
+
+ while (false !== ($row = fgetcsv($resource))) {
+ yield array_values($row);
+ }
+
+ fclose($resource);
+ }
+
+ private function getResource(string $path)
+ {
+ $resource = fopen($path, 'r');
+
+ if (false === $resource) {
+ throw new IoException(sprintf('Can\'t read file [%s]', $path));
+ }
+
+ return $resource;
+ }
+}
diff --git a/src/Service/GetTopNodes.php b/src/Service/GetTopNodes.php
deleted file mode 100644
index 1b201d2..0000000
--- a/src/Service/GetTopNodes.php
+++ /dev/null
@@ -1,30 +0,0 @@
-getNodes());
- $size = count($nodes);
-
- for ($i = 0; $i < $size; $i++) {
- for ($j = 0; $j < $size; $j++) {
- if ($nodes[$i]->getRank() > $nodes[$j]->getRank()) {
- $tmp = $nodes[$i];
- $nodes[$i] = $nodes[$j];
- $nodes[$j] = $tmp;
- }
- }
- }
-
- return $nodes;
- }
-}
diff --git a/src/Service/GetTopNodesInterface.php b/src/Service/GetTopNodesInterface.php
deleted file mode 100644
index 6e257bc..0000000
--- a/src/Service/GetTopNodesInterface.php
+++ /dev/null
@@ -1,22 +0,0 @@
-textBuilder = $textBuilder;
- $this->stopWordFilter = $stopWordFilter;
- }
-
- public function parse(string $rawText): TextInterface
- {
- $sentences = preg_split(
- '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/',
- $rawText,
- -1,
- PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
- );
-
- $textMap = [];
-
- foreach ($sentences as $sentenceIndex => $sentence) {
- $tokens = preg_split(
- '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/',
- $sentence,
- -1,
- PREG_SPLIT_NO_EMPTY
- );
-
- foreach ($tokens as $tokenIndex => $token) {
- $token = mb_strtolower(trim($token));
-
- if ($this->stopWordFilter->isStopWord($token)) {
- unset($tokens[$tokenIndex]);
- } else {
- $tokens[$tokenIndex] = mb_strtolower(trim($token));
- }
- }
-
- $textMap[$sentenceIndex] = $tokens;
- }
-
- return $this->textBuilder->build(
- $sentences,
- $textMap
- );
- }
-}
+textBuilder = $textBuilder;
+ $this->stopWordCollectionBuilder = $stopWordCollectionBuilder;
+ }
+
+ public function parse(string $rawText, string $stopWordsPath): TextInterface
+ {
+ $stopWordCollection = $this
+ ->stopWordCollectionBuilder
+ ->build($stopWordsPath);
+
+ $sentences = preg_split(
+ '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/',
+ $rawText,
+ -1,
+ PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
+ );
+
+ $textMap = [];
+
+ foreach ($sentences as $sentenceIndex => $sentence) {
+ $tokens = preg_split(
+ '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/',
+ $sentence,
+ -1,
+ PREG_SPLIT_NO_EMPTY
+ );
+
+ foreach ($tokens as $tokenIndex => $token) {
+ $token = mb_strtolower(trim($token));
+
+ if ($stopWordCollection->isExist($token)) {
+ unset($tokens[$tokenIndex]);
+ } else {
+ $tokens[$tokenIndex] = mb_strtolower(trim($token));
+ }
+ }
+
+ $textMap[$sentenceIndex] = $tokens;
+ }
+
+ return $this->textBuilder->build(
+ $sentences,
+ $textMap
+ );
+ }
+}
diff --git a/src/Service/ParserInterface.php b/src/Service/ParserInterface.php
new file mode 100644
index 0000000..9361546
--- /dev/null
+++ b/src/Service/ParserInterface.php
@@ -0,0 +1,12 @@
+getId()] = $keyword->getRank();
- }
-
- $sentenceOutputList = [];
-
- foreach ($text->getSentences() as $sentence) {
- $vector = $sentence->getVector();
- $score = .0;
-
- foreach ($vector as $tokenId) {
- if (isset($keywordRankMap[$tokenId])) {
- $score += $keywordRankMap[$tokenId];
- }
- }
-
- $score = $score / count($vector);
- $sentenceOutput = new OutputValue();
- $sentenceOutput->setId($sentence->getId());
- $sentenceOutput->setRank($score);
- $sentenceOutput->setValue($sentence->getOriginalValue());
-
- $sentenceOutputList[] = $sentenceOutput;
- }
-
- return $sentenceOutputList;
- }
-}
+createRankMap($nodeCollection);
+ $sentenceOutputList = [];
+
+ foreach ($text->getSentences() as $sentence) {
+ $vector = $sentence->getVector();
+ $weight = .0;
+
+ foreach ($vector as $tokenId) {
+ $weight += $rankMap[$tokenId];
+ }
+
+ $weight = $weight / max(1, count($vector));
+
+ $sentenceOutputList[] = $this
+ ->createSentence(
+ $sentence->getId(),
+ $weight,
+ $sentence->getOriginalValue()
+ );
+ }
+
+ return $sentenceOutputList;
+ }
+
+ private function createSentence(
+ int $id,
+ float $rank,
+ string $originalValue
+ ): RankDataObjectInterface {
+
+ $sentence = new RankDataObject();
+ $sentence->setId($id);
+ $sentence->setRank($rank);
+ $sentence->setValue($originalValue);
+
+ return $sentence;
+ }
+
+ private function createRankMap(
+ NodeCollectionInterface $nodeCollection
+ ): array {
+ $rankMap = [];
+
+ foreach ($nodeCollection->getNodes() as $node) {
+ $rankMap[$node->getId()] = $node->getRank();
+ }
+
+ return $rankMap;
+ }
+}
diff --git a/src/Service/SentenceWeightingInterface.php b/src/Service/SentenceWeightingInterface.php
index 7bd29c5..c41a669 100644
--- a/src/Service/SentenceWeightingInterface.php
+++ b/src/Service/SentenceWeightingInterface.php
@@ -1,19 +1,23 @@
-getRank() > $rankList[$j]->getRank()) {
+ $tmp = $rankList[$i];
+ $rankList[$i] = $rankList[$j];
+ $rankList[$j] = $tmp;
+ }
+ }
+ }
+
+ return $rankList;
+ }
+}
diff --git a/src/Service/SortRankDataListInterface.php b/src/Service/SortRankDataListInterface.php
new file mode 100644
index 0000000..929c056
--- /dev/null
+++ b/src/Service/SortRankDataListInterface.php
@@ -0,0 +1,18 @@
+stopWordCollection = $stopWordCollection;
- }
-
- public function isStopWord(string $word): bool
- {
- return array_search($word, $this->stopWordCollection->words) !== false;
- }
-}
diff --git a/src/Strategy/PageRankStrategy.php b/src/Strategy/PageRankStrategy.php
index 20bab66..89ba665 100644
--- a/src/Strategy/PageRankStrategy.php
+++ b/src/Strategy/PageRankStrategy.php
@@ -1,81 +1,82 @@
-pageRankDataSourceBuilder = $pageRankDataSourceBuilder;
- }
-
- public function rank(TextInterface $text): NodeCollectionInterface
- {
- $dataSource = $this->pageRankDataSourceBuilder->build($text);
- $strategy = $this->createPageRankStrategy($dataSource);
- $ranking = $this->createRanking($strategy);
- $pageRankAlgorithm = $this->createPageRankAlgorithm($ranking, $strategy);
- $maxIteration = 1000;
-
- return $pageRankAlgorithm->run($maxIteration);
- }
-
- private function createPageRankAlgorithm(
- RankingInterface $ranking,
- NodeDataSourceStrategyInterface $strategy
- ): PageRankAlgorithmInterface {
-
- $normalizer = new Normalizer();
-
- return new PageRankAlgorithm(
- $ranking,
- $strategy,
- $normalizer
- );
- }
-
- private function createPageRankStrategy(
- array $dataSource
- ): NodeDataSourceStrategyInterface {
-
- $nodeBuilder = new NodeBuilder();
- $nodeCollectionBuilder = new NodeCollectionBuilder();
-
- return new MemorySourceStrategy(
- $nodeBuilder,
- $nodeCollectionBuilder,
- $dataSource
- );
- }
-
- private function createRanking(
- NodeDataSourceStrategyInterface $strategy
- ): RankingInterface {
-
- $rankComparator = new RankComparator();
-
- return new Ranking(
- $rankComparator,
- $strategy
- );
- }
-}
+pageRankDataSourceBuilder = $pageRankDataSourceBuilder;
+ }
+
+ public function rank(
+ TextInterface $text,
+ int $iteration
+ ): NodeCollectionInterface {
+ $dataSource = $this->pageRankDataSourceBuilder->build($text);
+ $strategy = $this->createPageRankStrategy($dataSource);
+ $ranking = $this->createRanking($strategy);
+ $pageRankAlgorithm = $this->createPageRankAlgorithm($ranking, $strategy);
+
+ return $pageRankAlgorithm->run($iteration);
+ }
+
+ private function createPageRankAlgorithm(
+ RankingInterface $ranking,
+ NodeDataSourceStrategyInterface $strategy
+ ): PageRankAlgorithmInterface {
+
+ $normalizer = new Normalizer();
+
+ return new PageRankAlgorithm(
+ $ranking,
+ $strategy,
+ $normalizer
+ );
+ }
+
+ private function createPageRankStrategy(
+ array $dataSource
+ ): NodeDataSourceStrategyInterface {
+
+ $nodeBuilder = new NodeBuilder();
+ $nodeCollectionBuilder = new NodeCollectionBuilder();
+
+ return new MemorySourceStrategy(
+ $nodeBuilder,
+ $nodeCollectionBuilder,
+ $dataSource
+ );
+ }
+
+ private function createRanking(
+ NodeDataSourceStrategyInterface $strategy
+ ): RankingInterface {
+
+ $rankComparator = new RankComparator();
+
+ return new Ranking(
+ $rankComparator,
+ $strategy
+ );
+ }
+}
diff --git a/src/Strategy/RankingAlgorithmStrategyInterface.php b/src/Strategy/RankingAlgorithmStrategyInterface.php
index 3387864..e771d72 100644
--- a/src/Strategy/RankingAlgorithmStrategyInterface.php
+++ b/src/Strategy/RankingAlgorithmStrategyInterface.php
@@ -1,13 +1,16 @@
-
- * $stopWords = new English();
- *
- * $textRank = new TextRankFacade();
- * $textRank->setStopWords($stopWords);
- *
- * $sentences = $textRank->summarizeTextFreely(
- * $rawText,
- * 5,
- * 2,
- * Summarize::GET_ALL_IMPORTANT
- * );
- *
- *
- * @package PhpScience\TextRank
+ * @deprecated Use PhpScience\TextRank\Facade\TextRank instead.
*/
class TextRankFacade
{
/**
- * Stop Words
- *
- * Stop Words to ignore because of dummy words. These words will not be Key
- * Words. A, like, no yes, one, two, I, you for example.
- *
- * @see \PhpScience\TextRank\Tool\StopWords\English
- *
* @var StopWordsAbstract
*/
- protected $stopWords;
+ protected StopWordsAbstract $stopWords;
/**
- * Set Stop Words.
- *
- * @param StopWordsAbstract $stopWords Stop Words to ignore because of
- * dummy words.
+ * @param StopWordsAbstract $stopWords
*/
public function setStopWords(StopWordsAbstract $stopWords)
{
From e58ff6b0aea54510c502a08e3f1afea215ee9ad8 Mon Sep 17 00:00:00 2001
From: David Belicza <87.bdavid@gmail.com>
Date: Sat, 26 Sep 2020 19:45:01 +0200
Subject: [PATCH 5/5] Old code has been removed, Sorting algorithm has been
changed, Workflow refactored, Smaller bugfixes
---
src/Builder/AlgorithmOutputBuilder.php | 6 +-
src/Data/AlgorithmRequest.php | 21 +-
src/Data/AlgorithmRequestInterface.php | 10 +
src/Facade/TextRank.php | 3 +-
src/Service/Parser.php | 21 +-
src/Service/ParserInterface.php | 6 +-
src/Service/SortRankDataList.php | 29 +-
src/TextRankFacade.php | 235 --------
src/Tool/Graph.php | 83 ---
src/Tool/Parser.php | 213 -------
src/Tool/Score.php | 176 ------
src/Tool/StopWords/English.php | 334 ----------
src/Tool/StopWords/French.php | 706 ----------------------
src/Tool/StopWords/German.php | 616 -------------------
src/Tool/StopWords/Italian.php | 676 ---------------------
src/Tool/StopWords/Norwegian.php | 238 --------
src/Tool/StopWords/Russian.php | 575 ------------------
src/Tool/StopWords/Spanish.php | 738 -----------------------
src/Tool/StopWords/StopWordsAbstract.php | 27 -
src/Tool/Summarize.php | 224 -------
src/Tool/Text.php | 99 ---
tests/functional/TextRankFacadeTest.php | 151 -----
22 files changed, 74 insertions(+), 5113 deletions(-)
delete mode 100644 src/TextRankFacade.php
delete mode 100644 src/Tool/Graph.php
delete mode 100644 src/Tool/Parser.php
delete mode 100644 src/Tool/Score.php
delete mode 100644 src/Tool/StopWords/English.php
delete mode 100644 src/Tool/StopWords/French.php
delete mode 100644 src/Tool/StopWords/German.php
delete mode 100644 src/Tool/StopWords/Italian.php
delete mode 100644 src/Tool/StopWords/Norwegian.php
delete mode 100644 src/Tool/StopWords/Russian.php
delete mode 100644 src/Tool/StopWords/Spanish.php
delete mode 100644 src/Tool/StopWords/StopWordsAbstract.php
delete mode 100644 src/Tool/Summarize.php
delete mode 100644 src/Tool/Text.php
delete mode 100644 tests/functional/TextRankFacadeTest.php
diff --git a/src/Builder/AlgorithmOutputBuilder.php b/src/Builder/AlgorithmOutputBuilder.php
index ecf11dd..a4f2b25 100644
--- a/src/Builder/AlgorithmOutputBuilder.php
+++ b/src/Builder/AlgorithmOutputBuilder.php
@@ -29,7 +29,11 @@ public function build(
int $maxSentences
): AlgorithmOutputInterface {
$words = $this->createWordList($text, $nodeCollection, $maxKeywords);
- $sentences = $this->sortRankDataList->sort($sentences);
+ $sentences = array_slice(
+ $this->sortRankDataList->sort($sentences),
+ 0,
+ $maxSentences
+ );
$textRankOutput = new AlgorithmOutput();
$textRankOutput->setKeyWords($words);
diff --git a/src/Data/AlgorithmRequest.php b/src/Data/AlgorithmRequest.php
index 74e62dc..dc525ee 100644
--- a/src/Data/AlgorithmRequest.php
+++ b/src/Data/AlgorithmRequest.php
@@ -8,17 +8,20 @@ class AlgorithmRequest implements AlgorithmRequestInterface
{
private string $stopWordCsvPath;
private string $rawText;
+ private int $minKeywordLength;
private int $maxKeywords;
private int $maxKeySentences;
private int $pageRankPowerIteration;
public function __construct(
string $stopWordCsvPath = __DIR__ . '/../resource/stop-word/english.csv',
+ int $minKeywordLength = 3,
int $maxKeywords = 10,
int $maxKeySentences = 5,
int $pageRankPowerIteration = 10
) {
$this->stopWordCsvPath = $stopWordCsvPath;
+ $this->minKeywordLength = $minKeywordLength;
$this->maxKeywords = $maxKeywords;
$this->maxKeySentences = $maxKeySentences;
$this->pageRankPowerIteration = $pageRankPowerIteration;
@@ -34,6 +37,16 @@ public function setStopWordCsvPath(string $stopWordCsvPath): void
$this->stopWordCsvPath = $stopWordCsvPath;
}
+ public function getMinKeywordLength(): int
+ {
+ return $this->minKeywordLength;
+ }
+
+ public function setMinKeywordLength(int $minKeywordLength): void
+ {
+ $this->minKeywordLength = $minKeywordLength;
+ }
+
public function getRawText(): string
{
return $this->rawText;
@@ -64,13 +77,13 @@ public function setMaxKeySentences(int $maxKeySentences): void
$this->maxKeySentences = $maxKeySentences;
}
- public function getPageRankPowerIteration(): int
+ public function setPageRankPowerIteration(int $pageRankPowerIteration): void
{
- return $this->pageRankPowerIteration;
+ $this->pageRankPowerIteration = $pageRankPowerIteration;
}
- public function setPageRankPowerIteration(int $pageRankPowerIteration): void
+ public function getPageRankPowerIteration(): int
{
- $this->pageRankPowerIteration = $pageRankPowerIteration;
+ return $this->pageRankPowerIteration;
}
}
diff --git a/src/Data/AlgorithmRequestInterface.php b/src/Data/AlgorithmRequestInterface.php
index a6e0aa1..3da182a 100644
--- a/src/Data/AlgorithmRequestInterface.php
+++ b/src/Data/AlgorithmRequestInterface.php
@@ -16,6 +16,16 @@ public function getStopWordCsvPath(): string;
*/
public function setStopWordCsvPath(string $stopWordCsvPath): void;
+ /**
+ * @return int
+ */
+ public function getMinKeywordLength(): int;
+
+ /**
+ * @param int $minKeywordLength
+ */
+ public function setMinKeywordLength(int $minKeywordLength): void;
+
/**
* @return string
*/
diff --git a/src/Facade/TextRank.php b/src/Facade/TextRank.php
index 14f536f..f81a511 100644
--- a/src/Facade/TextRank.php
+++ b/src/Facade/TextRank.php
@@ -34,7 +34,8 @@ public function rank(
$text = $this->parser->parse(
$algorithmRequest->getRawText(),
- $algorithmRequest->getStopWordCsvPath()
+ $algorithmRequest->getStopWordCsvPath(),
+ $algorithmRequest->getMinKeywordLength()
);
$nodeCollection = $this->pageRankAlgorithm->rank(
diff --git a/src/Service/Parser.php b/src/Service/Parser.php
index 36c0321..59d51b5 100644
--- a/src/Service/Parser.php
+++ b/src/Service/Parser.php
@@ -21,8 +21,11 @@ public function __construct(
$this->stopWordCollectionBuilder = $stopWordCollectionBuilder;
}
- public function parse(string $rawText, string $stopWordsPath): TextInterface
- {
+ public function parse(
+ string $rawText,
+ string $stopWordsPath,
+ int $minimumTokenLength
+ ): TextInterface {
$stopWordCollection = $this
->stopWordCollectionBuilder
->build($stopWordsPath);
@@ -34,6 +37,12 @@ public function parse(string $rawText, string $stopWordsPath): TextInterface
PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
);
+ foreach ($sentences as $sentenceIndex => $sentence) {
+ if (1 === strlen(trim($sentence))) {
+ unset($sentences[$sentenceIndex]);
+ }
+ }
+
$textMap = [];
foreach ($sentences as $sentenceIndex => $sentence) {
@@ -47,10 +56,14 @@ public function parse(string $rawText, string $stopWordsPath): TextInterface
foreach ($tokens as $tokenIndex => $token) {
$token = mb_strtolower(trim($token));
- if ($stopWordCollection->isExist($token)) {
+ if (
+ ctype_punct($token)
+ || mb_strlen($token) < $minimumTokenLength
+ || $stopWordCollection->isExist($token)
+ ) {
unset($tokens[$tokenIndex]);
} else {
- $tokens[$tokenIndex] = mb_strtolower(trim($token));
+ $tokens[$tokenIndex] = $token;
}
}
diff --git a/src/Service/ParserInterface.php b/src/Service/ParserInterface.php
index 9361546..b432975 100644
--- a/src/Service/ParserInterface.php
+++ b/src/Service/ParserInterface.php
@@ -8,5 +8,9 @@
interface ParserInterface
{
- public function parse(string $rawText, string $stopWordsPath): TextInterface;
+ public function parse(
+ string $rawText,
+ string $stopWordsPath,
+ int $minimumTokenLength
+ ): TextInterface;
}
diff --git a/src/Service/SortRankDataList.php b/src/Service/SortRankDataList.php
index db557cc..2273597 100644
--- a/src/Service/SortRankDataList.php
+++ b/src/Service/SortRankDataList.php
@@ -11,18 +11,25 @@ class SortRankDataList implements SortRankDataListInterface
*/
public function sort(array $rankList): array
{
- $size = count($rankList);
-
- for ($i = 0; $i < $size; $i++) {
- for ($j = 0; $j < $size; $j++) {
- if ($rankList[$i]->getRank() > $rankList[$j]->getRank()) {
- $tmp = $rankList[$i];
- $rankList[$i] = $rankList[$j];
- $rankList[$j] = $tmp;
- }
- }
+ $rankIndex = $this->getIndexedRank($rankList);
+ arsort($rankIndex);
+ $rankCollection = [];
+
+ foreach ($rankIndex as $index => $rank) {
+ $rankCollection[] = $rankList[$index];
+ }
+
+ return $rankCollection;
+ }
+
+ private function getIndexedRank(array $rankList): array
+ {
+ $rankIndex = [];
+
+ foreach ($rankList as $index => $rankObject) {
+ $rankIndex[$index] = $rankObject->getRank();
}
- return $rankList;
+ return $rankIndex;
}
}
diff --git a/src/TextRankFacade.php b/src/TextRankFacade.php
deleted file mode 100644
index 20ddd34..0000000
--- a/src/TextRankFacade.php
+++ /dev/null
@@ -1,235 +0,0 @@
-stopWords = $stopWords;
- }
-
- /**
- * Only Keywords
- *
- * It retrieves the possible keywords with their scores from a text.
- *
- * @param string $rawText A single raw text.
- *
- * @return array Array from Keywords. Key is the parsed word, value is the
- * word score.
- */
- public function getOnlyKeyWords(string $rawText): array
- {
- $parser = new Parser();
- $parser->setMinimumWordLength(3);
- $parser->setRawText($rawText);
-
- if ($this->stopWords) {
- $parser->setStopWords($this->stopWords);
- }
-
- $text = $parser->parse();
-
- $graph = new Graph();
- $graph->createGraph($text);
-
- $score = new Score();
-
- return $score->calculate(
- $graph, $text
- );
- }
-
- /**
- * Highlighted Texts
- *
- * It finds the most important sentences from a text by the most important
- * keywords and these keywords also found by automatically. It retrieves
- * the most important sentences what are 20 percent of the full text.
- *
- * @param string $rawText A single raw text.
- *
- * @return array An array from sentences.
- */
- public function getHighlights(string $rawText): array
- {
- $parser = new Parser();
- $parser->setMinimumWordLength(3);
- $parser->setRawText($rawText);
-
- if ($this->stopWords) {
- $parser->setStopWords($this->stopWords);
- }
-
- $text = $parser->parse();
- $maximumSentences = (int) (count($text->getSentences()) * 0.2);
-
- $graph = new Graph();
- $graph->createGraph($text);
-
- $score = new Score();
- $scores = $score->calculate($graph, $text);
-
- $summarize = new Summarize();
-
- return $summarize->getSummarize(
- $scores,
- $graph,
- $text,
- 12,
- $maximumSentences,
- Summarize::GET_ALL_IMPORTANT
- );
- }
-
- /**
- * Compounds a Summarized Text
- *
- * It finds the three most important sentences from a text by the most
- * important keywords and these keywords also found by automatically. It
- * retrieves these important sentences.
- *
- * @param string $rawText A single raw text.
- *
- * @return array An array from sentences.
- */
- public function summarizeTextCompound(string $rawText): array
- {
- $parser = new Parser();
- $parser->setMinimumWordLength(3);
- $parser->setRawText($rawText);
-
- if ($this->stopWords) {
- $parser->setStopWords($this->stopWords);
- }
-
- $text = $parser->parse();
-
- $graph = new Graph();
- $graph->createGraph($text);
-
- $score = new Score();
- $scores = $score->calculate($graph, $text);
-
- $summarize = new Summarize();
-
- return $summarize->getSummarize(
- $scores,
- $graph,
- $text,
- 10,
- 3,
- Summarize::GET_ALL_IMPORTANT
- );
- }
-
- /**
- * Summarized Text
- *
- * It finds the most important sentence from a text by the most important
- * keywords and these keywords also found by automatically. It retrieves
- * the most important sentence and its following sentences.
- *
- * @param string $rawText A single raw text.
- *
- * @return array An array from sentences.
- */
- public function summarizeTextBasic(string $rawText): array
- {
- $parser = new Parser();
- $parser->setMinimumWordLength(3);
- $parser->setRawText($rawText);
-
- if ($this->stopWords) {
- $parser->setStopWords($this->stopWords);
- }
-
- $text = $parser->parse();
-
- $graph = new Graph();
- $graph->createGraph($text);
-
- $score = new Score();
- $scores = $score->calculate($graph, $text);
-
- $summarize = new Summarize();
-
- return $summarize->getSummarize(
- $scores,
- $graph,
- $text,
- 10,
- 3,
- Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS
- );
- }
-
- /**
- * Freely Summarized Text.
- *
- * It retrieves the most important sentences from a text by the most important
- * keywords and these keywords also found by automatically.
- *
- * @param string $rawText A single raw text.
- * @param int $analyzedKeyWords Maximum number of the most important
- * Key Words to analyze the text.
- * @param int $expectedSentences How many sentence should be retrieved.
- * @param int $summarizeType Highlights from the text or a part of
- * the text.
- *
- * @return array An array from sentences.
- */
- public function summarizeTextFreely(
- string $rawText,
- int $analyzedKeyWords,
- int $expectedSentences,
- int $summarizeType
- ): array {
- $parser = new Parser();
- $parser->setMinimumWordLength(3);
- $parser->setRawText($rawText);
-
- if ($this->stopWords) {
- $parser->setStopWords($this->stopWords);
- }
-
- $text = $parser->parse();
-
- $graph = new Graph();
- $graph->createGraph($text);
-
- $score = new Score();
- $scores = $score->calculate($graph, $text);
-
- $summarize = new Summarize();
-
- return $summarize->getSummarize(
- $scores,
- $graph,
- $text,
- $analyzedKeyWords,
- $expectedSentences,
- $summarizeType
- );
- }
-}
diff --git a/src/Tool/Graph.php b/src/Tool/Graph.php
deleted file mode 100644
index 06cdbe9..0000000
--- a/src/Tool/Graph.php
+++ /dev/null
@@ -1,83 +0,0 @@
-getWordMatrix();
-
- foreach ($wordMatrix as $sentenceIdx => $words) {
- $idxArray = array_keys($words);
-
- foreach ($idxArray as $idxKey => $idxValue) {
- $connections = [];
-
- if (isset($idxArray[$idxKey - 1])) {
- $connections[] = $idxArray[$idxKey - 1];
- }
-
- if (isset($idxArray[$idxKey + 1])) {
- $connections[] = $idxArray[$idxKey + 1];
- }
-
- $this->graph[$words[$idxValue]][$sentenceIdx][$idxValue] = $connections;
- }
- }
- }
-
- /**
- * Graph.
- *
- * It retrieves the graph. Key is the word, value is an array with the
- * sentence IDs.
- *
- *
- * array(
- * 'apple' => array( // word
- * 2 => array( // ID of the sentence
- * 52 => array( // ID of the word in the sentence
- * 51, 53 // IDs of the closest words to the apple word
- * ),
- * 10 => array( // IDs of the closest words to the apple word
- * 9, 11 // IDs of the closest words to the apple word
- * ),
- * 5 => array(6)
- * ),
- * 6 => array(
- * 9 => array(8, 10)
- * ),
- * ),
- * 'orange' => array(
- * 1 => array(
- * 30 => array(29, 31)
- * )
- * )
- * );
- *
- *
- * @return array
- */
- public function getGraph(): array
- {
- return $this->graph;
- }
-}
diff --git a/src/Tool/Parser.php b/src/Tool/Parser.php
deleted file mode 100644
index 76829be..0000000
--- a/src/Tool/Parser.php
+++ /dev/null
@@ -1,213 +0,0 @@
-minimumWordLength = $wordLength;
- }
-
- /**
- * It sets the raw text.
- *
- * @param string $rawText
- */
- public function setRawText(string $rawText)
- {
- $this->rawText = $rawText;
- }
-
- /**
- * Set Stop Words.
- *
- * It sets the stop words to remove them from the found keywords.
- *
- * @param StopWordsAbstract $words Stop Words to ignore. These words will
- * not be keywords.
- */
- public function setStopWords(StopWordsAbstract $words)
- {
- $this->stopWords = $words;
- }
-
- /**
- * It retrieves the punctuations.
- *
- * @return array Array from punctuations where key is the index to link to
- * the sentence and value is the punctuation.
- */
- public function getMarks(): array
- {
- return $this->marks;
- }
-
- /**
- * Parse.
- *
- * It parses the text from the property and retrieves in Text object
- * prepared to scoring and to searching.
- *
- * @return Text Parsed text prepared to scoring.
- */
- public function parse(): Text
- {
- $matrix = [];
- $sentences = $this->getSentences();
-
- foreach ($sentences as $sentenceIdx => $sentence) {
- $matrix[$sentenceIdx] = $this->getWords($sentence);
- }
-
- $text = new Text();
- $text->setSentences($sentences);
- $text->setWordMatrix($matrix);
- $text->setMarks($this->marks);
-
- return $text;
- }
-
- /**
- * Sentences.
- *
- * It retrieves the sentences in array without junk data.
- *
- * @return array Array from sentences.
- */
- protected function getSentences(): array
- {
- $sentences = $sentences = preg_split(
- '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/',
- $this->rawText,
- -1,
- PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
- );
-
- return array_values(
- array_filter(
- array_map(
- [$this, 'cleanSentence'],
- $sentences
- )
- )
- );
- }
-
- /**
- * Possible Keywords.
- *
- * It retrieves an array of possible keywords without junk characters,
- * spaces and stop words.
- *
- * @param string $subText It should be a sentence.
- *
- * @return array The array of the possible keywords.
- */
- protected function getWords(string $subText): array
- {
- $words = preg_split(
- '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/',
- $subText,
- -1,
- PREG_SPLIT_NO_EMPTY
- );
-
- $words = array_values(
- array_filter(
- array_map(
- [$this, 'cleanWord'],
- $words
- )
- )
- );
-
- if ($this->stopWords) {
- return array_filter($words, function($word) {
- return !ctype_punct($word)
- && strlen($word) > $this->minimumWordLength
- && !$this->stopWords->exist($word);
- });
- } else {
- return array_filter($words, function($word) {
- return !ctype_punct($word)
- && strlen($word) > $this->minimumWordLength;
- });
- }
- }
-
- /**
- * Clean Sentence.
- *
- * It clean the sentence. If it is a punctuation it will be stored in the
- * property $marks.
- *
- * @param string $sentence A sentence as a string.
- *
- * @return string It is empty string when it's punctuation. Otherwise it's
- * the trimmed sentence itself.
- */
- protected function cleanSentence(string $sentence): string
- {
- if (strlen(trim($sentence)) == 1) {
- $this->marks[] = trim($sentence);
- return '';
-
- } else {
- return trim($sentence);
- }
- }
-
- /**
- * Clean Word.
- *
- * It removes the junk spaces from the word and retrieves it.
- *
- * @param string $word
- *
- * @return string Cleaned word.
- */
- protected function cleanWord(string $word): string
- {
- return mb_strtolower(trim($word));
- }
-}
diff --git a/src/Tool/Score.php b/src/Tool/Score.php
deleted file mode 100644
index f28b2c3..0000000
--- a/src/Tool/Score.php
+++ /dev/null
@@ -1,176 +0,0 @@
-getGraph();
- $wordMatrix = $text->getWordMatrix();
- $wordConnections = $this->calculateConnectionNumbers($graphData);
- $scores = $this->calculateScores(
- $graphData,
- $wordMatrix,
- $wordConnections
- );
-
- return $this->normalizeAndSortScores($scores);
- }
-
- /**
- * Connection Numbers.
- *
- * It calculates the number of connections for each word and retrieves it
- * in array where key is the word and value is the number of connections.
- *
- * @param array $graphData Graph data from a Graph type object.
- *
- * @return array Key is the word and value is the number of the connected
- * words.
- */
- protected function calculateConnectionNumbers(array &$graphData): array
- {
- $wordConnections = [];
-
- foreach ($graphData as $wordKey => $sentences) {
- $connectionCount = 0;
-
- foreach ($sentences as $sentenceIdx => $wordInstances) {
- foreach ($wordInstances as $connections) {
- $connectionCount += count($connections);
- }
- }
-
- $wordConnections[$wordKey] = $connectionCount;
- }
-
- return $wordConnections;
- }
-
- /**
- * Calculate Scores.
- *
- * It calculates the score of the words and retrieves it in array where key
- * is the word and value is the score. The score depends on the number of
- * the connections and the closest word's connection numbers.
- *
- * @param array $graphData Graph data from a Graph type object.
- * @param array $wordMatrix Multidimensional array from integer keys
- * and string values.
- * @param array $wordConnections Key is the word and value is the number of
- * the connected words.
- *
- * @return array Scores where key is the word and value is the score.
- */
- protected function calculateScores(
- array &$graphData,
- array &$wordMatrix,
- array &$wordConnections
- ): array {
- $scores = [];
-
- foreach ($graphData as $wordKey => $sentences) {
- $value = 0;
-
- foreach ($sentences as $sentenceIdx => $wordInstances) {
- foreach ($wordInstances as $connections) {
- foreach ($connections as $wordIdx) {
- $word = $wordMatrix[$sentenceIdx][$wordIdx];
- $value += $wordConnections[$word];
- }
- }
- }
-
- $scores[$wordKey] = $value;
-
- if ($value > $this->maximumValue) {
- $this->maximumValue = $value;
- }
-
- if ($value < $this->minimumValue || $this->minimumValue == 0) {
- $this->minimumValue = $value;
- }
- }
-
- return $scores;
- }
-
- /**
- * Normalize and Sort Scores.
- *
- * It recalculates the scores by normalize the score numbers to between 0
- * and 1.
- *
- * @param array $scores Keywords with scores. Score is the key.
- *
- * @return array Keywords with normalized and ordered scores.
- */
- protected function normalizeAndSortScores(array &$scores): array
- {
- foreach ($scores as $key => $value) {
- $v = $this->normalize(
- $value,
- $this->minimumValue,
- $this->maximumValue
- );
-
- $scores[$key] = $v;
- }
-
- arsort($scores);
-
- return $scores;
- }
-
- /**
- * It normalizes a number.
- *
- * @param int $value Current weight.
- * @param int $min Minimum weight.
- * @param int $max Maximum weight.
- *
- * @return float|int Normalized weight aka score.
- */
- protected function normalize(int $value, int $min, int $max): float
- {
- $divisor = $max - $min;
-
- if ($divisor == 0) {
- return 0.0;
- }
-
- $normalized = ($value - $min) / $divisor;
-
- return $normalized;
- }
-}
diff --git a/src/Tool/StopWords/English.php b/src/Tool/StopWords/English.php
deleted file mode 100644
index 4b08b8b..0000000
--- a/src/Tool/StopWords/English.php
+++ /dev/null
@@ -1,334 +0,0 @@
-words) !== false;
- }
-}
diff --git a/src/Tool/Summarize.php b/src/Tool/Summarize.php
deleted file mode 100644
index fd4fed4..0000000
--- a/src/Tool/Summarize.php
+++ /dev/null
@@ -1,224 +0,0 @@
-getGraph();
- $sentences = $text->getSentences();
- $marks = $text->getMarks();
- $this->findAndWeightSentences($scores, $graphData, $keyWordLimit);
-
- if ($type == Summarize::GET_ALL_IMPORTANT) {
- return $this->getAllImportant($sentences, $marks, $sentenceLimit);
-
- } else if ($type == Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS) {
- return $this->getFirstImportantAndFollowings(
- $sentences,
- $marks,
- $sentenceLimit
- );
- }
-
- return [];
- }
-
- /**
- * Find and Weight Sentences.
- *
- * It finds the most important sentences and stores them into the property.
- *
- * @param array $scores Keywords with scores. Score is the key.
- * @param array $graphData Graph data from a Graph type object.
- * @param int $keyWordLimit How many keyword should be used to find the
- * important sentences.
- */
- protected function findAndWeightSentences(
- array &$scores,
- array &$graphData,
- int $keyWordLimit
- ) {
- $i = 0;
-
- foreach ($scores as $word => $score) {
- if ($i >= $keyWordLimit) {
- break;
- }
-
- $i++;
- $wordMap = $graphData[$word];
-
- foreach ($wordMap as $key => $value) {
- $this->updateSentenceWeight($key);
- }
- }
-
- arsort($this->sentenceWeight);
- }
-
- /**
- * Important Sentences.
- *
- * It retrieves the important sentences.
- *
- * @param array $sentences Sentences, ordered by weights.
- * @param array $marks Array of punctuations. Key is the reference
- * to the sentence, value is the punctuation.
- * @param int $sentenceLimit How many sentence should be retrieved.
- *
- * @return array An array from sentences what are the most important
- * sentences.
- */
- protected function getAllImportant(
- array &$sentences,
- array &$marks,
- int $sentenceLimit
- ): array {
-
- $summary = [];
- $i = 0;
-
- foreach ($this->sentenceWeight as $sentenceIdx => $weight) {
- if ($i >= $sentenceLimit) {
- break;
- }
-
- $i++;
- $summary[$sentenceIdx] = $sentences[$sentenceIdx]
- . $this->getMark($marks, $sentenceIdx);
- }
-
- ksort($summary);
-
- return $summary;
- }
-
- /**
- * Most Important Sentence and Next.
- *
- * It retrieves the first most important sentence and its following
- * sentences.
- *
- * @param array $sentences Sentences, ordered by weights.
- * @param array $marks Array of punctuations. Key is the reference
- * to the sentence, value is the punctuation.
- * @param int $sentenceLimit How many sentence should be retrieved.
- *
- * @return array An array from sentences what contains the most important
- * sentence and its following sentences.
- */
- protected function getFirstImportantAndFollowings(
- array &$sentences,
- array &$marks,
- int $sentenceLimit
- ): array {
-
- $summary = [];
- $startIdx = 0;
-
- foreach ($this->sentenceWeight as $sentenceIdx => $weight) {
- $summary[$sentenceIdx] = $sentences[$sentenceIdx] .
- $this->getMark($marks, $sentenceIdx);
-
- $startIdx = $sentenceIdx;
- break;
- }
-
- $i = 0;
-
- foreach ($sentences as $sentenceIdx => $sentence) {
- if ($sentenceIdx <= $startIdx) {
- continue;
- } else if ($i >= $sentenceLimit - 1) {
- break;
- }
-
- $i++;
- $summary[$sentenceIdx] = $sentences[$sentenceIdx] .
- $this->getMark($marks, $sentenceIdx);
- }
-
- return $summary;
- }
-
- /**
- * Update Sentence Weight.
- *
- * It updates the sentence weight what is stored in the property.
- *
- * @param int $sentenceIdx Index of the sentence.
- */
- protected function updateSentenceWeight(int $sentenceIdx)
- {
- if (isset($this->sentenceWeight[$sentenceIdx])) {
- $this->sentenceWeight[$sentenceIdx] = $this->sentenceWeight[$sentenceIdx] + 1;
- } else {
- $this->sentenceWeight[$sentenceIdx] = 1;
- }
- }
-
- /**
- * Punctuations.
- *
- * It retrieves the punctuation of the sentence.
- *
- * @param array $marks The punctuation. Key is the reference to the
- * sentence, value is the punctuation.
- * @param int $idx Key of the punctuation.
- *
- * @return string The punctuation of the sentence.
- */
- protected function getMark(array &$marks, int $idx)
- {
- return isset($marks[$idx]) ? $marks[$idx] : '';
- }
-}
diff --git a/src/Tool/Text.php b/src/Tool/Text.php
deleted file mode 100644
index 458fc04..0000000
--- a/src/Tool/Text.php
+++ /dev/null
@@ -1,99 +0,0 @@
-wordMatrix = $wordMatrix;
- }
-
- /**
- * It sets the sentences.
- *
- * @param array $sentences Array's key should be an int and value should be
- * string.
- */
- public function setSentences(array $sentences)
- {
- $this->sentences = $sentences;
- }
-
- /**
- * It set the punctuations to the property.
- *
- * @param array $marks Array's key should be an int and value should be
- * string.
- */
- public function setMarks(array $marks)
- {
- $this->marks = $marks;
- }
-
- /**
- * It retrieves the words in sentence groups.
- *
- * @return array Multidimensional array from words of the text. Key is
- * index of the sentence, value is an array from words
- * where key is the index of the word and value is the word.
- */
- public function getWordMatrix(): array
- {
- return $this->wordMatrix;
- }
-
- /**
- * It retrieves the sentences.
- *
- * @return array Array from sentences where key is the index and value is
- * the sentence.
- */
- public function getSentences(): array
- {
- return $this->sentences;
- }
-
- /**
- * It retrieves the punctuations.
- *
- * @return array Array from punctuations where key is the index to link to
- * the sentence and value is the punctuation.
- */
- public function getMarks(): array
- {
- return $this->marks;
- }
-}
diff --git a/tests/functional/TextRankFacadeTest.php b/tests/functional/TextRankFacadeTest.php
deleted file mode 100644
index 41151ac..0000000
--- a/tests/functional/TextRankFacadeTest.php
+++ /dev/null
@@ -1,151 +0,0 @@
-sampleText1 = fread($file, filesize($path));
-
- fclose($file);
- }
-
- public function testGetOnlyKeyWords()
- {
- $api = new TextRankFacade();
- $stopWords = new English();
- $api->setStopWords($stopWords);
-
- $result = $api->getOnlyKeyWords($this->sampleText1);
-
- $this->assertTrue(count($result) > 0);
- $this->assertTrue(array_values($result)[0] == 1);
- }
-
- public function testGetHighlights()
- {
- $api = new TextRankFacade();
- $stopWords = new English();
- $api->setStopWords($stopWords);
-
- $result = $api->getHighlights($this->sampleText1);
-
- $this->assertTrue(count($result) > 0);
- }
-
- public function testSummarizeTextCompound()
- {
- $api = new TextRankFacade();
- $stopWords = new English();
- $api->setStopWords($stopWords);
-
- $result = $api->summarizeTextCompound($this->sampleText1);
-
- $this->assertTrue(count($result) > 0);
- }
-
- public function testSummarizeTextBasic()
- {
- $api = new TextRankFacade();
- $stopWords = new English();
- $api->setStopWords($stopWords);
-
- $result = $api->summarizeTextBasic($this->sampleText1);
-
- $this->assertTrue(count($result) > 0);
- }
-
- public function testSummarizeTextFreely()
- {
- $api = new TextRankFacade();
- $stopWords = new English();
- $api->setStopWords($stopWords);
-
- $result = $api->summarizeTextFreely(
- $this->sampleText1,
- 5,
- 2,
- Summarize::GET_ALL_IMPORTANT
- );
-
- $this->assertTrue(count($result) == 2);
-
- $result = $api->summarizeTextFreely(
- $this->sampleText1,
- 10,
- 1,
- Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS
- );
-
- $this->assertTrue(count($result) == 1);
-
- // Stop words.
- $result = $api->summarizeTextFreely(
- 'one two. one two. three four.',
- 2,
- 10,
- Summarize::GET_ALL_IMPORTANT
- );
-
- $this->assertTrue(count($result) == 0);
-
- // Less sentences then expected.
- $result = $api->summarizeTextFreely(
- 'lorem ipsum. lorem holy ipsum. sit dolor amet.',
- 2,
- 10,
- Summarize::GET_ALL_IMPORTANT
- );
-
- $this->assertTrue(count($result) == 2);
- }
-
- public function testSmallText()
- {
- $api = new TextRankFacade();
- $stopWords = new English();
- $api->setStopWords($stopWords);
-
- $result = $api->getOnlyKeyWords('lorem ipsum sit');
-
- $this->assertEquals(2, count($result));
-
- $result = $api->getOnlyKeyWords('sit');
-
- $this->assertEquals(0, count($result));
-
- $result = $api->getOnlyKeyWords('');
-
- $this->assertEquals(0, count($result));
- }
-
- public function testSmallTextRu()
- {
- $api = new TextRankFacade();
- $stopWords = new Russian();
- $api->setStopWords($stopWords);
- $result = $api->getOnlyKeyWords('между холодными ладонями');
- $this->assertCount(2, $result);
-
- $result = $api->getOnlyKeyWords('конец');
- $this->assertCount(0, $result);
-
- $result = $api->getOnlyKeyWords('');
- $this->assertCount(0, $result);
- }
-}