Skip to content

Commit

Permalink
Merge pull request #429 from MarcusBarnes/issue-428
Browse files Browse the repository at this point in the history
Work on #428.
  • Loading branch information
MarcusBarnes authored Jul 10, 2017
2 parents 709037b + 225b12b commit 830b54d
Showing 1 changed file with 11 additions and 67 deletions.
78 changes: 11 additions & 67 deletions src/filegetters/CsvBooks.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ public function __construct($settings)
$this->input_directory = $this->settings['input_directory'];
$this->file_name_field = $this->settings['file_name_field'];
$this->fetcher = new \mik\fetchers\Csv($settings);

// Interate over inputDirectories to create $potentialObjFiles array.
$potentialObjFiles = $this->getMasterFiles($this->input_directory, $this->allowed_file_extensions_for_OBJ);
$this->OBJFilePaths = $this->determineObjItems($potentialObjFiles);
$this->OBJFilePaths = $this->getMasterFiles($this->input_directory, $this->allowed_file_extensions_for_OBJ);
}

/**
Expand All @@ -46,12 +43,9 @@ public function getChildren($record_key)
{
$page_paths = array();
$book_input_path = $this->getBookSourcePath($record_key);
foreach ($this->OBJFilePaths as $paths) {
foreach ($paths as $path) {
// If there's a match, we expect it to start at position 0.
if (strpos($path, $book_input_path) === 0) {
$page_paths[] = $path;
}
foreach ($this->OBJFilePaths as $path) {
if (strpos($path, $book_input_path) === 0) {
$page_paths[] = $path;
}
}
return $page_paths;
Expand All @@ -62,7 +56,7 @@ public function getChildren($record_key)
*
* @param string $inputDirectory
* The input directory as defined in the configuration.
* @param array $allowedFileTypes
* @param array $allowedFileTypes
* The list of file types (e.g. extensions) to look for.
*
* @return array
Expand Down Expand Up @@ -92,77 +86,27 @@ private function getMasterFiles($inputDirectory, $allowedFileTypes)
$potentialObjFiles = array_merge($potentialObjFiles, $potentialFilesArray);
$potentialObjFiles = array_unique($potentialObjFiles);
}

return $potentialObjFiles;
}

/**
* @todo: how does this relate to books?
*
* Filters out paths to files that do not have a yyyy-mm-dd date in their parent directories.
*
* @param array $arrayOfFilesToPreserve
* The list of file types (e.g. extensions) to look for.
*
* @return array
* An associative array with keys containing dates in yyyy-mm-dd
* format and values containing paths to files with the key date.
*/
private function determineObjItems($arrayOfFilesToPreserve)
{
// This regex will look for a pattern like /yyyy-mm-dd/ in the path that
// represents the issue date for the newspaper. Assumes publication frequency
// of at most one issue daily.
// $regex_pattern = '%[/\\\\][0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9][/\\\\]%';
$regex_pattern = '%.%';

$dateForIdentifierArray = array();
foreach ($arrayOfFilesToPreserve as $path) {
preg_match($regex_pattern, $path, $matches);
if ($matches) {
array_push($dateForIdentifierArray, $matches[0]);
}
}
$dateForIdentifierArray = array_unique($dateForIdentifierArray);

$dictOfItems = array();
foreach ($dateForIdentifierArray as $dateIdentifier) {
$tempItemList = array();
foreach ($arrayOfFilesToPreserve as $filepath) {
if (stristr($filepath, $dateIdentifier)) {
array_push($tempItemList, $filepath);
}
}

if (count($tempItemList) > 0) {
$dictOfItems[$dateIdentifier] = $tempItemList;
}
}
return $dictOfItems;
}

/**
* Return a list of absolute filepaths to the pages of a book.
* Return the absolute filepath to the pages of a book.
*
* @param $record_key
*
* @return string
* The absolute paths to the issue's page files.
* The absolute path to the book's page files.
*/
public function getBookSourcePath($record_key)
{
// Get the path to the issue.
$item_info = $this->fetcher->getItemInfo($record_key);
$book_directory = $item_info->{$this->file_name_field};
$escaped_book_directory = preg_replace('/\-/', '\-', $book_directory);
$directory_regex = '#' . DIRECTORY_SEPARATOR . $escaped_book_directory . DIRECTORY_SEPARATOR . '#';
foreach ($this->OBJFilePaths as $paths) {
foreach ($paths as $path) {
if (preg_match($directory_regex, $path)) {
return pathinfo($path, PATHINFO_DIRNAME);
}
$directory_regex = preg_quote('#' . DIRECTORY_SEPARATOR . $escaped_book_directory . DIRECTORY_SEPARATOR . '#');
foreach ($this->OBJFilePaths as $path) {
if (preg_match($directory_regex, $path)) {
return pathinfo($path, PATHINFO_DIRNAME);
}
}
}

}

0 comments on commit 830b54d

Please sign in to comment.