AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_doc"));
AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_docx"));
AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_pdf"));
AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_xls"));
class FindInFiles
{
public static function OnSearchGetFileContent_doc($absolute_path)
{
if(file_exists($absolute_path) && is_file($absolute_path) && substr($absolute_path, -3) == "doc")
{
wicd($absolute_path,__DIR__);
return self::processDocFile($absolute_path);
}
else
return false;
}
public static function OnSearchGetFileContent_docx($absolute_path)
{
if(file_exists($absolute_path) && is_file($absolute_path) && substr($absolute_path, -4) == "docx")
{
wicd($absolute_path,__DIR__);
return self::processDocxFile($absolute_path);
}
else
return false;
}
public static function OnSearchGetFileContent_pdf($absolute_path)
{
wicd($absolute_path,__DIR__);
if(file_exists($absolute_path) && is_file($absolute_path) && substr($absolute_path, -3) == "pdf")
{
wicd($absolute_path,__DIR__);
return self::processPdfFile($absolute_path);
}
else
return false;
}
public static function OnSearchGetFileContent_xls($absolute_path)
{
if(file_exists($absolute_path) && is_file($absolute_path) && (substr($absolute_path, -3) == "xls" or substr($absolute_path, -4) == "xlsx" ))
{
wicd($absolute_path,__DIR__);
return self::processXlsFile($absolute_path);
}
else
return false;
}
private static function processDocFile($absolute_path) {
if (file_exists($absolute_path)) {
if (($fh = fopen($absolute_path, 'r')) !== false) {
$headers = fread($fh, 0xA00);
$n1 = ( ord($headers[0x21C]) - 1 );
$n2 = ( ( ord($headers[0x21D]) - 8 ) * 256 );
$n3 = ( ( ord($headers[0x21E]) * 256 ) * 256 );
$n4 = ( ( ( ord($headers[0x21F]) * 256 ) * 256 ) * 256 );
$textLength = ($n1 + $n2 + $n3 + $n4);
$extracted_plaintext = fread($fh, $textLength);
$extracted_plaintext = mb_convert_encoding( $extracted_plaintext, 'UTF-8', 'UTF-16LE' );
$text = preg_replace('/[^\p{L}\p{N}\s]+/u', '', nl2br($extracted_plaintext));
} else {
return false;
}
} else {
return false;
}
return array(
"TITLE" => basename($absolute_path),
"CONTENT" => $text,
"PROPERTIES" => array(),
);
}
private static function processDocxFile($absolute_path) {
$phpWord = \PhpOffice\PhpWord\IOFactory::load($absolute_path);
$text = '';
$sections = $phpWord->getSections();
foreach ($sections as $section) {
$elements = $section->getElements();
foreach ($elements as $element) {
if (method_exists($element, 'getText')) {
$text .= $element->getText();
} else {
$text .= "\n";
}
}
}
return array(
"TITLE" => basename($absolute_path),
"CONTENT" => $text,
"PROPERTIES" => array(),
);
}
private static function processPdfFile($absolute_path) {
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseFile($_SERVER["DOCUMENT_ROOT"] . '/file.pdf');
$text = $pdf->getText();
return array(
"TITLE" => basename($absolute_path),
"CONTENT" => $text,
"PROPERTIES" => array(),
);
}
private static function processXlsFile($absolute_path) {
$spreadsheet = IOFactory::load($absolute_path);
$worksheet = $spreadsheet->getActiveSheet();
foreach ($worksheet->getRowIterator() as $row) {
foreach ($row->getCellIterator() as $cell) {
$cell_value = $cell->getValue();
$text = $cell_value . "\t";
}
}
return array(
"TITLE" => basename($absolute_path),
"CONTENT" => $text,
"PROPERTIES" => array(),
);
}
}
соответственно нужно установить используемые библиотеки
|