Может пригодится кому
Код |
---|
AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_doc")); AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_docx")); AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_pdf")); AddEventHandler("search", "OnSearchGetFileContent", array("FindInFiles", "OnSearchGetFileContent_xls")); class FindInFiles { public static function OnSearchGetFileContent_doc($absolute_path) { if(file_exists($absolute_path) && is_file($absolute_path) && substr($absolute_path, -3) == "doc") { wicd($absolute_path,__DIR__); return self::processDocFile($absolute_path); } else return false; } public static function OnSearchGetFileContent_docx($absolute_path) { if(file_exists($absolute_path) && is_file($absolute_path) && substr($absolute_path, -4) == "docx") { wicd($absolute_path,__DIR__); return self::processDocxFile($absolute_path); } else return false; } public static function OnSearchGetFileContent_pdf($absolute_path) { wicd($absolute_path,__DIR__); if(file_exists($absolute_path) && is_file($absolute_path) && substr($absolute_path, -3) == "pdf") { wicd($absolute_path,__DIR__); return self::processPdfFile($absolute_path); } else return false; } public static function OnSearchGetFileContent_xls($absolute_path) { if(file_exists($absolute_path) && is_file($absolute_path) && (substr($absolute_path, -3) == "xls" or substr($absolute_path, -4) == "xlsx" )) { wicd($absolute_path,__DIR__); return self::processXlsFile($absolute_path); } else return false; } private static function processDocFile($absolute_path) { if (file_exists($absolute_path)) { if (($fh = fopen($absolute_path, 'r')) !== false) { $headers = fread($fh, 0xA00); $n1 = ( ord($headers[0x21C]) - 1 ); $n2 = ( ( ord($headers[0x21D]) - 8 ) * 256 ); $n3 = ( ( ord($headers[0x21E]) * 256 ) * 256 ); $n4 = ( ( ( ord($headers[0x21F]) * 256 ) * 256 ) * 256 ); $textLength = ($n1 + $n2 + $n3 + $n4); $extracted_plaintext = fread($fh, $textLength); $extracted_plaintext = mb_convert_encoding( $extracted_plaintext, 'UTF-8', 'UTF-16LE' ); $text = preg_replace('/[^\p{L}\p{N}\s]+/u', '', nl2br($extracted_plaintext)); } else { return false; } } else { return false; } return array( "TITLE" => basename($absolute_path), "CONTENT" => $text, "PROPERTIES" => array(), ); } private static function processDocxFile($absolute_path) { $phpWord = \PhpOffice\PhpWord\IOFactory::load($absolute_path); $text = ''; $sections = $phpWord->getSections(); foreach ($sections as $section) { $elements = $section->getElements(); foreach ($elements as $element) { if (method_exists($element, 'getText')) { $text .= $element->getText(); } else { $text .= "\n"; } } } return array( "TITLE" => basename($absolute_path), "CONTENT" => $text, "PROPERTIES" => array(), ); } private static function processPdfFile($absolute_path) { $parser = new \Smalot\PdfParser\Parser(); $pdf = $parser->parseFile($_SERVER["DOCUMENT_ROOT"] . '/file.pdf'); $text = $pdf->getText(); return array( "TITLE" => basename($absolute_path), "CONTENT" => $text, "PROPERTIES" => array(), ); } private static function processXlsFile($absolute_path) { $spreadsheet = IOFactory::load($absolute_path); $worksheet = $spreadsheet->getActiveSheet(); foreach ($worksheet->getRowIterator() as $row) { foreach ($row->getCellIterator() as $cell) { $cell_value = $cell->getValue(); $text = $cell_value . "\t"; } } return array( "TITLE" => basename($absolute_path), "CONTENT" => $text, "PROPERTIES" => array(), ); } } соответственно нужно установить используемые библиотеки |