diff --git a/htdocs/core/lib/images.lib.php b/htdocs/core/lib/images.lib.php index f1301d5d6dc..0a725b04cc4 100644 --- a/htdocs/core/lib/images.lib.php +++ b/htdocs/core/lib/images.lib.php @@ -509,7 +509,7 @@ function correctExifImageOrientation($fileSource, $fileDest, $quality = 95) * @param int $quality Quality of compression (0=worst, 100=best) * @param string $outdir Directory where to store thumb * @param int $targetformat New format of target (IMAGETYPE_GIF, IMAGETYPE_JPG, IMAGETYPE_PNG, IMAGETYPE_BMP, IMAGETYPE_WBMP ... or 0 to keep old format) - * @return string|0 Full path of thumb or '' if it fails or 'Error...' if it fails, or 0 if it fails to detect the type of image + * @return string|int<0,0> Full path of thumb or '' if it fails or 'Error...' if it fails, or 0 if it fails to detect the type of image */ function vignette($file, $maxWidth = 160, $maxHeight = 120, $extName = '_small', $quality = 50, $outdir = 'thumbs', $targetformat = 0) { diff --git a/htdocs/core/lib/website.lib.php b/htdocs/core/lib/website.lib.php index 040f7927b43..3fb8d673db3 100644 --- a/htdocs/core/lib/website.lib.php +++ b/htdocs/core/lib/website.lib.php @@ -985,7 +985,7 @@ function getSocialNetworkSharingLinks($socialnetworks = '') /** - * Return HTML content to add structured data for an article, news or Blog Post. + * Return nb of images known into inde files for an object; * * @param Object $object Object * @return int HTML img content or '' if no image found @@ -1020,13 +1020,14 @@ function getNbOfImagePublicURLOfObject($object) } /** - * Return HTML content to add structured data for an article, news or Blog Post. + * Return the public image URL of an object. + * For example, you can get the public image URL of a product (image that is shared). * * @param Object $object Object * @param int $no Numero of image (if there is several images. 1st one by default) * @param string $extName Extension to differentiate thumb file name ('', '_small', '_mini') * @return string HTML img content or '' if no image found - * @see getNbOfImagePublicURLOfObject() + * @see getNbOfImagePublicURLOfObject(), getPublicFilesOfObject() */ function getImagePublicURLOfObject($object, $no = 1, $extName = '') { @@ -1096,10 +1097,11 @@ function getImagePublicURLOfObject($object, $no = 1, $extName = '') } /** - * Return list of public files of a given object. + * Return array with list of all public files of a given object. * * @param Object $object Object * @return array List of public files of object + * @see getImagePublicURLOfObject() */ function getPublicFilesOfObject($object) { @@ -1319,12 +1321,55 @@ function getPagesFromSearchCriterias($type, $algo, $searchstring, $max = 25, $so } /** - * Download all images found into page content $tmp. + * Return the URL of an image found into a HTML content. + * To get image from an external URL to download first, see getAllImages() + * + * @param string $htmlContent HTML content + * @param string $imageNumber The position of image. 1 by default = first image found + * @return string URL of image or '' if not foud + */ +function getImageFromHtmlContent($htmlContent, $imageNumber = 1) +{ + $dom = new DOMDocument(); + + libxml_use_internal_errors(false); // Avoid to fill memory with xml errors + if (LIBXML_VERSION < 20900) { + // Avoid load of external entities (security problem). + // Required only if LIBXML_VERSION < 20900 + // @phan-suppress-next-line PhanDeprecatedFunctionInternal + libxml_disable_entity_loader(true); + } + + // Load HTML content into object + $dom->loadHTML($htmlContent); + + // Re-enable HTML load errors + libxml_clear_errors(); + + // Load all img tags + $images = $dom->getElementsByTagName('img'); + + // Check if nb of image is valid + if ($imageNumber > 0 && $imageNumber <= $images->length) { + // Récupère l'image correspondante (index - 1 car $imageNumber est 1-based) + $img = $images->item($imageNumber - 1); + if ($img) { + return $img->getAttribute('src'); + } + } + + return ''; +} + +/** + * Download all images found into an external URL. + * It using a text regex parsing solution, not a DOM analysis. * If $modifylinks is set, links to images will be replace with a link to viewimage wrapper. + * To extract an URL from a HTML text content, see instead getImageFromHtmlContent(). * * @param Website $object Object website * @param WebsitePage $objectpage Object website page - * @param string $urltograb URL to grab (example: http://www.nltechno.com/ or http://www.nltechno.com/dir1/ or http://www.nltechno.com/dir1/mapage1) + * @param string $urltograb URL to grab (example: https://www.nltechno.com/ or s://www.nltechno.com/dir1/ or https://www.nltechno.com/dir1/mapage1) * @param string $tmp Content to parse * @param string $action Var $action * @param int $modifylinks 0=Do not modify content, 1=Replace links with a link to viewimage diff --git a/test/phpunit/WebsiteTest.php b/test/phpunit/WebsiteTest.php index def0194fff8..b4612f11e46 100644 --- a/test/phpunit/WebsiteTest.php +++ b/test/phpunit/WebsiteTest.php @@ -171,4 +171,23 @@ class WebsiteTest extends CommonClassTest print __METHOD__." result dolKeepOnlyPhpCode=".$result."\n"; $this->assertEquals('', $result, 'dolKeepOnlyPhpCode did extract the correct string'); } + + /** + * testGetImageFromHtmlContent + * + * @return void + */ + public function testGetImageFromHtmlContent() + { + // Example of usage + $htmlContent = '
Some text before.

Some text in between.

Some text after.
'; + + $firstImage = getImageFromHtmlContent($htmlContent, 1); + print __METHOD__." result firstImage=".$firstImage."\n"; + $this->assertEquals('image1.jpg', $firstImage, ' failed to get firstimage'); + + $secondImage = getImageFromHtmlContent($htmlContent, 2); + print __METHOD__." result secondImage=".$secondImage."\n"; + $this->assertEquals('/mydir/image2.jpg', $secondImage, ' failed to get second image'); + } }