1: <?php
2: /**
3: * This file is part of the FastFeed package.
4: *
5: * (c) Daniel González <daniel@desarrolla2.com>
6: *
7: * For the full copyright and license information, please view the LICENSE
8: * file that was distributed with this source code.
9: */
10: namespace FastFeed\Processor;
11:
12: use FastFeed\Exception\InvalidArgumentException;
13: use FastFeed\Item;
14:
15: /**
16: * SanitizerProcessor
17: * Remove malicious HTML
18: */
19: class SanitizerProcessor implements ProcessorInterface
20: {
21: /**
22: * @var HTMLPurifier
23: */
24: protected $purifier;
25:
26: /**
27: * @param null $cacheDirectory
28: *
29: * @throws \FastFeed\Exception\InvalidArgumentException
30: */
31: public function __construct($cacheDirectory = null)
32: {
33: if (!$cacheDirectory) {
34: $cacheDirectory = realpath(sys_get_temp_dir());
35: }
36:
37: if (!is_writable($cacheDirectory)) {
38: throw new InvalidArgumentException($cacheDirectory . ' is not writable');
39: }
40: // require to configure some CONSTANST
41: new \HTMLPurifier_Bootstrap();
42: $config = \HTMLPurifier_Config::createDefault();
43: $config->set('Cache.SerializerPath', $cacheDirectory);
44: $this->purifier = new \HTMLPurifier($config);
45: }
46:
47: /**
48: * Execute processor
49: *
50: * @param array $items
51: *
52: * @return array
53: */
54: public function process(array $items)
55: {
56: foreach ($items as $key => $item) {
57: $items[$key] = $this->doClean($item);
58: }
59:
60: return $items;
61: }
62:
63: /**
64: * @param Item $item
65: *
66: * @return string
67: */
68: protected function doClean(Item $item)
69: {
70: $item->setIntro(
71: $this->purifier->purify($item->getIntro())
72: );
73: $item->setContent(
74: $this->purifier->purify($item->getContent())
75: );
76:
77: return $item;
78: }
79: }
80: