1. sfDomCssSelector.class.php
  2. /** * sfDomCssSelector allows to navigate a DOM with CSS selector. * * Based on getElementsBySelector version 0.4 - Simon Willison, March 25th 2003 * http://simon.incutio.com/archive/2003/03/25/getElementsBySelector * * Some methods based on the jquery library * * @package symfony * @subpackage util * @author Fabien Potencier * @version SVN: $Id: sfDomCssSelector.class.php 26872 2010-01-19 10:50:05Z fabien $ */
  3. class sfDomCssSelector implements Countable, Iterator
  4. {
  5. public $nodes = array();
  6. private $count;
  7. public function __construct($nodes)
  8. {
  9. if (!is_array($nodes))
  10. {
  11. $nodes = array($nodes);
  12. }
  13. $this->nodes = $nodes;
  14. }
  15. public function getNodes()
  16. {
  17. return $this->nodes;
  18. }
  19. public function getNode()
  20. {
  21. return $this->nodes ? $this->nodes[0] : null;
  22. }
  23. public function getValue()
  24. {
  25. return $this->nodes[0]->nodeValue;
  26. }
  27. public function getValues()
  28. {
  29. $values = array();
  30. foreach ($this->nodes as $node)
  31. {
  32. $values[] = $node->nodeValue;
  33. }
  34. return $values;
  35. }
  36. public function matchSingle($selector)
  37. {
  38. $nodes = $this->getElements($selector);
  39. return $nodes ? new sfDomCssSelector($nodes[0]) : new sfDomCssSelector(array());
  40. }
  41. public function matchAll($selector)
  42. {
  43. $nodes = $this->getElements($selector);
  44. return $nodes ? new sfDomCssSelector($nodes) : new sfDomCssSelector(array());
  45. }
  46. protected function getElements($selector)
  47. {
  48. $nodes = array();
  49. foreach ($this->nodes as $node)
  50. {
  51. $result_nodes = $this->getElementsForNode($selector, $node);
  52. if ($result_nodes)
  53. {
  54. $nodes = array_merge($nodes, $result_nodes);
  55. }
  56. }
  57. foreach ($nodes as $node)
  58. {
  59. $node->removeAttribute('sf_matched');
  60. }
  61. return $nodes;
  62. }
  63. protected function getElementsForNode($selector, $root_node)
  64. {
  65. $all_nodes = array();
  66. foreach ($this->tokenize_selectors($selector) as $selector)
  67. {
  68. $nodes = array($root_node);
  69. foreach ($this->tokenize($selector) as $token)
  70. {
  71. $combinator = $token['combinator'];
  72. $selector = $token['selector'];
  73. $token = trim($token['name']);
  74. $pos = strpos($token, '#');
  75. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  76. {
  77. // Token is an ID selector
  78. $tagName = substr($token, 0, $pos);
  79. $id = substr($token, $pos + 1);
  80. $xpath = new DomXPath($root_node);
  81. $element = $xpath->query(sprintf("//*[@id = '%s']", $id))->item(0);
  82. if (!$element || ($tagName && strtolower($element->nodeName) != $tagName))
  83. {
  84. // tag with that ID not found
  85. return array();
  86. }
  87. // Set nodes to contain just this element
  88. $nodes = array($element);
  89. $nodes = $this->matchMultipleCustomSelectors($nodes, $selector);
  90. continue; // Skip to next token
  91. }
  92. $pos = strpos($token, '.');
  93. if (false !== $pos && preg_match('/^[A-Za-z0-9\*]*$/', substr($token, 0, $pos)))
  94. {
  95. // Token contains a class selector
  96. $tagName = substr($token, 0, $pos);
  97. if (!$tagName)
  98. {
  99. $tagName = '*';
  100. }
  101. $className = substr($token, $pos + 1);
  102. // Get elements matching tag, filter them for class selector
  103. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  104. $nodes = array();
  105. foreach ($founds as $found)
  106. {
  107. if (preg_match('/\b'.$className.'\b/', $found->getAttribute('class')))
  108. {
  109. $nodes[] = $found;
  110. }
  111. }
  112. $nodes = $this->matchMultipleCustomSelectors($nodes, $selector);
  113. continue; // Skip to next token
  114. }
  115. // Code to deal with attribute selectors
  116. if (preg_match('/^(\w+|\*)(\[.+\])$/', $token, $matches))
  117. {
  118. $tagName = $matches[1] ? $matches[1] : '*';
  119. preg_match_all('/
  120. \[
  121. ([\w\-]+) # attribute
  122. ([=~\|\^\$\*]?) # modifier (optional)
  123. =? # equal (optional)
  124. (
  125. "([^"]*)" # quoted value (optional)
  126. |
  127. ([^\]]*) # non quoted value (optional)
  128. )
  129. \]
  130. /x', $matches[2], $matches, PREG_SET_ORDER);
  131. // Grab all of the tagName elements within current node
  132. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  133. $nodes = array();
  134. foreach ($founds as $found)
  135. {
  136. $ok = false;
  137. foreach ($matches as $match)
  138. {
  139. $attrName = $match[1];
  140. $attrOperator = $match[2];
  141. $attrValue = $match[4] ? $match[4] : $match[5];
  142. switch ($attrOperator)
  143. {
  144. case '=': // Equality
  145. $ok = $found->getAttribute($attrName) == $attrValue;
  146. break;
  147. case '~': // Match one of space seperated words
  148. $ok = preg_match('/\b'.preg_quote($attrValue, '/').'\b/', $found->getAttribute($attrName));
  149. break;
  150. case '|': // Match start with value followed by optional hyphen
  151. $ok = preg_match('/^'.preg_quote($attrValue, '/').'-?/', $found->getAttribute($attrName));
  152. break;
  153. case '^': // Match starts with value
  154. $ok = 0 === strpos($found->getAttribute($attrName), $attrValue);
  155. break;
  156. case '$': // Match ends with value
  157. $ok = $attrValue == substr($found->getAttribute($attrName), -strlen($attrValue));
  158. break;
  159. case '*': // Match ends with value
  160. $ok = false !== strpos($found->getAttribute($attrName), $attrValue);
  161. break;
  162. default :
  163. // Just test for existence of attribute
  164. $ok = $found->hasAttribute($attrName);
  165. }
  166. if (false == $ok)
  167. {
  168. break;
  169. }
  170. }
  171. if ($ok)
  172. {
  173. $nodes[] = $found;
  174. }
  175. }
  176. continue; // Skip to next token
  177. }
  178. // If we get here, token is JUST an element (not a class or ID selector)
  179. $nodes = $this->getElementsByTagName($nodes, $token, $combinator);
  180. $nodes = $this->matchMultipleCustomSelectors($nodes, $selector);
  181. }
  182. foreach ($nodes as $node)
  183. {
  184. if (!$node->getAttribute('sf_matched'))
  185. {
  186. $node->setAttribute('sf_matched', true);
  187. $all_nodes[] = $node;
  188. }
  189. }
  190. }
  191. return $all_nodes;
  192. }
  193. protected function getElementsByTagName($nodes, $tagName, $combinator = ' ')
  194. {
  195. $founds = array();
  196. foreach ($nodes as $node)
  197. {
  198. switch ($combinator)
  199. {
  200. case ' ':
  201. // Descendant selector
  202. foreach ($node->getElementsByTagName($tagName) as $element)
  203. {
  204. $founds[] = $element;
  205. }
  206. break;
  207. case '>':
  208. // Child selector
  209. foreach ($node->childNodes as $element)
  210. {
  211. if ($tagName == $element->nodeName)
  212. {
  213. $founds[] = $element;
  214. }
  215. }
  216. break;
  217. case '+':
  218. // Adjacent selector
  219. $element = $node->nextSibling;
  220. if ($element && '#text' == $element->nodeName)
  221. {
  222. $element = $element->nextSibling;
  223. }
  224. if ($element && $tagName == $element->nodeName)
  225. {
  226. $founds[] = $element;
  227. }
  228. break;
  229. default:
  230. throw new Exception(sprintf('Unrecognized combinator "%s".', $combinator));
  231. }
  232. }
  233. return $founds;
  234. }
  235. protected function tokenize_selectors($selector)
  236. {
  237. // split tokens by , except in an attribute selector
  238. $tokens = array();
  239. $quoted = false;
  240. $token = '';
  241. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  242. {
  243. if (',' == $selector[$i] && !$quoted)
  244. {
  245. $tokens[] = trim($token);
  246. $token = '';
  247. }
  248. else if ('"' == $selector[$i])
  249. {
  250. $token .= $selector[$i];
  251. $quoted = $quoted ? false : true;
  252. }
  253. else
  254. {
  255. $token .= $selector[$i];
  256. }
  257. }
  258. if ($token)
  259. {
  260. $tokens[] = trim($token);
  261. }
  262. return $tokens;
  263. }
  264. protected function tokenize($selector)
  265. {
  266. // split tokens by space except if space is in an attribute selector
  267. $tokens = array();
  268. $combinators = array(' ', '>', '+');
  269. $quoted = false;
  270. $token = array('combinator' => ' ', 'name' => '');
  271. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  272. {
  273. if (in_array($selector[$i], $combinators) && !$quoted)
  274. {
  275. // remove all whitespaces around the combinator
  276. $combinator = $selector[$i];
  277. while (in_array($selector[$i + 1], $combinators))
  278. {
  279. if (' ' != $selector[++$i])
  280. {
  281. $combinator = $selector[$i];
  282. }
  283. }
  284. $tokens[] = $token;
  285. $token = array('combinator' => $combinator, 'name' => '');
  286. }
  287. else if ('"' == $selector[$i])
  288. {
  289. $token['name'] .= $selector[$i];
  290. $quoted = $quoted ? false : true;
  291. }
  292. else
  293. {
  294. $token['name'] .= $selector[$i];
  295. }
  296. }
  297. if ($token['name'])
  298. {
  299. $tokens[] = $token;
  300. }
  301. foreach ($tokens as &$token)
  302. {
  303. list($token['name'], $token['selector']) = $this->tokenize_selector_name($token['name']);
  304. }
  305. return $tokens;
  306. }
  307. protected function tokenize_selector_name($token_name)
  308. {
  309. // split custom selector
  310. $quoted = false;
  311. $name = '';
  312. $selector = '';
  313. $in_selector = false;
  314. for ($i = 0, $max = strlen($token_name); $i < $max; $i++)
  315. {
  316. if ('"' == $token_name[$i])
  317. {
  318. $quoted = $quoted ? false : true;
  319. }
  320. if (!$quoted && ':' == $token_name[$i])
  321. {
  322. $in_selector = true;
  323. }
  324. if ($in_selector)
  325. {
  326. $selector .= $token_name[$i];
  327. }
  328. else
  329. {
  330. $name .= $token_name[$i];
  331. }
  332. }
  333. return array($name, $selector);
  334. }
  335. protected function matchMultipleCustomSelectors($nodes, $selector)
  336. {
  337. if (!$selector)
  338. {
  339. return $nodes;
  340. }
  341. foreach ($this->split_custom_selector($selector) as $selector) {
  342. $nodes = $this->matchCustomSelector($nodes, $selector);
  343. }
  344. return $nodes;
  345. }
  346. protected function matchCustomSelector($nodes, $selector)
  347. {
  348. if (!$selector)
  349. {
  350. return $nodes;
  351. }
  352. $selector = $this->tokenize_custom_selector($selector);
  353. $matchingNodes = array();
  354. for ($i = 0, $max = count($nodes); $i < $max; $i++)
  355. {
  356. switch ($selector['selector'])
  357. {
  358. case 'contains':
  359. if (false !== strpos($nodes[$i]->textContent, $selector['parameter']))
  360. {
  361. $matchingNodes[] = $nodes[$i];
  362. }
  363. break;
  364. case 'nth-child':
  365. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->firstChild, (integer) $selector['parameter']))
  366. {
  367. $matchingNodes[] = $nodes[$i];
  368. }
  369. break;
  370. case 'first-child':
  371. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->firstChild))
  372. {
  373. $matchingNodes[] = $nodes[$i];
  374. }
  375. break;
  376. case 'last-child':
  377. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->lastChild, 1, 'previousSibling'))
  378. {
  379. $matchingNodes[] = $nodes[$i];
  380. }
  381. break;
  382. case 'lt':
  383. if ($i < (integer) $selector['parameter'])
  384. {
  385. $matchingNodes[] = $nodes[$i];
  386. }
  387. break;
  388. case 'gt':
  389. if ($i > (integer) $selector['parameter'])
  390. {
  391. $matchingNodes[] = $nodes[$i];
  392. }
  393. break;
  394. case 'odd':
  395. if ($i % 2)
  396. {
  397. $matchingNodes[] = $nodes[$i];
  398. }
  399. break;
  400. case 'even':
  401. if (0 == $i % 2)
  402. {
  403. $matchingNodes[] = $nodes[$i];
  404. }
  405. break;
  406. case 'nth':
  407. case 'eq':
  408. if ($i == (integer) $selector['parameter'])
  409. {
  410. $matchingNodes[] = $nodes[$i];
  411. }
  412. break;
  413. case 'first':
  414. if ($i == 0)
  415. {
  416. $matchingNodes[] = $nodes[$i];
  417. }
  418. break;
  419. case 'last':
  420. if ($i == $max - 1)
  421. {
  422. $matchingNodes[] = $nodes[$i];
  423. }
  424. break;
  425. default:
  426. throw new Exception(sprintf('Unrecognized selector "%s".', $selector['selector']));
  427. }
  428. }
  429. return $matchingNodes;
  430. }
  431. protected function split_custom_selector($selectors)
  432. {
  433. if (!preg_match_all('/
  434. :
  435. (?:[a-zA-Z0-9\-]+)
  436. (?:
  437. \(
  438. (?:
  439. ("|\')(?:.*?)?\1
  440. |
  441. (?:.*?)
  442. )
  443. \)
  444. )?
  445. /x', $selectors, $matches, PREG_PATTERN_ORDER))
  446. {
  447. throw new Exception(sprintf('Unable to split custom selector "%s".', $selectors));
  448. }
  449. return $matches[0];
  450. }
  451. protected function tokenize_custom_selector($selector)
  452. {
  453. if (!preg_match('/
  454. ([a-zA-Z0-9\-]+)
  455. (?:
  456. \(
  457. (?:
  458. ("|\')(.*)?\2
  459. |
  460. (.*?)
  461. )
  462. \)
  463. )?
  464. /x', substr($selector, 1), $matches))
  465. {
  466. throw new Exception(sprintf('Unable to parse custom selector "%s".', $selector));
  467. }
  468. return array('selector' => $matches[1], 'parameter' => isset($matches[3]) ? ($matches[3] ? $matches[3] : $matches[4]) : '');
  469. }
  470. protected function nth($cur, $result = 1, $dir = 'nextSibling')
  471. {
  472. $num = 0;
  473. for (; $cur; $cur = $cur->$dir)
  474. {
  475. if (1 == $cur->nodeType)
  476. {
  477. ++$num;
  478. }
  479. if ($num == $result)
  480. {
  481. return $cur;
  482. }
  483. }
  484. }
  485. /**
  486. * Reset the array to the beginning (as required for the Iterator interface).
  487. */
  488. public function rewind()
  489. {
  490. reset($this->nodes);
  491. $this->count = count($this->nodes);
  492. }
  493. /**
  494. * Get the key associated with the current value (as required by the Iterator interface).
  495. *
  496. * @return string The key
  497. */
  498. public function key()
  499. {
  500. return key($this->nodes);
  501. }
  502. /**
  503. * Escapes and return the current value (as required by the Iterator interface).
  504. *
  505. * @return mixed The escaped value
  506. */
  507. public function current()
  508. {
  509. return current($this->nodes);
  510. }
  511. /**
  512. * Moves to the next element (as required by the Iterator interface).
  513. */
  514. public function next()
  515. {
  516. next($this->nodes);
  517. $this->count --;
  518. }
  519. /**
  520. * Returns true if the current element is valid (as required by the Iterator interface).
  521. *
  522. * The current element will not be valid if {@link next()} has fallen off the
  523. * end of the array or if there are no elements in the array and {@link
  524. * rewind()} was called.
  525. *
  526. * @return bool The validity of the current element; true if it is valid
  527. */
  528. public function valid()
  529. {
  530. return $this->count > 0;
  531. }
  532. /**
  533. * Returns the number of matching nodes (implements Countable).
  534. *
  535. * @param integer The number of matching nodes
  536. */
  537. public function count()
  538. {
  539. return count($this->nodes);
  540. }
  541. }

Debug toolbar