* @license PHP License * @package WB * @subpackage db */ WBClass::load('WBDatasource_Decorator' , 'WBDatasource_Decorator_Statistic' , 'WBClock' , 'WBDatasource_Callback'); /** * Simple datasource decorator: Statistic_BrowserCleaner * * @version 0.1.1 * @package WB * @subpackage db */ class WBDatasource_Decorator_Statistic_BrowserCleaner extends WBDatasource_Decorator implements WBDatasource_Callback { /** * Decorator's Parameter List * * Parameters: * * @var array */ protected $parameter = array( 'mindateindex' => 'mindate', 'maxdateindex' => 'maxdate' ); private const SAVE_BLOCK_SIZE = 25; private const TRACKID_CLI = 42; /** * @var WBInfo_Browser */ private $browser; /** * @var WBDatasource_Table */ private $table; /** * List of track ids to be marked as crawler * @var array */ private $crawlerTrackId = array(); /** * List of Browser Info 2 Save * @var array */ private $browserInfo = array(); /** * Current Progess * @var int */ private $cnt = 0; /** * Current Date Range * @var array */ private $dateRange = array(); /** * Setup */ public function onStart(&$item) { if (empty($this->table)) { $this->table = WBClass::create('WBDatasource_Table'); } } /** * Actually Decorate List Item * * @param array $item * @return void */ public function decorate(&$item) { // action is required if (empty($item['action'])) { return; } $this->dateRange[0] = $item[$this->parameter['mindateindex']]; $this->dateRange[1] = $item[$this->parameter['maxdateindex']]; $clause = array(); $clause[] = array( 'field' => 'hint', 'value' => 'default' ); $this->injectDateRange2Clause($clause); switch (strtolower($item['action'])) { case 'clisession': $this->updateSession42($clause); break; case 'normalsession': $this->updateSessions($clause); break; default: break; } } /** * Update CLI Session Records * * Always ignore CLI sessions * * @param array */ private function updateSession42($clause) { $save = array( 'hint' => 'ignore' ); $clause[] = array( 'field' => 'trackid', 'value' => self::TRACKID_CLI ); $this->table->save('sessionview', null, $save, $clause); $this->table->save('pageview', null, $save, $clause); } /** * Update Normal Sessions * * Detect browser and update session information * * @param array */ private function updateSessions($clause) { if (empty($this->browser)) { $this->browser = WBClass::create('WBInfo_Browser'); } $this->crawlerTrackId = array(); $option = array( 'callback' => $this, 'flush' => true, ); $list = $this->table->get('sessionview', null, null, $clause, $option); $this->markAsCrawler(null, true); foreach ($this->browserInfo as $b => $bData) { foreach ($bData as $v => $vData) { foreach ($vData as $m => $list) { if (empty($list)) { continue; } $this->saveBrowserInfo($b, $v, $m); } } } $this->browserInfo = array(); } /** * Inject Date Range in Clause * * Helper method to create clause * * @param array */ private function injectDateRange2Clause(&$clause) { $clause[] = array( 'field' => 'created', 'relation' => 'ge', 'value' => $this->dateRange[0] ); $clause[] = array( 'field' => 'created', 'relation' => 'lt', 'value' => $this->dateRange[1] ); } /** * Callback when receive whole entry * * @param string $src name of source * @param string $key * @param array $data loaded data * @return bool true on success */ public function onDatasourceGet($src, $key, &$data) { if (self::TRACKID_CLI == $data['trackid']) { return; } $this->browser->detect($data['useragent']); if ($this->browser->isCrawler()) { $this->markAsCrawler($data['trackid']); return true; } $save = array(); if ('unknown' != $this->browser->getType()) { $name = $this->browser->getName(); if (!isset($this->browserInfo[$name])) { $this->browserInfo[$name] = array(); } $major = $this->browser->getVersion(WBInfo_Browser::VER_TYPE_MAJOR); if (!isset($this->browserInfo[$name][$major])) { $this->browserInfo[$name][$major] = array(); } $minor = $this->browser->getVersion(WBInfo_Browser::VER_TYPE_MINOR); if (!isset($this->browserInfo[$name][$major][$minor])) { $this->browserInfo[$name][$major][$minor] = array(); } $this->browserInfo[$name][$major][$minor][] = $data['trackid']; if (self::SAVE_BLOCK_SIZE < count($this->browserInfo[$name][$major][$minor])) { $this->saveBrowserInfo($name, $major, $minor); } } if (empty($save)) { return true; } $clause = array(); $clause[] = array( 'field' => 'trackid', 'value' => $data['trackid'] ); $this->injectDateRange2Clause($clause); $this->table->save('sessionview', null, $save, $clause); } /** * Mark TrackId as Crawler * * Bulk storage for many records * * @param string|null * @param bool */ private function markAsCrawler($trackId = null, $force = false) { if (!empty($trackId)) { $this->crawlerTrackId[] = $trackId; } if (empty($this->crawlerTrackId)) { return; } if (!$force && self::SAVE_BLOCK_SIZE > count($this->crawlerTrackId)) { return; } $save = array( 'hint' => 'spider' ); $clause = array(); $clause[] = array( 'field' => 'trackid', 'relation' => 'in', 'value' => $this->crawlerTrackId ); $this->injectDateRange2Clause($clause); $this->table->save('sessionview', null, $save, $clause); $this->table->save('pageview', null, $save, $clause); // flush $this->crawlerTrackId = array(); } /** * Store Browser Info * * Bulk storage for many records * * @param string browser name * @param string major version * @param string minor version */ private function saveBrowserInfo($browser, $major, $minor) { if (empty($this->browserInfo[$browser][$major][$minor])) { $this->browserInfo[$browser][$major][$minor] = array(); return; } $save = array( 'browser' => $browser, 'majorversion' => $major, 'minorversion' => $minor ); $clause = array(); $clause[] = array( 'field' => 'trackid', 'relation' => 'in', 'value' => $this->browserInfo[$browser][$major][$minor] ); $this->injectDateRange2Clause($clause); $this->table->save('sessionview', null, $save, $clause); // flush $this->browserInfo[$browser][$major][$minor] = array(); } }