* @license PHP License
* @package WB
* @subpackage base
*/
WBClass::load( 'WBCli');
/**
* Command Line Interface class: HTML Dumper
*
* Dump the whole site as static HTML
*
* @version 0.1.0
* @package WB
* @subpackage base
*/
class WBCli_HtmlDumper extends WBCli
{
/**
* config loader
* @var WBConfig
*/
protected $conf;
/**
* document root - web wise
* @var string
*/
protected $docRoot;
/**
* basic html page
* @var string
*/
protected $baseUrl;
/**
* search strings
* @var array
*/
protected $search = array();
/**
* replacement strings
* @var array
*/
protected $replace = array();
/**
* destination folder
* @var string
*/
protected $des;
/**
* 2nd constructor
*
* load config and setup importer
*/
protected function init()
{
}
/**
* execute programme
*
* Actually dump pages
*/
protected function execute()
{
$this->conf = WBClass::create('WBConfig');
$this->conf->load('config');
$this->docRoot = $this->conf->get('page/docroot');
$this->baseUrl = sprintf('http://%s%s%s',
$this->conf->get('page/server'),
$this->docRoot,
$this->conf->get('page/service/html'));
$this->setDestination('var/tmp/html');
exec(sprintf('rm -rf %s/var/cache/*', WBParam::get('wb/dir/base')));
exec(sprintf('rm -rf %s/%s/*', WBParam::get('wb/dir/base'), $this->des));
$this->prepareConfig();
$this->browseOthers();
$this->browseHtml();
$this->restoreConfig();
exec(sprintf('chmod -R go-w %s/%s', WBParam::get('wb/dir/base'), $this->des));
}
/**
* set desitination folder
*
* @param string $des
*/
protected function setDestination($des)
{
$file = WBClass::create('WBFile');
$file->mkdir($des);
$this->des = $des;
$this->pl('Destination: ' . WBParam::get('wb/dir/base') . '/' . $des);
}
/**
* override "normal" config files
*
*
*/
protected function prepareConfig()
{
$config = WBParam::get('wb/dir/config', 'etc');
$base = WBParam::get('wb/dir/base');
// backup config
if (file_exists($base . '/' . $config . '/config.php')) {
copy($base . '/' . $config . '/config.php', $base . '/var/tmp/config.php');
}
if (file_exists($base . '/' . $config . '/config.xml')) {
copy($base . '/' . $config . '/config.xml', $base . '/var/tmp/config.xml');
}
copy($base . '/' . $config . '-default/dumper-config.php', $base . '/' . $config . '/config.php');
copy($base . '/' . $config . '-default/dumper-config.xml', $base . '/' . $config . '/config.xml');
}
/**
* restore "normal" config files
*
*/
protected function restoreConfig()
{
$config = WBParam::get('wb/dir/config', 'etc');
$base = WBParam::get('wb/dir/base');
unlink($base . '/' . $config . '/config.php');
unlink($base . '/' . $config . '/config.xml');
// restore
if (file_exists($base . '/var/tmp/config.php')) {
rename($base . '/var/tmp/config.php', $base . '/' . $config . '/config.php');
}
if (file_exists($base . '/var/tmp/config.xml')) {
rename($base . '/var/tmp/config.xml', $base . '/' . $config . '/config.xml');
}
}
/**
* surf to special files
*
* Create JavaScript and CSS files and prepare string replacement
*
*/
protected function browseOthers()
{
// warm up cache
$this->visit('/');
$base = 'http://' . $this->conf->get('page/server') . $this->docRoot;
$cs = '';
$di = new DirectoryIterator(WBParam::get('wb/dir/base') . '/var/cache/javascript/bulk');
/** @var $di DirectoryIterator */
foreach ($di as $i) {
if ($i->isDot() || $i->isDir()) {
continue;
}
$cs = $i->getFilename();
break;
}
$data = file_get_contents($base . $this->conf->get('page/service/javascript') . '?cs=' . $cs);
$this->pvl(sprintf(' JS %s (%d bytes)', $this->conf->get('page/service/javscript'), strlen($data)));
file_put_contents($this->des . '/script.js', $data);
$this->prepareReplacements($cs);
// css
$data = file_get_contents($base . $this->conf->get('page/service/css'));
$this->pvl(sprintf(' CSS %s (%d bytes)', $this->conf->get('page/service/css'), strlen($data)));
file_put_contents($this->des . '/style.css', $data);
// copy img folder
if (is_dir(WBParam::get('wb/dir/base') . '/htdoc/img')) {
exec(sprintf('svn export %s/htdoc/img %s/img', WBParam::get('wb/dir/base'), $this->des));
}
}
/**
* setup string replacements for HTML code
*
*
* @param string $cs
*/
protected function prepareReplacements($cs = null)
{
$this->search[] = '/CSSREPLACE/';
$this->replace[] = '/style.css';
if ($cs) {
$this->search[] = '/JAVASCRIPTREPLACE/?cs=' . $cs;
$this->replace[] = '/script.js';
}
$this->search[] = '/JAVASCRIPTREPLACE/';
$this->replace[] = '/js/';
}
/**
* surf to all the pages
*
*/
protected function browseHtml()
{
$this->browseDir();
}
/**
* leech HTML recursively
*
* Dig down all folders having page files
*
* @param string $dir
*/
protected function browseDir($dir = '/')
{
$skip = array(
'/robots.txt',
'/sitemap.xml'
);
$base = WBParam::get('wb/dir/base')
. '/'
. WBParam::get('wb/dir/config', 'etc')
. '-default/site/page';
$di = new DirectoryIterator($base . $dir);
/** @var $di DirectoryIterator */
$this->visit($dir);
foreach ($di as $i) {
if ($i->isDot() || strncmp($i->getFilename(), '.', 1) == 0) {
continue;
}
$name = $i->getFilename();
if (strncmp($name, '__', 2) == 0) {
continue;
}
if ($i->isDir()) {
$this->browseDir($dir . $i->getFilename() . '/');
}
$name = explode('.', $i->getFilename());
$ext = array_pop($name);
if ($ext != 'xml') {
continue;
}
$name = implode('.', $name);
if (in_array($dir . $name, $skip)) {
continue;
}
$this->visit($dir . $name);
}
}
/**
* actually surf to page and store HTML
*
* @param string $path
*/
public function visit($path = '/')
{
if (empty($path)) {
$path = '/';
}
if ($path[0] != '/') {
$path = '/' . $path;
}
$url = $this->baseUrl . $path;
$html = file_get_contents($url);
$html = str_replace($this->search, $this->replace, $html);
$file = WBClass::create('WBFile');
$file->touch($this->des . $path . '/index.html');
file_put_contents($file->realpath(), $html);
$this->pvl(sprintf('HTML: %s (%d bytes)', $path, strlen($html)));
}
}
?>