* @license PHP License * @package WB * @subpackage base */ WBClass::load('WBFile'); /** * File operations for CSV files * * @version 1.0.3 * @package WB * @subpackage base */ class WBFile_CSV extends WBFile { /** * CSV format info * @var array */ private $format = array( 'del' => ',', 'enc' => '"' ); /** * column map * @var array */ private $map = array(); /** * number of columns per line * @var int */ private $cols = -1; /** * String format source to convert, if any * @var string */ private $iconvSrc = null; /** * Read row * * Get row from CSV file using delimiter and endlosure * * @see fgetscsv() * @return array */ public function read() { $row = fgetcsv($this->fh, 0, $this->format['del'], $this->format['enc']); if (empty($row)) { return array(); } // force number of columns /* if (0 < $this->cols) { // read more lines to fill multi line colums while (count($row) < $this->cols) { if (feof($this->fh)) { break; } $line = fgetcsv($this->fh, 0, $this->format['del'], $this->format['enc']); $row[count($row) - 1] .= "\n" . array_shift($line); if (count($line)) { $row = array_merge($row, $line); } } } */ $row = array_map(array($this, 'onReadColumn'), $row); if (empty($this->map)) { return $row; } $data = array(); foreach ($this->map as $k => $i) { if (isset($row[$i])) { $data[$k] = $row[$i]; } } return $data; } /** * Map function for each column * * @param string * @return string */ private function onReadColumn($col) { if ($this->iconvSrc) { $col = iconv($this->iconvSrc, 'UTF-8', $col); } return trim($col); } /** * Write row * * Put row to CSV file using delimiter and enclosure * * @see fputcsv() * @param array $data */ public function write($data) { if (empty($this->map)) { $row = array_values($data); } else { $row = array(); foreach ($this->map as $k => $i) { $row[$i] = ''; if (isset($data[$k])) { $row[$i] = $data[$k]; } } } return fputcsv($this->fh, $row, $this->format['del'], $this->format['enc']); } /** * Set map to support associative arrays * * Optional map allow to support associative arrays * * @param array $map */ public function setMap($map = array()) { $this->map = $map; } /** * Force Number of Data Columns * * Make sure that all rows contain a defined number of columns. This is especially * usefull for multi-line content. Use force columns to fold multi-line columns into * single data rows * * @see read() * @param int $columns optional number of columns */ public function forceColumns($columns = -1) { $this->cols = $columns; } /** * Tell which format to use read/write CSV file * * Set delimiter and enclosure * * @param string $del * @param string $enc */ public function setFormat($del = ';', $enc = '"') { $this->format['del'] = $del; $this->format['enc'] = $enc; return $this; } /** * Tell to convert strings to UTF-8 * * @param string $source */ public function convert2UTF8($source = null) { $this->iconvSrc = $source; } /** * Try to detect CSV file's format * * Detect delimiter, enclosure of CSV file. * * @return bool */ public function guessFormat() { if (!$this->fh) { WBClass::load('WBException_Call'); throw new WBException_Call('Call open() first', 1, __CLASS__); } $delimiter = array("\t", ';', ','); $enclosure = array("'", '"'); $ok = false; foreach ($delimiter as $d) { foreach ($enclosure as $e) { fseek($this->fh, 0); // get two rows; $row1 = fgetcsv($this->fh, 0, $d, $e); $row2 = array(); // try to skip first rows, because they sometime are just header for ($i = 0; $i < 9; ++$i) { if (feof($this->fh)) { break; } $row2 = $row1; $row1 = fgetcsv($this->fh, 0, $d, $e); if (!feof($this->fh)) { $row2 = fgetcsv($this->fh, 0, $d, $e); } } if (feof($this->fh)) { fseek($this->fh, 0); $ex = array( 'msg' => 'CSV file to short to guess format - require at least 20 rows (including header)', 'code' => 2, 'class' => __CLASS__ ); throw WBClass::create('WBException_File', $ex); return $ok; } // failed to fget row2 if (false === $row2) { fseek($this->fh, 0); $ex = array( 'msg' => 'Guess format failed, unspecified error', 'code' => 3, 'class' => __CLASS__ ); throw WBClass::create('WBException_File', $ex); return $ok; } // require two columns if (2 > min(count($row1), count($row2))) { continue; } $oe = '"'; if ('"' == $e) { $oe = "'"; } // does enclosure work as expected? foreach ($row1 as $c) { // onclosure does not seem to match, try next if ($c != trim($c, $oe)) { continue 2; } } $ok = true; $this->format['enc'] = $e; $this->format['del'] = $d; break 2; } } fseek($this->fh, 0); return $ok; } /** * Try to Dectect Encoding of File * * Trial and error algorithm. First, set condoding to UTF-8 and see if there * are * * @param array list of umlauts to detect * @return bool true if detection was successful */ public function guessEncoding($umlaut = array()) { if (!$this->fh) { WBClass::load('WBException_Call'); throw new WBException_Call('Call open() first', 1, __CLASS__); } if (empty($umlaut)) { $umlaut = array('ß', 'ä', 'ü', 'ö'); } // skip the first 10 rows for ($i = 0; $i <= 10; ++$i) { $row = $this->read(); } $pos = ftell($this->fh); $encoding = array( null, // UTF-8 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', 'CP850', 'CP1250', 'CP1251', 'CP1252', 'CP1253', 'CP1254', 'CP1255', 'CP1256', 'CP1257', 'CP1258' ); while (!empty($encoding)) { $this->convert2UTF8(array_shift($encoding)); $found = 0; // try to find some umlauts in 20 lines for ($i = 0; $i <= 20; ++$i) { $row = $this->read(); $line = implode('-', array_values($row)); foreach ($umlaut as $u) { if (strstr($line, $u)) { ++$found; } } } if ((2 * count($umlaut)) < $found) { $found = 1; break; } // rewind fseek($this->fh, $pos); } fseek($this->fh, 0); if (0 < $found) { return true; } $this->convert2UTF8(null); return false; } }