37 CFile(fname, rw, name)
53 is_data_transposed=value;
60 m_delimiter=delimiter;
68 m_num_to_skip=num_lines;
95 m_line_reader->
reset();
100 void CCSVFile::init()
102 is_data_transposed=
false;
107 m_line_tokenizer=NULL;
112 void CCSVFile::init_with_defaults()
114 is_data_transposed=
false;
132 void CCSVFile::skip_lines(int32_t num_lines)
134 for (int32_t i=0; i<num_lines; i++)
138 #define GET_VECTOR(read_func, sg_type) \ 139 void CCSVFile::get_vector(sg_type*& vector, int32_t& len) \ 141 if (!m_line_reader->has_next()) \ 144 int32_t num_feat=0; \ 146 get_matrix(vector, num_feat, num_vec); \ 177 #define GET_MATRIX(read_func, sg_type) \ 178 void CCSVFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \ 180 int32_t num_lines=0; \ 181 int32_t num_tokens=-1; \ 182 int32_t current_line_idx=0; \ 183 SGVector<char> line; \ 185 skip_lines(m_num_to_skip); \ 186 num_lines=get_stats(num_tokens); \ 190 matrix=SG_MALLOC(sg_type, num_lines*num_tokens); \ 191 skip_lines(m_num_to_skip); \ 192 while (m_line_reader->has_next()) \ 194 line=m_line_reader->read_line(); \ 195 m_parser->set_text(line); \ 197 for (int32_t i=0; i<num_tokens; i++) \ 199 if (!m_parser->has_next()) \ 202 if (!is_data_transposed) \ 203 matrix[i+current_line_idx*num_tokens]=m_parser->read_func(); \ 205 matrix[current_line_idx+i*num_tokens]=m_parser->read_func(); \ 207 current_line_idx++; \ 212 if (!is_data_transposed) \ 214 num_feat=num_tokens; \ 219 num_feat=num_lines; \ 220 num_vec=num_tokens; \ 238 #define GET_NDARRAY(read_func, sg_type) \ 239 void CCSVFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \ 253 #define GET_SPARSE_MATRIX(read_func, sg_type) \ 254 void CCSVFile::get_sparse_matrix( \ 255 SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \ 273 #undef GET_SPARSE_MATRIX 275 #define SET_VECTOR(format, sg_type) \ 276 void CCSVFile::set_vector(const sg_type* vector, int32_t len) \ 280 if (!is_data_transposed) \ 282 for (int32_t i=0; i<len; i++) \ 283 fprintf(file, "%" format "\n", vector[i]); \ 288 for (i=0; i<len-1; i++) \ 289 fprintf(file, "%" format "%c", vector[i], m_delimiter); \ 290 fprintf(file, "%" format "\n", vector[i]); \ 310 #define SET_MATRIX(format, sg_type) \ 311 void CCSVFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \ 315 if (!is_data_transposed) \ 317 for (int32_t i=0; i<num_vec; i++) \ 320 for (j=0; j<num_feat-1; j++) \ 321 fprintf(file, "%" format "%c", matrix[j+i*num_feat], m_delimiter); \ 322 fprintf(file, "%" format "\n", matrix[j+i*num_feat]); \ 327 for (int32_t i=0; i<num_feat; i++) \ 330 for (j=0; j<num_vec-1; j++) \ 331 fprintf(file, "%" format "%c", matrix[i+j*num_vec], m_delimiter); \ 332 fprintf(file, "%" format "\n", matrix[i+j*num_vec]); \ 353 #define SET_SPARSE_MATRIX(format, sg_type) \ 354 void CCSVFile::set_sparse_matrix( \ 355 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 373 #undef SET_SPARSE_MATRIX 377 int32_t& max_string_len)
380 int32_t current_line_idx=0;
381 int32_t num_tokens=0;
387 skip_lines(m_num_to_skip);
391 strings[current_line_idx].
slen=line.
vlen;
392 strings[current_line_idx].
string=SG_MALLOC(
char, line.
vlen);
393 for (int32_t i=0; i<line.
vlen; i++)
394 strings[current_line_idx].
string[i]=line[i];
396 if (line.
vlen>max_string_len)
397 max_string_len=line.
vlen;
402 num_str=current_line_idx;
405 #define GET_STRING_LIST(sg_type) \ 406 void CCSVFile::get_string_list( \ 407 SGString<sg_type>*& strings, int32_t& num_str, \ 408 int32_t& max_string_len) \ 424 #undef GET_STRING_LIST 429 for (int32_t i=0; i<num_str; i++)
431 for (int32_t j=0; j<strings[i].
slen; j++)
432 fprintf(
file,
"%c", strings[i].
string[j]);
437 #define SET_STRING_LIST(sg_type) \ 438 void CCSVFile::set_string_list( \ 439 const SGString<sg_type>* strings, int32_t num_str) \ 455 #undef SET_STRING_LIST 460 char *last = s.
start;
463 if (*s.
start == delim)
void set_delimiter(char delimiter)
#define GET_MATRIX(read_func, sg_type)
void set_transpose(bool value)
Class v_array taken directly from JL's implementation
#define GET_NDARRAY(read_func, sg_type)
virtual index_t next_token_idx(index_t &start)
virtual void set_string_list(const SGString< uint8_t > *strings, int32_t num_str)
#define GET_SPARSE_MATRIX(read_func, sg_type)
#define SET_STRING_LIST(sg_type)
Class for buffered reading from a ascii file
struct Substring, specified by start position and end position.
void set_lines_to_skip(int32_t num_lines)
static void tokenize(char delim, substring s, v_array< substring > &ret)
void push(const T &new_elem)
int32_t get_stats(int32_t &num_tokens)
virtual SGVector< char > read_line()
#define GET_STRING_LIST(sg_type)
#define SET_MATRIX(format, sg_type)
Class for reading from a string
A File access base class.
#define SET_VECTOR(format, sg_type)
#define GET_VECTOR(read_func, sg_type)
#define SET_SPARSE_MATRIX(format, sg_type)
void set_tokenizer(CTokenizer *tokenizer)
all of classes and functions are contained in the shogun namespace
The class CDelimiterTokenizer is used to tokenize a SGVector<char> into tokens using custom chars as ...
SGVector< bool > delimiters
virtual void set_text(SGVector< char > txt)
virtual void get_string_list(SGString< uint8_t > *&strings, int32_t &num_str, int32_t &max_string_len)