/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: GPL 2.0
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License. You should have
* received a copy of the GPL license along with this program; if you
* did not, you can find it at http://www.gnu.org/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Coreseek.com code.
*
* Copyright (C) 2007-2008. All Rights Reserved.
*
* Author:
* Li monan
*
* ***** END LICENSE BLOCK ***** */
#include "csr_typedefs.h"
#include "mmthunk.h"
namespace css {
void MMThunk::setItems(i4 idx, u2 rs_count, UnigramDict::result_pair_type* results)
{
if(m_max_length < idx)
m_max_length = idx;
u4 index = (idx % CHUNK_BUFFER_SIZE ) + base_offset;
item_info* item = item_list.alloc();
item->freq = 0;
item->items.clear();
for(u2 i = 0; i< rs_count; i++){
item->freq += results[i].value;
item->items.push_back(results[i].length);
//if(i == rs_count - 1)
// item->length = results[i].length;
}
m_charinfos[idx] = item;
return;
}
//set the potient key words.
void MMThunk::setKwItems(i4 idx, u2 rs_count, UnigramDict::result_pair_type* results)
{
if(m_max_length < idx)
m_max_length = idx;
u4 index = (idx % CHUNK_BUFFER_SIZE ) + base_offset;
item_info* item = item_list.alloc();
item->items.clear();
for(u2 i = 0; i< rs_count; i++){
item->freq += results[i].value;
item->items.push_back(results[i].length);
//if(i == rs_count - 1)
// item->length = results[i].length;
}
m_kwinfos[idx] = item;
return;
}
u1* MMThunk::peekToken(u2& length)
{
length = 0;
if(tokens.size()){
length = tokens[0];
//tokens.erase(tokens.begin());
}
return NULL;
}
u2 MMThunk::popupToken()
{
u2 length = 0;
if(tokens.size()){
length = tokens[0];
m_length -= length;
tokens.erase(tokens.begin());
}
return length;
}
u1* MMThunk::peekKwToken(u2& pos, u2& length)
{
if(m_max_length < m_kw_pos)
m_max_length = m_kw_pos;
while(m_kw_pos <= m_max_length) {
u4 index = (m_kw_pos % CHUNK_BUFFER_SIZE ) + base_offset;
//clear kw_word
item_info* info_kw = m_kwinfos[index];
if(info_kw) {
//find the item
size_t cnt = info_kw->items.size();
if(m_kw_ipositems[m_kw_ipos];
m_kw_ipos++;
//found one
pos = m_kw_pos;
return NULL;
}
}
m_kw_pos++;
m_kw_ipos = 0;
}
length = 0;
return NULL;
}
u2 MMThunk::popupKwToken()
{
/*
u2 length = 0;
if(kwtokens.size()){
length = kwtokens[0];
kwtokens.erase(kwtokens.begin());
}
*/
return 0;
}
//do real segment in this function, return token's count
int MMThunk::Tokenize()
{
#if CHUNK_DEBUG
for(u2 i = 0; m_charinfos[i]; i++){
std::vector::iterator it;
for(it = m_charinfos[i]->items.begin();
it < m_charinfos[i]->items.end();
it++)
printf("%d, ", *it);
printf("\n");
}
#endif
// appply rules
u2 base = 0;
while(base<=m_max_length){
Chunk chunk;
item_info* info_1st = m_charinfos[base];
for(size_t i = 0; iitems.size(); i++){
if(i == 0)
chunk.pushToken(info_1st->items[i], info_1st->freq);
else
chunk.pushToken(info_1st->items[i],0);
//Chunk L1_chunk = chunk;
u2 idx_2nd = info_1st->items[i] + base;
//check bound
item_info* info_2nd = NULL;
if(idx_2nditems.size(); j++) {
if(j == 0)
chunk.pushToken(info_2nd->items[j], info_2nd->freq);
else
chunk.pushToken(info_2nd->items[j],1);
u2 idx_3rd = info_2nd->items[j] + idx_2nd;
if(idx_3rditems[m_charinfos[idx_3rd]->items.size()-1];
if(m_charinfos[idx_3rd]->items.size() == 1)
chunk.pushToken(idx_4th, m_charinfos[idx_3rd]->freq );
else
chunk.pushToken(idx_4th, 1);
//push path.
pushChunk(chunk);
//pop 3part
chunk.popup();
}else{
//no 3part, push path
pushChunk(chunk);
}
//pop 2part
chunk.popup();
}//end for
}//end if
else{
//no 2part ,push path
pushChunk(chunk);
}
//pop 1part
chunk.popup();
}
//find the last pharse
//reset. rebase
u2 tok_len = m_queue.getToken();
if(tok_len){
pushToken(tok_len, base); //tokens.push_back(tok_len);
}else
break;
m_queue.reset();
chunk.reset();
base += tok_len;
}//end while
return 0;
}
void MMThunk::pushChunk(Chunk& ck)
{
#if CHUNK_DEBUG
printf("Pushing: ");
for(size_t i = 0; i::iterator it = info_kw->items.begin();
for(;ititems.end();it++) {
if(*it == aSize) {
info_kw->items.erase(it); //find the same item.
break;
}
}
}
}
void MMThunk::reset()
{
memset(m_charinfos, 0, sizeof(item_info*)*CHUNK_BUFFER_SIZE);
memset(m_kwinfos, 0, sizeof(item_info*)*CHUNK_BUFFER_SIZE);
item_list.free();
tokens.clear();
m_queue.reset();
m_max_length = -1;
m_length = 0;
m_kw_pos = m_kw_ipos = 0;
}
}