您还没有提到要将哪些数据传递给已编码的函数。在#Document Frequency
document_frequecy_data_1 { word : to ,
frequency : 993427,
info :
[ { document_id : 1 ,
occurrence_count : 6 ,
positions : [ 7, 18, 33, 72, 86, 231]
},
{ document_id : 2 ,
occurrence_count : 5 ,
positions : [ 1, 17, 74, 222, 255]
},
{ document_id : 4 ,
occurrence_count : 5 ,
positions : [ 8, 16 , 190, 429, 433]
},
{ document_id : 5 ,
occurrence_count : 2 ,
positions : [ 363, 367 ]
},
{ document_id : 7 ,
occurrence_count : 3 ,
positions : [ 13, 23, 191]
}
]
}
document_frequecy_data_2 { word : be ,
frequency : 178239,
info :
[ { document_id : 1 ,
occurrence_count : 2,
positions : [ 17, 25]
},
{ document_id : 4 ,
occurrence_count : 5 ,
positions : [ 17, 191, 291, 430, 434]
},
{ document_id : 5 ,
occurrence_count : 3 ,
positions : [ 14, 19, 101 ]
}
]
}
def pos_intersect(data_1, data_2, k 1):
answer []
data_info_1 data_1[ info ]
data_info_2 data_2[ info ]
i 0
j 0
while ( i len(data_info_1) and j len(data_info_2)):
document_id_1 data_info_1[i][ document_id ]
document_id_2 data_info_2[j][ document_id ]
if ( document_id_1 document_id_2):
pos_res_list []
pos_list_1 data_info_1[i][ positions ]
pos_list_2 data_info_2[j][ positions ]
k 0
while ( k len(pos_list_1) ):
l 0
while (l len(pos_list_2)) :
distance abs(pos_list_1[k] - pos_list_2[l])
if ( distance k):
pos_res_list.append(l)
elif pos_list_2[l] pos_list_1[k]:
break
l l 1
for item in pos_res_list:
distance abs(pos_list_2[item] - pos_list_1[k] )
if distance k :
pos_res_list.remove(item)
for item in pos_res_list:
answer.append({ document_id : document_id_1, position_data_1 : pos_list_1[k] , position_data_2 : pos_list_2[item] } )
k k 1
i i 1
j j 1
else:
if document_id_1 document_id_2:
i i 1
else:
j j 1
return answer
results pos_intersect(document_frequecy_data_1, document_frequecy_data_2, 4)
print Results :
for res in results:
print Document id : , res[ document_id ] , Position 1: , res[ position_data_1 ], Position 2 : , res[ position_data_1 ]
点赞 评论本文链接: http://intersac.immuno-online.com/view-691101.html