You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import os
import multiprocessing
import numpy as np
from tensorpack.dataflow import RNGDataFlow, PrefetchDataZMQ
import json
import csv
import re
import traceback
from tensorpack.dataflow.serialize import LMDBSerializer
if name == 'main':
# 商品info文件 + bp_feature文件 + lmdb保存文件
input_file = '/home/wuwei/datasets/ofa/tianchi_data/item_valid_info.jsonl'
bp_feature_input_file = './testv1/item_valid_image_feature.csv'
bp_feature_lmdb_file = './testv1/item_valid_image_feature.lmdb'
table_data = []
with open(input_file, encoding='utf-8', mode='r') as f:
for line in f.readlines():
line = json.loads(line.strip())
item_id = line['item_id']
table_data.append(item_id)
data_len = len(table_data)
# time.sleep(25200)
ds = Conceptual_Caption(bp_feature_input_file, data_len)
ds1 = PrefetchDataZMQ(ds)
#print(ds)
LMDBSerializer.save(ds1, bp_feature_lmdb_file)
2. What you observed:
I found that when the program is executing LMDBSerializer.save function, it stuck in "Flushing Database...".I was confucious and did'nt konw how to solve it.I tried ctrl +z to stop it but the lmdb file maybe crushed.
3. What you expected, if not obvious.
make sure the code can be fully done
4. Your environment:
Paste the output of this command: python -m tensorpack.tfutils.collect_env
python 3.6.2
torch 1.4.0
The text was updated successfully, but these errors were encountered:
1. What you did:
import os
import multiprocessing
import numpy as np
from tensorpack.dataflow import RNGDataFlow, PrefetchDataZMQ
import json
import csv
import re
import traceback
from tensorpack.dataflow.serialize import LMDBSerializer
FIELDNAMES = ['item_id', 'image_h', 'image_w', 'num_boxes', 'boxes', 'features', 'cls_prob', 'title']
import sys
import pandas as pd
import zlib
import base64
csv.field_size_limit(sys.maxsize)
def read_json(file):
f = open(file, "r", encoding="utf-8").read()
return json.loads(f)
def write_json(file, data):
f = open(file, "w", encoding="utf-8")
json.dump(data, f, indent=2, ensure_ascii=False)
return
def _file_name(row):
return "%s/%s" % (row['folder'], (zlib.crc32(row['url'].encode('utf-8')) & 0xffffffff))
def decode_base64(data, altchars=b'+/'):
"""Decode base64, padding being optional.
"../bp_feature/convert_feature_all.py" 114L, 3573C 1,1 Top
# time.sleep(25200)
count = 0
with open(infile, encoding='utf-8', mode='r') as tsv_in_file:
reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
for item in reader:
cnt += 1
if name == 'main':
# 商品info文件 + bp_feature文件 + lmdb保存文件
input_file = '/home/wuwei/datasets/ofa/tianchi_data/item_valid_info.jsonl'
bp_feature_input_file = './testv1/item_valid_image_feature.csv'
bp_feature_lmdb_file = './testv1/item_valid_image_feature.lmdb'
2. What you observed:
I found that when the program is executing LMDBSerializer.save function, it stuck in "Flushing Database...".I was confucious and did'nt konw how to solve it.I tried ctrl +z to stop it but the lmdb file maybe crushed.
3. What you expected, if not obvious.
make sure the code can be fully done
4. Your environment:
Paste the output of this command:
python -m tensorpack.tfutils.collect_env
python 3.6.2
torch 1.4.0
The text was updated successfully, but these errors were encountered: