Skip to content

Commit

Permalink
show total directory size in listings
Browse files Browse the repository at this point in the history
sizes are computed during `-e2ds` indexing, and new uploads
are counted, but a rescan is necessary after a move or delete
  • Loading branch information
9001 committed Sep 15, 2024
1 parent 7d64879 commit 427597b
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 33 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1968,6 +1968,7 @@ below are some tweaks roughly ordered by usefulness:
* and also makes thumbnails load faster, regardless of e2d/e2t
* `--dedup` enables deduplication and thus avoids writing to the HDD if someone uploads a dupe
* `--safe-dedup 1` makes deduplication much faster during upload by skipping verification of file contents; safe if there is no other software editing/moving the files in the volumes
* `--no-dirsz` shows the size of folder inodes instead of the total size of the contents, giving about 30% faster folder listings
* `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
* if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304`
* `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger)
Expand Down
2 changes: 2 additions & 0 deletions copyparty/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,8 @@ def add_db_general(ap, hcores):
ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)")
ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)")
ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)")
ap2.add_argument("--no-dirsz", action="store_true", help="do not show total recursive size of folders in listings, show inode size instead; slightly faster (volflag=nodirsz)")
ap2.add_argument("--re-dirsz", action="store_true", help="if the directory-sizes in the UI are bonkers, use this along with \033[33m-e2dsa\033[0m to rebuild the index from scratch")
ap2.add_argument("--no-dhash", action="store_true", help="disable rescan acceleration; do full database integrity check -- makes the db ~5%% smaller and bootup/rescans 3~10x slower")
ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)")
ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)")
Expand Down
1 change: 1 addition & 0 deletions copyparty/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def vf_bmap() -> dict[str, str]:
"dav_rt": "davrt",
"ed": "dots",
"hardlink_only": "hardlinkonly",
"no_dirsz": "nodirsz",
"no_dupe": "nodupe",
"no_forget": "noforget",
"no_pipe": "nopipe",
Expand Down
14 changes: 12 additions & 2 deletions copyparty/httpcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5214,13 +5214,23 @@ def tx_browser(self) -> bool:
fe["tags"] = tags

if icur:
for fe in dirs:
fe["tags"] = ODict()

lmte = list(mte)
if self.can_admin:
lmte.extend(("up_ip", ".up_at"))

if "nodirsz" not in vf:
tagset.add(".files")
vdir = "%s/" % (rd,) if rd else ""
q = "select sz, nf from ds where rd=? limit 1"
for fe in dirs:
hit = icur.execute(q, (vdir + fe["name"],)).fetchone()
if hit:
(fe["sz"], fe["tags"][".files"]) = hit

taglist = [k for k in lmte if k in tagset]
for fe in dirs:
fe["tags"] = ODict()
else:
taglist = list(tagset)

Expand Down
108 changes: 81 additions & 27 deletions copyparty/up2k.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,10 @@ def _build_file_index(self, vol: VFS, all_vols: list[VFS]) -> tuple[bool, bool]:
# ~/.wine/dosdevices/z:/ and such
excl.extend(("/dev", "/proc", "/run", "/sys"))

if self.args.re_dirsz:
db.c.execute("delete from ds")
db.n += 1

rtop = absreal(top)
n_add = n_rm = 0
try:
Expand All @@ -1212,7 +1216,7 @@ def _build_file_index(self, vol: VFS, all_vols: list[VFS]) -> tuple[bool, bool]:
self.log(t % (vol.vpath, rtop), 6)
return True, False

n_add = self._build_dir(
n_add, _, _ = self._build_dir(
db,
top,
set(excl),
Expand Down Expand Up @@ -1286,17 +1290,18 @@ def _build_dir(
cst: os.stat_result,
dev: int,
xvol: bool,
) -> int:
) -> tuple[int, int, int]:
if xvol and not rcdir.startswith(top):
self.log("skip xvol: [{}] -> [{}]".format(cdir, rcdir), 6)
return 0
return 0, 0, 0

if rcdir in seen:
t = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}"
self.log(t.format(seen[-1], rcdir, cdir), 3)
return 0
return 0, 0, 0

ret = 0
# total-files-added, total-num-files, recursive-size
tfa = tnf = rsz = 0
seen = seen + [rcdir]
unreg: list[str] = []
files: list[tuple[int, int, str]] = []
Expand All @@ -1321,7 +1326,7 @@ def _build_dir(
partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
for iname, inf in gl:
if self.stop:
return -1
return -1, 0, 0

rp = rds + iname
abspath = cdirs + iname
Expand Down Expand Up @@ -1358,7 +1363,7 @@ def _build_dir(
continue
# self.log(" dir: {}".format(abspath))
try:
ret += self._build_dir(
i1, i2, i3 = self._build_dir(
db,
top,
excl,
Expand All @@ -1373,6 +1378,9 @@ def _build_dir(
dev,
xvol,
)
tfa += i1
tnf += i2
rsz += i3
except:
t = "failed to index subdir [{}]:\n{}"
self.log(t.format(abspath, min_ex()), c=1)
Expand All @@ -1391,6 +1399,7 @@ def _build_dir(
# placeholder for unfinished upload
continue

rsz += sz
files.append((sz, lmod, iname))
liname = iname.lower()
if (
Expand All @@ -1412,6 +1421,15 @@ def _build_dir(
):
cv = iname

if not self.args.no_dirsz:
tnf += len(files)
q = "select sz, nf from ds where rd=? limit 1"
db_sz, db_nf = db.c.execute(q, (rd,)).fetchone() or (-1, -1)
if rsz != db_sz or tnf != db_nf:
db.c.execute("delete from ds where rd=?", (rd,))
db.c.execute("insert into ds values (?,?,?)", (rd, rsz, tnf))
db.n += 1

# folder of 1000 files = ~1 MiB RAM best-case (tiny filenames);
# free up stuff we're done with before dhashing
gl = []
Expand All @@ -1435,7 +1453,7 @@ def _build_dir(
c = db.c.execute(sql, (drd, dhash))

if c.fetchone():
return ret
return tfa, tnf, rsz

if cv and rd:
# mojibake not supported (for performance / simplicity):
Expand All @@ -1452,7 +1470,7 @@ def _build_dir(
seen_files = set([x[2] for x in files]) # for dropcheck
for sz, lmod, fn in files:
if self.stop:
return -1
return -1, 0, 0

rp = rds + fn
abspath = cdirs + fn
Expand Down Expand Up @@ -1485,7 +1503,7 @@ def _build_dir(
)
self.log(t)
self.db_rm(db.c, rd, fn, 0)
ret += 1
tfa += 1
db.n += 1
in_db = []
else:
Expand All @@ -1510,7 +1528,7 @@ def _build_dir(
continue

if not hashes:
return -1
return -1, 0, 0

wark = up2k_wark_from_hashlist(self.salt, sz, hashes)

Expand All @@ -1521,7 +1539,7 @@ def _build_dir(
# skip upload hooks by not providing vflags
self.db_add(db.c, {}, rd, fn, lmod, sz, "", "", wark, "", "", ip, at)
db.n += 1
ret += 1
tfa += 1
td = time.time() - db.t
if db.n >= 4096 or td >= 60:
self.log("commit {} new files".format(db.n))
Expand All @@ -1534,33 +1552,38 @@ def _build_dir(
db.c.execute("insert into dh values (?,?)", (drd, dhash)) # type: ignore

if self.stop:
return -1
return -1, 0, 0

# drop shadowed folders
for sh_rd in unreg:
n = 0
q = "select count(w) from up where (rd=? or rd like ?||'%') and +at == 0"
q = "select count(w) from up where (rd=? or rd like ?||'/%') and +at == 0"
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
try:
n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0]
erd_erd = (sh_erd, sh_erd)
n = db.c.execute(q, erd_erd).fetchone()[0]
break
except:
pass

assert erd_erd # type: ignore # !rm

if n:
t = "forgetting {} shadowed autoindexed files in [{}] > [{}]"
self.log(t.format(n, top, sh_rd))
assert sh_erd # type: ignore # !rm

q = "delete from dh where (d = ? or d like ?||'%')"
db.c.execute(q, (sh_erd, sh_erd + "/"))
q = "delete from dh where (d = ? or d like ?||'/%')"
db.c.execute(q, erd_erd)

q = "delete from up where (rd=? or rd like ?||'/%') and +at == 0"
db.c.execute(q, erd_erd)
tfa += n

q = "delete from up where (rd=? or rd like ?||'%') and +at == 0"
db.c.execute(q, (sh_erd, sh_erd + "/"))
ret += n
q = "delete from ds where (rd=? or rd like ?||'/%')"
db.c.execute(q, erd_erd)

if n4g:
return ret
return tfa, tnf, rsz

# drop missing files
q = "select fn from up where rd = ?"
Expand All @@ -1578,7 +1601,7 @@ def _build_dir(
if n_rm:
self.log("forgot {} deleted files".format(n_rm))

return ret
return tfa, tnf, rsz

def _drop_lost(self, cur: "sqlite3.Cursor", top: str, excl: list[str]) -> int:
rm = []
Expand Down Expand Up @@ -1796,13 +1819,13 @@ def _verify_integrity(self, vol: VFS) -> int:
return 0

with self.mutex:
q = "update up set w=?, sz=?, mt=? where rd=? and fn=?"
for rd, fn, w, sz, mt in rewark:
q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1"
cur.execute(q, (w, sz, int(mt), rd, fn))

for _, _, w in f404:
q = "delete from up where w = ? limit 1"
cur.execute(q, (w,))
if f404:
q = "delete from up where rd=? and fn=? and +w=?"
cur.executemany(q, f404)

cur.connection.commit()

Expand Down Expand Up @@ -2478,6 +2501,7 @@ def _open_db(self, db_path: str) -> "sqlite3.Cursor":
self._add_xiu_tab(cur)
self._add_cv_tab(cur)
self._add_idx_up_vp(cur, db_path)
self._add_ds_tab(cur)

try:
nfiles = next(cur.execute("select count(w) from up"))[0]
Expand Down Expand Up @@ -2591,6 +2615,7 @@ def _create_db(
self._add_dhash_tab(cur)
self._add_xiu_tab(cur)
self._add_cv_tab(cur)
self._add_ds_tab(cur)
self.log("created DB at {}".format(db_path))
return cur

Expand Down Expand Up @@ -2684,6 +2709,22 @@ def _add_idx_up_vp(self, cur: "sqlite3.Cursor", db_path: str) -> None:
cur.connection.commit()
cur.execute("vacuum")

def _add_ds_tab(self, cur: "sqlite3.Cursor") -> None:
# v5d -> v5e
try:
cur.execute("select rd, sz from ds limit 1").fetchone()
return
except:
pass

for cmd in [
r"create table ds (rd text, sz int, nf int)",
r"create index ds_rd on ds(rd)",
]:
cur.execute(cmd)

cur.connection.commit()

def wake_rescanner(self):
with self.rescan_cond:
self.rescan_cond.notify_all()
Expand Down Expand Up @@ -3693,6 +3734,19 @@ def db_add(
except:
pass

if "nodirsz" not in vflags:
try:
q = "update ds set nf=nf+1, sz=sz+? where rd=?"
q2 = "insert into ds values(?,?,1)"
while True:
if not db.execute(q, (sz, rd)).rowcount:
db.execute(q2, (rd, sz))
if not rd:
break
rd = rd.rsplit("/", 1)[0] if "/" in rd else ""
except:
pass

def handle_rm(
self,
uname: str,
Expand Down
6 changes: 3 additions & 3 deletions copyparty/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def _add_mimes() -> None:

DEF_EXP = "self.ip self.ua self.uname self.host cfg.name cfg.logout vf.scan vf.thsize hdr.cf_ipcountry srv.itime srv.htime"

DEF_MTE = "circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash"
DEF_MTE = ".files,circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash"

DEF_MTH = ".vq,.aq,vc,ac,fmt,res,.fps"

Expand Down Expand Up @@ -482,8 +482,8 @@ def _sqlite_ver() -> str:


try:
_b64_enc_tl = bytes.maketrans(b'+/', b'-_')
_b64_dec_tl = bytes.maketrans(b'-_', b'+/')
_b64_enc_tl = bytes.maketrans(b"+/", b"-_")
_b64_dec_tl = bytes.maketrans(b"-_", b"+/")

def ub64enc(bs: bytes) -> bytes:
x = binascii.b2a_base64(bs, newline=False)
Expand Down
2 changes: 1 addition & 1 deletion tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class Cfg(Namespace):
def __init__(self, a=None, v=None, c=None, **ka0):
ka = {}

ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs"
ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand re_dirsz smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs"
ka.update(**{k: False for k in ex.split()})

ex = "dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"
Expand Down

0 comments on commit 427597b

Please sign in to comment.