Skip to content

Commit 44c70ab

Browse files
committed
Add ghetto-purge.py
Script to purge files and directories from a backend GlusterFS brick to solve problems when `rm -rf` fails on the FUSE mount. This is for the case when you want a directory tree gone, not for any matter of repair!!!
1 parent 2ddf91a commit 44c70ab

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed

glusterfs/ghetto-purge.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
#!/usr/bin/env python3
2+
#
3+
# SPDX-License-Identifier: GPL-3.0-only
4+
#
5+
# ghetto-purge.py v0.0.1
6+
#
7+
# Read files and directories from an input file, one per line, and attempt to
8+
# remove them from the GlusterFS brick along with their .glusterfs links. This
9+
# is to work around split brain issues when trying to remove directories from
10+
# the FUSE mount, for example:
11+
#
12+
# - Directory not empty
13+
# - No such file or directory
14+
#
15+
# These are caused by various types of GlusterFS split brain.
16+
#
17+
# Requires Python 3.6+.
18+
19+
import argparse
20+
import os
21+
from pathlib import Path
22+
23+
24+
def main():
25+
# Strip trailing slash from brick path
26+
brick_path = args.brick_path.rstrip("/")
27+
28+
# Read paths from input file (could be files or directories)
29+
lines_to_analyze = args.input_file.readlines()
30+
31+
for line in lines_to_analyze:
32+
# Note that we have to remove \n from the line
33+
path_to_analyze = f"{brick_path}/{line.strip()}"
34+
35+
# To be safe I check if this is a symlink first, because we only
36+
# want to process entries named in our input file and not follow them
37+
# all over the file system. I am not sure how to deal with these yet...
38+
# pathlib's exists() will follow symlinks and return False if they are
39+
# broken, for example (note that pathlib's is_dir() is different than
40+
# os.is_dir(), and the latter has follow_symlinks(false)).
41+
if Path(f"{path_to_analyze}").is_symlink():
42+
print(f"Skipping symlink: {path_to_analyze}")
43+
44+
continue
45+
46+
if Path(f"{path_to_analyze}").exists():
47+
if Path(f"{path_to_analyze}").is_dir():
48+
if args.debug:
49+
print(f"Descend into: {path_to_analyze}")
50+
51+
# This will recursively handle all child directories and files,
52+
# so by the time we get back here we can simply remove the ori-
53+
# directory safely.
54+
descend_into_directory(brick_path, path_to_analyze)
55+
56+
if args.debug:
57+
print(f"Process directory: {path_to_analyze}")
58+
59+
process_directory(brick_path, path_to_analyze)
60+
61+
elif Path(f"{path_to_analyze}").is_file():
62+
process_file(brick_path, path_to_analyze)
63+
else:
64+
if args.debug:
65+
print(f"Does not exist on this brick: {path_to_analyze}")
66+
67+
continue
68+
69+
args.input_file.close()
70+
71+
72+
def descend_into_directory(brick_path, path):
73+
if args.debug:
74+
print(f"Descended into: {path}")
75+
76+
with os.scandir(path) as list_of_entries:
77+
for entry in list_of_entries:
78+
if entry.is_dir():
79+
if args.debug:
80+
print(f"Descend into: {path}")
81+
82+
descend_into_directory(brick_path, entry.path)
83+
84+
# After we return from descending into and removing child dirs
85+
# we should be able to remove the original dir as we go back up.
86+
# This is the same logic as in main(), but here it is for dirs
87+
# we encounter as we walk, whereas in main() we are iterating
88+
# over entries named in the input file.
89+
process_directory(brick_path, entry.path)
90+
91+
elif entry.is_file():
92+
process_file(brick_path, entry.path)
93+
94+
95+
def process_directory(brick_path, path):
96+
if args.debug:
97+
print(f"Processing directory: {path}")
98+
99+
# Get a string representation of the xattr from hex bytes
100+
directory_gfid = os.getxattr(path, "trusted.gfid").hex()
101+
directory_glusterfs_path = dot_glusterfs_path(brick_path, directory_gfid)
102+
103+
if Path(path).exists():
104+
if not args.dry_run:
105+
os.rmdir(path)
106+
107+
print(f'{"(DRY RUN) " if args.dry_run else ""}Removed directory: {path}')
108+
109+
# Directories inside the .glusterfs directory should always be symlinks. We
110+
# should remove them unconditionally. We don't use exists() here because it
111+
# follows symlinks by default and errors if the link is broken.
112+
if Path(directory_glusterfs_path).is_symlink():
113+
if not args.dry_run:
114+
os.remove(directory_glusterfs_path)
115+
116+
print(
117+
f'{"(DRY RUN) " if args.dry_run else ""}Removed directory symlink: {directory_glusterfs_path}'
118+
)
119+
120+
121+
def process_file(brick_path, path):
122+
if args.debug:
123+
print(f"Processing file: {path}")
124+
125+
# Get a string representation of the xattr from hex bytes
126+
file_gfid = os.getxattr(path, "trusted.gfid").hex()
127+
file_glusterfs_path = dot_glusterfs_path(brick_path, file_gfid)
128+
129+
if Path(path).exists():
130+
if not args.dry_run:
131+
os.remove(path)
132+
133+
print(f'{"(DRY RUN) " if args.dry_run else ""}Removed file: {path}')
134+
135+
if Path(file_glusterfs_path).exists():
136+
if not args.dry_run:
137+
os.remove(file_glusterfs_path)
138+
139+
print(
140+
f'{"(DRY RUN) " if args.dry_run else ""}Removed file hardlink: {file_glusterfs_path}'
141+
)
142+
143+
144+
def dot_glusterfs_path(brick_path, gfid):
145+
# Construct path to .glusterfs file based on the GFID
146+
return f"{brick_path}/.glusterfs/{gfid[0:2]}/{gfid[2:4]}/{gfid[0:8]}-{gfid[8:12]}-{gfid[12:16]}-{gfid[16:20]}-{gfid[20:]}"
147+
148+
149+
parser = argparse.ArgumentParser(
150+
description="Purge files and directories from GlusterFS backend brick (along with their .glusterfs links)."
151+
)
152+
parser.add_argument(
153+
"-b",
154+
"--brick-path",
155+
help="Path to brick.",
156+
required=True,
157+
)
158+
parser.add_argument(
159+
"-d",
160+
"--debug",
161+
help="Print debug messages.",
162+
action="store_true",
163+
)
164+
parser.add_argument(
165+
"-i",
166+
"--input-file",
167+
help="Path to input file.",
168+
required=True,
169+
type=argparse.FileType("r"),
170+
)
171+
parser.add_argument(
172+
"-n",
173+
"--dry-run",
174+
help="Don't actually delete anything.",
175+
action="store_true",
176+
)
177+
args = parser.parse_args()
178+
179+
180+
if __name__ == "__main__":
181+
main()
182+
183+
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4

0 commit comments

Comments
 (0)