diff mbox series

[3/9] tools/scripts: split a mbox N ways

Message ID 20180221141716.10908-4-dja@axtens.net
State Changes Requested
Headers show
Series Tools and fixes for parallel parsing | expand

Checks

Context Check Description
dja/snowpatch-0_1_0 success master/apply_patch Successfully applied
dja/snowpatch-snowpatch_job_snowpatch-patchwork success Test snowpatch/job/snowpatch-patchwork on branch master

Commit Message

Daniel Axtens Feb. 21, 2018, 2:17 p.m. UTC
To test parallel loading of mail, it's handy to be able to split
an existing mbox file into N mbox files in an alternating pattern
(e.g. 1 2 1 2 or 1 2 3 4 1 2 3 4 etc)

Introduce tools/scripts as a place to put things like this.

Signed-off-by: Daniel Axtens <dja@axtens.net>
---
 tools/scripts/split_mail.py | 76 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100755 tools/scripts/split_mail.py

Comments

Andrew Donnellan Feb. 22, 2018, 3:28 a.m. UTC | #1
On 22/02/18 01:17, Daniel Axtens wrote:
> To test parallel loading of mail, it's handy to be able to split
> an existing mbox file into N mbox files in an alternating pattern
> (e.g. 1 2 1 2 or 1 2 3 4 1 2 3 4 etc)
> 
> Introduce tools/scripts as a place to put things like this.
> 
> Signed-off-by: Daniel Axtens <dja@axtens.net>

A few comments below.

Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

> ---
>   tools/scripts/split_mail.py | 76 +++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 76 insertions(+)
>   create mode 100755 tools/scripts/split_mail.py
> 
> diff --git a/tools/scripts/split_mail.py b/tools/scripts/split_mail.py
> new file mode 100755
> index 000000000000..ce71fe16c362
> --- /dev/null
> +++ b/tools/scripts/split_mail.py
> @@ -0,0 +1,76 @@
> +#!/usr/bin/python3
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2018 Daniel Axtens <dja@axtens.net>
> +#
> +# This file is part of the Patchwork package.
> +#
> +# Patchwork is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# Patchwork is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +
> +import sys
> +import os
> +import mailbox
> +
> +usage = """Split a maildir or mbox into N mboxes
> +in an alternating pattern
> +
> +Usage: ./split_mail.py <input> <mbox prefix> <N>
> +
> + <input>: input mbox file or Maildir
> + <mbox prefix>: output mbox
> +    <mbox-prefix>-1... must not exist
> + <N> N-way split"""
> +
> +
> +in_name = sys.argv[1]
> +out_name = sys.argv[2]

You might want to check len(sys.argv)

> +
> +try:
> +    n = int(sys.argv[3])
> +except:
> +    print("N must be an integer.")
> +    print(" ")
> +    print(usage)
> +    exit(1)
> +
> +if n < 2:
> +    print("N must be be at least 2")
> +    print(" ")
> +    print(usage)
> +    exit(1)
> +
> +if not os.path.exists(in_name):
> +    print("No input at ", in_name)
> +    print(" ")
> +    print(usage)
> +    exit(1)
> +
> +print("Opening", in_name)
> +if os.path.isdir(in_name):
> +    inmail = mailbox.Maildir(in_name)
> +else:
> +    inmail = mailbox.mbox(in_name)
> +
> +out=[]

pep8: spacing

> +for i in range(n):
> +    if os.path.exists(out_name+"-"+str(i+1)):
> +        print("mbox already exists at ", out_name+"-"+str(i+1))
> +        print(" ")
> +        print(usage)
> +        exit(1)
> +
> +    out += [mailbox.mbox(out_name+'-'+str(i+1))]
> +
> +print("Copying messages")
> +
> +for (i, msg) in enumerate(inmail):
> +    out[i % n].add(msg)
> +
> +print("Done")
>
diff mbox series

Patch

diff --git a/tools/scripts/split_mail.py b/tools/scripts/split_mail.py
new file mode 100755
index 000000000000..ce71fe16c362
--- /dev/null
+++ b/tools/scripts/split_mail.py
@@ -0,0 +1,76 @@ 
+#!/usr/bin/python3
+# Patchwork - automated patch tracking system
+# Copyright (C) 2018 Daniel Axtens <dja@axtens.net>
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+import sys
+import os
+import mailbox
+
+usage = """Split a maildir or mbox into N mboxes
+in an alternating pattern
+
+Usage: ./split_mail.py <input> <mbox prefix> <N>
+
+ <input>: input mbox file or Maildir
+ <mbox prefix>: output mbox
+    <mbox-prefix>-1... must not exist
+ <N> N-way split"""
+
+
+in_name = sys.argv[1]
+out_name = sys.argv[2]
+
+try:
+    n = int(sys.argv[3])
+except:
+    print("N must be an integer.")
+    print(" ")
+    print(usage)
+    exit(1)
+
+if n < 2:
+    print("N must be be at least 2")
+    print(" ")
+    print(usage)
+    exit(1)
+
+if not os.path.exists(in_name):
+    print("No input at ", in_name)
+    print(" ")
+    print(usage)
+    exit(1)
+
+print("Opening", in_name)
+if os.path.isdir(in_name):
+    inmail = mailbox.Maildir(in_name)
+else:
+    inmail = mailbox.mbox(in_name)
+
+out=[]
+for i in range(n):
+    if os.path.exists(out_name+"-"+str(i+1)):
+        print("mbox already exists at ", out_name+"-"+str(i+1))
+        print(" ")
+        print(usage)
+        exit(1)
+
+    out += [mailbox.mbox(out_name+'-'+str(i+1))]
+
+print("Copying messages")
+
+for (i, msg) in enumerate(inmail):
+    out[i % n].add(msg)
+
+print("Done")