From 46bcec2ea8b32912ff9fe126650e7e6ad2a86b8c Mon Sep 17 00:00:00 2001 From: Pawan Paudel Date: Sun, 19 Jul 2020 09:19:52 +0545 Subject: [PATCH] fixed nested key issues and added live links checking --- .gitignore | 13 +- Pipfile | 5 +- Pipfile.lock | 263 +++++++++++++++++++++++++++---- build/lib/m3u_parser/__init__.py | 1 + m3uParser.py | 248 ----------------------------- m3u_parser/__init__.py | 1 + m3u_parser/helper.py | 60 +++++++ m3u_parser/m3u_parser.py | 230 +++++++++++++++++++++++++++ setup.py | 115 ++++++++++++++ 9 files changed, 652 insertions(+), 284 deletions(-) create mode 100644 build/lib/m3u_parser/__init__.py delete mode 100644 m3uParser.py create mode 100644 m3u_parser/__init__.py create mode 100644 m3u_parser/helper.py create mode 100644 m3u_parser/m3u_parser.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index de06dde..d4ce05b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,15 @@ venv .vscode .idea -__pycache__/* \ No newline at end of file +__pycache__/* + +# Compiled python modules. +*.pyc + +# Setuptools distribution folder. +/dist/ + +# Python egg metadata, regenerated from source files by setuptools. +/*.egg-info + +build/* \ No newline at end of file diff --git a/Pipfile b/Pipfile index 52d0e3a..a83a13a 100644 --- a/Pipfile +++ b/Pipfile @@ -9,6 +9,9 @@ pylint = "*" [packages] pycountry = "*" +requests = "*" +aiohttp = "*" +asyncio = "*" [requires] -python_version = "3.8.2" +python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock index 328bcce..19264e8 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "65d5ad2315f4d312e985d2ad1a0a69d73bc8532aca426d2d60479409ca4b6cd8" + "sha256": "add8afe80f54c120f354a373c824ae5a9419c491aafa6ae8b08056cabf1d2c9e" }, "pipfile-spec": 6, "requires": { - "python_version": "3.0" + "python_version": "3.6" }, "sources": [ { @@ -16,21 +16,159 @@ ] }, "default": { + "aiohttp": { + "hashes": [ + "sha256:1e984191d1ec186881ffaed4581092ba04f7c61582a177b187d3a2f07ed9719e", + "sha256:259ab809ff0727d0e834ac5e8a283dc5e3e0ecc30c4d80b3cd17a4139ce1f326", + "sha256:2f4d1a4fdce595c947162333353d4a44952a724fba9ca3205a3df99a33d1307a", + "sha256:32e5f3b7e511aa850829fbe5aa32eb455e5534eaa4b1ce93231d00e2f76e5654", + "sha256:344c780466b73095a72c616fac5ea9c4665add7fc129f285fbdbca3cccf4612a", + "sha256:460bd4237d2dbecc3b5ed57e122992f60188afe46e7319116da5eb8a9dfedba4", + "sha256:4c6efd824d44ae697814a2a85604d8e992b875462c6655da161ff18fd4f29f17", + "sha256:50aaad128e6ac62e7bf7bd1f0c0a24bc968a0c0590a726d5a955af193544bcec", + "sha256:6206a135d072f88da3e71cc501c59d5abffa9d0bb43269a6dcd28d66bfafdbdd", + "sha256:65f31b622af739a802ca6fd1a3076fd0ae523f8485c52924a89561ba10c49b48", + "sha256:ae55bac364c405caa23a4f2d6cfecc6a0daada500274ffca4a9230e7129eac59", + "sha256:b778ce0c909a2653741cb4b1ac7015b5c130ab9c897611df43ae6a58523cb965" + ], + "index": "pypi", + "version": "==3.6.2" + }, + "async-timeout": { + "hashes": [ + "sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f", + "sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3" + ], + "version": "==3.0.1" + }, + "asyncio": { + "hashes": [ + "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41", + "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de", + "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c", + "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d" + ], + "index": "pypi", + "version": "==3.4.3" + }, + "attrs": { + "hashes": [ + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" + ], + "version": "==19.3.0" + }, + "certifi": { + "hashes": [ + "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", + "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" + ], + "version": "==2020.6.20" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "idna": { + "hashes": [ + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + ], + "version": "==2.10" + }, + "idna-ssl": { + "hashes": [ + "sha256:a933e3bb13da54383f9e8f35dc4f9cb9eb9b3b78c6b36f311254d6d0d92c6c7c" + ], + "markers": "python_version < '3.7'", + "version": "==1.1.0" + }, + "multidict": { + "hashes": [ + "sha256:1ece5a3369835c20ed57adadc663400b5525904e53bae59ec854a5d36b39b21a", + "sha256:275ca32383bc5d1894b6975bb4ca6a7ff16ab76fa622967625baeebcf8079000", + "sha256:3750f2205b800aac4bb03b5ae48025a64e474d2c6cc79547988ba1d4122a09e2", + "sha256:4538273208e7294b2659b1602490f4ed3ab1c8cf9dbdd817e0e9db8e64be2507", + "sha256:5141c13374e6b25fe6bf092052ab55c0c03d21bd66c94a0e3ae371d3e4d865a5", + "sha256:51a4d210404ac61d32dada00a50ea7ba412e6ea945bbe992e4d7a595276d2ec7", + "sha256:5cf311a0f5ef80fe73e4f4c0f0998ec08f954a6ec72b746f3c179e37de1d210d", + "sha256:6513728873f4326999429a8b00fc7ceddb2509b01d5fd3f3be7881a257b8d463", + "sha256:7388d2ef3c55a8ba80da62ecfafa06a1c097c18032a501ffd4cabbc52d7f2b19", + "sha256:9456e90649005ad40558f4cf51dbb842e32807df75146c6d940b6f5abb4a78f3", + "sha256:c026fe9a05130e44157b98fea3ab12969e5b60691a276150db9eda71710cd10b", + "sha256:d14842362ed4cf63751648e7672f7174c9818459d169231d03c56e84daf90b7c", + "sha256:e0d072ae0f2a179c375f67e3da300b47e1a83293c554450b29c900e50afaae87", + "sha256:f07acae137b71af3bb548bd8da720956a3bc9f9a0b87733e0899226a2317aeb7", + "sha256:fbb77a75e529021e7c4a8d4e823d88ef4d23674a202be4f5addffc72cbb91430", + "sha256:fcfbb44c59af3f8ea984de67ec7c306f618a3ec771c2843804069917a8f2e255", + "sha256:feed85993dbdb1dbc29102f50bca65bdc68f2c0c8d352468c25b54874f23c39d" + ], + "version": "==4.7.6" + }, "pycountry": { "hashes": [ - "sha256:3c57aa40adcf293d59bebaffbe60d8c39976fba78d846a018dc0c2ec9c6cb3cb" + "sha256:81084a53d3454344c0292deebc20fcd0a1488c136d4900312cbd465cf552cb42" ], "index": "pypi", - "version": "==19.8.18" + "version": "==20.7.3" + }, + "requests": { + "hashes": [ + "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", + "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" + ], + "index": "pypi", + "version": "==2.24.0" + }, + "typing-extensions": { + "hashes": [ + "sha256:6e95524d8a547a91e08f404ae485bbb71962de46967e1b71a0cb89af24e761c5", + "sha256:79ee589a3caca649a9bfd2a8de4709837400dfa00b6cc81962a1e6a1815969ae", + "sha256:f8d2bd89d25bc39dabe7d23df520442fa1d8969b82544370e03d88b5a591c392" + ], + "markers": "python_version < '3.7'", + "version": "==3.7.4.2" + }, + "urllib3": { + "hashes": [ + "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", + "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" + ], + "version": "==1.25.9" + }, + "yarl": { + "hashes": [ + "sha256:0c2ab325d33f1b824734b3ef51d4d54a54e0e7a23d13b86974507602334c2cce", + "sha256:0ca2f395591bbd85ddd50a82eb1fde9c1066fafe888c5c7cc1d810cf03fd3cc6", + "sha256:2098a4b4b9d75ee352807a95cdf5f10180db903bc5b7270715c6bbe2551f64ce", + "sha256:25e66e5e2007c7a39541ca13b559cd8ebc2ad8fe00ea94a2aad28a9b1e44e5ae", + "sha256:26d7c90cb04dee1665282a5d1a998defc1a9e012fdca0f33396f81508f49696d", + "sha256:308b98b0c8cd1dfef1a0311dc5e38ae8f9b58349226aa0533f15a16717ad702f", + "sha256:3ce3d4f7c6b69c4e4f0704b32eca8123b9c58ae91af740481aa57d7857b5e41b", + "sha256:58cd9c469eced558cd81aa3f484b2924e8897049e06889e8ff2510435b7ef74b", + "sha256:5b10eb0e7f044cf0b035112446b26a3a2946bca9d7d7edb5e54a2ad2f6652abb", + "sha256:6faa19d3824c21bcbfdfce5171e193c8b4ddafdf0ac3f129ccf0cdfcb083e462", + "sha256:944494be42fa630134bf907714d40207e646fd5a94423c90d5b514f7b0713fea", + "sha256:a161de7e50224e8e3de6e184707476b5a989037dcb24292b391a3d66ff158e70", + "sha256:a4844ebb2be14768f7994f2017f70aca39d658a96c786211be5ddbe1c68794c1", + "sha256:c2b509ac3d4b988ae8769901c66345425e361d518aecbe4acbfc2567e416626a", + "sha256:c9959d49a77b0e07559e579f38b2f3711c2b8716b8410b320bf9713013215a1b", + "sha256:d8cdee92bc930d8b09d8bd2043cedd544d9c8bd7436a77678dd602467a993080", + "sha256:e15199cdb423316e15f108f51249e44eb156ae5dba232cb73be555324a1d49c2" + ], + "version": "==1.4.2" } }, "develop": { "astroid": { "hashes": [ - "sha256:4c17cea3e592c21b6e222f673868961bad77e1f985cb1694ed077475a89229c1", - "sha256:d8506842a3faf734b81599c8b98dcc423de863adcc1999248480b18bd31a0f38" + "sha256:2f4078c2a41bf377eea06d71c9d2ba4eb8f6b1af2135bec27bbbb7d8f12bb703", + "sha256:bc58d83eb610252fd8de6363e39d4f1d0619c894b0ed24603b881c02e64c7386" ], - "version": "==2.4.1" + "version": "==2.4.2" }, "bleach": { "hashes": [ @@ -41,10 +179,10 @@ }, "certifi": { "hashes": [ - "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", - "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" + "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", + "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" ], - "version": "==2020.4.5.1" + "version": "==2020.6.20" }, "cffi": { "hashes": [ @@ -86,6 +224,13 @@ ], "version": "==3.0.4" }, + "colorama": { + "hashes": [ + "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff", + "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1" + ], + "version": "==0.4.3" + }, "cryptography": { "hashes": [ "sha256:091d31c42f444c6f519485ed528d8b451d1a0c7bf30e8ca583a0cac44b8a0df6", @@ -119,10 +264,18 @@ }, "idna": { "hashes": [ - "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb", - "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa" + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" ], - "version": "==2.9" + "version": "==2.10" + }, + "importlib-metadata": { + "hashes": [ + "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83", + "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070" + ], + "markers": "python_version < '3.8'", + "version": "==1.7.0" }, "isort": { "hashes": [ @@ -181,10 +334,10 @@ }, "packaging": { "hashes": [ - "sha256:3c292b474fda1671ec57d46d739d072bfd495a4f51ad01a055121d81e952b7a3", - "sha256:82f77b9bee21c1bafbf35a84905d604d5d1223801d639cf3ed140bd651c08752" + "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", + "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" ], - "version": "==20.3" + "version": "==20.4" }, "pkginfo": { "hashes": [ @@ -209,11 +362,11 @@ }, "pylint": { "hashes": [ - "sha256:b95e31850f3af163c2283ed40432f053acbc8fc6eba6a069cb518d9dbf71848c", - "sha256:dd506acce0427e9e08fb87274bcaa953d38b50a58207170dbf5b36cf3e16957b" + "sha256:7dd78437f2d8d019717dbf287772d0b2dbdfd13fc016aa7faa08d67bccc46adc", + "sha256:d0ece7d223fe422088b0e8f13fa0a1e8eb745ebffcb8ed53d3e95394b6101a1c" ], "index": "pypi", - "version": "==2.5.2" + "version": "==2.5.3" }, "pyparsing": { "hashes": [ @@ -231,10 +384,11 @@ }, "requests": { "hashes": [ - "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", - "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", + "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" ], - "version": "==2.23.0" + "index": "pypi", + "version": "==2.24.0" }, "requests-toolbelt": { "hashes": [ @@ -243,6 +397,13 @@ ], "version": "==0.9.1" }, + "rfc3986": { + "hashes": [ + "sha256:112398da31a3344dc25dbf477d8df6cb34f9278a94fee2625d89e4514be8bb9d", + "sha256:af9147e9aceda37c91a05f4deb128d4b4b49d6b199775fd2d2927768abdc8f50" + ], + "version": "==1.4.0" + }, "secretstorage": { "hashes": [ "sha256:15da8a989b65498e29be338b3b279965f1b8f09b9668bd8010da183024c8bff6", @@ -253,32 +414,59 @@ }, "six": { "hashes": [ - "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a", - "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c" + "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", + "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" ], - "version": "==1.14.0" + "version": "==1.15.0" }, "toml": { "hashes": [ - "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", - "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e" + "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", + "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" ], - "version": "==0.10.0" + "version": "==0.10.1" }, "tqdm": { "hashes": [ - "sha256:4733c4a10d0f2a4d098d801464bdaf5240c7dadd2a7fde4ee93b0a0efd9fb25e", - "sha256:acdafb20f51637ca3954150d0405ff1a7edde0ff19e38fb99a80a66210d2a28f" + "sha256:6baa75a88582b1db6d34ce4690da5501d2a1cb65c34664840a456b2c9f794d29", + "sha256:fcb7cb5b729b60a27f300b15c1ffd4744f080fb483b88f31dc8654b082cc8ea5" ], - "version": "==4.46.0" + "version": "==4.48.0" }, "twine": { "hashes": [ - "sha256:c1af8ca391e43b0a06bbc155f7f67db0bf0d19d284bfc88d1675da497a946124", - "sha256:d561a5e511f70275e5a485a6275ff61851c16ffcb3a95a602189161112d9f160" + "sha256:34352fd52ec3b9d29837e6072d5a2a7c6fe4290e97bba46bb8d478b5c598f7ab", + "sha256:ba9ff477b8d6de0c89dd450e70b2185da190514e91c42cc62f96850025c10472" ], "index": "pypi", - "version": "==3.1.1" + "version": "==3.2.0" + }, + "typed-ast": { + "hashes": [ + "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355", + "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919", + "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa", + "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652", + "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75", + "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01", + "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d", + "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1", + "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907", + "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c", + "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3", + "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b", + "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614", + "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb", + "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b", + "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41", + "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6", + "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34", + "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe", + "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4", + "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7" + ], + "markers": "implementation_name == 'cpython' and python_version < '3.8'", + "version": "==1.4.1" }, "urllib3": { "hashes": [ @@ -299,6 +487,13 @@ "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7" ], "version": "==1.12.1" + }, + "zipp": { + "hashes": [ + "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b", + "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96" + ], + "version": "==3.1.0" } } } diff --git a/build/lib/m3u_parser/__init__.py b/build/lib/m3u_parser/__init__.py new file mode 100644 index 0000000..b193d5f --- /dev/null +++ b/build/lib/m3u_parser/__init__.py @@ -0,0 +1 @@ +from .m3u_parser import M3uParser \ No newline at end of file diff --git a/m3uParser.py b/m3uParser.py deleted file mode 100644 index 03e7170..0000000 --- a/m3uParser.py +++ /dev/null @@ -1,248 +0,0 @@ -import csv -import json -import os -import re -import urllib3 -# import requests -from random import random -from urllib.parse import urlparse -import pycountry - - -def is_present(regex, content): - match = re.search(re.compile(regex, flags=re.IGNORECASE), content) - return match.group(1) if match else "" - - -class M3uParser: - - def __init__(self): - self.files = [] - self.lines = [] - self.content = "" - self.url_regex = re.compile(r"^(?:(?:https?|ftp)://)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(" - r"?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1," - r"3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){" - r"2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[" - r"a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[" - r"a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:/\S*)?$") - - # Download the file from the given url - def parse_m3u(self, url): - if urlparse(url).scheme != '' or re.search(self.url_regex, url): - try: - with urllib3.PoolManager() as http: - self.content = http.request('GET', url).data.decode('utf-8') - # self.content = requests.get(url).text - except: - print("Cannot read anything from the url!!!") - exit() - else: - try: - with open(url, errors='ignore') as fp: - self.content = fp.read() - except FileNotFoundError: - print("File doesn't exist!!!") - exit() - self.__read_m3u() - - # Read the file from the given path - def __read_m3u(self): - if self.__read_all_lines() > 0: - self.__parse_file() - else: - print("No content to parse!!!") - - # Read all file lines - def __read_all_lines(self): - self.lines = [line.strip('\n\r') for line in self.content.split("\n") if line.strip('\n\r') != ''] - return len(self.lines) - - def __parse_file(self): - num_line = len(self.lines) - for n in range(num_line): - line = self.lines[n] - if "#EXTINF" in line: - self.__manage_line(n) - - def __manage_line(self, n): - line_info = self.lines[n] - line_link = '' - lines_link = [] - try: - for i in [1,2]: - if self.lines[n+i] and re.search(self.url_regex, self.lines[n+i]): - lines_link.append(self.lines[n+i]) - break - line_link = lines_link[0] - except IndexError: - pass - if line_info and line_link: - try: - tvg_name = is_present(r"tvg-name=\"(.*?)\"", line_info) - tvg_id = is_present(r"tvg-id=\"(.*?)\"", line_info) - logo = is_present(r"tvg-logo=\"(.*?)\"", line_info) - group = is_present(r"group-title=\"(.*?)\"", line_info) - title = is_present("[,](?!.*[,])(.*?)$", line_info) - country = is_present(r"tvg-country=\"(.*?)\"", line_info) - language = is_present(r"tvg-language=\"(.*?)\"", line_info) - tvg_url = is_present(r"tvg-url=\"(.*?)\"", line_info) - country_obj = pycountry.countries.get(alpha_2=country.upper()) - language_obj = pycountry.languages.get(name=country.capitalize()) - country_name = country_obj.name if country_obj else '' - language_code = language_obj.alpha_3 if language_obj else '' - self.files.append({ - "name": title, - "logo": logo, - "url": line_link, - "category": group, - "language": { - "code": language_code, - "name": language, - }, - "country": { - "code": country, - "name": country_name - }, - "tvg": { - "id": tvg_id, - "name": tvg_name, - "url": tvg_url, - } - }) - except AttributeError: - pass - - def filter_by(self, key, filters, retrieve=True): - if not filters: - print("Filter word/s missing!!!") - return - if not isinstance(filters, list): - filters = [filters] - if retrieve: - self.files = list(filter( - lambda file: any([re.search(re.compile(fltr, flags=re.IGNORECASE), file[key]) for fltr in filters]), - self.files)) - else: - self.files = list(filter( - lambda file: any([not re.search(re.compile(fltr, flags=re.IGNORECASE), file[key]) for fltr in filters]), - self.files)) - - # Remove files with a certain file extension - def remove_by_extension(self, extension): - self.filter_by('url', extension, retrieve=False) - - # Select only files with a certain file extension - def retrieve_by_extension(self, extension): - self.filter_by('url', extension, retrieve=True) - - # Remove files that contains a certain filterWord - def remove_by_grpname(self, filter_word): - self.filter_by('category', filter_word, retrieve=False) - - # Select only files that contains a certain filterWord - def retrieve_by_grpname(self, filter_word): - self.filter_by('category', filter_word, retrieve=True) - - def sort_by(self, key, jsonify=False, asc=True): - self.files = sorted(self.files, key=lambda file: file[key], reverse=not asc) - - # Getter for the list - def get_json(self): - return json.dumps(self.files, indent=4) - - def get_dict(self): - return self.files - - # Return a random element - def get_file(self, random_shuffle): - if random_shuffle: - random.shuffle(self.files) - if not len(self.files): - print("No files in the array, cannot extract anything") - return None - return self.files.pop() - - def to_file(self, filename, format='json'): - format = filename.split('.')[-1] if len(filename.split('.')) > 1 else format - - def with_extension(name, ext): - name, ext = name.lower(), ext.lower() - if ext in name: - return name - else: - return name + f".{ext}" - - if format == 'json': - data = json.dumps(self.files, indent=4) - with open(with_extension(filename, format), 'w') as fp: - fp.write(data) - - elif format == 'csv': - ndict_to_csv(self.files, with_extension(filename, format)) - else: - print("Unrecognised format!!!") - - -def is_dict(item, ans=None): - if ans is None: - ans = [] - tree = [] - for k, v in item.items(): - if isinstance(v, dict): - ans.append(str(k)) - tree.extend(is_dict(v, ans)) - ans = [] - else: - if ans: - ans.append(str(k)) - key = ','.join(ans).replace(',', '_') - tree.extend([(key, str(v))]) - ans.remove(str(k)) - else: - tree.extend([(str(k), str(v))]) - return tree - - -def get_tree(item): - tree = [] - if isinstance(item, dict): - tree.extend(is_dict(item, ans=[])) - elif isinstance(item, list): - tree = [] - for i in item: - tree.append(get_tree(i)) - return tree - - -def render_csv(header, data, out_path='output.csv'): - input = [] - with open(out_path, 'w') as f: - dict_writer = csv.DictWriter(f, fieldnames=header) - dict_writer.writeheader() - if not isinstance(data[0], list): - input.append(dict(data)) - else: - for i in data: - input.append(dict(i)) - dict_writer.writerows(input) - return - - -def ndict_to_csv(obj, output_path): - tree = get_tree(obj) - if isinstance(obj, list): - header = [i[0] for i in tree[0]] - else: - header = [i[0] for i in tree] - return render_csv(header, tree, output_path) - -if __name__ == "__main__": - myFile = M3uParser() - url = "https://pastebin.com/raw/jbqA0j82" - myFile.parse_m3u(url) - # myFile.remove_by_extension('m3u8') - # myFile.remove_by_grpname('Zimbabwe') - # myFile.filter_by('tvg-language', 'Hungarian', retrieve=False) - print(len(myFile.get_dict())) - # myFile.to_file('pawan.csv') diff --git a/m3u_parser/__init__.py b/m3u_parser/__init__.py new file mode 100644 index 0000000..b193d5f --- /dev/null +++ b/m3u_parser/__init__.py @@ -0,0 +1 @@ +from .m3u_parser import M3uParser \ No newline at end of file diff --git a/m3u_parser/helper.py b/m3u_parser/helper.py new file mode 100644 index 0000000..e717a54 --- /dev/null +++ b/m3u_parser/helper.py @@ -0,0 +1,60 @@ +import re + +# check if the regex is present or not +def is_present(regex, content): + match = re.search(re.compile(regex, flags=re.IGNORECASE), content) + return match.group(1) if match else "" + + +def is_dict(item, ans=None): + if ans is None: + ans = [] + tree = [] + for k, v in item.items(): + if isinstance(v, dict): + ans.append(str(k)) + tree.extend(is_dict(v, ans)) + ans = [] + else: + if ans: + ans.append(str(k)) + key = ','.join(ans).replace(',', '_') + tree.extend([(key, str(v))]) + ans.remove(str(k)) + else: + tree.extend([(str(k), str(v))]) + return tree + + +def get_tree(item): + tree = [] + if isinstance(item, dict): + tree.extend(is_dict(item, ans=[])) + elif isinstance(item, list): + tree = [] + for i in item: + tree.append(get_tree(i)) + return tree + + +def render_csv(header, data, out_path='output.csv'): + input = [] + with open(out_path, 'w') as f: + dict_writer = csv.DictWriter(f, fieldnames=header) + dict_writer.writeheader() + if not isinstance(data[0], list): + input.append(dict(data)) + else: + for i in data: + input.append(dict(i)) + dict_writer.writerows(input) + return + +# convert nested dictionary to csv +def ndict_to_csv(obj, output_path): + tree = get_tree(obj) + if isinstance(obj, list): + header = [i[0] for i in tree[0]] + else: + header = [i[0] for i in tree] + return render_csv(header, tree, output_path) \ No newline at end of file diff --git a/m3u_parser/m3u_parser.py b/m3u_parser/m3u_parser.py new file mode 100644 index 0000000..b6cde0e --- /dev/null +++ b/m3u_parser/m3u_parser.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 + +import asyncio +import csv +import json +import logging +import random +import re +import sys +import aiohttp +import pycountry +import requests +from urllib.parse import urlparse + +from helper import is_present, ndict_to_csv + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s: %(message)s") + + +class M3uParser: + def __init__(self, useragent, timeout=5): + self.streams_info = [] + self.lines = [] + self.timeout = timeout + self.headers = { + 'User-Agent': useragent + } + self.check_live = False + self.content = "" + self.url_regex = re.compile(r"^(?:(?:https?|ftp)://)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(" + r"?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1," + r"3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){" + r"2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[" + r"a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[" + r"a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:/\S*)?$") + + # Download the file from the given url or use the local file path to get the content + def parse_m3u(self, url, check_live=False): + self.check_live = check_live + if urlparse(url).scheme != '' or re.search(self.url_regex, url): + try: + self.content = requests.get(url).text + except: + logging.info("Cannot read anything from the url!!!") + exit() + else: + try: + with open(url, errors='ignore') as fp: + self.content = fp.read() + except FileNotFoundError: + logging.info("File doesn't exist!!!") + exit() + # splitting contents into lines to parse them + self.lines = [line.strip('\n\r') for line in self.content.split("\n") if line.strip('\n\r') != ''] + if len(self.lines) > 0: + self.__parse_lines() + else: + logging.info("No content to parse!!!") + + # parse each lines and extract the streams information + def __parse_lines(self): + num_lines = len(self.lines) + try: + loop = asyncio.get_event_loop() + f = asyncio.wait( + [self.__parse_line(line_num) for line_num in range(num_lines) if "#EXTINF" in self.lines[line_num]], return_when=asyncio.ALL_COMPLETED) + loop.run_until_complete(f) + except: + pass + finally: + loop.close() + + async def __parse_line(self, line_num): + line_info = self.lines[line_num] + stream_link = '' + streams_link = [] + try: + for i in [1, 2]: + if self.lines[line_num + i] and re.search(self.url_regex, self.lines[line_num + i]): + streams_link.append(self.lines[line_num + i]) + break + stream_link = streams_link[0] + except IndexError: + pass + if line_info and stream_link: + try: + tvg_name = is_present(r"tvg-name=\"(.*?)\"", line_info) + tvg_id = is_present(r"tvg-id=\"(.*?)\"", line_info) + logo = is_present(r"tvg-logo=\"(.*?)\"", line_info) + group = is_present(r"group-title=\"(.*?)\"", line_info) + title = is_present("[,](?!.*[,])(.*?)$", line_info) + country = is_present(r"tvg-country=\"(.*?)\"", line_info) + language = is_present(r"tvg-language=\"(.*?)\"", line_info) + tvg_url = is_present(r"tvg-url=\"(.*?)\"", line_info) + country_obj = pycountry.countries.get(alpha_2=country.upper()) + language_obj = pycountry.languages.get(name=country.capitalize()) + country_name = country_obj.name if country_obj else '' + language_code = language_obj.alpha_3 if language_obj else '' + + timeout = aiohttp.ClientTimeout(total=self.timeout) + status = 'BAD' + if self.check_live: + try: + async with aiohttp.ClientSession() as session: + async with session.request('get', stream_link, headers=self.headers, + timeout=timeout) as response: + if response.status == 200: + status = 'GOOD' + except: + pass + temp = { + "name": title, + "logo": logo, + "url": stream_link, + "category": group, + "language": { + "code": language_code, + "name": language, + }, + "country": { + "code": country, + "name": country_name + }, + "tvg": { + "id": tvg_id, + "name": tvg_name, + "url": tvg_url, + } + } + if self.check_live: + temp['status'] = status + self.streams_info.append(temp) + except AttributeError: + pass + + def filter_by(self, key, filters, retrieve=True, nested_key=False): + key_0, key_1 = ['']*2 + if nested_key: + key_0, key_1 = key.split('-') + if not filters: + logging.info("Filter word/s missing!!!") + return [] + if not isinstance(filters, list): + filters = [filters] + if retrieve: + self.streams_info = list(filter( + lambda file: any( + [re.search(re.compile(fltr, flags=re.IGNORECASE), file[key_0][key_1] if nested_key else file[key]) + for fltr in filters]), + self.streams_info)) + else: + self.streams_info = list(filter( + lambda file: any([not re.search(re.compile(fltr, flags=re.IGNORECASE), + file[key_0][key_1] if nested_key else file[key]) for fltr in filters]), + self.streams_info)) + + # Remove streams_info with a certain file extension + def remove_by_extension(self, extension): + self.filter_by('tvg-url', extension, retrieve=False, nested_key=True) + + # Select only streams_info with a certain file extension + def retrieve_by_extension(self, extension): + self.filter_by('tvg-url', extension, retrieve=True, nested_key=True) + + # Remove streams_info that contains a certain filter word + def remove_by_grpname(self, filter_word): + self.filter_by('category', filter_word, retrieve=False) + + # Retrieve only streams_info that contains a certain filter word + def retrieve_by_grpname(self, filter_word): + self.filter_by('category', filter_word, retrieve=True) + + # sort the streams_info + def sort_by(self, key, asc=True, nested_key=False): + key_0, key_1 = ['']*2 + if nested_key: + key_0, key_1 = key.split('-') + self.streams_info = sorted(self.streams_info, key=lambda file: file[key_0][key_1] if nested_key else file[key], + reverse=not asc) + + # Get the streams info as json + def get_json(self): + return json.dumps(self.streams_info, indent=4) + + # Get the streams info as dict + def get_dict(self): + return self.streams_info + + # Return a random stream information + def get_random_stream(self, random_shuffle=True): + if not len(self.streams_info): + logging.info("No streams information so could not get any random stream.") + return None + if random_shuffle: random.shuffle(self.streams_info) + return random.choice(self.streams_info) + + # save to file (CSV or JSON) + def to_file(self, filename, format='json'): + format = filename.split('.')[-1] if len(filename.split('.')) > 1 else format + + def with_extension(name, ext): + name, ext = name.lower(), ext.lower() + if ext in name: + return name + else: + return name + f".{ext}" + + if format == 'json': + data = json.dumps(self.streams_info, indent=4) + with open(with_extension(filename, format), 'w') as fp: + fp.write(data) + + elif format == 'csv': + ndict_to_csv(self.streams_info, with_extension(filename, format)) + else: + logging.info("Unrecognised format!!!") + + +if __name__ == "__main__": + url = "https://iptv-org.github.io/iptv/categories/music.m3u" + timeout = 5 + useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36" + m3u_playlist = M3uParser(timeout=timeout, useragent=useragent) + m3u_playlist.parse_m3u(url, check_live=True) + m3u_playlist.remove_by_extension('m3u8') + m3u_playlist.remove_by_grpname('Zimbabwe') + m3u_playlist.filter_by('language-name', 'Hungarian', retrieve=False, nested_key=True) + m3u_playlist.filter_by('status', 'GOOD') + print(len(m3u_playlist.get_dict())) + m3u_playlist.to_file('pawan.json') diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..68a2cd7 --- /dev/null +++ b/setup.py @@ -0,0 +1,115 @@ +import io +import os +import sys +from shutil import rmtree + +from setuptools import find_packages, setup, Command + +# Package meta-data. +NAME = 'm3u_parser' +DESCRIPTION = 'A useful module for parsing m3u files or links' +URL = 'https://github.com/pawanpaudel93/m3u_parser' +EMAIL = 'pawanpaudel93@gmail.com' +AUTHOR = 'Pawan Paudel' +REQUIRES_PYTHON = '>=3.0' +VERSION = '0.1.0' + +REQUIRED = [ + 'requests', 'asyncio', 'aiohttp', 'pycountry' +] + +EXTRAS = { + # 'fancy feature': ['django'], +} + +here = os.path.abspath(os.path.dirname(__file__)) + +# Import the README and use it as the long-description. +# Note: this will only work if 'README.md' is present in your MANIFEST.in file! +try: + with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = '\n' + f.read() +except FileNotFoundError: + long_description = DESCRIPTION + +# Load the package's __version__.py module as a dictionary. +about = {} +if not VERSION: + project_slug = NAME.lower().replace("-", "_").replace(" ", "_") + with open(os.path.join(here, project_slug, '__version__.py')) as f: + exec(f.read(), about) +else: + about['__version__'] = VERSION + + +class UploadCommand(Command): + """Support setup.py upload.""" + + description = 'Build and publish the package.' + user_options = [] + + @staticmethod + def status(s): + """Prints things in bold.""" + print('\033[1m{0}\033[0m'.format(s)) + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + try: + self.status('Removing previous builds…') + rmtree(os.path.join(here, 'dist')) + except OSError: + pass + + self.status('Building Source and Wheel (universal) distribution…') + os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) + + self.status('Uploading the package to PyPI via Twine…') + os.system('twine upload dist/*') + + self.status('Pushing git tags…') + os.system('git tag v{0}'.format(about['__version__'])) + os.system('git push --tags') + + sys.exit() + + +# Where the magic happens: +setup( + name=NAME, + version=about['__version__'], + description=DESCRIPTION, + long_description=long_description, + long_description_content_type='text/markdown', + author=AUTHOR, + author_email=EMAIL, + python_requires=REQUIRES_PYTHON, + url=URL, + py_modules=['m3u_parser'], + + install_requires=REQUIRED, + extras_require=EXTRAS, + packages=["m3u_parser"], + include_package_data=True, + license='ISC', + classifiers=[ + # Trove classifiers + # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers + 'License :: OSI Approved :: ISC License (ISCL)', + 'Development Status :: 3 - Alpha', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + "Operating System :: OS Independent" + ], + # $ setup.py publish support. + cmdclass={ + 'upload': UploadCommand, + }, +)