2014-03-14 13:15:02 -04:00
|
|
|
#!/usr/bin/env python
|
2014-03-10 01:18:05 -04:00
|
|
|
"""
|
|
|
|
Script which takes one or more file paths and reports on their detected
|
|
|
|
encodings
|
|
|
|
|
|
|
|
Example::
|
|
|
|
|
|
|
|
% chardetect somefile someotherfile
|
|
|
|
somefile: windows-1252 with confidence 0.5
|
|
|
|
someotherfile: ascii with confidence 1.0
|
|
|
|
|
|
|
|
If no paths are provided, it takes its input from stdin.
|
|
|
|
|
|
|
|
"""
|
|
|
|
from io import open
|
|
|
|
from sys import argv, stdin
|
|
|
|
|
|
|
|
from chardet.universaldetector import UniversalDetector
|
|
|
|
|
|
|
|
|
|
|
|
def description_of(file, name='stdin'):
|
|
|
|
"""Return a string describing the probable encoding of a file."""
|
|
|
|
u = UniversalDetector()
|
|
|
|
for line in file:
|
|
|
|
u.feed(line)
|
|
|
|
u.close()
|
|
|
|
result = u.result
|
|
|
|
if result['encoding']:
|
|
|
|
return '%s: %s with confidence %s' % (name,
|
|
|
|
result['encoding'],
|
|
|
|
result['confidence'])
|
|
|
|
else:
|
|
|
|
return '%s: no result' % name
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
if len(argv) <= 1:
|
|
|
|
print(description_of(stdin))
|
|
|
|
else:
|
|
|
|
for path in argv[1:]:
|
|
|
|
with open(path, 'rb') as f:
|
|
|
|
print(description_of(f, path))
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|