mirror of
https://github.com/moparisthebest/Simba
synced 2024-11-24 18:22:25 -05:00
Initial MML Documentation work.
This commit is contained in:
parent
e60a542c45
commit
b9fe547613
45
Doc/Makefile
45
Doc/Makefile
@ -1,39 +1,16 @@
|
|||||||
|
.PHONY: default clean tarball
|
||||||
|
|
||||||
.PHONY: default clean intro psbook html all
|
core = client ocr
|
||||||
|
|
||||||
intro_ := mufasa_intro
|
all:
|
||||||
psbook_ := mufasa_ps_handbook
|
@python docgen.py `echo $(core) | sed -r \
|
||||||
book_ := mufasa_handbook
|
's/(\w+)/..\/Units\/MMLCore\/\1.pas/g'` > /dev/null
|
||||||
dev_ := mufasa_developers
|
@make -C sphinx/ html
|
||||||
|
|
||||||
default: tex
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rvf $(intro_)
|
@rm -f `echo $(core) | sed -r 's/(\w+)/sphinx\/\1.rst/g'`
|
||||||
rm -rvf $(psbook_)
|
@make -C sphinx/ clean
|
||||||
rm -rvf $(book_)
|
|
||||||
rm -rvf $(dev_)
|
|
||||||
find -iname "$(intro_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
|
||||||
find -iname "$(psbook_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
|
||||||
find -iname "$(book_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
|
||||||
find -iname "$(dev_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
|
||||||
$(MAKE) -C Pics/ clean
|
|
||||||
|
|
||||||
tex:
|
tarball:
|
||||||
$(MAKE) -C Pics/
|
@$(MAKE) all
|
||||||
texi2pdf $(intro_).tex #--silent
|
@tar cjf doc.tar.bz2 -C sphinx/_build/ html
|
||||||
texi2pdf $(psbook_).tex #--silent
|
|
||||||
texi2pdf $(book_).tex #--silent
|
|
||||||
texi2pdf $(dev_).tex #--silent
|
|
||||||
|
|
||||||
|
|
||||||
html:
|
|
||||||
$(MAKE) -C Pics/
|
|
||||||
latex2html $(intro_).tex -local_icons -nofootnode
|
|
||||||
latex2html $(psbook_).tex -local_icons -nofootnode
|
|
||||||
latex2html $(book_).tex -local_icons -nofootnode
|
|
||||||
latex2html $(dev_).tex -local_icons -nofootnode
|
|
||||||
|
|
||||||
sphinx:
|
|
||||||
$(MAKE) -C Pics/
|
|
||||||
$(MAKE) html -C sphinx
|
|
||||||
|
39
Doc/Makefile_outdated
Normal file
39
Doc/Makefile_outdated
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
.PHONY: default clean intro psbook html all
|
||||||
|
|
||||||
|
intro_ := mufasa_intro
|
||||||
|
psbook_ := mufasa_ps_handbook
|
||||||
|
book_ := mufasa_handbook
|
||||||
|
dev_ := mufasa_developers
|
||||||
|
|
||||||
|
default: tex
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rvf $(intro_)
|
||||||
|
rm -rvf $(psbook_)
|
||||||
|
rm -rvf $(book_)
|
||||||
|
rm -rvf $(dev_)
|
||||||
|
find -iname "$(intro_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
||||||
|
find -iname "$(psbook_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
||||||
|
find -iname "$(book_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
||||||
|
find -iname "$(dev_)*" | grep -v svn | grep -v tex | xargs rm -vf
|
||||||
|
$(MAKE) -C Pics/ clean
|
||||||
|
|
||||||
|
tex:
|
||||||
|
$(MAKE) -C Pics/
|
||||||
|
texi2pdf $(intro_).tex #--silent
|
||||||
|
texi2pdf $(psbook_).tex #--silent
|
||||||
|
texi2pdf $(book_).tex #--silent
|
||||||
|
texi2pdf $(dev_).tex #--silent
|
||||||
|
|
||||||
|
|
||||||
|
html:
|
||||||
|
$(MAKE) -C Pics/
|
||||||
|
latex2html $(intro_).tex -local_icons -nofootnode
|
||||||
|
latex2html $(psbook_).tex -local_icons -nofootnode
|
||||||
|
latex2html $(book_).tex -local_icons -nofootnode
|
||||||
|
latex2html $(dev_).tex -local_icons -nofootnode
|
||||||
|
|
||||||
|
sphinx:
|
||||||
|
$(MAKE) -C Pics/
|
||||||
|
$(MAKE) html -C sphinx
|
22
Doc/docgen.py
Normal file
22
Doc/docgen.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import re
|
||||||
|
from sys import argv
|
||||||
|
|
||||||
|
files = argv[1:]
|
||||||
|
|
||||||
|
commentregex = re.compile('\(\*.+?\*\)', re.DOTALL)
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
print file
|
||||||
|
f = open(file)
|
||||||
|
p = file.rfind('/')
|
||||||
|
filetrim = file[p+1:]
|
||||||
|
p = filetrim.rfind('.pas')
|
||||||
|
filetrim2 = filetrim[:p]
|
||||||
|
|
||||||
|
o = open('sphinx/mmlref/%s.rst' % filetrim2, 'w+')
|
||||||
|
c = ''.join([x for x in f])
|
||||||
|
res = commentregex.findall(c)
|
||||||
|
for y in res:
|
||||||
|
o.write(y[2:][:-2])
|
||||||
|
o.close()
|
||||||
|
f.close()
|
@ -34,10 +34,31 @@ uses
|
|||||||
{$IFDEF MSWINDOWS} os_windows {$ENDIF}
|
{$IFDEF MSWINDOWS} os_windows {$ENDIF}
|
||||||
{$IFDEF LINUX} os_linux {$ENDIF};
|
{$IFDEF LINUX} os_linux {$ENDIF};
|
||||||
|
|
||||||
{
|
(*
|
||||||
TClient is a full-blown instance of the MML.
|
|
||||||
It binds all the components together.
|
Client Class
|
||||||
}
|
============
|
||||||
|
|
||||||
|
The ``TClient`` class is the class that glues all other MML classes together
|
||||||
|
into one usable class. Internally, quite some MML classes require other MML
|
||||||
|
classes, and they access these other classes through their "parent"
|
||||||
|
``TClient``
|
||||||
|
class.
|
||||||
|
|
||||||
|
An image tells more than a thousands words:
|
||||||
|
|
||||||
|
.. image:: ../../Pics/Client_Classes.png
|
||||||
|
|
||||||
|
|
||||||
|
And the class dependency graph: (An arrow indicates a dependency)
|
||||||
|
|
||||||
|
.. image:: ../../Pics/client_classes_dependencies.png
|
||||||
|
|
||||||
|
The client class does not do much else except creating the classes when it
|
||||||
|
is
|
||||||
|
created and destroying the classes when it is being destroyed.
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
type
|
type
|
||||||
|
|
||||||
@ -57,9 +78,31 @@ type
|
|||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
Properties:
|
||||||
|
|
||||||
|
- IOManager
|
||||||
|
- MFiles
|
||||||
|
- MFinder
|
||||||
|
- MBitmaps
|
||||||
|
- MDTMs
|
||||||
|
- MOCR
|
||||||
|
- WriteLnProc
|
||||||
|
*)
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
|
|
||||||
|
(*
|
||||||
|
|
||||||
|
TClient.WriteLn
|
||||||
|
~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
procedure TClient.WriteLn(s: string);
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
procedure TClient.WriteLn(s: string);
|
procedure TClient.WriteLn(s: string);
|
||||||
begin
|
begin
|
||||||
@ -69,6 +112,17 @@ begin
|
|||||||
mDebugLn(s);
|
mDebugLn(s);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
|
||||||
|
TClient.Create
|
||||||
|
~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
constructor TClient.Create(const plugin_dir: string = ''; const UseIOManager : TIOManager = nil);
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
// Possibly pass arguments to a default window.
|
// Possibly pass arguments to a default window.
|
||||||
constructor TClient.Create(const plugin_dir: string = ''; const UseIOManager : TIOManager = nil);
|
constructor TClient.Create(const plugin_dir: string = ''; const UseIOManager : TIOManager = nil);
|
||||||
begin
|
begin
|
||||||
@ -86,6 +140,17 @@ begin
|
|||||||
MOCR := TMOCR.Create(self);
|
MOCR := TMOCR.Create(self);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
|
||||||
|
TClient.Destroy
|
||||||
|
~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
destructor TClient.Destroy;
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
destructor TClient.Destroy;
|
destructor TClient.Destroy;
|
||||||
begin
|
begin
|
||||||
if FOwnIOManager then
|
if FOwnIOManager then
|
||||||
|
@ -33,6 +33,18 @@ uses
|
|||||||
graphtype, intfgraphics,graphics;
|
graphtype, intfgraphics,graphics;
|
||||||
{End To-Remove unit}
|
{End To-Remove unit}
|
||||||
|
|
||||||
|
(*
|
||||||
|
.. _mmlref-ocr:
|
||||||
|
|
||||||
|
TMOCR Class
|
||||||
|
===========
|
||||||
|
|
||||||
|
The TMOCR class uses the powerful ``ocrutil`` unit to create some default but
|
||||||
|
useful functions that can be used to create and identify text. It also contains
|
||||||
|
some functions used in special cases to filter noise. Specifically, these are
|
||||||
|
all the ``Filter*`` functions.
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
type
|
type
|
||||||
{ TMOCR }
|
{ TMOCR }
|
||||||
@ -131,10 +143,18 @@ begin
|
|||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{
|
(*
|
||||||
InitTOCR loads all fonts in path
|
InitTOCR
|
||||||
We don't do this in the constructor because we may not yet have the path.
|
~~~~~~~~
|
||||||
}
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.InitTOCR(const path: string): boolean;
|
||||||
|
|
||||||
|
InitTOCR loads all fonts in path
|
||||||
|
We don't do this in the constructor because we may not yet have the path.
|
||||||
|
|
||||||
|
*)
|
||||||
function TMOCR.InitTOCR(const path: string): boolean;
|
function TMOCR.InitTOCR(const path: string): boolean;
|
||||||
var
|
var
|
||||||
dirs: array of string;
|
dirs: array of string;
|
||||||
@ -188,6 +208,7 @@ begin
|
|||||||
Self.FFonts := NewFonts.Copy(Self.Client);
|
Self.FFonts := NewFonts.Copy(Self.Client);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
Filter UpText by a very rough colour comparison / range check.
|
Filter UpText by a very rough colour comparison / range check.
|
||||||
We first convert the colour to RGB, and if it falls into the following
|
We first convert the colour to RGB, and if it falls into the following
|
||||||
@ -208,6 +229,16 @@ end;
|
|||||||
We will match shadow as well; we need it later on.
|
We will match shadow as well; we need it later on.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*
|
||||||
|
FilterUpTextByColour
|
||||||
|
~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
procedure TMOCR.FilterUpTextByColour(bmp: TMufasaBitmap);
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
procedure TMOCR.FilterUpTextByColour(bmp: TMufasaBitmap);
|
procedure TMOCR.FilterUpTextByColour(bmp: TMufasaBitmap);
|
||||||
var
|
var
|
||||||
x, y,r, g, b: Integer;
|
x, y,r, g, b: Integer;
|
||||||
@ -351,6 +382,16 @@ end;
|
|||||||
We don't need to do this from the right bottom to left top.
|
We don't need to do this from the right bottom to left top.
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*
|
||||||
|
FilterUpTextByCharacteristics
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
procedure TMOCR.FilterUpTextByCharacteristics(bmp: TMufasaBitmap; w,h: integer);
|
||||||
|
*)
|
||||||
|
|
||||||
procedure TMOCR.FilterUpTextByCharacteristics(bmp: TMufasaBitmap; w,h: integer);
|
procedure TMOCR.FilterUpTextByCharacteristics(bmp: TMufasaBitmap; w,h: integer);
|
||||||
var
|
var
|
||||||
x,y: Integer;
|
x,y: Integer;
|
||||||
@ -457,7 +498,18 @@ begin
|
|||||||
setlength(result,c);
|
setlength(result,c);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ Remove anything but the shadows on the bitmap (Shadow = clPurple, remember?) }
|
|
||||||
|
(*
|
||||||
|
FilterShadowBitmap
|
||||||
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
procedure TMOCR.FilterShadowBitmap(bmp: TMufasaBitmap);
|
||||||
|
|
||||||
|
Remove anything but the shadows on the bitmap (Shadow = clPurple)
|
||||||
|
*)
|
||||||
|
|
||||||
procedure TMOCR.FilterShadowBitmap(bmp: TMufasaBitmap);
|
procedure TMOCR.FilterShadowBitmap(bmp: TMufasaBitmap);
|
||||||
var
|
var
|
||||||
x,y:integer;
|
x,y:integer;
|
||||||
@ -473,13 +525,20 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{
|
(*
|
||||||
Remove all but uptext colours clWhite,clGreen, etc.
|
|
||||||
See constants above.
|
|
||||||
|
|
||||||
This assumes that the bitmap only consists of colour 0, and the other
|
FilterCharsBitmap
|
||||||
constants founds above the functionss
|
~~~~~~~~~~~~~~~~~
|
||||||
}
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
procedure TMOCR.FilterCharsBitmap(bmp: TMufasaBitmap);
|
||||||
|
|
||||||
|
Remove all but uptext colours clWhite,clGreen, etc.
|
||||||
|
|
||||||
|
This assumes that the bitmap only consists of colour 0, and the other
|
||||||
|
constants founds above the functions
|
||||||
|
*)
|
||||||
procedure TMOCR.FilterCharsBitmap(bmp: TMufasaBitmap);
|
procedure TMOCR.FilterCharsBitmap(bmp: TMufasaBitmap);
|
||||||
var
|
var
|
||||||
x,y: integer;
|
x,y: integer;
|
||||||
@ -509,17 +568,27 @@ end;
|
|||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
This uses the two filters, and performs a split on the bitmap.
|
}
|
||||||
A split per character, that is. So we can more easily identify it.
|
(*
|
||||||
|
getTextPointsIn
|
||||||
|
~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
TODO:
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.getTextPointsIn(sx, sy, w, h: Integer; shadow: boolean;
|
||||||
|
var _chars, _shadows: T2DPointArray): Boolean;
|
||||||
|
|
||||||
|
This uses the two filters, and performs a split on the bitmap.
|
||||||
|
A split per character, that is. So we can more easily identify it.
|
||||||
|
|
||||||
|
TODO:
|
||||||
*
|
*
|
||||||
Remove more noise after we have split, it should be possible to identify
|
Remove more noise after we have split, it should be possible to identify
|
||||||
noise; weird positions or boxes compared to the rest, etc.
|
noise; weird positions or boxes compared to the rest, etc.
|
||||||
*
|
*
|
||||||
Split each colours seperately, and combine only later, after removing noise.
|
Split each colours seperately, and combine only later, after removing noise.
|
||||||
|
*)
|
||||||
|
|
||||||
}
|
|
||||||
function TMOCR.getTextPointsIn(sx, sy, w, h: Integer; shadow: boolean;
|
function TMOCR.getTextPointsIn(sx, sy, w, h: Integer; shadow: boolean;
|
||||||
var _chars, _shadows: T2DPointArray): Boolean;
|
var _chars, _shadows: T2DPointArray): Boolean;
|
||||||
var
|
var
|
||||||
@ -685,14 +754,20 @@ begin
|
|||||||
Result := true;
|
Result := true;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{
|
(*
|
||||||
GetUpTextAtEx combines/uses the functions above.
|
GetUpTextAtEx
|
||||||
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
It will identify each character, and also keep track of the previous
|
.. code-block:: pascal
|
||||||
chars' final `x' bounds. If the difference between the .x2 of the previous
|
|
||||||
character and the .x1 of the current character is bigger than 5, then there
|
function TMOCR.GetUpTextAtEx(atX, atY: integer; shadow: boolean): string;
|
||||||
was a space between them. (Add ' ' to result)
|
|
||||||
}
|
|
||||||
|
GetUpTextAtEx will identify each character, and also keep track of the previous
|
||||||
|
chars' final *x* bounds. If the difference between the .x2 of the previous
|
||||||
|
character and the .x1 of the current character is bigger than 5, then there
|
||||||
|
was a space between them. (Add ' ' to result)
|
||||||
|
*)
|
||||||
|
|
||||||
function TMOCR.GetUpTextAtEx(atX, atY: integer; shadow: boolean): string;
|
function TMOCR.GetUpTextAtEx(atX, atY: integer; shadow: boolean): string;
|
||||||
var
|
var
|
||||||
@ -763,6 +838,17 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
GetUpTextAt
|
||||||
|
~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.GetUpTextAt(atX, atY: integer; shadow: boolean): string;
|
||||||
|
|
||||||
|
Retreives the (special) uptext.
|
||||||
|
|
||||||
|
*)
|
||||||
function TMOCR.GetUpTextAt(atX, atY: integer; shadow: boolean): string;
|
function TMOCR.GetUpTextAt(atX, atY: integer; shadow: boolean): string;
|
||||||
|
|
||||||
begin
|
begin
|
||||||
@ -772,6 +858,18 @@ begin
|
|||||||
result := GetUpTextAtEx(atX, atY, false);
|
result := GetUpTextAtEx(atX, atY, false);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
GetTextATPA
|
||||||
|
~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.GetTextATPA(const ATPA : T2DPointArray;const maxvspacing : integer; font: string): string;
|
||||||
|
|
||||||
|
Returns the text defined by the ATPA. Each TPA represents one character,
|
||||||
|
approximately.
|
||||||
|
*)
|
||||||
|
|
||||||
function TMOCR.GetTextATPA(const ATPA : T2DPointArray;const maxvspacing : integer; font: string): string;
|
function TMOCR.GetTextATPA(const ATPA : T2DPointArray;const maxvspacing : integer; font: string): string;
|
||||||
var
|
var
|
||||||
b, lb: TBox;
|
b, lb: TBox;
|
||||||
@ -831,6 +929,17 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
GetTextAt
|
||||||
|
~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.GetTextAt(xs, ys, xe,ye, minvspacing, maxvspacing, hspacing,
|
||||||
|
color, tol: integer; font: string): string;
|
||||||
|
|
||||||
|
General text-finding function.
|
||||||
|
*)
|
||||||
function TMOCR.GetTextAt(xs, ys, xe,ye, minvspacing, maxvspacing, hspacing,
|
function TMOCR.GetTextAt(xs, ys, xe,ye, minvspacing, maxvspacing, hspacing,
|
||||||
color, tol: integer; font: string): string;
|
color, tol: integer; font: string): string;
|
||||||
var
|
var
|
||||||
@ -851,6 +960,17 @@ begin;
|
|||||||
result := gettextatpa(STPA,maxvspacing,font);
|
result := gettextatpa(STPA,maxvspacing,font);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
GetTextAt (2)
|
||||||
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.GetTextAt(atX, atY, minvspacing, maxvspacing, hspacing,
|
||||||
|
color, tol, len: integer; font: string): string;
|
||||||
|
|
||||||
|
General text-finding function. Different parameters than other GetTextAt.
|
||||||
|
*)
|
||||||
function TMOCR.GetTextAt(atX, atY, minvspacing, maxvspacing, hspacing,
|
function TMOCR.GetTextAt(atX, atY, minvspacing, maxvspacing, hspacing,
|
||||||
color, tol, len: integer; font: string): string;
|
color, tol, len: integer; font: string): string;
|
||||||
var
|
var
|
||||||
@ -873,6 +993,19 @@ begin
|
|||||||
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
TextToFontTPA
|
||||||
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.TextToFontTPA(Text, font: String; out w, h: integer): TPointArray;
|
||||||
|
|
||||||
|
Returns a TPA of a specific *Text* of the specified *Font*.
|
||||||
|
|
||||||
|
*)
|
||||||
|
|
||||||
|
|
||||||
function TMOCR.TextToFontTPA(Text, font: String; out w, h: integer): TPointArray;
|
function TMOCR.TextToFontTPA(Text, font: String; out w, h: integer): TPointArray;
|
||||||
|
|
||||||
var
|
var
|
||||||
@ -918,6 +1051,17 @@ begin
|
|||||||
{ writeln('C: ' + inttostr(c)); }
|
{ writeln('C: ' + inttostr(c)); }
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
(*
|
||||||
|
TextToFontBitmap
|
||||||
|
~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
function TMOCR.TextToFontBitmap(Text, font: String): TMufasaBitmap;
|
||||||
|
|
||||||
|
Returns a Bitmap of the specified *Text* of the specified *Font*.
|
||||||
|
*)
|
||||||
|
|
||||||
function TMOCR.TextToFontBitmap(Text, font: String): TMufasaBitmap;
|
function TMOCR.TextToFontBitmap(Text, font: String): TMufasaBitmap;
|
||||||
var
|
var
|
||||||
TPA: TPointArray;
|
TPA: TPointArray;
|
||||||
@ -968,3 +1112,179 @@ end;
|
|||||||
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
(*
|
||||||
|
|
||||||
|
.. _uptext-filter:
|
||||||
|
|
||||||
|
Uptext
|
||||||
|
======
|
||||||
|
|
||||||
|
To read the UpText, the TMOCR class applies several filters on the client data
|
||||||
|
before performing the actual OCR. We will take a look at the two filters first.
|
||||||
|
|
||||||
|
Filter 1: The Colour Filter
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
We first filter the raw client image with a very rough and tolerant colour
|
||||||
|
comparison / check.
|
||||||
|
We first convert the colour to RGB, and if it falls into the following
|
||||||
|
defined ranges, it may be part of the uptext. We also get the possible
|
||||||
|
shadows.
|
||||||
|
|
||||||
|
|
||||||
|
We will iterate over each pixel in the bitmap, and if it matches any of the
|
||||||
|
*rules* for the colour; we will set it to a constant colour which
|
||||||
|
represents this colour (and corresponding rule). Usually the *base*
|
||||||
|
colour. If it doesn't match any of the rules, it will be painted black.
|
||||||
|
We won't just check for colours, but also for differences between specific
|
||||||
|
R, G, B values. For example, if the colour is white; R, G and B should all
|
||||||
|
lie very close to each other. (That's what makes a colour white.)
|
||||||
|
|
||||||
|
The tolerance for getting the pixels is quite large. The reasons for the
|
||||||
|
high tolerance is because the uptext colour vary quite a lot. They're also
|
||||||
|
transparent and vary thus per background.
|
||||||
|
We will store/match shadow as well; we need it later on in filter 2.
|
||||||
|
|
||||||
|
To my knowledge this algorithm doesn't remove any *valid* points. It does
|
||||||
|
not remove *all* invalid points either; but that is simply not possible
|
||||||
|
based purely on the colour. (If someone has a good idea, let me know)
|
||||||
|
|
||||||
|
In code:
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
for y := 0 to bmp.Height - 1 do
|
||||||
|
for x := 0 to bmp.Width - 1 do
|
||||||
|
begin
|
||||||
|
colortorgb(bmp.fastgetpixel(x,y),r,g,b);
|
||||||
|
|
||||||
|
if (r < ocr_Limit_Low) and (g < ocr_Limit_Low) and
|
||||||
|
(b < ocr_Limit_Low) then
|
||||||
|
begin
|
||||||
|
bmp.FastSetPixel(x,y, ocr_Purple);
|
||||||
|
continue;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Black if no match
|
||||||
|
bmp.fastsetpixel(x,y,0);
|
||||||
|
end;
|
||||||
|
|
||||||
|
Filter 2: The Characteristics Filter
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
This second filter is easy to understand but also very powerful:
|
||||||
|
|
||||||
|
- It removes *all* false shadow pixels.
|
||||||
|
- It removes uptext pixels that can't be uptext according to specific
|
||||||
|
rules. These rules are specifically designed so that it will never
|
||||||
|
throw away proper points.
|
||||||
|
|
||||||
|
It also performs another filter right at the start, but we'll disregard that
|
||||||
|
filter for now.
|
||||||
|
|
||||||
|
Removing shadow points is trivial if one understands the following insight.
|
||||||
|
|
||||||
|
If there some pixel is shadow on *x, y*, then it's neighbour *x+1, y+1*
|
||||||
|
may not be a shadow pixel. A shadow is always only one pixel *thick*.
|
||||||
|
|
||||||
|
With this in mind, we can easily define an algorithm which removes all false
|
||||||
|
shadow pixels. In code:
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
{
|
||||||
|
The tricky part of the algorithm is that it starts at the bottom,
|
||||||
|
removing shadow point x,y if x-1,y-1 is also shadow. This is
|
||||||
|
more efficient than the obvious way. (It is also easier to implement)
|
||||||
|
}
|
||||||
|
|
||||||
|
for y := bmp.Height - 1 downto 1 do
|
||||||
|
for x := bmp.Width - 1 downto 1 do
|
||||||
|
begin
|
||||||
|
// Is it shadow?
|
||||||
|
if bmp.fastgetpixel(x,y) <> clPurple then
|
||||||
|
continue;
|
||||||
|
// Is the point at x-1,y-1 shadow? If it is
|
||||||
|
// then x, y cannot be shadow.
|
||||||
|
if bmp.fastgetpixel(x,y) = bmp.fastgetpixel(x-1,y-1) then
|
||||||
|
begin
|
||||||
|
bmp.fastsetpixel(x,y,clSilver);
|
||||||
|
continue;
|
||||||
|
end;
|
||||||
|
if bmp.fastgetpixel(x-1,y-1) = 0 then
|
||||||
|
bmp.fastsetpixel(x,y,clSilver);
|
||||||
|
end;
|
||||||
|
|
||||||
|
We are now left with only proper shadow pixels.
|
||||||
|
Now it is time to filter out false Uptext pixels.
|
||||||
|
|
||||||
|
Realize:
|
||||||
|
|
||||||
|
- If *x, y* is uptext, then *x+1, y+1* must be either uptext or shadow.
|
||||||
|
|
||||||
|
In code:
|
||||||
|
|
||||||
|
.. code-block:: pascal
|
||||||
|
|
||||||
|
for y := bmp.Height - 2 downto 0 do
|
||||||
|
for x := bmp.Width - 2 downto 0 do
|
||||||
|
begin
|
||||||
|
if bmp.fastgetpixel(x,y) = clPurple then
|
||||||
|
continue;
|
||||||
|
if bmp.fastgetpixel(x,y) = clBlack then
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Is the other pixel also uptext?
|
||||||
|
// NOTE THAT IT ALSO HAS TO BE THE SAME COLOUR
|
||||||
|
// UPTEXT IN THIS CASE.
|
||||||
|
// I'm still not sure if this is a good idea or not.
|
||||||
|
// Perhaps it should match *any* uptext colour.
|
||||||
|
if (bmp.fastgetpixel(x,y) = bmp.fastgetpixel(x+1,y+1) ) then
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If it isn't shadow (and not the same colour uptext, see above)
|
||||||
|
// then it is not uptext.
|
||||||
|
if bmp.fastgetpixel(x+1,y+1) <> clPurple then
|
||||||
|
begin
|
||||||
|
bmp.fastsetpixel(x,y,clOlive);
|
||||||
|
continue;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// If we make it to here, it means the pixel is part of the uptext.
|
||||||
|
end;
|
||||||
|
|
||||||
|
Identifying characters
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This part of the documentation is a bit vague and incomplete.
|
||||||
|
|
||||||
|
To actually identify the text we split it up into single character and then
|
||||||
|
pass each character to the OCR engine.
|
||||||
|
|
||||||
|
In the function *getTextPointsIn* we will use both the filters mentioned above.
|
||||||
|
After these have been applied, we will make a bitmap that only contains the
|
||||||
|
shadows as well as a bitmap that only contains the uptext chars (not the
|
||||||
|
shadows)
|
||||||
|
|
||||||
|
Now it is a good idea to count the occurances of all colours
|
||||||
|
(on the character bitmap); we will also use this later on.
|
||||||
|
To split the characters we use the well known *splittpaex* function.
|
||||||
|
|
||||||
|
We will then sort the points for in each character TPA, as this makes
|
||||||
|
makes looping over them and comparing distances easier. We will also
|
||||||
|
calculate the bounding box of each characters TPA.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
Some more hackery is then used to seperate the characters and find
|
||||||
|
spaces; but isn't yet documented here.
|
||||||
|
|
||||||
|
Normal OCR
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
To do :-)
|
||||||
|
A large part is already explained above.
|
||||||
|
Most of the other OCR functions are simply used for plain identifying
|
||||||
|
and have no filtering tasks.
|
||||||
|
*)
|
||||||
|
Loading…
Reference in New Issue
Block a user