From eaea86a50b8089d0c5943518bb36694240334977 Mon Sep 17 00:00:00 2001 From: moparisthebest Date: Wed, 15 Feb 2017 01:21:16 -0500 Subject: [PATCH] Initial Commit --- .gitignore | 4 + license.md | 361 ++++++++++++++++++ pom.xml | 108 ++++++ readme.md | 18 + .../phonehash/ByteArrayComparator.java | 18 + .../LockingBufferedOutputStream.java | 128 +++++++ .../phonehash/PhoneBucketGen.java | 140 +++++++ .../phonehash/PhoneComparator.java | 216 +++++++++++ .../com/moparisthebest/phonehash/Sha1.java | 357 +++++++++++++++++ .../phonehash/webservices/WebApp.java | 98 +++++ 10 files changed, 1448 insertions(+) create mode 100644 .gitignore create mode 100644 license.md create mode 100755 pom.xml create mode 100644 readme.md create mode 100644 src/main/java/com/moparisthebest/phonehash/ByteArrayComparator.java create mode 100644 src/main/java/com/moparisthebest/phonehash/LockingBufferedOutputStream.java create mode 100644 src/main/java/com/moparisthebest/phonehash/PhoneBucketGen.java create mode 100644 src/main/java/com/moparisthebest/phonehash/PhoneComparator.java create mode 100644 src/main/java/com/moparisthebest/phonehash/Sha1.java create mode 100644 src/main/java/com/moparisthebest/phonehash/webservices/WebApp.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..40d0ced --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea/ +*.iml +target/ +out.xml diff --git a/license.md b/license.md new file mode 100644 index 0000000..af5153d --- /dev/null +++ b/license.md @@ -0,0 +1,361 @@ +### GNU GENERAL PUBLIC LICENSE + +Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +### Preamble + +The licenses for most software are designed to take away your freedom +to share and change it. By contrast, the GNU General Public License is +intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if +you distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + +We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, +we want its recipients to know that what they have is not the +original, so that any problems introduced by others will not reflect +on the original authors' reputations. + +Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at +all. + +The precise terms and conditions for copying, distribution and +modification follow. + +### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +**0.** This License applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work +based on the Program" means either the Program or any derivative work +under copyright law: that is to say, a work containing the Program or +a portion of it, either verbatim or with modifications and/or +translated into another language. (Hereinafter, translation is +included without limitation in the term "modification".) Each licensee +is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the Program +(independent of having been made by running the Program). Whether that +is true depends on what the Program does. + +**1.** You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a +fee. + +**2.** You may modify your copy or copies of the Program or any +portion of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + +**a)** You must cause the modified files to carry prominent notices +stating that you changed the files and the date of any change. + + +**b)** You must cause any work that you distribute or publish, that in +whole or in part contains or is derived from the Program or any part +thereof, to be licensed as a whole at no charge to all third parties +under the terms of this License. + + +**c)** If the modified program normally reads commands interactively +when run, you must cause it, when started running for such interactive +use in the most ordinary way, to print or display an announcement +including an appropriate copyright notice and a notice that there is +no warranty (or else, saying that you provide a warranty) and that +users may redistribute the program under these conditions, and telling +the user how to view a copy of this License. (Exception: if the +Program itself is interactive but does not normally print such an +announcement, your work based on the Program is not required to print +an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + +**3.** You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + +**a)** Accompany it with the complete corresponding machine-readable +source code, which must be distributed under the terms of Sections 1 +and 2 above on a medium customarily used for software interchange; or, + + +**b)** Accompany it with a written offer, valid for at least three +years, to give any third party, for a charge no more than your cost of +physically performing source distribution, a complete machine-readable +copy of the corresponding source code, to be distributed under the +terms of Sections 1 and 2 above on a medium customarily used for +software interchange; or, + + +**c)** Accompany it with the information you received as to the offer +to distribute corresponding source code. (This alternative is allowed +only for noncommercial distribution and only if you received the +program in object code or executable form with such an offer, in +accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + +**4.** You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt otherwise +to copy, modify, sublicense or distribute the Program is void, and +will automatically terminate your rights under this License. However, +parties who have received copies, or rights, from you under this +License will not have their licenses terminated so long as such +parties remain in full compliance. + +**5.** You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +**6.** Each time you redistribute the Program (or any work based on +the Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + +**7.** If, as a consequence of a court judgment or allegation of +patent infringement or for any other reason (not limited to patent +issues), conditions are imposed on you (whether by court order, +agreement or otherwise) that contradict the conditions of this +License, they do not excuse you from the conditions of this License. +If you cannot distribute so as to satisfy simultaneously your +obligations under this License and any other pertinent obligations, +then as a consequence you may not distribute the Program at all. For +example, if a patent license would not permit royalty-free +redistribution of the Program by all those who receive copies directly +or indirectly through you, then the only way you could satisfy both it +and this License would be to refrain entirely from distribution of the +Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + +**8.** If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + +**9.** The Free Software Foundation may publish revised and/or new +versions of the General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Program does not specify a +version number of this License, you may choose any version ever +published by the Free Software Foundation. + +**10.** If you wish to incorporate parts of the Program into other +free programs whose distribution conditions are different, write to +the author to ask for permission. For software which is copyrighted by +the Free Software Foundation, write to the Free Software Foundation; +we sometimes make exceptions for this. Our decision will be guided by +the two goals of preserving the free status of all derivatives of our +free software and of promoting the sharing and reuse of software +generally. + +**NO WARRANTY** + +**11.** BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +**12.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + +### END OF TERMS AND CONDITIONS + +### How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + one line to give the program's name and an idea of what it does. + Copyright (C) yyyy name of author + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +Also add information on how to contact you by electronic and paper +mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details + type `show w'. This is free software, and you are welcome + to redistribute it under certain conditions; type `show c' + for details. + +The hypothetical commands \`show w' and \`show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than \`show w' and +\`show c'; they could even be mouse-clicks or menu items--whatever +suits your program. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the program, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright + interest in the program `Gnomovision' + (which makes passes at compilers) written + by James Hacker. + + signature of Ty Coon, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, +you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use the +[GNU Lesser General Public +License](http://www.gnu.org/licenses/lgpl.html) instead of this +License. \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100755 index 0000000..0681816 --- /dev/null +++ b/pom.xml @@ -0,0 +1,108 @@ + + 4.0.0 + + com.moparisthebest + phonehash + 0.0.1 + jar + + phonehash + + phonehash + + https://github.com/moparisthebest/phonehash + + 2.12 + + + + com.moparisthebest + filelists + 0.1-SNAPSHOT + + + + + org.glassfish.jersey.containers + jersey-container-servlet + ${jersey.version} + + + + org.glassfish.jersey.containers + jersey-container-grizzly2-http + ${jersey.version} + + + + + + Travis Burtrum + http://www.moparisthebest.com/ + + + + + + GNU GENERAL PUBLIC LICENSE, Version 2 + https://www.gnu.org/licenses/old-licenses/gpl-2.0.html + + + + + compile + ${project.artifactId} + + + maven-compiler-plugin + 3.1 + + 1.8 + 1.8 + true + + -Xlint + + + + + maven-shade-plugin + + + package + + shade + + + false + false + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + com.moparisthebest.phonehash.webservices.WebApp + + + + + + + + + + + scm:git:git@github.com:moparisthebest/phonehash.git + scm:git:git@github.com:moparisthebest/phonehash.git + git@github.com:moparisthebest/phonehash.git + + diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..19d1906 --- /dev/null +++ b/readme.md @@ -0,0 +1,18 @@ +Quickly reverse sha1 hashes to phone numbers +-------------------------------------------- + +Kontalk JIDs are simply [hashes of phone numbers](https://github.com/kontalk/androidclient/issues/497) in the form of beb578a7ae7e6b93b49a619e6709a1c3b1063e9c@kontalk.net, which was generated from the phone number +15555555555 + +They already know this isn't much protection, but I wanted to see how fast I could go from hash to phone number, and it turned out to be harder than I thought. I'll try to walk you through my thought process here. + +We are talking about 100 billion possible numbers here, 0 - 99,999,999,999. The smallest number of bytes that number can be represented with is 5, and a sha1 hash is 20 bytes. So if you wanted to generate and store the entire list of sha1 hashes and phone numbers, you'd need 100,000,000,000 * 25 bytes of space, or 2.5 TB. I don't care to waste that much space on this, so I decided to store only the phone numbers (500 GB), but sorted by the sha1 hash, by generating the sha1 hash to sort, and only writing phone numbers to disk. The good news is that, if this list is sorted, a [binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm) only costs O(log n) in the worst case, so that's worst case computing 26 sha1 hashes on each search, which will be plenty fast on even the most modest of hardware. + +Interestingly as a side-note this is the first time I have been bitten by java only supporting 32-bit signed integers as array indices, as this requires a much bigger array. I ended up writing a [List implementation backed by a RandomAccessFile](https://github.com/moparisthebest/filelists) just for this very purpose. + +Sorting turned out to be a challenge itself, since I'm not storing the sha1 hash, I have to generate it for each compare, and you start to care about the number of compares in your sort algorithm. Also, the best ones (merge/quicksort) require O(n*2) space, and even O(n*1.5) space is too much in this case. Also I couldn't use any of the built-in java ones because they only operate on arrays with 32-bit indices. I wrote a few implementations for RandomAccessFileList, and the fastest was heap sort, but the time it took to sort 1 million rows extrapolated out to 100 billion was looking to be somewhere around 4 years of constant runtime. Finally I had an epiphany, essentially do a bucket sort but during generation so it doesn't take n*2 space. I have multiple threads iterate over portions of the range, generate sha1 hashes for each number, and write them to files sorted out on the first 2 bytes of the sha1 hash. This gives you 65535 files about 7.3 MB each, then you just need to sort them and concatenate them together and done. This requires only 500 GB + 7.3 MB of space for a final result of a perfectly sorted 500 GB file. + +This generation and sorting to 65535 buckets part took about 26 hours with 4 threads and slow spinning drives, the sorting each and concatenating them I am limiting to one thread on account of not killing my slow drives with seek times, and it's looking at taking about 55 hours, I'll update this when it's done. + +The end result was worth it though, you can now go to https://www.moparisthebest.com/phonehash/ to reverse a Kontalk sha1 hash to a phone number in constant time seconds with essentially no load on my server. I also included all the tools to generate and use this yourself in this repo. PhoneBucketGen will generate this massive sorted phone number file, and WebApp will run a single web service to return answers for you. + +Enjoy! diff --git a/src/main/java/com/moparisthebest/phonehash/ByteArrayComparator.java b/src/main/java/com/moparisthebest/phonehash/ByteArrayComparator.java new file mode 100644 index 0000000..e4dc05e --- /dev/null +++ b/src/main/java/com/moparisthebest/phonehash/ByteArrayComparator.java @@ -0,0 +1,18 @@ +package com.moparisthebest.phonehash; + +import java.util.Comparator; + +/** + * Created by mopar on 2/10/17. + */ +public class ByteArrayComparator implements Comparator { + @Override + public int compare(final byte[] o1, final byte[] o2) { + for (int x = 0; x < o1.length; ++x) { + final int y = o1[x] - o2[x]; + if (y != 0) + return y; + } + return 0; + } +} diff --git a/src/main/java/com/moparisthebest/phonehash/LockingBufferedOutputStream.java b/src/main/java/com/moparisthebest/phonehash/LockingBufferedOutputStream.java new file mode 100644 index 0000000..1ca2391 --- /dev/null +++ b/src/main/java/com/moparisthebest/phonehash/LockingBufferedOutputStream.java @@ -0,0 +1,128 @@ +package com.moparisthebest.phonehash; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Objects; + +/** + * Created by mopar on 2/11/17. + */ +public class LockingBufferedOutputStream extends FilterOutputStream { + /** + * The internal buffer where data is stored. + */ + protected byte buf[]; + + /** + * The number of valid bytes in the buffer. This value is always + * in the range 0 through buf.length; elements + * buf[0] through buf[count-1] contain valid + * byte data. + */ + protected int count; + + protected final Object lock; + + /** + * Creates a new buffered output stream to write data to the + * specified underlying output stream. + * + * @param out the underlying output stream. + */ + public LockingBufferedOutputStream(OutputStream out, final Object lock) { + this(out, lock, 8192); + } + + /** + * Creates a new buffered output stream to write data to the + * specified underlying output stream with the specified buffer + * size. + * + * @param out the underlying output stream. + * @param size the buffer size. + * @exception IllegalArgumentException if size <= 0. + */ + public LockingBufferedOutputStream(OutputStream out, final Object lock, int size) { + super(out); + Objects.requireNonNull(lock); + if (size <= 0) { + throw new IllegalArgumentException("Buffer size <= 0"); + } + buf = new byte[size]; + this.lock = lock; + } + + /** Flush the internal buffer */ + private void flushBuffer() throws IOException { + if (count > 0) { + out.write(buf, 0, count); + count = 0; + } + } + + /** + * Writes the specified byte to this buffered output stream. + * + * @param b the byte to be written. + * @exception IOException if an I/O error occurs. + */ + public synchronized void write(int b) throws IOException { + if (count >= buf.length) { + synchronized (lock) { + flushBuffer(); + } + } + buf[count++] = (byte)b; + } + + /** + * Writes len bytes from the specified byte array + * starting at offset off to this buffered output stream. + * + *

Ordinarily this method stores bytes from the given array into this + * stream's buffer, flushing the buffer to the underlying output stream as + * needed. If the requested length is at least as large as this stream's + * buffer, however, then this method will flush the buffer and write the + * bytes directly to the underlying output stream. Thus redundant + * BufferedOutputStreams will not copy data unnecessarily. + * + * @param b the data. + * @param off the start offset in the data. + * @param len the number of bytes to write. + * @exception IOException if an I/O error occurs. + */ + public synchronized void write(byte b[], int off, int len) throws IOException { + if (len >= buf.length) { + /* If the request length exceeds the size of the output buffer, + flush the output buffer and then write the data directly. + In this way buffered streams will cascade harmlessly. */ + synchronized (lock) { + flushBuffer(); + out.write(b, off, len); + } + return; + } + if (len > buf.length - count) { + synchronized (lock) { + flushBuffer(); + } + } + System.arraycopy(b, off, buf, count, len); + count += len; + } + + /** + * Flushes this buffered output stream. This forces any buffered + * output bytes to be written out to the underlying output stream. + * + * @exception IOException if an I/O error occurs. + * @see java.io.FilterOutputStream#out + */ + public synchronized void flush() throws IOException { + synchronized (lock) { + flushBuffer(); + out.flush(); + } + } +} diff --git a/src/main/java/com/moparisthebest/phonehash/PhoneBucketGen.java b/src/main/java/com/moparisthebest/phonehash/PhoneBucketGen.java new file mode 100644 index 0000000..f76ad78 --- /dev/null +++ b/src/main/java/com/moparisthebest/phonehash/PhoneBucketGen.java @@ -0,0 +1,140 @@ +package com.moparisthebest.phonehash; + +import com.moparisthebest.filelist.LongConverter40Bit; +import com.moparisthebest.filelist.RandomAccessFileList; + +import java.io.*; +import java.util.*; + +/** + * Created by mopar on 2/10/17. + */ +public class PhoneBucketGen { + + public static int index(final byte a, final byte b) { + return (((a & 0xff) << 8) | (b & 0xff)); + } + + public static void main(String[] args) throws Exception { + final Object lock = new Object(); + final File path = new File(args[0]); + final int numThreads = Integer.parseInt(args[1]); + final long start = Long.parseLong(args[2]); + final long end = Long.parseLong(args[3]) + 1; + path.mkdirs(); + + { + + final OutputStream[] osArr = new OutputStream[65536]; + + { + final Random r = new Random(); + byte a = Byte.MIN_VALUE; + do { + final int ia = a + 128; + final File folder = new File(path, String.format("%03d", ia)); + folder.mkdirs(); + byte b = Byte.MIN_VALUE; + do { + final int index = index(a, b); + final int ib = b + 128; + //System.out.printf("%d/%d: %03d/%03d: %d%n", a, b, ia, ib, index); + final File file = new File(folder, String.format("%03d.list", ib)); + //System.out.println(file.getAbsolutePath()); + //osArr[index] = new FileOutputStream(file); + //osArr[index] = new BufferedOutputStream(new FileOutputStream(file), 4100 + (5 * r.nextInt(2458))); // random cache between 4100 and 16385 + osArr[index] = new LockingBufferedOutputStream(new FileOutputStream(file), lock, 131075); // 128mb, will require ~8gb ram + ++b; + } while (b != Byte.MIN_VALUE); + //list.add((int) a); + ++a; + } while (a != Byte.MIN_VALUE); + } + System.out.println("files set up"); + + final long totalNums = end - start; + final long numsPerThread = (totalNums / numThreads) + 1; // hacky round-up + System.out.printf("numThreads: %d start: %d end: %d totalNums: %d numsPerThread: %d%n", numThreads, start, end, totalNums, numsPerThread); + final List threads = new ArrayList<>(numThreads); + for (long threadStart = start; threadStart < end; threadStart += numsPerThread) { + final long threadStartf = threadStart, threadEnd = Math.min(threadStart + numsPerThread, end); + System.out.printf("threadStart: %d threadEnd: %d%n", threadStart, threadEnd); + + final Thread thread = new Thread() { + @Override + public void run() { + final PhoneComparator pc = new PhoneComparator(); + final byte[] longToWrite = new byte[LongConverter40Bit.instance.numBytes()]; + for (long l = threadStartf; l < threadEnd; ++l) { + final byte[] hash = pc.fastHashReadOnly(l); + try { + LongConverter40Bit.instance.toBytes(l, longToWrite, 0); + osArr[index(hash[0], hash[1])].write(longToWrite); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + }; + threads.add(thread); + thread.start(); + } + + // wait for threads to finish + for (final Thread t : threads) { + t.join(); + System.out.println("thread finished"); + } + System.out.println("all threads finished"); + + for (final OutputStream os : osArr) + os.close(); + + } + + try (OutputStream finalOut = new BufferedOutputStream(new FileOutputStream(new File(path, "final.list")), 2097155)) { // 2 gigs + final byte[] buf = new byte[LongConverter40Bit.instance.numBytes()]; + final PhoneComparator pc = new PhoneComparator(); + final TreeMap phone = new TreeMap<>(new ByteArrayComparator()); + + byte a = Byte.MIN_VALUE; + do { + final int ia = a + 128; + final File folder = new File(path, String.format("%03d", ia)); + System.out.println(new Date() + ": starting folder " + ia); + byte b = Byte.MIN_VALUE; + do { + final int ib = b + 128; + //System.out.printf("%d/%d: %03d/%03d: %d%n", a, b, ia, ib, index); + final File file = new File(folder, String.format("%03d.list", ib)); + + if (ib % 32 == 0) + System.out.println(new Date() + ": starting file " + ib); + + if (!file.exists()) { + ++b; + continue; + } + + phone.clear(); + try (RandomAccessFileList raf = new RandomAccessFileList(file, LongConverter40Bit.instance)) { + for (final Long l : raf) + phone.put(pc.fastHashCopy(l), l); + } + for (final Long l : phone.values()) { + LongConverter40Bit.instance.toBytes(l, buf, 0); + finalOut.write(buf); + } + file.delete(); + ++b; + } while (b != Byte.MIN_VALUE); + //System.out.println(new Date() + ": done with folder "+ia); + folder.delete(); + ++a; + } while (a != Byte.MIN_VALUE); + + System.out.println("closing " + new Date()); + } + System.out.println("written " + new Date()); + } +} diff --git a/src/main/java/com/moparisthebest/phonehash/PhoneComparator.java b/src/main/java/com/moparisthebest/phonehash/PhoneComparator.java new file mode 100644 index 0000000..98befb8 --- /dev/null +++ b/src/main/java/com/moparisthebest/phonehash/PhoneComparator.java @@ -0,0 +1,216 @@ +package com.moparisthebest.phonehash; + +import com.moparisthebest.filelist.TransformComparator; + +/** + * Created by mopar on 2/10/17. + */ +public class PhoneComparator implements java.util.Comparator, TransformComparator { + + private static final boolean debug = false; + + // format is +00000000000 + // +14404746223 -> 3a6da5520baee5353448fd95cb036279da7f6b5f + // +00000000023 -> 18ff74bfab994d0b7f5a52fbbdcce34c0bc309e3 + + private static final byte[][] prefix = new byte[][]{ + "+00000000000".getBytes(), + "+0000000000".getBytes(), + "+000000000".getBytes(), + "+00000000".getBytes(), + "+0000000".getBytes(), + "+000000".getBytes(), + "+00000".getBytes(), + "+0000".getBytes(), + "+000".getBytes(), + "+00".getBytes(), + "+0".getBytes(), + "+".getBytes(), + }; + + private static final String[] stringPrefix = new String[]{ + "+00000000000", + "+0000000000", + "+000000000", + "+00000000", + "+0000000", + "+000000", + "+00000", + "+0000", + "+000", + "+00", + "+0", + "+", + }; + + public static String formatPhoneNumber(final Long num){ + final String numString = num.toString(); + return stringPrefix[numString.length()] + numString; + } + + /* + public PhoneComparator() throws NoSuchAlgorithmException { + } + + // safe method + private final MessageDigest md = MessageDigest.getInstance("SHA-1"); + + private byte[] safeHash(final Long o) { + final byte[] s = o.toString().getBytes(); + md.reset(); + md.update(prefix[s.length]); + final byte[] ret = md.digest(s); + if (debug) + System.out.printf("num '%s%s' hash '%s'%n", + new String(prefix[s.length]), new String(s), String.format("%040x", new java.math.BigInteger(1, ret))); + return ret; + } + */ + + // fast and dirty method + private final Sha1 digest = new Sha1(); + // 2 to store sha1 hashes, 1 to store longs as byte[] for hashing + private final byte[] s1hash = new byte[20], s2hash = new byte[20], lArr = new byte[11]; + + public void fastHash(final long o, final byte[] hash) { + final int size = stringSize(o); + getChars(o, size, lArr); + digest.engineReset(); + final byte[] sPrefix = prefix[size]; + digest.engineUpdate(sPrefix, 0, sPrefix.length); + digest.engineUpdate(lArr, 0, size); + digest.engineDigest(hash, 0, hash.length); + if (debug) + System.out.printf("num '%s%s' hash '%s'%n", + new String(prefix[size]), new String(lArr, 0, size), String.format("%040x", new java.math.BigInteger(1, hash))); + } + + public byte[] fastHashCopy(final long o) { + final byte[] hash = new byte[20]; + this.fastHash(o, hash); + return hash; + } + + public byte[] fastHashReadOnly(final long o) { + this.fastHash(o, s1hash); + return s1hash; + } + + @Override + public int compare(final Long o1, final Long o2) { + //final byte[] s1hash = safeHash(o1); + fastHash(o1, s1hash); + return compareTransform(s1hash, o2); + } + + @Override + public int compareTransform(final byte[] s1hash, final Long o2) { + //final byte[] s2hash = safeHash(o2); + fastHash(o2, s2hash); + for (int x = 0; x < s1hash.length; ++x) { + final int y = s1hash[x] - s2hash[x]; + if (y != 0) + return y; + } + return 0; + } + + // below was pulled and modified from Long and Integer to convert positive longs directly to byte[], producing the same effect + // as someLong.toString().getBytes(), but MUCH faster, and writing into existing buffers instead of creating new + + /** + * All possible chars for representing a number as a String + */ + final static byte[] digits = { + '0' , '1' , '2' , '3' , '4' , '5' , + '6' , '7' , '8' , '9' , 'a' , 'b' , + 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , + 'i' , 'j' , 'k' , 'l' , 'm' , 'n' , + 'o' , 'p' , 'q' , 'r' , 's' , 't' , + 'u' , 'v' , 'w' , 'x' , 'y' , 'z' + }; + + final static byte [] DigitTens = { + '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', + '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', + '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', + '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', + '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', + '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', + '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', + '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', + '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', + '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', + } ; + + final static byte [] DigitOnes = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + } ; + + /** + * Places characters representing the integer i into the + * character array buf. The characters are placed into + * the buffer backwards starting with the least significant + * digit at the specified index (exclusive), and working + * backwards from there. + * + * Will fail if i == Long.MIN_VALUE + */ + static void getChars(long i, final int index, final byte[] buf) { + long q; + int r; + int charPos = index; + + // Get 2 digits/iteration using longs until quotient fits into an int + while (i > Integer.MAX_VALUE) { + q = i / 100; + // really: r = i - (q * 100); + r = (int)(i - ((q << 6) + (q << 5) + (q << 2))); + i = q; + buf[--charPos] = DigitOnes[r]; + buf[--charPos] = DigitTens[r]; + } + + // Get 2 digits/iteration using ints + int q2; + int i2 = (int)i; + while (i2 >= 65536) { + q2 = i2 / 100; + // really: r = i2 - (q * 100); + r = i2 - ((q2 << 6) + (q2 << 5) + (q2 << 2)); + i2 = q2; + buf[--charPos] = DigitOnes[r]; + buf[--charPos] = DigitTens[r]; + } + + // Fall thru to fast mode for smaller numbers + // assert(i2 <= 65536, i2); + for (;;) { + q2 = (i2 * 52429) >>> (16+3); + r = i2 - ((q2 << 3) + (q2 << 1)); // r = i2-(q2*10) ... + buf[--charPos] = digits[r]; + i2 = q2; + if (i2 == 0) break; + } + } + + // Requires positive x + static int stringSize(final long x) { + long p = 10; + for (int i=1; i<19; ++i) { + if (x < p) + return i; + p = 10*p; + } + return 19; + } +} diff --git a/src/main/java/com/moparisthebest/phonehash/Sha1.java b/src/main/java/com/moparisthebest/phonehash/Sha1.java new file mode 100644 index 0000000..6d4f016 --- /dev/null +++ b/src/main/java/com/moparisthebest/phonehash/Sha1.java @@ -0,0 +1,357 @@ +package com.moparisthebest.phonehash; + +import sun.misc.Unsafe; +import sun.security.action.GetPropertyAction; + +import java.lang.reflect.Field; +import java.nio.ByteOrder; +import java.security.AccessController; + +/** + * Created by mopar on 2/10/17. + */ +public class Sha1 { + private int[] W = new int[80]; + private int[] state = new int[5]; + private static final int round1_kt = 1518500249; + private static final int round2_kt = 1859775393; + private static final int round3_kt = -1894007588; + private static final int round4_kt = -899497514; + + public Sha1() { + this("SHA-1", 20, 64); + this.implReset(); + } + + void implReset() { + this.state[0] = 1732584193; + this.state[1] = -271733879; + this.state[2] = -1732584194; + this.state[3] = 271733878; + this.state[4] = -1009589776; + } + + void implDigest(byte[] var1, int var2) { + long var3 = this.bytesProcessed << 3; + int var5 = (int)this.bytesProcessed & 63; + int var6 = var5 < 56?56 - var5:120 - var5; + this.engineUpdate(padding, 0, var6); + i2bBig4((int)(var3 >>> 32), this.buffer, 56); + i2bBig4((int)var3, this.buffer, 60); + this.implCompress(this.buffer, 0); + i2bBig(this.state, 0, var1, var2, 20); + } + + void implCompress(byte[] var1, int var2) { + b2iBig64(var1, var2, this.W); + + int var3; + int var4; + for(var3 = 16; var3 <= 79; ++var3) { + var4 = this.W[var3 - 3] ^ this.W[var3 - 8] ^ this.W[var3 - 14] ^ this.W[var3 - 16]; + this.W[var3] = var4 << 1 | var4 >>> 31; + } + + var3 = this.state[0]; + var4 = this.state[1]; + int var5 = this.state[2]; + int var6 = this.state[3]; + int var7 = this.state[4]; + + int var8; + int var9; + for(var8 = 0; var8 < 20; ++var8) { + var9 = (var3 << 5 | var3 >>> 27) + (var4 & var5 | ~var4 & var6) + var7 + this.W[var8] + 1518500249; + var7 = var6; + var6 = var5; + var5 = var4 << 30 | var4 >>> 2; + var4 = var3; + var3 = var9; + } + + for(var8 = 20; var8 < 40; ++var8) { + var9 = (var3 << 5 | var3 >>> 27) + (var4 ^ var5 ^ var6) + var7 + this.W[var8] + 1859775393; + var7 = var6; + var6 = var5; + var5 = var4 << 30 | var4 >>> 2; + var4 = var3; + var3 = var9; + } + + for(var8 = 40; var8 < 60; ++var8) { + var9 = (var3 << 5 | var3 >>> 27) + (var4 & var5 | var4 & var6 | var5 & var6) + var7 + this.W[var8] + -1894007588; + var7 = var6; + var6 = var5; + var5 = var4 << 30 | var4 >>> 2; + var4 = var3; + var3 = var9; + } + + for(var8 = 60; var8 < 80; ++var8) { + var9 = (var3 << 5 | var3 >>> 27) + (var4 ^ var5 ^ var6) + var7 + this.W[var8] + -899497514; + var7 = var6; + var6 = var5; + var5 = var4 << 30 | var4 >>> 2; + var4 = var3; + var3 = var9; + } + + this.state[0] += var3; + this.state[1] += var4; + this.state[2] += var5; + this.state[3] += var6; + this.state[4] += var7; + } + + // from DigestBase + private byte[] oneByte; + private final String algorithm; + private final int digestLength; + private final int blockSize; + byte[] buffer; + private int bufOfs; + long bytesProcessed; + static final byte[] padding = new byte[136]; + + Sha1(String var1, int var2, int var3) { + this.algorithm = var1; + this.digestLength = var2; + this.blockSize = var3; + this.buffer = new byte[var3]; + } + + protected final int engineGetDigestLength() { + return this.digestLength; + } + + protected final void engineUpdate(byte var1) { + if(this.oneByte == null) { + this.oneByte = new byte[1]; + } + + this.oneByte[0] = var1; + this.engineUpdate(this.oneByte, 0, 1); + } + + protected final void engineUpdate(byte[] var1, int var2, int var3) { + if(var3 != 0) { + if(var2 >= 0 && var3 >= 0 && var2 <= var1.length - var3) { + if(this.bytesProcessed < 0L) { + this.engineReset(); + } + + this.bytesProcessed += (long)var3; + int var4; + if(this.bufOfs != 0) { + var4 = Math.min(var3, this.blockSize - this.bufOfs); + System.arraycopy(var1, var2, this.buffer, this.bufOfs, var4); + this.bufOfs += var4; + var2 += var4; + var3 -= var4; + if(this.bufOfs >= this.blockSize) { + this.implCompress(this.buffer, 0); + this.bufOfs = 0; + } + } + + if(var3 >= this.blockSize) { + var4 = var2 + var3; + var2 = this.implCompressMultiBlock(var1, var2, var4 - this.blockSize); + var3 = var4 - var2; + } + + if(var3 > 0) { + System.arraycopy(var1, var2, this.buffer, 0, var3); + this.bufOfs = var3; + } + + } else { + throw new ArrayIndexOutOfBoundsException(); + } + } + } + + private int implCompressMultiBlock(byte[] var1, int var2, int var3) { + while(var2 <= var3) { + this.implCompress(var1, var2); + var2 += this.blockSize; + } + + return var2; + } + + protected final void engineReset() { + if(this.bytesProcessed != 0L) { + this.implReset(); + this.bufOfs = 0; + this.bytesProcessed = 0L; + } + } + + protected final int engineDigest(byte[] var1, int var2, int var3) { + if(var3 < this.digestLength) { + throw new ArrayIndexOutOfBoundsException("Length must be at least " + this.digestLength + " for " + this.algorithm + "digests"); + } else if(var2 >= 0 && var3 >= 0 && var2 <= var1.length - var3) { + if(this.bytesProcessed < 0L) { + this.engineReset(); + } + + this.implDigest(var1, var2); + this.bytesProcessed = -1L; + return this.digestLength; + } else { + throw new ArrayIndexOutOfBoundsException("Length must be at least " + this.digestLength + " for " + this.algorithm + "digests"); + } + } + + // from ByteArrayAccess + private static final Unsafe unsafe; + private static final boolean littleEndianUnaligned; + private static final boolean bigEndian; + private static final int byteArrayOfs; + + static { + // from DigestBase + padding[0] = -128; + // for the following + try { + final Field field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + unsafe = (Unsafe)field.get(null); + } catch (Exception e) { + throw new RuntimeException(e); + } + // from ByteArrayAccess + byteArrayOfs = unsafe.arrayBaseOffset(byte[].class); + boolean var0 = unsafe.arrayIndexScale(byte[].class) == 1 && unsafe.arrayIndexScale(int[].class) == 4 && unsafe.arrayIndexScale(long[].class) == 8 && (byteArrayOfs & 3) == 0; + ByteOrder var1 = ByteOrder.nativeOrder(); + littleEndianUnaligned = var0 && unaligned() && var1 == ByteOrder.LITTLE_ENDIAN; + bigEndian = var0 && var1 == ByteOrder.BIG_ENDIAN; + } + + private static boolean unaligned() { + String var0 = (String) AccessController.doPrivileged(new GetPropertyAction("os.arch", "")); + return var0.equals("i386") || var0.equals("x86") || var0.equals("amd64") || var0.equals("x86_64"); + } + + static void i2bBig4(int var0, byte[] var1, int var2) { + if(var2 >= 0 && var1.length - var2 >= 4) { + if(littleEndianUnaligned) { + unsafe.putInt(var1, (long)(byteArrayOfs + var2), Integer.reverseBytes(var0)); + } else if(bigEndian && (var2 & 3) == 0) { + unsafe.putInt(var1, (long)(byteArrayOfs + var2), var0); + } else { + var1[var2] = (byte)(var0 >> 24); + var1[var2 + 1] = (byte)(var0 >> 16); + var1[var2 + 2] = (byte)(var0 >> 8); + var1[var2 + 3] = (byte)var0; + } + + } else { + throw new ArrayIndexOutOfBoundsException(); + } + } + + static void i2bBig(int[] var0, int var1, byte[] var2, int var3, int var4) { + if(var1 >= 0 && var0.length - var1 >= var4 / 4 && var3 >= 0 && var2.length - var3 >= var4) { + if(littleEndianUnaligned) { + var3 += byteArrayOfs; + + for(var4 += var3; var3 < var4; var3 += 4) { + unsafe.putInt(var2, (long)var3, Integer.reverseBytes(var0[var1++])); + } + } else { + int var5; + if(bigEndian && (var3 & 3) == 0) { + var3 += byteArrayOfs; + + for(var4 += var3; var3 < var4; var3 += 4) { + unsafe.putInt(var2, (long)var3, var0[var1++]); + } + } else { + for(var4 += var3; var3 < var4; var2[var3++] = (byte)var5) { + var5 = var0[var1++]; + var2[var3++] = (byte)(var5 >> 24); + var2[var3++] = (byte)(var5 >> 16); + var2[var3++] = (byte)(var5 >> 8); + } + } + } + + } else { + throw new ArrayIndexOutOfBoundsException(); + } + } + + static void b2iBig64(byte[] var0, int var1, int[] var2) { + if(var1 >= 0 && var0.length - var1 >= 64 && var2.length >= 16) { + if(littleEndianUnaligned) { + var1 += byteArrayOfs; + var2[0] = Integer.reverseBytes(unsafe.getInt(var0, (long)var1)); + var2[1] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 4))); + var2[2] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 8))); + var2[3] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 12))); + var2[4] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 16))); + var2[5] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 20))); + var2[6] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 24))); + var2[7] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 28))); + var2[8] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 32))); + var2[9] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 36))); + var2[10] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 40))); + var2[11] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 44))); + var2[12] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 48))); + var2[13] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 52))); + var2[14] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 56))); + var2[15] = Integer.reverseBytes(unsafe.getInt(var0, (long)(var1 + 60))); + } else if(bigEndian && (var1 & 3) == 0) { + var1 += byteArrayOfs; + var2[0] = unsafe.getInt(var0, (long)var1); + var2[1] = unsafe.getInt(var0, (long)(var1 + 4)); + var2[2] = unsafe.getInt(var0, (long)(var1 + 8)); + var2[3] = unsafe.getInt(var0, (long)(var1 + 12)); + var2[4] = unsafe.getInt(var0, (long)(var1 + 16)); + var2[5] = unsafe.getInt(var0, (long)(var1 + 20)); + var2[6] = unsafe.getInt(var0, (long)(var1 + 24)); + var2[7] = unsafe.getInt(var0, (long)(var1 + 28)); + var2[8] = unsafe.getInt(var0, (long)(var1 + 32)); + var2[9] = unsafe.getInt(var0, (long)(var1 + 36)); + var2[10] = unsafe.getInt(var0, (long)(var1 + 40)); + var2[11] = unsafe.getInt(var0, (long)(var1 + 44)); + var2[12] = unsafe.getInt(var0, (long)(var1 + 48)); + var2[13] = unsafe.getInt(var0, (long)(var1 + 52)); + var2[14] = unsafe.getInt(var0, (long)(var1 + 56)); + var2[15] = unsafe.getInt(var0, (long)(var1 + 60)); + } else { + b2iBig(var0, var1, var2, 0, 64); + } + + } else { + throw new ArrayIndexOutOfBoundsException(); + } + } + + static void b2iBig(byte[] var0, int var1, int[] var2, int var3, int var4) { + if(var1 >= 0 && var0.length - var1 >= var4 && var3 >= 0 && var2.length - var3 >= var4 / 4) { + if(littleEndianUnaligned) { + var1 += byteArrayOfs; + + for(var4 += var1; var1 < var4; var1 += 4) { + var2[var3++] = Integer.reverseBytes(unsafe.getInt(var0, (long)var1)); + } + } else if(bigEndian && (var1 & 3) == 0) { + var1 += byteArrayOfs; + + for(var4 += var1; var1 < var4; var1 += 4) { + var2[var3++] = unsafe.getInt(var0, (long)var1); + } + } else { + for(var4 += var1; var1 < var4; var1 += 4) { + var2[var3++] = var0[var1 + 3] & 255 | (var0[var1 + 2] & 255) << 8 | (var0[var1 + 1] & 255) << 16 | var0[var1] << 24; + } + } + + } else { + throw new ArrayIndexOutOfBoundsException(); + } + } +} diff --git a/src/main/java/com/moparisthebest/phonehash/webservices/WebApp.java b/src/main/java/com/moparisthebest/phonehash/webservices/WebApp.java new file mode 100644 index 0000000..d2cbd6f --- /dev/null +++ b/src/main/java/com/moparisthebest/phonehash/webservices/WebApp.java @@ -0,0 +1,98 @@ +package com.moparisthebest.phonehash.webservices; + +import com.moparisthebest.filelist.LongConverter40Bit; +import com.moparisthebest.filelist.RandomAccessFileList; +import com.moparisthebest.phonehash.PhoneComparator; +import org.glassfish.grizzly.http.server.CLStaticHttpHandler; +import org.glassfish.grizzly.http.server.HttpServer; +import org.glassfish.grizzly.http.server.StaticHttpHandler; +import org.glassfish.grizzly.http.server.StaticHttpHandlerBase; +import org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory; +import org.glassfish.jersey.server.ResourceConfig; + +import javax.ws.rs.*; +import javax.ws.rs.core.MediaType; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.util.regex.Pattern; + +/** + * Created by mopar on 9/12/14. + */ +@ApplicationPath("rest") +@Path("") +public class WebApp extends ResourceConfig { + + private static File sortedPhoneNumberList; + private static final Pattern sha1regex = Pattern.compile("^[0-9a-fA-F]{40}$"); + + @GET + @Path("hashToPhone/{sha1}") + @Produces(MediaType.TEXT_PLAIN) + public String hashToPhone(@PathParam("sha1") final String sha1) throws IOException { + if (sha1 == null || sha1.length() != 40 || !sha1regex.matcher(sha1).matches()) + return "sha1 hash must be 40 character hexadecimal"; + final PhoneComparator pc = new PhoneComparator(); + final byte[] needle = hexToBytes(sha1); + try (RandomAccessFileList list = new RandomAccessFileList<>(sortedPhoneNumberList, LongConverter40Bit.instance)) { + final long index = list.indexedBinarySearch(needle, pc); + return index < 0 ? "Not found" : PhoneComparator.formatPhoneNumber(list.get(index)); + } + } + + private static byte[] hexToBytes(final String hex) { + final int len = hex.length(); + final byte[] array = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + array[i / 2] = (byte) ((Character.digit(hex.charAt(i), 16) << 4) + Character.digit(hex.charAt(i + 1), 16)); + } + return array; + } + + public WebApp() { + packages(this.getClass().getPackage().getName()); + } + + public static void main(String[] args) throws IOException, InterruptedException { + if (args.length < 1) { + System.err.println("WebApp sortedPhoneNumbers.list [http://localhost:8080/phonehash/]"); + return; + } + sortedPhoneNumberList = new File(args[0]); + String contextPath = args.length > 1 && !args[1].trim().isEmpty() ? args[1] : "http://localhost:8080/phonehash/"; + + if (!contextPath.endsWith("/")) + contextPath += "/"; + + final ResourceConfig rc = new WebApp(); + + final ApplicationPath ap = rc.getClass().getAnnotation(ApplicationPath.class); + + final HttpServer server = GrizzlyHttpServerFactory.createHttpServer(URI.create(ap == null ? contextPath : contextPath + ap.value() + "/"), rc, false); + + final File webapp = new File("./src/main/webapp/"); + final StaticHttpHandlerBase staticHttpHandler; + + if (new File(webapp, "index.html").canRead()) { + //staticHttpHandler = new CLStaticHttpHandler(new URLClassLoader(new URL[]{webapp.toURI().toURL()})); + staticHttpHandler = new StaticHttpHandler(webapp.getAbsolutePath()); + staticHttpHandler.setFileCacheEnabled(false); // don't cache files, because we are in development? + System.out.println("File Caching disabled!"); + } else { + staticHttpHandler = new CLStaticHttpHandler(WebApp.class.getClassLoader()); // jar class loader, leave cache enabled + } + + server.getServerConfiguration().addHttpHandler(staticHttpHandler, + contextPath.replaceFirst("^[^/]+//[^/]+", "") + ); + + try { + server.start(); + System.out.printf("Application started on '%s'.\nHit enter to stop it...", contextPath); + System.in.read(); + } finally { + server.shutdownNow(); + } + } +}