diff --git a/.travis.yml b/.travis.yml index 3f0172e..c42582a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,20 +8,20 @@ addons: sources: - ubuntu-toolchain-r-test packages: - - g++-4.8 + - g++-6 notifications: email: false env: matrix: - - TRAVIS_NODE_VERSION="0.10" - - TRAVIS_NODE_VERSION="0.12" - TRAVIS_NODE_VERSION="4" - - TRAVIS_NODE_VERSION="5" - TRAVIS_NODE_VERSION="6" + - TRAVIS_NODE_VERSION="8" + - TRAVIS_NODE_VERSION="9" install: - "rm -rf ~/.nvm && mkdir -p ~/.nvm && curl -sL `curl -sI https://github.com/creationix/nvm/releases/latest|sed -En 's/^Location: (.+)\\/releases\\/tag\\/(.+)/\\1\\/tarball\\/\\2/p'|tr -d '\r\n'`|tar zx --strip=1 -C ~/.nvm && source ~/.nvm/nvm.sh && nvm install $TRAVIS_NODE_VERSION" + - node --version - npm --version - - if [[ $TRAVIS_OS_NAME == "linux" ]]; then export CXX=g++-4.8; export CC=gcc-4.8; fi + - if [[ $TRAVIS_OS_NAME == "linux" ]]; then export CXX=g++-6; export CC=gcc-6; fi - $CXX --version - npm install script: npm test diff --git a/README.md b/README.md index 38466fd..28c9b7e 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ An async libmagic binding for [node.js](http://nodejs.org/) for detecting conten Requirements ============ -* [node.js](http://nodejs.org/) -- v0.10.0 or newer +* [node.js](http://nodejs.org/) -- v4.0.0 or newer Install diff --git a/appveyor.yml b/appveyor.yml index d714d65..9673e9f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,20 +1,31 @@ # http://www.appveyor.com/docs/appveyor-yml +# This image contains both 2013 and 2015 +os: Visual Studio 2015 + # Test against these versions of Node.js. environment: matrix: - - nodejs_version: "0.10" - - nodejs_version: "0.12" - nodejs_version: "4" + GYP_MSVS_VERSION: "2013" + - nodejs_version: "4" + GYP_MSVS_VERSION: "2015" + - nodejs_version: "6" + GYP_MSVS_VERSION: "2013" - nodejs_version: "6" + GYP_MSVS_VERSION: "2015" + - nodejs_version: "8" + GYP_MSVS_VERSION: "2013" + - nodejs_version: "8" + GYP_MSVS_VERSION: "2015" + - nodejs_version: "9" + GYP_MSVS_VERSION: "2015" # Install scripts. (runs after repo cloning) install: # Get the latest stable version of Node - ps: Install-Product node $env:nodejs_version # Typical npm stuff. - - IF %nodejs_version% LSS 1 npm -g install npm - - IF %nodejs_version% LSS 1 set PATH=%APPDATA%\npm;%PATH% - set CL=-DDELAYIMP_INSECURE_WRITABLE_HOOKS - npm install diff --git a/deps/libmagic/ChangeLog b/deps/libmagic/ChangeLog index 345b32e..2063a23 100644 --- a/deps/libmagic/ChangeLog +++ b/deps/libmagic/ChangeLog @@ -1,3 +1,86 @@ +2017-09-02 11:53 Christos Zoulas + + * release 5.32 + +2017-08-28 16:37 Christos Zoulas + + * Always reset state in {file,buffer}_apprentice (Krzysztof Wilczynski) + +2017-08-27 03:55 Christos Zoulas + + * Fix always true condition (Thomas Jarosch) + +2017-05-24 17:30 Christos Zoulas + + * pickier parsing of numeric values in magic files. + +2017-05-23 17:55 Christos Zoulas + + * PR/615 add magic_getflags() + +2017-05-23 13:55 Christos Zoulas + + * release 5.31 + +2017-03-17 20:32 Christos Zoulas + + * remove trailing spaces from magic files + * refactor is_tar + * better bounds checks for cdf + +2017-02-10 12:24 Christos Zoulas + + * release 5.30 + +2017-02-07 23:27 Christos Zoulas + + * If we exceeded the offset in a search return no match + (Christoph Biedl) + * Be more lenient on corrupt CDF files (Christoph Biedl) + +2017-02-04 16:46 Christos Zoulas + + * pacify ubsan sign extension (oss-fuzz/524) + +2017-02-01 12:42 Christos Zoulas + + * off by one in cdf parsing (PR/593) + * report debugging sections in elf (PR/591) + +2016-11-06 10:52 Christos Zoulas + + * Allow @@@ in extensions + * Add missing overflow check in der magic (Jonas Wagner) + +2016-10-25 10:40 Christos Zoulas + + * release 5.29 + +2016-10-24 11:20 Christos Zoulas + + * der getlength overflow (Jonas Wagner) + * multiple magic file load failure (Christoph Biedl) + +2016-10-17 11:26 Christos Zoulas + + * CDF parsing improvements (Guy Helmer) + +2016-07-20 7:26 Christos Zoulas + + * Add support for signed indirect offsets + +2016-07-18 7:41 Christos Zoulas + + * cat /dev/null | file - should print empty (Christoph Biedl) + +2016-07-05 15:20 Christos Zoulas + + * Bump string size from 64 to 96. + +2016-06-13 20:20 Christos Zoulas + + * PR/556: Fix separators on annotations. + 2016-06-13 19:40 Christos Zoulas * release 5.28 @@ -358,7 +441,7 @@ ` 2013-11-06 14:40 Christos Zoulas - * fix erroneous non-zero exit code from non-existant file and message + * fix erroneous non-zero exit code from non-existent file and message 2013-10-29 14:25 Christos Zoulas diff --git a/deps/libmagic/TODO b/deps/libmagic/TODO index 5c2ed49..78890cf 100644 --- a/deps/libmagic/TODO +++ b/deps/libmagic/TODO @@ -6,7 +6,6 @@ file, not here. More speculative material can live here. listed in the BUGS section of the man page had been fixed!) --- - It would be nice to simplify file considerably. For example, reimplement the apprentice and non-pattern magic methods in Python, and compile the magic patterns to a giant regex (or something similar; @@ -15,8 +14,23 @@ small amount of C is needed (because fast execution is typically only required for soft magic, not the more detailed information given by hard-wired routines). In this regard, note that hplip, which is BSD-licensed, has a magic reimplementation in Python. - +--- Read the kerberos magic entry for more ideas. - +--- Write a string merger to make magic entry sizes dynamic. Strings will be converted to offsets from the string table. +--- +Programming language support, we can introduce the concept of a group +of rules where n rules need to match before the rule is positive. This +could require structural changes to the matching code :-( + +0 group 2 # require 2 matches +# rule 1 +>0 .... +... +# rule 2 +>0 .... +... + +christos + diff --git a/deps/libmagic/libmagic.gyp b/deps/libmagic/libmagic.gyp index 32a1d70..76be697 100644 --- a/deps/libmagic/libmagic.gyp +++ b/deps/libmagic/libmagic.gyp @@ -3,11 +3,8 @@ { 'target_name': 'libmagic', 'type': 'static_library', - 'include_dirs': [ '.', 'src', 'pcre' ], - 'dependencies': [ - 'pcre/pcre.gyp:libpcre', - ], - 'defines': [ 'HAVE_CONFIG_H', 'VERSION="5.28"' ], + 'include_dirs': [ '.', 'src' ], + 'defines': [ 'HAVE_CONFIG_H', 'VERSION="5.32"' ], 'conditions': [ [ 'OS!="freebsd" and OS!="mac"', { 'sources': [ 'src/fmtcheck.c' ], @@ -27,8 +24,10 @@ 'src/strlcat.c', 'src/strlcpy.c', 'src/vasprintf.c', + # POSIX regex implementation + 'msvc/libgnurx-2.5/regex.c', ], - 'include_dirs': [ 'msvc', 'config/win' ], + 'include_dirs': [ 'config/win', 'msvc', 'msvc/libgnurx-2.5' ], 'link_settings': { 'libraries': [ '-lshlwapi.lib', diff --git a/deps/libmagic/msvc/libgnurx-2.5/COPYING.LIB b/deps/libmagic/msvc/libgnurx-2.5/COPYING.LIB new file mode 100644 index 0000000..cf9b6b9 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/COPYING.LIB @@ -0,0 +1,510 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/deps/libmagic/msvc/libgnurx-2.5/Makefile b/deps/libmagic/msvc/libgnurx-2.5/Makefile new file mode 100644 index 0000000..f777eea --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/Makefile @@ -0,0 +1,41 @@ +all: libgnurx-0.dll libgnurx.dll.a libregex.a + +THIS = libgnurx +VERSION = 2.5 + +CC = x86_64-w64-mingw32-gcc -mthreads +CFLAGS = -I . + +SOURCES = $(wildcard *.c *.h) +OBJECTS = regex.o + +libgnurx-0.dll libgnurx.dll.a: $(OBJECTS) Makefile + $(CC) -shared -o libgnurx-0.dll -Wl,--enable-auto-image-base -Wl,--out-implib,libgnurx.dll.a -Wl,--output-def,libgnurx.def $(OBJECTS) + +libregex.a: libgnurx.dll.a + cp -p libgnurx.dll.a $@ + +dist: $(THIS)-$(VERSION).zip $(THIS)-dev-$(VERSION).zip $(THIS)-src-$(VERSION).zip + +$(THIS)-$(VERSION).zip: libgnurx-0.dll + mkdir -p runtime/bin + cp -p libgnurx-0.dll runtime/bin + (cd runtime; zip -r ../$(THIS)-$(VERSION).zip bin) + rm -rf runtime + +$(THIS)-dev-$(VERSION).zip: regex.h libgnurx.dll.a libregex.a + mkdir -p dev/include dev/lib + cp -p regex.h dev/include + cp -p libgnurx.dll.a libregex.a gnurx.lib dev/lib + (cd dev; zip -r ../$(THIS)-dev-$(VERSION).zip .) + rm -rf dev + +$(THIS)-src-$(VERSION).zip: Makefile README COPYING.LIB $(SOURCES) + mkdir $(THIS)-$(VERSION) + cp -p Makefile README COPYING.LIB $(SOURCES) $(THIS)-$(VERSION) + zip -r $@ $(THIS)-$(VERSION$) + rm -rf $(THIS)-$(VERSION$) + +clean: + rm -f *~ *.o *.dll *.def *.exp *.a *.zip + rm -rf runtime dev $(THIS)-$(VERSION) diff --git a/deps/libmagic/msvc/libgnurx-2.5/README b/deps/libmagic/msvc/libgnurx-2.5/README new file mode 100644 index 0000000..534cc3c --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/README @@ -0,0 +1,33 @@ +This is the regex functionality from glibc 2.5 extracted into a +separate library, for Win32. + +I call the DLL libgnurx-0.dll which hopefully should be unique. At +least it isn't "regex.dll" which has been used by the +gnuwin32.sourceforge.net site for *two* incompatible DLLs. (That mess, +and the mess with their build of Henry Spencer's regex library, was +what lead me to build my own GNU regex library. See the gnuwin32-users mailing list archives from December 2006.) + +The "-0" is so that if at some point I build a release that isn't +binary compatible, I can then increment that and use a different name. + +The import library for gcc is called libgnurx.dll.a, but I also +distribute a copy of it called libregex.a so that configure scripts +that look for -lregex will work. + +Note that none of the wide-character and i18n functionality which is +built when this is part of glibc gets compiled. Thus things like +character classes most probably work only for single-byte codepages. + +Compiling that stuff would drag in lots of glibc's locale handling +stuff which is completely incompatible with Microsoft's C library's +locale handling anyway. Also, I am not sure whether the GNU regex code +is prepared to handle a two-byte wchar_t, or does it assume that +wchar_t is int as it is on Linux? Hmm, actually there is lots of +sizeof(wchar_t) in glibc, so maybe it *is* prepared? Maybe +later... But anyway, it would presumably mean we should have not just +the regex functionality but a larger subset of glibc that would +include all locale, ctype, wchar, mbs, etc stuff, presumably ending up +with a very large part of glibc (not the system calls, +obviously). Indeed, something to save for later, or never... + +--Tor Lillqvist , diff --git a/deps/libmagic/msvc/libgnurx-2.5/README.md b/deps/libmagic/msvc/libgnurx-2.5/README.md new file mode 100644 index 0000000..6e761b1 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/README.md @@ -0,0 +1,10 @@ +This version of libgnurx was taken from https://github.com/nscaife/file-windows/tree/master/libgnurx-2.5. + +Only one modification has been made to the code taken from the above source: regexec.c contained a ternary expression with a missing expression, which is not supported by the Visual C compiler, so the missing expression has been re-added. + +Original README.md below +======================== + +The contents of this directory are from http://ftp.gnome.org/pub/gnome/binaries/win32/dependencies/. + +I have made a few modifications to the Makefile to disable creation of gnurx.lib. The creation of this file relies on lib.exe from Visual Studio (forcing a Windows build environment), and is not needed to build libmagic or file. diff --git a/deps/libmagic/msvc/libgnurx-2.5/regcomp.c b/deps/libmagic/msvc/libgnurx-2.5/regcomp.c new file mode 100644 index 0000000..78a1218 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/regcomp.c @@ -0,0 +1,3800 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const unsigned char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = alloca (dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + int i; + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes + || cset->nranges || cset->nchar_classes) + { +# ifdef _LIBC + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) + { + /* In this case we want to catch the bytes which are + the first byte of any collation elements. + e.g. In da_DK, we want to catch 'a' since "aa" + is a valid collation element, and don't catch + 'b' since 'b' is the only collation element + which starts from 'b'. */ + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# else + if (dfa->mb_cur_max > 1) + for (i = 0; i < SBC_MAX; ++i) + if (__btowc (i) == WEOF) + re_set_fastmap (fastmap, icase, i); +# endif /* not _LIBC */ + } + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, 0, *(unsigned char *) buf); + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *__restrict preg; + const char *__restrict pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror (errcode, preg, errbuf, errbuf_size) + int errcode; + const regex_t *__restrict preg; + char *__restrict errbuf; + size_t errbuf_size; +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { +#if defined HAVE_MEMPCPY || defined _LIBC + *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; +#else + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; +#endif + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +static const bitset_t utf8_sb_map = +{ + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#endif + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.idx) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + if (dfa->nodes[org_node].type == ANCHOR) + { + /* In case of the node has another constraint, append it. */ + if (org_node == root_node && clone_node != org_node) + { + /* ...but if the node is root_node itself, it means the + epsilon closure have a loop, then tie it to the + destination of the root_node. */ + ret = re_node_set_insert (dfa->edests + clone_node, + org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + constraint |= dfa->nodes[org_node].opr.ctx_type; + } + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There are no such a duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There are a duplicated node which satisfy the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + if (dfa->nodes[org_idx].type == ANCHOR) + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + unsigned int constraint; + int i, incomplete; + re_node_set eclosure; + incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + constraint = ((dfa->nodes[node].type == ANCHOR) + ? dfa->nodes[node].opr.ctx_type : 0); + /* If the current node has constraints, duplicate all nodes. + Since they must inherit the constraints. */ + if (constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + re_node_set_merge (&eclosure, &eclosure_elem); + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* Epsilon closures include itself. */ + re_node_set_insert (&eclosure, node); + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression : + CAT + / \ + / \ + EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + |: + ALT + / \ + / \ + + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + : + CAT + / \ + / \ + + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "", + it must not be "". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "alnum", + (const unsigned char *) "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "space", + (const unsigned char *) "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (): + SUBEXP + | + +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); + re_token_t start_token = *token; + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* {} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE (end != -1 && start > end, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "{n,m}" to "...{0,}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite {0,n} as ((...?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\0'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [::], [..], and + [==]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + if (len == weights[idx2]) + { + int cnt = 0; + while (cnt <= len && + weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const unsigned char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const unsigned char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + const char *name = (const char *) class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (name, "blank") == 0) + BUILD_CHARCLASS_LOOP (isblank); + else if (strcmp (name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + /* + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set(cset->sbcset, '\0'); + */ + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} diff --git a/deps/libmagic/msvc/libgnurx-2.5/regex.c b/deps/libmagic/msvc/libgnurx-2.5/regex.c new file mode 100644 index 0000000..d2d4f28 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/regex.c @@ -0,0 +1,74 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +/* On some systems, limits.h sets RE_DUP_MAX to a lower value than + GNU regex allows. Include it before , which correctly + #undefs RE_DUP_MAX and sets it to the right value. */ +#include + +#include +#include "regex_internal.h" + +#include "regex_internal.c" +#include "regcomp.c" +#include "regexec.c" + +/* Binary backward compatibility. */ +#if _LIBC +# include +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") +int re_max_failures = 2000; +# endif +#endif diff --git a/deps/libmagic/msvc/libgnurx-2.5/regex.h b/deps/libmagic/msvc/libgnurx-2.5/regex.h new file mode 100644 index 0000000..807c404 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/regex.h @@ -0,0 +1,556 @@ +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_H +#define _REGEX_H 1 + +#include + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +# undef RE_DUP_MAX +#endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +#define RE_DUP_MAX (0x7fff) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#ifdef _XOPEN_SOURCE + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Inalid collating element. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +# define RE_TRANSLATE_TYPE unsigned char * +#endif + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long int allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long int used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + RE_TRANSLATE_TYPE translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +# define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); + +#if defined _REGEX_RE_COMP || defined _LIBC +# ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif + +/* POSIX compatibility. */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/deps/libmagic/msvc/libgnurx-2.5/regex_internal.c b/deps/libmagic/msvc/libgnurx-2.5/regex_internal.c new file mode 100644 index 0000000..66154e0 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/regex_internal.c @@ -0,0 +1,1717 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + (0), (1), (0), (1), + Then wide character buffer will be: + , WEOF , , WEOF , + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wchar_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + int remain_len; + remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *q, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + q = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + q = buf; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + int type = token.type; + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows. */ + if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + re_node_set_insert_last (&newstate->non_eps_nodes, elem); + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + unsigned int constraint = 0; + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (node->constraint) + constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR) + constraint = node->opr.ctx_type; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + re_node_set_init_copy (newstate->entrance_nodes, nodes); + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} diff --git a/deps/libmagic/msvc/libgnurx-2.5/regex_internal.h b/deps/libmagic/msvc/libgnurx-2.5/regex_internal.h new file mode 100644 index 0000000..b0c4d35 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/regex_internal.h @@ -0,0 +1,778 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include +#include +#include +#include +#include + +#if defined(__MINGW32_VERSION) || defined(_MSC_VER) +#define strcasecmp stricmp +#endif + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include +#else +typedef enum { false, true } bool; +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if defined HAVE_STDINT_H || defined _LIBC +# include +#endif /* HAVE_STDINT_H || _LIBC */ +#if defined _LIBC +# include +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include +# include +# include +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# define __wctype wctype +# define __iswctype iswctype +# define __btowc btowc +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +#ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; +#endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifdef __GNUC__ +# define alloca(size) __builtin_alloca (size) +# define HAVE_ALLOCA 1 +#elif defined(_MSC_VER) +# include +# define alloca _alloca +# define HAVE_ALLOCA 1 +#else +# error No alloca() +#endif + +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif + __libc_lock_define (, lock) +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ diff --git a/deps/libmagic/msvc/libgnurx-2.5/regexec.c b/deps/libmagic/msvc/libgnurx-2.5/regexec.c new file mode 100644 index 0000000..3c41f54 --- /dev/null +++ b/deps/libmagic/msvc/libgnurx-2.5/regexec.c @@ -0,0 +1,4329 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) internal_function; +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len) internal_function; +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len) internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated) internal_function; +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) + internal_function; +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *__restrict preg; + const char *__restrict string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + reg_errcode_t err; + int start, length; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (bufp, string, length, start, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (bufp, string, length, start, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, + stop, ret_len) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop, ret_len; + struct re_registers *regs; +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; +#ifdef _LIBC + memcpy (__mempcpy (s, string1, length1), string2, length2); +#else + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); +#endif + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, + ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range, stop, ret_len; + struct re_registers *regs; +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (regs, pmatch, nregs, regs_allocated) + struct re_registers *regs; + regmatch_t *pmatch; + int nregs, regs_allocated; +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) + return REGS_UNALLOCATED; + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) + return REGS_UNALLOCATED; + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, + eflags) + const regex_t *preg; + const char *string; + int length, start, range, stop, eflags; + size_t nmatch; + regmatch_t pmatch[]; +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (mctx) + re_match_context_t *mctx; +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + ( ) + ( ) + ( ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? path->next_idx : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; + reg_errcode_t err = REG_NOERROR; + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + if ((elem_len <= 1 && char_len <= 1) || char_len == 0) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + int32_t idx; + /* This #include defines a local function! */ +# include + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx]; + if (weight_len == weights[equiv_class_idx]) + { + int cnt = 0; + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} diff --git a/deps/libmagic/pcre/AUTHORS b/deps/libmagic/pcre/AUTHORS deleted file mode 100644 index ba4753d..0000000 --- a/deps/libmagic/pcre/AUTHORS +++ /dev/null @@ -1,45 +0,0 @@ -THE MAIN PCRE LIBRARY ---------------------- - -Written by: Philip Hazel -Email local part: ph10 -Email domain: cam.ac.uk - -University of Cambridge Computing Service, -Cambridge, England. - -Copyright (c) 1997-2012 University of Cambridge -All rights reserved - - -PCRE JUST-IN-TIME COMPILATION SUPPORT -------------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2010-2012 Zoltan Herczeg -All rights reserved. - - -STACK-LESS JUST-IN-TIME COMPILER --------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2009-2012 Zoltan Herczeg -All rights reserved. - - -THE C++ WRAPPER LIBRARY ------------------------ - -Written by: Google Inc. - -Copyright (c) 2007-2012 Google Inc -All rights reserved - -#### diff --git a/deps/libmagic/pcre/COPYING b/deps/libmagic/pcre/COPYING deleted file mode 100644 index 58eed01..0000000 --- a/deps/libmagic/pcre/COPYING +++ /dev/null @@ -1,5 +0,0 @@ -PCRE LICENCE - -Please see the file LICENCE in the PCRE distribution for licensing details. - -End diff --git a/deps/libmagic/pcre/ChangeLog b/deps/libmagic/pcre/ChangeLog deleted file mode 100644 index 1b016ec..0000000 --- a/deps/libmagic/pcre/ChangeLog +++ /dev/null @@ -1,4981 +0,0 @@ -ChangeLog for PCRE ------------------- - -Version 8.32 30-November-2012 ------------------------------ - -1. Improved JIT compiler optimizations for first character search and single - character iterators. - -2. Supporting IBM XL C compilers for PPC architectures in the JIT compiler. - Patch by Daniel Richard G. - -3. Single character iterator optimizations in the JIT compiler. - -4. Improved JIT compiler optimizations for character ranges. - -5. Rename the "leave" variable names to "quit" to improve WinCE compatibility. - Reported by Giuseppe D'Angelo. - -6. The PCRE_STARTLINE bit, indicating that a match can occur only at the start - of a line, was being set incorrectly in cases where .* appeared inside - atomic brackets at the start of a pattern, or where there was a subsequent - *PRUNE or *SKIP. - -7. Improved instruction cache flush for POWER/PowerPC. - Patch by Daniel Richard G. - -8. Fixed a number of issues in pcregrep, making it more compatible with GNU - grep: - - (a) There is now no limit to the number of patterns to be matched. - - (b) An error is given if a pattern is too long. - - (c) Multiple uses of --exclude, --exclude-dir, --include, and --include-dir - are now supported. - - (d) --exclude-from and --include-from (multiple use) have been added. - - (e) Exclusions and inclusions now apply to all files and directories, not - just to those obtained from scanning a directory recursively. - - (f) Multiple uses of -f and --file-list are now supported. - - (g) In a Windows environment, the default for -d has been changed from - "read" (the GNU grep default) to "skip", because otherwise the presence - of a directory in the file list provokes an error. - - (h) The documentation has been revised and clarified in places. - -9. Improve the matching speed of capturing brackets. - -10. Changed the meaning of \X so that it now matches a Unicode extended - grapheme cluster. - -11. Patch by Daniel Richard G to the autoconf files to add a macro for sorting - out POSIX threads when JIT support is configured. - -12. Added support for PCRE_STUDY_EXTRA_NEEDED. - -13. In the POSIX wrapper regcomp() function, setting re_nsub field in the preg - structure could go wrong in environments where size_t is not the same size - as int. - -14. Applied user-supplied patch to pcrecpp.cc to allow PCRE_NO_UTF8_CHECK to be - set. - -15. The EBCDIC support had decayed; later updates to the code had included - explicit references to (e.g.) \x0a instead of CHAR_LF. There has been a - general tidy up of EBCDIC-related issues, and the documentation was also - not quite right. There is now a test that can be run on ASCII systems to - check some of the EBCDIC-related things (but is it not a full test). - -16. The new PCRE_STUDY_EXTRA_NEEDED option is now used by pcregrep, resulting - in a small tidy to the code. - -17. Fix JIT tests when UTF is disabled and both 8 and 16 bit mode are enabled. - -18. If the --only-matching (-o) option in pcregrep is specified multiple - times, each one causes appropriate output. For example, -o1 -o2 outputs the - substrings matched by the 1st and 2nd capturing parentheses. A separating - string can be specified by --om-separator (default empty). - -19. Improving the first n character searches. - -20. Turn case lists for horizontal and vertical white space into macros so that - they are defined only once. - -21. This set of changes together give more compatible Unicode case-folding - behaviour for characters that have more than one other case when UCP - support is available. - - (a) The Unicode property table now has offsets into a new table of sets of - three or more characters that are case-equivalent. The MultiStage2.py - script that generates these tables (the pcre_ucd.c file) now scans - CaseFolding.txt instead of UnicodeData.txt for character case - information. - - (b) The code for adding characters or ranges of characters to a character - class has been abstracted into a generalized function that also handles - case-independence. In UTF-mode with UCP support, this uses the new data - to handle characters with more than one other case. - - (c) A bug that is fixed as a result of (b) is that codepoints less than 256 - whose other case is greater than 256 are now correctly matched - caselessly. Previously, the high codepoint matched the low one, but not - vice versa. - - (d) The processing of \h, \H, \v, and \ in character classes now makes use - of the new class addition function, using character lists defined as - macros alongside the case definitions of 20 above. - - (e) Caseless back references now work with characters that have more than - one other case. - - (f) General caseless matching of characters with more than one other case - is supported. - -22. Unicode character properties were updated from Unicode 6.2.0 - -23. Improved CMake support under Windows. Patch by Daniel Richard G. - -24. Add support for 32-bit character strings, and UTF-32 - -25. Major JIT compiler update (code refactoring and bugfixing). - Experimental Sparc 32 support is added. - -26. Applied a modified version of Daniel Richard G's patch to create - pcre.h.generic and config.h.generic by "make" instead of in the - PrepareRelease script. - -27. Added a definition for CHAR_NULL (helpful for the z/OS port), and use it in - pcre_compile.c when checking for a zero character. - -28. Introducing a native interface for JIT. Through this interface, the compiled - machine code can be directly executed. The purpose of this interface is to - provide fast pattern matching, so several sanity checks are not performed. - However, feature tests are still performed. The new interface provides - 1.4x speedup compared to the old one. - -29. If pcre_exec() or pcre_dfa_exec() was called with a negative value for - the subject string length, the error given was PCRE_ERROR_BADOFFSET, which - was confusing. There is now a new error PCRE_ERROR_BADLENGTH for this case. - -30. In 8-bit UTF-8 mode, pcretest failed to give an error for data codepoints - greater than 0x7fffffff (which cannot be represented in UTF-8, even under - the "old" RFC 2279). Instead, it ended up passing a negative length to - pcre_exec(). - -31. Add support for GCC's visibility feature to hide internal functions. - -32. Running "pcretest -C pcre8" or "pcretest -C pcre16" gave a spurious error - "unknown -C option" after outputting 0 or 1. - -33. There is now support for generating a code coverage report for the test - suite in environments where gcc is the compiler and lcov is installed. This - is mainly for the benefit of the developers. - -34. If PCRE is built with --enable-valgrind, certain memory regions are marked - unaddressable using valgrind annotations, allowing valgrind to detect - invalid memory accesses. This is mainly for the benefit of the developers. - -25. (*UTF) can now be used to start a pattern in any of the three libraries. - -26. Give configure error if --enable-cpp but no C++ compiler found. - - -Version 8.31 06-July-2012 -------------------------- - -1. Fixing a wrong JIT test case and some compiler warnings. - -2. Removed a bashism from the RunTest script. - -3. Add a cast to pcre_exec.c to fix the warning "unary minus operator applied - to unsigned type, result still unsigned" that was given by an MS compiler - on encountering the code "-sizeof(xxx)". - -4. Partial matching support is added to the JIT compiler. - -5. Fixed several bugs concerned with partial matching of items that consist - of more than one character: - - (a) /^(..)\1/ did not partially match "aba" because checking references was - done on an "all or nothing" basis. This also applied to repeated - references. - - (b) \R did not give a hard partial match if \r was found at the end of the - subject. - - (c) \X did not give a hard partial match after matching one or more - characters at the end of the subject. - - (d) When newline was set to CRLF, a pattern such as /a$/ did not recognize - a partial match for the string "\r". - - (e) When newline was set to CRLF, the metacharacter "." did not recognize - a partial match for a CR character at the end of the subject string. - -6. If JIT is requested using /S++ or -s++ (instead of just /S+ or -s+) when - running pcretest, the text "(JIT)" added to the output whenever JIT is - actually used to run the match. - -7. Individual JIT compile options can be set in pcretest by following -s+[+] - or /S+[+] with a digit between 1 and 7. - -8. OP_NOT now supports any UTF character not just single-byte ones. - -9. (*MARK) control verb is now supported by the JIT compiler. - -10. The command "./RunTest list" lists the available tests without actually - running any of them. (Because I keep forgetting what they all are.) - -11. Add PCRE_INFO_MAXLOOKBEHIND. - -12. Applied a (slightly modified) user-supplied patch that improves performance - when the heap is used for recursion (compiled with --disable-stack-for- - recursion). Instead of malloc and free for each heap frame each time a - logical recursion happens, frames are retained on a chain and re-used where - possible. This sometimes gives as much as 30% improvement. - -13. As documented, (*COMMIT) is now confined to within a recursive subpattern - call. - -14. As documented, (*COMMIT) is now confined to within a positive assertion. - -15. It is now possible to link pcretest with libedit as an alternative to - libreadline. - -16. (*COMMIT) control verb is now supported by the JIT compiler. - -17. The Unicode data tables have been updated to Unicode 6.1.0. - -18. Added --file-list option to pcregrep. - -19. Added binary file support to pcregrep, including the -a, --binary-files, - -I, and --text options. - -20. The madvise function is renamed for posix_madvise for QNX compatibility - reasons. Fixed by Giuseppe D'Angelo. - -21. Fixed a bug for backward assertions with REVERSE 0 in the JIT compiler. - -22. Changed the option for creating symbolic links for 16-bit man pages from - -s to -sf so that re-installing does not cause issues. - -23. Support PCRE_NO_START_OPTIMIZE in JIT as (*MARK) support requires it. - -24. Fixed a very old bug in pcretest that caused errors with restarted DFA - matches in certain environments (the workspace was not being correctly - retained). Also added to pcre_dfa_exec() a simple plausibility check on - some of the workspace data at the beginning of a restart. - -25. \s*\R was auto-possessifying the \s* when it should not, whereas \S*\R - was not doing so when it should - probably a typo introduced by SVN 528 - (change 8.10/14). - -26. When PCRE_UCP was not set, \w+\x{c4} was incorrectly auto-possessifying the - \w+ when the character tables indicated that \x{c4} was a word character. - There were several related cases, all because the tests for doing a table - lookup were testing for characters less than 127 instead of 255. - -27. If a pattern contains capturing parentheses that are not used in a match, - their slots in the ovector are set to -1. For those that are higher than - any matched groups, this happens at the end of processing. In the case when - there were back references that the ovector was too small to contain - (causing temporary malloc'd memory to be used during matching), and the - highest capturing number was not used, memory off the end of the ovector - was incorrectly being set to -1. (It was using the size of the temporary - memory instead of the true size.) - -28. To catch bugs like 27 using valgrind, when pcretest is asked to specify an - ovector size, it uses memory at the end of the block that it has got. - -29. Check for an overlong MARK name and give an error at compile time. The - limit is 255 for the 8-bit library and 65535 for the 16-bit library. - -30. JIT compiler update. - -31. JIT is now supported on jailbroken iOS devices. Thanks for Ruiger - Rill for the patch. - -32. Put spaces around SLJIT_PRINT_D in the JIT compiler. Required by CXX11. - -33. Variable renamings in the PCRE-JIT compiler. No functionality change. - -34. Fixed typos in pcregrep: in two places there was SUPPORT_LIBZ2 instead of - SUPPORT_LIBBZ2. This caused a build problem when bzip2 but not gzip (zlib) - was enabled. - -35. Improve JIT code generation for greedy plus quantifier. - -36. When /((?:a?)*)*c/ or /((?>a?)*)*c/ was matched against "aac", it set group - 1 to "aa" instead of to an empty string. The bug affected repeated groups - that could potentially match an empty string. - -37. Optimizing single character iterators in JIT. - -38. Wide characters specified with \uxxxx in JavaScript mode are now subject to - the same checks as \x{...} characters in non-JavaScript mode. Specifically, - codepoints that are too big for the mode are faulted, and in a UTF mode, - disallowed codepoints are also faulted. - -39. If PCRE was compiled with UTF support, in three places in the DFA - matcher there was code that should only have been obeyed in UTF mode, but - was being obeyed unconditionally. In 8-bit mode this could cause incorrect - processing when bytes with values greater than 127 were present. In 16-bit - mode the bug would be provoked by values in the range 0xfc00 to 0xdc00. In - both cases the values are those that cannot be the first data item in a UTF - character. The three items that might have provoked this were recursions, - possessively repeated groups, and atomic groups. - -40. Ensure that libpcre is explicitly listed in the link commands for pcretest - and pcregrep, because some OS require shared objects to be explicitly - passed to ld, causing the link step to fail if they are not. - -41. There were two incorrect #ifdefs in pcre_study.c, meaning that, in 16-bit - mode, patterns that started with \h* or \R* might be incorrectly matched. - - -Version 8.30 04-February-2012 ------------------------------ - -1. Renamed "isnumber" as "is_a_number" because in some Mac environments this - name is defined in ctype.h. - -2. Fixed a bug in fixed-length calculation for lookbehinds that would show up - only in quite long subpatterns. - -3. Removed the function pcre_info(), which has been obsolete and deprecated - since it was replaced by pcre_fullinfo() in February 2000. - -4. For a non-anchored pattern, if (*SKIP) was given with a name that did not - match a (*MARK), and the match failed at the start of the subject, a - reference to memory before the start of the subject could occur. This bug - was introduced by fix 17 of release 8.21. - -5. A reference to an unset group with zero minimum repetition was giving - totally wrong answers (in non-JavaScript-compatibility mode). For example, - /(another)?(\1?)test/ matched against "hello world test". This bug was - introduced in release 8.13. - -6. Add support for 16-bit character strings (a large amount of work involving - many changes and refactorings). - -7. RunGrepTest failed on msys because \r\n was replaced by whitespace when the - command "pattern=`printf 'xxx\r\njkl'`" was run. The pattern is now taken - from a file. - -8. Ovector size of 2 is also supported by JIT based pcre_exec (the ovector size - rounding is not applied in this particular case). - -9. The invalid Unicode surrogate codepoints U+D800 to U+DFFF are now rejected - if they appear, or are escaped, in patterns. - -10. Get rid of a number of -Wunused-but-set-variable warnings. - -11. The pattern /(?=(*:x))(q|)/ matches an empty string, and returns the mark - "x". The similar pattern /(?=(*:x))((*:y)q|)/ did not return a mark at all. - Oddly, Perl behaves the same way. PCRE has been fixed so that this pattern - also returns the mark "x". This bug applied to capturing parentheses, - non-capturing parentheses, and atomic parentheses. It also applied to some - assertions. - -12. Stephen Kelly's patch to CMakeLists.txt allows it to parse the version - information out of configure.ac instead of relying on pcre.h.generic, which - is not stored in the repository. - -13. Applied Dmitry V. Levin's patch for a more portable method for linking with - -lreadline. - -14. ZH added PCRE_CONFIG_JITTARGET; added its output to pcretest -C. - -15. Applied Graycode's patch to put the top-level frame on the stack rather - than the heap when not using the stack for recursion. This gives a - performance improvement in many cases when recursion is not deep. - -16. Experimental code added to "pcretest -C" to output the stack frame size. - - -Version 8.21 12-Dec-2011 ------------------------- - -1. Updating the JIT compiler. - -2. JIT compiler now supports OP_NCREF, OP_RREF and OP_NRREF. New test cases - are added as well. - -3. Fix cache-flush issue on PowerPC (It is still an experimental JIT port). - PCRE_EXTRA_TABLES is not suported by JIT, and should be checked before - calling _pcre_jit_exec. Some extra comments are added. - -4. (*MARK) settings inside atomic groups that do not contain any capturing - parentheses, for example, (?>a(*:m)), were not being passed out. This bug - was introduced by change 18 for 8.20. - -5. Supporting of \x, \U and \u in JavaScript compatibility mode based on the - ECMA-262 standard. - -6. Lookbehinds such as (?<=a{2}b) that contained a fixed repetition were - erroneously being rejected as "not fixed length" if PCRE_CASELESS was set. - This bug was probably introduced by change 9 of 8.13. - -7. While fixing 6 above, I noticed that a number of other items were being - incorrectly rejected as "not fixed length". This arose partly because newer - opcodes had not been added to the fixed-length checking code. I have (a) - corrected the bug and added tests for these items, and (b) arranged for an - error to occur if an unknown opcode is encountered while checking for fixed - length instead of just assuming "not fixed length". The items that were - rejected were: (*ACCEPT), (*COMMIT), (*FAIL), (*MARK), (*PRUNE), (*SKIP), - (*THEN), \h, \H, \v, \V, and single character negative classes with fixed - repetitions, e.g. [^a]{3}, with and without PCRE_CASELESS. - -8. A possessively repeated conditional subpattern such as (?(?=c)c|d)++ was - being incorrectly compiled and would have given unpredicatble results. - -9. A possessively repeated subpattern with minimum repeat count greater than - one behaved incorrectly. For example, (A){2,}+ behaved as if it was - (A)(A)++ which meant that, after a subsequent mismatch, backtracking into - the first (A) could occur when it should not. - -10. Add a cast and remove a redundant test from the code. - -11. JIT should use pcre_malloc/pcre_free for allocation. - -12. Updated pcre-config so that it no longer shows -L/usr/lib, which seems - best practice nowadays, and helps with cross-compiling. (If the exec_prefix - is anything other than /usr, -L is still shown). - -13. In non-UTF-8 mode, \C is now supported in lookbehinds and DFA matching. - -14. Perl does not support \N without a following name in a [] class; PCRE now - also gives an error. - -15. If a forward reference was repeated with an upper limit of around 2000, - it caused the error "internal error: overran compiling workspace". The - maximum number of forward references (including repeats) was limited by the - internal workspace, and dependent on the LINK_SIZE. The code has been - rewritten so that the workspace expands (via pcre_malloc) if necessary, and - the default depends on LINK_SIZE. There is a new upper limit (for safety) - of around 200,000 forward references. While doing this, I also speeded up - the filling in of repeated forward references. - -16. A repeated forward reference in a pattern such as (a)(?2){2}(.) was - incorrectly expecting the subject to contain another "a" after the start. - -17. When (*SKIP:name) is activated without a corresponding (*MARK:name) earlier - in the match, the SKIP should be ignored. This was not happening; instead - the SKIP was being treated as NOMATCH. For patterns such as - /A(*MARK:A)A+(*SKIP:B)Z|AAC/ this meant that the AAC branch was never - tested. - -18. The behaviour of (*MARK), (*PRUNE), and (*THEN) has been reworked and is - now much more compatible with Perl, in particular in cases where the result - is a non-match for a non-anchored pattern. For example, if - /b(*:m)f|a(*:n)w/ is matched against "abc", the non-match returns the name - "m", where previously it did not return a name. A side effect of this - change is that for partial matches, the last encountered mark name is - returned, as for non matches. A number of tests that were previously not - Perl-compatible have been moved into the Perl-compatible test files. The - refactoring has had the pleasing side effect of removing one argument from - the match() function, thus reducing its stack requirements. - -19. If the /S+ option was used in pcretest to study a pattern using JIT, - subsequent uses of /S (without +) incorrectly behaved like /S+. - -21. Retrieve executable code size support for the JIT compiler and fixing - some warnings. - -22. A caseless match of a UTF-8 character whose other case uses fewer bytes did - not work when the shorter character appeared right at the end of the - subject string. - -23. Added some (int) casts to non-JIT modules to reduce warnings on 64-bit - systems. - -24. Added PCRE_INFO_JITSIZE to pass on the value from (21) above, and also - output it when the /M option is used in pcretest. - -25. The CheckMan script was not being included in the distribution. Also, added - an explicit "perl" to run Perl scripts from the PrepareRelease script - because this is reportedly needed in Windows. - -26. If study data was being save in a file and studying had not found a set of - "starts with" bytes for the pattern, the data written to the file (though - never used) was taken from uninitialized memory and so caused valgrind to - complain. - -27. Updated RunTest.bat as provided by Sheri Pierce. - -28. Fixed a possible uninitialized memory bug in pcre_jit_compile.c. - -29. Computation of memory usage for the table of capturing group names was - giving an unnecessarily large value. - - -Version 8.20 21-Oct-2011 ------------------------- - -1. Change 37 of 8.13 broke patterns like [:a]...[b:] because it thought it had - a POSIX class. After further experiments with Perl, which convinced me that - Perl has bugs and confusions, a closing square bracket is no longer allowed - in a POSIX name. This bug also affected patterns with classes that started - with full stops. - -2. If a pattern such as /(a)b|ac/ is matched against "ac", there is no - captured substring, but while checking the failing first alternative, - substring 1 is temporarily captured. If the output vector supplied to - pcre_exec() was not big enough for this capture, the yield of the function - was still zero ("insufficient space for captured substrings"). This cannot - be totally fixed without adding another stack variable, which seems a lot - of expense for a edge case. However, I have improved the situation in cases - such as /(a)(b)x|abc/ matched against "abc", where the return code - indicates that fewer than the maximum number of slots in the ovector have - been set. - -3. Related to (2) above: when there are more back references in a pattern than - slots in the output vector, pcre_exec() uses temporary memory during - matching, and copies in the captures as far as possible afterwards. It was - using the entire output vector, but this conflicts with the specification - that only 2/3 is used for passing back captured substrings. Now it uses - only the first 2/3, for compatibility. This is, of course, another edge - case. - -4. Zoltan Herczeg's just-in-time compiler support has been integrated into the - main code base, and can be used by building with --enable-jit. When this is - done, pcregrep automatically uses it unless --disable-pcregrep-jit or the - runtime --no-jit option is given. - -5. When the number of matches in a pcre_dfa_exec() run exactly filled the - ovector, the return from the function was zero, implying that there were - other matches that did not fit. The correct "exactly full" value is now - returned. - -6. If a subpattern that was called recursively or as a subroutine contained - (*PRUNE) or any other control that caused it to give a non-standard return, - invalid errors such as "Error -26 (nested recursion at the same subject - position)" or even infinite loops could occur. - -7. If a pattern such as /a(*SKIP)c|b(*ACCEPT)|/ was studied, it stopped - computing the minimum length on reaching *ACCEPT, and so ended up with the - wrong value of 1 rather than 0. Further investigation indicates that - computing a minimum subject length in the presence of *ACCEPT is difficult - (think back references, subroutine calls), and so I have changed the code - so that no minimum is registered for a pattern that contains *ACCEPT. - -8. If (*THEN) was present in the first (true) branch of a conditional group, - it was not handled as intended. [But see 16 below.] - -9. Replaced RunTest.bat and CMakeLists.txt with improved versions provided by - Sheri Pierce. - -10. A pathological pattern such as /(*ACCEPT)a/ was miscompiled, thinking that - the first byte in a match must be "a". - -11. Change 17 for 8.13 increased the recursion depth for patterns like - /a(?:.)*?a/ drastically. I've improved things by remembering whether a - pattern contains any instances of (*THEN). If it does not, the old - optimizations are restored. It would be nice to do this on a per-group - basis, but at the moment that is not feasible. - -12. In some environments, the output of pcretest -C is CRLF terminated. This - broke RunTest's code that checks for the link size. A single white space - character after the value is now allowed for. - -13. RunTest now checks for the "fr" locale as well as for "fr_FR" and "french". - For "fr", it uses the Windows-specific input and output files. - -14. If (*THEN) appeared in a group that was called recursively or as a - subroutine, it did not work as intended. [But see next item.] - -15. Consider the pattern /A (B(*THEN)C) | D/ where A, B, C, and D are complex - pattern fragments (but not containing any | characters). If A and B are - matched, but there is a failure in C so that it backtracks to (*THEN), PCRE - was behaving differently to Perl. PCRE backtracked into A, but Perl goes to - D. In other words, Perl considers parentheses that do not contain any | - characters to be part of a surrounding alternative, whereas PCRE was - treading (B(*THEN)C) the same as (B(*THEN)C|(*FAIL)) -- which Perl handles - differently. PCRE now behaves in the same way as Perl, except in the case - of subroutine/recursion calls such as (?1) which have in any case always - been different (but PCRE had them first :-). - -16. Related to 15 above: Perl does not treat the | in a conditional group as - creating alternatives. Such a group is treated in the same way as an - ordinary group without any | characters when processing (*THEN). PCRE has - been changed to match Perl's behaviour. - -17. If a user had set PCREGREP_COLO(U)R to something other than 1:31, the - RunGrepTest script failed. - -18. Change 22 for version 13 caused atomic groups to use more stack. This is - inevitable for groups that contain captures, but it can lead to a lot of - stack use in large patterns. The old behaviour has been restored for atomic - groups that do not contain any capturing parentheses. - -19. If the PCRE_NO_START_OPTIMIZE option was set for pcre_compile(), it did not - suppress the check for a minimum subject length at run time. (If it was - given to pcre_exec() or pcre_dfa_exec() it did work.) - -20. Fixed an ASCII-dependent infelicity in pcretest that would have made it - fail to work when decoding hex characters in data strings in EBCDIC - environments. - -21. It appears that in at least one Mac OS environment, the isxdigit() function - is implemented as a macro that evaluates to its argument more than once, - contravening the C 90 Standard (I haven't checked a later standard). There - was an instance in pcretest which caused it to go wrong when processing - \x{...} escapes in subject strings. The has been rewritten to avoid using - things like p++ in the argument of isxdigit(). - - -Version 8.13 16-Aug-2011 ------------------------- - -1. The Unicode data tables have been updated to Unicode 6.0.0. - -2. Two minor typos in pcre_internal.h have been fixed. - -3. Added #include to pcre_scanner_unittest.cc, pcrecpp.cc, and - pcrecpp_unittest.cc. They are needed for strcmp(), memset(), and strchr() - in some environments (e.g. Solaris 10/SPARC using Sun Studio 12U2). - -4. There were a number of related bugs in the code for matching backrefences - caselessly in UTF-8 mode when codes for the characters concerned were - different numbers of bytes. For example, U+023A and U+2C65 are an upper - and lower case pair, using 2 and 3 bytes, respectively. The main bugs were: - (a) A reference to 3 copies of a 2-byte code matched only 2 of a 3-byte - code. (b) A reference to 2 copies of a 3-byte code would not match 2 of a - 2-byte code at the end of the subject (it thought there wasn't enough data - left). - -5. Comprehensive information about what went wrong is now returned by - pcre_exec() and pcre_dfa_exec() when the UTF-8 string check fails, as long - as the output vector has at least 2 elements. The offset of the start of - the failing character and a reason code are placed in the vector. - -6. When the UTF-8 string check fails for pcre_compile(), the offset that is - now returned is for the first byte of the failing character, instead of the - last byte inspected. This is an incompatible change, but I hope it is small - enough not to be a problem. It makes the returned offset consistent with - pcre_exec() and pcre_dfa_exec(). - -7. pcretest now gives a text phrase as well as the error number when - pcre_exec() or pcre_dfa_exec() fails; if the error is a UTF-8 check - failure, the offset and reason code are output. - -8. When \R was used with a maximizing quantifier it failed to skip backwards - over a \r\n pair if the subsequent match failed. Instead, it just skipped - back over a single character (\n). This seems wrong (because it treated the - two characters as a single entity when going forwards), conflicts with the - documentation that \R is equivalent to (?>\r\n|\n|...etc), and makes the - behaviour of \R* different to (\R)*, which also seems wrong. The behaviour - has been changed. - -9. Some internal refactoring has changed the processing so that the handling - of the PCRE_CASELESS and PCRE_MULTILINE options is done entirely at compile - time (the PCRE_DOTALL option was changed this way some time ago: version - 7.7 change 16). This has made it possible to abolish the OP_OPT op code, - which was always a bit of a fudge. It also means that there is one less - argument for the match() function, which reduces its stack requirements - slightly. This change also fixes an incompatibility with Perl: the pattern - (?i:([^b]))(?1) should not match "ab", but previously PCRE gave a match. - -10. More internal refactoring has drastically reduced the number of recursive - calls to match() for possessively repeated groups such as (abc)++ when - using pcre_exec(). - -11. While implementing 10, a number of bugs in the handling of groups were - discovered and fixed: - - (?<=(a)+) was not diagnosed as invalid (non-fixed-length lookbehind). - (a|)*(?1) gave a compile-time internal error. - ((a|)+)+ did not notice that the outer group could match an empty string. - (^a|^)+ was not marked as anchored. - (.*a|.*)+ was not marked as matching at start or after a newline. - -12. Yet more internal refactoring has removed another argument from the match() - function. Special calls to this function are now indicated by setting a - value in a variable in the "match data" data block. - -13. Be more explicit in pcre_study() instead of relying on "default" for - opcodes that mean there is no starting character; this means that when new - ones are added and accidentally left out of pcre_study(), testing should - pick them up. - -14. The -s option of pcretest has been documented for ages as being an old - synonym of -m (show memory usage). I have changed it to mean "force study - for every regex", that is, assume /S for every regex. This is similar to -i - and -d etc. It's slightly incompatible, but I'm hoping nobody is still - using it. It makes it easier to run collections of tests with and without - study enabled, and thereby test pcre_study() more easily. All the standard - tests are now run with and without -s (but some patterns can be marked as - "never study" - see 20 below). - -15. When (*ACCEPT) was used in a subpattern that was called recursively, the - restoration of the capturing data to the outer values was not happening - correctly. - -16. If a recursively called subpattern ended with (*ACCEPT) and matched an - empty string, and PCRE_NOTEMPTY was set, pcre_exec() thought the whole - pattern had matched an empty string, and so incorrectly returned a no - match. - -17. There was optimizing code for the last branch of non-capturing parentheses, - and also for the obeyed branch of a conditional subexpression, which used - tail recursion to cut down on stack usage. Unfortunately, now that there is - the possibility of (*THEN) occurring in these branches, tail recursion is - no longer possible because the return has to be checked for (*THEN). These - two optimizations have therefore been removed. [But see 8.20/11 above.] - -18. If a pattern containing \R was studied, it was assumed that \R always - matched two bytes, thus causing the minimum subject length to be - incorrectly computed because \R can also match just one byte. - -19. If a pattern containing (*ACCEPT) was studied, the minimum subject length - was incorrectly computed. - -20. If /S is present twice on a test pattern in pcretest input, it now - *disables* studying, thereby overriding the use of -s on the command line - (see 14 above). This is necessary for one or two tests to keep the output - identical in both cases. - -21. When (*ACCEPT) was used in an assertion that matched an empty string and - PCRE_NOTEMPTY was set, PCRE applied the non-empty test to the assertion. - -22. When an atomic group that contained a capturing parenthesis was - successfully matched, but the branch in which it appeared failed, the - capturing was not being forgotten if a higher numbered group was later - captured. For example, /(?>(a))b|(a)c/ when matching "ac" set capturing - group 1 to "a", when in fact it should be unset. This applied to multi- - branched capturing and non-capturing groups, repeated or not, and also to - positive assertions (capturing in negative assertions does not happen - in PCRE) and also to nested atomic groups. - -23. Add the ++ qualifier feature to pcretest, to show the remainder of the - subject after a captured substring, to make it easier to tell which of a - number of identical substrings has been captured. - -24. The way atomic groups are processed by pcre_exec() has been changed so that - if they are repeated, backtracking one repetition now resets captured - values correctly. For example, if ((?>(a+)b)+aabab) is matched against - "aaaabaaabaabab" the value of captured group 2 is now correctly recorded as - "aaa". Previously, it would have been "a". As part of this code - refactoring, the way recursive calls are handled has also been changed. - -25. If an assertion condition captured any substrings, they were not passed - back unless some other capturing happened later. For example, if - (?(?=(a))a) was matched against "a", no capturing was returned. - -26. When studying a pattern that contained subroutine calls or assertions, - the code for finding the minimum length of a possible match was handling - direct recursions such as (xxx(?1)|yyy) but not mutual recursions (where - group 1 called group 2 while simultaneously a separate group 2 called group - 1). A stack overflow occurred in this case. I have fixed this by limiting - the recursion depth to 10. - -27. Updated RunTest.bat in the distribution to the version supplied by Tom - Fortmann. This supports explicit test numbers on the command line, and has - argument validation and error reporting. - -28. An instance of \X with an unlimited repeat could fail if at any point the - first character it looked at was a mark character. - -29. Some minor code refactoring concerning Unicode properties and scripts - should reduce the stack requirement of match() slightly. - -30. Added the '=' option to pcretest to check the setting of unused capturing - slots at the end of the pattern, which are documented as being -1, but are - not included in the return count. - -31. If \k was not followed by a braced, angle-bracketed, or quoted name, PCRE - compiled something random. Now it gives a compile-time error (as does - Perl). - -32. A *MARK encountered during the processing of a positive assertion is now - recorded and passed back (compatible with Perl). - -33. If --only-matching or --colour was set on a pcregrep call whose pattern - had alternative anchored branches, the search for a second match in a line - was done as if at the line start. Thus, for example, /^01|^02/ incorrectly - matched the line "0102" twice. The same bug affected patterns that started - with a backwards assertion. For example /\b01|\b02/ also matched "0102" - twice. - -34. Previously, PCRE did not allow quantification of assertions. However, Perl - does, and because of capturing effects, quantifying parenthesized - assertions may at times be useful. Quantifiers are now allowed for - parenthesized assertions. - -35. A minor code tidy in pcre_compile() when checking options for \R usage. - -36. \g was being checked for fancy things in a character class, when it should - just be a literal "g". - -37. PCRE was rejecting [:a[:digit:]] whereas Perl was not. It seems that the - appearance of a nested POSIX class supersedes an apparent external class. - For example, [:a[:digit:]b:] matches "a", "b", ":", or a digit. Also, - unescaped square brackets may also appear as part of class names. For - example, [:a[:abc]b:] gives unknown class "[:abc]b:]". PCRE now behaves - more like Perl. (But see 8.20/1 above.) - -38. PCRE was giving an error for \N with a braced quantifier such as {1,} (this - was because it thought it was \N{name}, which is not supported). - -39. Add minix to OS list not supporting the -S option in pcretest. - -40. PCRE tries to detect cases of infinite recursion at compile time, but it - cannot analyze patterns in sufficient detail to catch mutual recursions - such as ((?1))((?2)). There is now a runtime test that gives an error if a - subgroup is called recursively as a subpattern for a second time at the - same position in the subject string. In previous releases this might have - been caught by the recursion limit, or it might have run out of stack. - -41. A pattern such as /(?(R)a+|(?R)b)/ is quite safe, as the recursion can - happen only once. PCRE was, however incorrectly giving a compile time error - "recursive call could loop indefinitely" because it cannot analyze the - pattern in sufficient detail. The compile time test no longer happens when - PCRE is compiling a conditional subpattern, but actual runaway loops are - now caught at runtime (see 40 above). - -42. It seems that Perl allows any characters other than a closing parenthesis - to be part of the NAME in (*MARK:NAME) and other backtracking verbs. PCRE - has been changed to be the same. - -43. Updated configure.ac to put in more quoting round AC_LANG_PROGRAM etc. so - as not to get warnings when autogen.sh is called. Also changed - AC_PROG_LIBTOOL (deprecated) to LT_INIT (the current macro). - -44. To help people who use pcregrep to scan files containing exceedingly long - lines, the following changes have been made: - - (a) The default value of the buffer size parameter has been increased from - 8K to 20K. (The actual buffer used is three times this size.) - - (b) The default can be changed by ./configure --with-pcregrep-bufsize when - PCRE is built. - - (c) A --buffer-size=n option has been added to pcregrep, to allow the size - to be set at run time. - - (d) Numerical values in pcregrep options can be followed by K or M, for - example --buffer-size=50K. - - (e) If a line being scanned overflows pcregrep's buffer, an error is now - given and the return code is set to 2. - -45. Add a pointer to the latest mark to the callout data block. - -46. The pattern /.(*F)/, when applied to "abc" with PCRE_PARTIAL_HARD, gave a - partial match of an empty string instead of no match. This was specific to - the use of ".". - -47. The pattern /f.*/8s, when applied to "for" with PCRE_PARTIAL_HARD, gave a - complete match instead of a partial match. This bug was dependent on both - the PCRE_UTF8 and PCRE_DOTALL options being set. - -48. For a pattern such as /\babc|\bdef/ pcre_study() was failing to set up the - starting byte set, because \b was not being ignored. - - -Version 8.12 15-Jan-2011 ------------------------- - -1. Fixed some typos in the markup of the man pages, and wrote a script that - checks for such things as part of the documentation building process. - -2. On a big-endian 64-bit system, pcregrep did not correctly process the - --match-limit and --recursion-limit options (added for 8.11). In - particular, this made one of the standard tests fail. (The integer value - went into the wrong half of a long int.) - -3. If the --colour option was given to pcregrep with -v (invert match), it - did strange things, either producing crazy output, or crashing. It should, - of course, ignore a request for colour when reporting lines that do not - match. - -4. Another pcregrep bug caused similar problems if --colour was specified with - -M (multiline) and the pattern match finished with a line ending. - -5. In pcregrep, when a pattern that ended with a literal newline sequence was - matched in multiline mode, the following line was shown as part of the - match. This seems wrong, so I have changed it. - -6. Another pcregrep bug in multiline mode, when --colour was specified, caused - the check for further matches in the same line (so they could be coloured) - to overrun the end of the current line. If another match was found, it was - incorrectly shown (and then shown again when found in the next line). - -7. If pcregrep was compiled under Windows, there was a reference to the - function pcregrep_exit() before it was defined. I am assuming this was - the cause of the "error C2371: 'pcregrep_exit' : redefinition;" that was - reported by a user. I've moved the definition above the reference. - - -Version 8.11 10-Dec-2010 ------------------------- - -1. (*THEN) was not working properly if there were untried alternatives prior - to it in the current branch. For example, in ((a|b)(*THEN)(*F)|c..) it - backtracked to try for "b" instead of moving to the next alternative branch - at the same level (in this case, to look for "c"). The Perl documentation - is clear that when (*THEN) is backtracked onto, it goes to the "next - alternative in the innermost enclosing group". - -2. (*COMMIT) was not overriding (*THEN), as it does in Perl. In a pattern - such as (A(*COMMIT)B(*THEN)C|D) any failure after matching A should - result in overall failure. Similarly, (*COMMIT) now overrides (*PRUNE) and - (*SKIP), (*SKIP) overrides (*PRUNE) and (*THEN), and (*PRUNE) overrides - (*THEN). - -3. If \s appeared in a character class, it removed the VT character from - the class, even if it had been included by some previous item, for example - in [\x00-\xff\s]. (This was a bug related to the fact that VT is not part - of \s, but is part of the POSIX "space" class.) - -4. A partial match never returns an empty string (because you can always - match an empty string at the end of the subject); however the checking for - an empty string was starting at the "start of match" point. This has been - changed to the "earliest inspected character" point, because the returned - data for a partial match starts at this character. This means that, for - example, /(?<=abc)def/ gives a partial match for the subject "abc" - (previously it gave "no match"). - -5. Changes have been made to the way PCRE_PARTIAL_HARD affects the matching - of $, \z, \Z, \b, and \B. If the match point is at the end of the string, - previously a full match would be given. However, setting PCRE_PARTIAL_HARD - has an implication that the given string is incomplete (because a partial - match is preferred over a full match). For this reason, these items now - give a partial match in this situation. [Aside: previously, the one case - /t\b/ matched against "cat" with PCRE_PARTIAL_HARD set did return a partial - match rather than a full match, which was wrong by the old rules, but is - now correct.] - -6. There was a bug in the handling of #-introduced comments, recognized when - PCRE_EXTENDED is set, when PCRE_NEWLINE_ANY and PCRE_UTF8 were also set. - If a UTF-8 multi-byte character included the byte 0x85 (e.g. +U0445, whose - UTF-8 encoding is 0xd1,0x85), this was misinterpreted as a newline when - scanning for the end of the comment. (*Character* 0x85 is an "any" newline, - but *byte* 0x85 is not, in UTF-8 mode). This bug was present in several - places in pcre_compile(). - -7. Related to (6) above, when pcre_compile() was skipping #-introduced - comments when looking ahead for named forward references to subpatterns, - the only newline sequence it recognized was NL. It now handles newlines - according to the set newline convention. - -8. SunOS4 doesn't have strerror() or strtoul(); pcregrep dealt with the - former, but used strtoul(), whereas pcretest avoided strtoul() but did not - cater for a lack of strerror(). These oversights have been fixed. - -9. Added --match-limit and --recursion-limit to pcregrep. - -10. Added two casts needed to build with Visual Studio when NO_RECURSE is set. - -11. When the -o option was used, pcregrep was setting a return code of 1, even - when matches were found, and --line-buffered was not being honoured. - -12. Added an optional parentheses number to the -o and --only-matching options - of pcregrep. - -13. Imitating Perl's /g action for multiple matches is tricky when the pattern - can match an empty string. The code to do it in pcretest and pcredemo - needed fixing: - - (a) When the newline convention was "crlf", pcretest got it wrong, skipping - only one byte after an empty string match just before CRLF (this case - just got forgotten; "any" and "anycrlf" were OK). - - (b) The pcretest code also had a bug, causing it to loop forever in UTF-8 - mode when an empty string match preceded an ASCII character followed by - a non-ASCII character. (The code for advancing by one character rather - than one byte was nonsense.) - - (c) The pcredemo.c sample program did not have any code at all to handle - the cases when CRLF is a valid newline sequence. - -14. Neither pcre_exec() nor pcre_dfa_exec() was checking that the value given - as a starting offset was within the subject string. There is now a new - error, PCRE_ERROR_BADOFFSET, which is returned if the starting offset is - negative or greater than the length of the string. In order to test this, - pcretest is extended to allow the setting of negative starting offsets. - -15. In both pcre_exec() and pcre_dfa_exec() the code for checking that the - starting offset points to the beginning of a UTF-8 character was - unnecessarily clumsy. I tidied it up. - -16. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a - bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD. - -17. Nobody had reported that the --include_dir option, which was added in - release 7.7 should have been called --include-dir (hyphen, not underscore) - for compatibility with GNU grep. I have changed it to --include-dir, but - left --include_dir as an undocumented synonym, and the same for - --exclude-dir, though that is not available in GNU grep, at least as of - release 2.5.4. - -18. At a user's suggestion, the macros GETCHAR and friends (which pick up UTF-8 - characters from a string of bytes) have been redefined so as not to use - loops, in order to improve performance in some environments. At the same - time, I abstracted some of the common code into auxiliary macros to save - repetition (this should not affect the compiled code). - -19. If \c was followed by a multibyte UTF-8 character, bad things happened. A - compile-time error is now given if \c is not followed by an ASCII - character, that is, a byte less than 128. (In EBCDIC mode, the code is - different, and any byte value is allowed.) - -20. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_ - START_OPTIMIZE option, which is now allowed at compile time - but just - passed through to pcre_exec() or pcre_dfa_exec(). This makes it available - to pcregrep and other applications that have no direct access to PCRE - options. The new /Y option in pcretest sets this option when calling - pcre_compile(). - -21. Change 18 of release 8.01 broke the use of named subpatterns for recursive - back references. Groups containing recursive back references were forced to - be atomic by that change, but in the case of named groups, the amount of - memory required was incorrectly computed, leading to "Failed: internal - error: code overflow". This has been fixed. - -22. Some patches to pcre_stringpiece.h, pcre_stringpiece_unittest.cc, and - pcretest.c, to avoid build problems in some Borland environments. - - -Version 8.10 25-Jun-2010 ------------------------- - -1. Added support for (*MARK:ARG) and for ARG additions to PRUNE, SKIP, and - THEN. - -2. (*ACCEPT) was not working when inside an atomic group. - -3. Inside a character class, \B is treated as a literal by default, but - faulted if PCRE_EXTRA is set. This mimics Perl's behaviour (the -w option - causes the error). The code is unchanged, but I tidied the documentation. - -4. Inside a character class, PCRE always treated \R and \X as literals, - whereas Perl faults them if its -w option is set. I have changed PCRE so - that it faults them when PCRE_EXTRA is set. - -5. Added support for \N, which always matches any character other than - newline. (It is the same as "." when PCRE_DOTALL is not set.) - -6. When compiling pcregrep with newer versions of gcc which may have - FORTIFY_SOURCE set, several warnings "ignoring return value of 'fwrite', - declared with attribute warn_unused_result" were given. Just casting the - result to (void) does not stop the warnings; a more elaborate fudge is - needed. I've used a macro to implement this. - -7. Minor change to pcretest.c to avoid a compiler warning. - -8. Added four artifical Unicode properties to help with an option to make - \s etc use properties (see next item). The new properties are: Xan - (alphanumeric), Xsp (Perl space), Xps (POSIX space), and Xwd (word). - -9. Added PCRE_UCP to make \b, \d, \s, \w, and certain POSIX character classes - use Unicode properties. (*UCP) at the start of a pattern can be used to set - this option. Modified pcretest to add /W to test this facility. Added - REG_UCP to make it available via the POSIX interface. - -10. Added --line-buffered to pcregrep. - -11. In UTF-8 mode, if a pattern that was compiled with PCRE_CASELESS was - studied, and the match started with a letter with a code point greater than - 127 whose first byte was different to the first byte of the other case of - the letter, the other case of this starting letter was not recognized - (#976). - -12. If a pattern that was studied started with a repeated Unicode property - test, for example, \p{Nd}+, there was the theoretical possibility of - setting up an incorrect bitmap of starting bytes, but fortunately it could - not have actually happened in practice until change 8 above was made (it - added property types that matched character-matching opcodes). - -13. pcre_study() now recognizes \h, \v, and \R when constructing a bit map of - possible starting bytes for non-anchored patterns. - -14. Extended the "auto-possessify" feature of pcre_compile(). It now recognizes - \R, and also a number of cases that involve Unicode properties, both - explicit and implicit when PCRE_UCP is set. - -15. If a repeated Unicode property match (e.g. \p{Lu}*) was used with non-UTF-8 - input, it could crash or give wrong results if characters with values - greater than 0xc0 were present in the subject string. (Detail: it assumed - UTF-8 input when processing these items.) - -16. Added a lot of (int) casts to avoid compiler warnings in systems where - size_t is 64-bit (#991). - -17. Added a check for running out of memory when PCRE is compiled with - --disable-stack-for-recursion (#990). - -18. If the last data line in a file for pcretest does not have a newline on - the end, a newline was missing in the output. - -19. The default pcre_chartables.c file recognizes only ASCII characters (values - less than 128) in its various bitmaps. However, there is a facility for - generating tables according to the current locale when PCRE is compiled. It - turns out that in some environments, 0x85 and 0xa0, which are Unicode space - characters, are recognized by isspace() and therefore were getting set in - these tables, and indeed these tables seem to approximate to ISO 8859. This - caused a problem in UTF-8 mode when pcre_study() was used to create a list - of bytes that can start a match. For \s, it was including 0x85 and 0xa0, - which of course cannot start UTF-8 characters. I have changed the code so - that only real ASCII characters (less than 128) and the correct starting - bytes for UTF-8 encodings are set for characters greater than 127 when in - UTF-8 mode. (When PCRE_UCP is set - see 9 above - the code is different - altogether.) - -20. Added the /T option to pcretest so as to be able to run tests with non- - standard character tables, thus making it possible to include the tests - used for 19 above in the standard set of tests. - -21. A pattern such as (?&t)(?#()(?(DEFINE)(?a)) which has a forward - reference to a subpattern the other side of a comment that contains an - opening parenthesis caused either an internal compiling error, or a - reference to the wrong subpattern. - - -Version 8.02 19-Mar-2010 ------------------------- - -1. The Unicode data tables have been updated to Unicode 5.2.0. - -2. Added the option --libs-cpp to pcre-config, but only when C++ support is - configured. - -3. Updated the licensing terms in the pcregexp.pas file, as agreed with the - original author of that file, following a query about its status. - -4. On systems that do not have stdint.h (e.g. Solaris), check for and include - inttypes.h instead. This fixes a bug that was introduced by change 8.01/8. - -5. A pattern such as (?&t)*+(?(DEFINE)(?.)) which has a possessive - quantifier applied to a forward-referencing subroutine call, could compile - incorrect code or give the error "internal error: previously-checked - referenced subpattern not found". - -6. Both MS Visual Studio and Symbian OS have problems with initializing - variables to point to external functions. For these systems, therefore, - pcre_malloc etc. are now initialized to local functions that call the - relevant global functions. - -7. There were two entries missing in the vectors called coptable and poptable - in pcre_dfa_exec.c. This could lead to memory accesses outsize the vectors. - I've fixed the data, and added a kludgy way of testing at compile time that - the lengths are correct (equal to the number of opcodes). - -8. Following on from 7, I added a similar kludge to check the length of the - eint vector in pcreposix.c. - -9. Error texts for pcre_compile() are held as one long string to avoid too - much relocation at load time. To find a text, the string is searched, - counting zeros. There was no check for running off the end of the string, - which could happen if a new error number was added without updating the - string. - -10. \K gave a compile-time error if it appeared in a lookbehind assersion. - -11. \K was not working if it appeared in an atomic group or in a group that - was called as a "subroutine", or in an assertion. Perl 5.11 documents that - \K is "not well defined" if used in an assertion. PCRE now accepts it if - the assertion is positive, but not if it is negative. - -12. Change 11 fortuitously reduced the size of the stack frame used in the - "match()" function of pcre_exec.c by one pointer. Forthcoming - implementation of support for (*MARK) will need an extra pointer on the - stack; I have reserved it now, so that the stack frame size does not - decrease. - -13. A pattern such as (?P(?P0)|(?P>L2)(?P>L1)) in which the only other - item in branch that calls a recursion is a subroutine call - as in the - second branch in the above example - was incorrectly given the compile- - time error "recursive call could loop indefinitely" because pcre_compile() - was not correctly checking the subroutine for matching a non-empty string. - -14. The checks for overrunning compiling workspace could trigger after an - overrun had occurred. This is a "should never occur" error, but it can be - triggered by pathological patterns such as hundreds of nested parentheses. - The checks now trigger 100 bytes before the end of the workspace. - -15. Fix typo in configure.ac: "srtoq" should be "strtoq". - - -Version 8.01 19-Jan-2010 ------------------------- - -1. If a pattern contained a conditional subpattern with only one branch (in - particular, this includes all (*DEFINE) patterns), a call to pcre_study() - computed the wrong minimum data length (which is of course zero for such - subpatterns). This could cause incorrect "no match" results. - -2. For patterns such as (?i)a(?-i)b|c where an option setting at the start of - the pattern is reset in the first branch, pcre_compile() failed with - "internal error: code overflow at offset...". This happened only when - the reset was to the original external option setting. (An optimization - abstracts leading options settings into an external setting, which was the - cause of this.) - -3. A pattern such as ^(?!a(*SKIP)b) where a negative assertion contained one - of the verbs SKIP, PRUNE, or COMMIT, did not work correctly. When the - assertion pattern did not match (meaning that the assertion was true), it - was incorrectly treated as false if the SKIP had been reached during the - matching. This also applied to assertions used as conditions. - -4. If an item that is not supported by pcre_dfa_exec() was encountered in an - assertion subpattern, including such a pattern used as a condition, - unpredictable results occurred, instead of the error return - PCRE_ERROR_DFA_UITEM. - -5. The C++ GlobalReplace function was not working like Perl for the special - situation when an empty string is matched. It now does the fancy magic - stuff that is necessary. - -6. In pcre_internal.h, obsolete includes to setjmp.h and stdarg.h have been - removed. (These were left over from very, very early versions of PCRE.) - -7. Some cosmetic changes to the code to make life easier when compiling it - as part of something else: - - (a) Change DEBUG to PCRE_DEBUG. - - (b) In pcre_compile(), rename the member of the "branch_chain" structure - called "current" as "current_branch", to prevent a collision with the - Linux macro when compiled as a kernel module. - - (c) In pcre_study(), rename the function set_bit() as set_table_bit(), to - prevent a collision with the Linux macro when compiled as a kernel - module. - -8. In pcre_compile() there are some checks for integer overflows that used to - cast potentially large values to (double). This has been changed to that - when building, a check for int64_t is made, and if it is found, it is used - instead, thus avoiding the use of floating point arithmetic. (There is no - other use of FP in PCRE.) If int64_t is not found, the fallback is to - double. - -9. Added two casts to avoid signed/unsigned warnings from VS Studio Express - 2005 (difference between two addresses compared to an unsigned value). - -10. Change the standard AC_CHECK_LIB test for libbz2 in configure.ac to a - custom one, because of the following reported problem in Windows: - - - libbz2 uses the Pascal calling convention (WINAPI) for the functions - under Win32. - - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", - therefore missing the function definition. - - The compiler thus generates a "C" signature for the test function. - - The linker fails to find the "C" function. - - PCRE fails to configure if asked to do so against libbz2. - -11. When running libtoolize from libtool-2.2.6b as part of autogen.sh, these - messages were output: - - Consider adding `AC_CONFIG_MACRO_DIR([m4])' to configure.ac and - rerunning libtoolize, to keep the correct libtool macros in-tree. - Consider adding `-I m4' to ACLOCAL_AMFLAGS in Makefile.am. - - I have done both of these things. - -12. Although pcre_dfa_exec() does not use nearly as much stack as pcre_exec() - most of the time, it *can* run out if it is given a pattern that contains a - runaway infinite recursion. I updated the discussion in the pcrestack man - page. - -13. Now that we have gone to the x.xx style of version numbers, the minor - version may start with zero. Using 08 or 09 is a bad idea because users - might check the value of PCRE_MINOR in their code, and 08 or 09 may be - interpreted as invalid octal numbers. I've updated the previous comment in - configure.ac, and also added a check that gives an error if 08 or 09 are - used. - -14. Change 8.00/11 was not quite complete: code had been accidentally omitted, - causing partial matching to fail when the end of the subject matched \W - in a UTF-8 pattern where \W was quantified with a minimum of 3. - -15. There were some discrepancies between the declarations in pcre_internal.h - of _pcre_is_newline(), _pcre_was_newline(), and _pcre_valid_utf8() and - their definitions. The declarations used "const uschar *" and the - definitions used USPTR. Even though USPTR is normally defined as "const - unsigned char *" (and uschar is typedeffed as "unsigned char"), it was - reported that: "This difference in casting confuses some C++ compilers, for - example, SunCC recognizes above declarations as different functions and - generates broken code for hbpcre." I have changed the declarations to use - USPTR. - -16. GNU libtool is named differently on some systems. The autogen.sh script now - tries several variants such as glibtoolize (MacOSX) and libtoolize1x - (FreeBSD). - -17. Applied Craig's patch that fixes an HP aCC compile error in pcre 8.00 - (strtoXX undefined when compiling pcrecpp.cc). The patch contains this - comment: "Figure out how to create a longlong from a string: strtoll and - equivalent. It's not enough to call AC_CHECK_FUNCS: hpux has a strtoll, for - instance, but it only takes 2 args instead of 3!" - -18. A subtle bug concerned with back references has been fixed by a change of - specification, with a corresponding code fix. A pattern such as - ^(xa|=?\1a)+$ which contains a back reference inside the group to which it - refers, was giving matches when it shouldn't. For example, xa=xaaa would - match that pattern. Interestingly, Perl (at least up to 5.11.3) has the - same bug. Such groups have to be quantified to be useful, or contained - inside another quantified group. (If there's no repetition, the reference - can never match.) The problem arises because, having left the group and - moved on to the rest of the pattern, a later failure that backtracks into - the group uses the captured value from the final iteration of the group - rather than the correct earlier one. I have fixed this in PCRE by forcing - any group that contains a reference to itself to be an atomic group; that - is, there cannot be any backtracking into it once it has completed. This is - similar to recursive and subroutine calls. - - -Version 8.00 19-Oct-09 ----------------------- - -1. The table for translating pcre_compile() error codes into POSIX error codes - was out-of-date, and there was no check on the pcre_compile() error code - being within the table. This could lead to an OK return being given in - error. - -2. Changed the call to open a subject file in pcregrep from fopen(pathname, - "r") to fopen(pathname, "rb"), which fixed a problem with some of the tests - in a Windows environment. - -3. The pcregrep --count option prints the count for each file even when it is - zero, as does GNU grep. However, pcregrep was also printing all files when - --files-with-matches was added. Now, when both options are given, it prints - counts only for those files that have at least one match. (GNU grep just - prints the file name in this circumstance, but including the count seems - more useful - otherwise, why use --count?) Also ensured that the - combination -clh just lists non-zero counts, with no names. - -4. The long form of the pcregrep -F option was incorrectly implemented as - --fixed_strings instead of --fixed-strings. This is an incompatible change, - but it seems right to fix it, and I didn't think it was worth preserving - the old behaviour. - -5. The command line items --regex=pattern and --regexp=pattern were not - recognized by pcregrep, which required --regex pattern or --regexp pattern - (with a space rather than an '='). The man page documented the '=' forms, - which are compatible with GNU grep; these now work. - -6. No libpcreposix.pc file was created for pkg-config; there was just - libpcre.pc and libpcrecpp.pc. The omission has been rectified. - -7. Added #ifndef SUPPORT_UCP into the pcre_ucd.c module, to reduce its size - when UCP support is not needed, by modifying the Python script that - generates it from Unicode data files. This should not matter if the module - is correctly used as a library, but I received one complaint about 50K of - unwanted data. My guess is that the person linked everything into his - program rather than using a library. Anyway, it does no harm. - -8. A pattern such as /\x{123}{2,2}+/8 was incorrectly compiled; the trigger - was a minimum greater than 1 for a wide character in a possessive - repetition. The same bug could also affect patterns like /(\x{ff}{0,2})*/8 - which had an unlimited repeat of a nested, fixed maximum repeat of a wide - character. Chaos in the form of incorrect output or a compiling loop could - result. - -9. The restrictions on what a pattern can contain when partial matching is - requested for pcre_exec() have been removed. All patterns can now be - partially matched by this function. In addition, if there are at least two - slots in the offset vector, the offset of the earliest inspected character - for the match and the offset of the end of the subject are set in them when - PCRE_ERROR_PARTIAL is returned. - -10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is - synonymous with PCRE_PARTIAL, for backwards compatibility, and - PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match, - and may be more useful for multi-segment matching. - -11. Partial matching with pcre_exec() is now more intuitive. A partial match - used to be given if ever the end of the subject was reached; now it is - given only if matching could not proceed because another character was - needed. This makes a difference in some odd cases such as Z(*FAIL) with the - string "Z", which now yields "no match" instead of "partial match". In the - case of pcre_dfa_exec(), "no match" is given if every matching path for the - final character ended with (*FAIL). - -12. Restarting a match using pcre_dfa_exec() after a partial match did not work - if the pattern had a "must contain" character that was already found in the - earlier partial match, unless partial matching was again requested. For - example, with the pattern /dog.(body)?/, the "must contain" character is - "g". If the first part-match was for the string "dog", restarting with - "sbody" failed. This bug has been fixed. - -13. The string returned by pcre_dfa_exec() after a partial match has been - changed so that it starts at the first inspected character rather than the - first character of the match. This makes a difference only if the pattern - starts with a lookbehind assertion or \b or \B (\K is not supported by - pcre_dfa_exec()). It's an incompatible change, but it makes the two - matching functions compatible, and I think it's the right thing to do. - -14. Added a pcredemo man page, created automatically from the pcredemo.c file, - so that the demonstration program is easily available in environments where - PCRE has not been installed from source. - -15. Arranged to add -DPCRE_STATIC to cflags in libpcre.pc, libpcreposix.cp, - libpcrecpp.pc and pcre-config when PCRE is not compiled as a shared - library. - -16. Added REG_UNGREEDY to the pcreposix interface, at the request of a user. - It maps to PCRE_UNGREEDY. It is not, of course, POSIX-compatible, but it - is not the first non-POSIX option to be added. Clearly some people find - these options useful. - -17. If a caller to the POSIX matching function regexec() passes a non-zero - value for nmatch with a NULL value for pmatch, the value of - nmatch is forced to zero. - -18. RunGrepTest did not have a test for the availability of the -u option of - the diff command, as RunTest does. It now checks in the same way as - RunTest, and also checks for the -b option. - -19. If an odd number of negated classes containing just a single character - interposed, within parentheses, between a forward reference to a named - subpattern and the definition of the subpattern, compilation crashed with - an internal error, complaining that it could not find the referenced - subpattern. An example of a crashing pattern is /(?&A)(([^m])(?))/. - [The bug was that it was starting one character too far in when skipping - over the character class, thus treating the ] as data rather than - terminating the class. This meant it could skip too much.] - -20. Added PCRE_NOTEMPTY_ATSTART in order to be able to correctly implement the - /g option in pcretest when the pattern contains \K, which makes it possible - to have an empty string match not at the start, even when the pattern is - anchored. Updated pcretest and pcredemo to use this option. - -21. If the maximum number of capturing subpatterns in a recursion was greater - than the maximum at the outer level, the higher number was returned, but - with unset values at the outer level. The correct (outer level) value is - now given. - -22. If (*ACCEPT) appeared inside capturing parentheses, previous releases of - PCRE did not set those parentheses (unlike Perl). I have now found a way to - make it do so. The string so far is captured, making this feature - compatible with Perl. - -23. The tests have been re-organized, adding tests 11 and 12, to make it - possible to check the Perl 5.10 features against Perl 5.10. - -24. Perl 5.10 allows subroutine calls in lookbehinds, as long as the subroutine - pattern matches a fixed length string. PCRE did not allow this; now it - does. Neither allows recursion. - -25. I finally figured out how to implement a request to provide the minimum - length of subject string that was needed in order to match a given pattern. - (It was back references and recursion that I had previously got hung up - on.) This code has now been added to pcre_study(); it finds a lower bound - to the length of subject needed. It is not necessarily the greatest lower - bound, but using it to avoid searching strings that are too short does give - some useful speed-ups. The value is available to calling programs via - pcre_fullinfo(). - -26. While implementing 25, I discovered to my embarrassment that pcretest had - not been passing the result of pcre_study() to pcre_dfa_exec(), so the - study optimizations had never been tested with that matching function. - Oops. What is worse, even when it was passed study data, there was a bug in - pcre_dfa_exec() that meant it never actually used it. Double oops. There - were also very few tests of studied patterns with pcre_dfa_exec(). - -27. If (?| is used to create subpatterns with duplicate numbers, they are now - allowed to have the same name, even if PCRE_DUPNAMES is not set. However, - on the other side of the coin, they are no longer allowed to have different - names, because these cannot be distinguished in PCRE, and this has caused - confusion. (This is a difference from Perl.) - -28. When duplicate subpattern names are present (necessarily with different - numbers, as required by 27 above), and a test is made by name in a - conditional pattern, either for a subpattern having been matched, or for - recursion in such a pattern, all the associated numbered subpatterns are - tested, and the overall condition is true if the condition is true for any - one of them. This is the way Perl works, and is also more like the way - testing by number works. - - -Version 7.9 11-Apr-09 ---------------------- - -1. When building with support for bzlib/zlib (pcregrep) and/or readline - (pcretest), all targets were linked against these libraries. This included - libpcre, libpcreposix, and libpcrecpp, even though they do not use these - libraries. This caused unwanted dependencies to be created. This problem - has been fixed, and now only pcregrep is linked with bzlib/zlib and only - pcretest is linked with readline. - -2. The "typedef int BOOL" in pcre_internal.h that was included inside the - "#ifndef FALSE" condition by an earlier change (probably 7.8/18) has been - moved outside it again, because FALSE and TRUE are already defined in AIX, - but BOOL is not. - -3. The pcre_config() function was treating the PCRE_MATCH_LIMIT and - PCRE_MATCH_LIMIT_RECURSION values as ints, when they should be long ints. - -4. The pcregrep documentation said spaces were inserted as well as colons (or - hyphens) following file names and line numbers when outputting matching - lines. This is not true; no spaces are inserted. I have also clarified the - wording for the --colour (or --color) option. - -5. In pcregrep, when --colour was used with -o, the list of matching strings - was not coloured; this is different to GNU grep, so I have changed it to be - the same. - -6. When --colo(u)r was used in pcregrep, only the first matching substring in - each matching line was coloured. Now it goes on to look for further matches - of any of the test patterns, which is the same behaviour as GNU grep. - -7. A pattern that could match an empty string could cause pcregrep to loop; it - doesn't make sense to accept an empty string match in pcregrep, so I have - locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this - seems to be how GNU grep behaves. - -8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at - start or after a newline", because the conditional assertion was not being - correctly handled. The rule now is that both the assertion and what follows - in the first alternative must satisfy the test. - -9. If auto-callout was enabled in a pattern with a conditional group whose - condition was an assertion, PCRE could crash during matching, both with - pcre_exec() and pcre_dfa_exec(). - -10. The PCRE_DOLLAR_ENDONLY option was not working when pcre_dfa_exec() was - used for matching. - -11. Unicode property support in character classes was not working for - characters (bytes) greater than 127 when not in UTF-8 mode. - -12. Added the -M command line option to pcretest. - -14. Added the non-standard REG_NOTEMPTY option to the POSIX interface. - -15. Added the PCRE_NO_START_OPTIMIZE match-time option. - -16. Added comments and documentation about mis-use of no_arg in the C++ - wrapper. - -17. Implemented support for UTF-8 encoding in EBCDIC environments, a patch - from Martin Jerabek that uses macro names for all relevant character and - string constants. - -18. Added to pcre_internal.h two configuration checks: (a) If both EBCDIC and - SUPPORT_UTF8 are set, give an error; (b) If SUPPORT_UCP is set without - SUPPORT_UTF8, define SUPPORT_UTF8. The "configure" script handles both of - these, but not everybody uses configure. - -19. A conditional group that had only one branch was not being correctly - recognized as an item that could match an empty string. This meant that an - enclosing group might also not be so recognized, causing infinite looping - (and probably a segfault) for patterns such as ^"((?(?=[a])[^"])|b)*"$ - with the subject "ab", where knowledge that the repeated group can match - nothing is needed in order to break the loop. - -20. If a pattern that was compiled with callouts was matched using pcre_dfa_ - exec(), but without supplying a callout function, matching went wrong. - -21. If PCRE_ERROR_MATCHLIMIT occurred during a recursion, there was a memory - leak if the size of the offset vector was greater than 30. When the vector - is smaller, the saved offsets during recursion go onto a local stack - vector, but for larger vectors malloc() is used. It was failing to free - when the recursion yielded PCRE_ERROR_MATCH_LIMIT (or any other "abnormal" - error, in fact). - -22. There was a missing #ifdef SUPPORT_UTF8 round one of the variables in the - heapframe that is used only when UTF-8 support is enabled. This caused no - problem, but was untidy. - -23. Steven Van Ingelgem's patch to CMakeLists.txt to change the name - CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE is - included within another project. - -24. Steven Van Ingelgem's patches to add more options to the CMake support, - slightly modified by me: - - (a) PCRE_BUILD_TESTS can be set OFF not to build the tests, including - not building pcregrep. - - (b) PCRE_BUILD_PCREGREP can be see OFF not to build pcregrep, but only - if PCRE_BUILD_TESTS is also set OFF, because the tests use pcregrep. - -25. Forward references, both numeric and by name, in patterns that made use of - duplicate group numbers, could behave incorrectly or give incorrect errors, - because when scanning forward to find the reference group, PCRE was not - taking into account the duplicate group numbers. A pattern such as - ^X(?3)(a)(?|(b)|(q))(Y) is an example. - -26. Changed a few more instances of "const unsigned char *" to USPTR, making - the feature of a custom pointer more persuasive (as requested by a user). - -27. Wrapped the definitions of fileno and isatty for Windows, which appear in - pcretest.c, inside #ifndefs, because it seems they are sometimes already - pre-defined. - -28. Added support for (*UTF8) at the start of a pattern. - -29. Arrange for flags added by the "release type" setting in CMake to be shown - in the configuration summary. - - -Version 7.8 05-Sep-08 ---------------------- - -1. Replaced UCP searching code with optimized version as implemented for Ad - Muncher (http://www.admuncher.com/) by Peter Kankowski. This uses a two- - stage table and inline lookup instead of a function, giving speed ups of 2 - to 5 times on some simple patterns that I tested. Permission was given to - distribute the MultiStage2.py script that generates the tables (it's not in - the tarball, but is in the Subversion repository). - -2. Updated the Unicode datatables to Unicode 5.1.0. This adds yet more - scripts. - -3. Change 12 for 7.7 introduced a bug in pcre_study() when a pattern contained - a group with a zero qualifier. The result of the study could be incorrect, - or the function might crash, depending on the pattern. - -4. Caseless matching was not working for non-ASCII characters in back - references. For example, /(\x{de})\1/8i was not matching \x{de}\x{fe}. - It now works when Unicode Property Support is available. - -5. In pcretest, an escape such as \x{de} in the data was always generating - a UTF-8 string, even in non-UTF-8 mode. Now it generates a single byte in - non-UTF-8 mode. If the value is greater than 255, it gives a warning about - truncation. - -6. Minor bugfix in pcrecpp.cc (change "" == ... to NULL == ...). - -7. Added two (int) casts to pcregrep when printing the difference of two - pointers, in case they are 64-bit values. - -8. Added comments about Mac OS X stack usage to the pcrestack man page and to - test 2 if it fails. - -9. Added PCRE_CALL_CONVENTION just before the names of all exported functions, - and a #define of that name to empty if it is not externally set. This is to - allow users of MSVC to set it if necessary. - -10. The PCRE_EXP_DEFN macro which precedes exported functions was missing from - the convenience functions in the pcre_get.c source file. - -11. An option change at the start of a pattern that had top-level alternatives - could cause overwriting and/or a crash. This command provoked a crash in - some environments: - - printf "/(?i)[\xc3\xa9\xc3\xbd]|[\xc3\xa9\xc3\xbdA]/8\n" | pcretest - - This potential security problem was recorded as CVE-2008-2371. - -12. For a pattern where the match had to start at the beginning or immediately - after a newline (e.g /.*anything/ without the DOTALL flag), pcre_exec() and - pcre_dfa_exec() could read past the end of the passed subject if there was - no match. To help with detecting such bugs (e.g. with valgrind), I modified - pcretest so that it places the subject at the end of its malloc-ed buffer. - -13. The change to pcretest in 12 above threw up a couple more cases when pcre_ - exec() might read past the end of the data buffer in UTF-8 mode. - -14. A similar bug to 7.3/2 existed when the PCRE_FIRSTLINE option was set and - the data contained the byte 0x85 as part of a UTF-8 character within its - first line. This applied both to normal and DFA matching. - -15. Lazy qualifiers were not working in some cases in UTF-8 mode. For example, - /^[^d]*?$/8 failed to match "abc". - -16. Added a missing copyright notice to pcrecpp_internal.h. - -17. Make it more clear in the documentation that values returned from - pcre_exec() in ovector are byte offsets, not character counts. - -18. Tidied a few places to stop certain compilers from issuing warnings. - -19. Updated the Virtual Pascal + BCC files to compile the latest v7.7, as - supplied by Stefan Weber. I made a further small update for 7.8 because - there is a change of source arrangements: the pcre_searchfuncs.c module is - replaced by pcre_ucd.c. - - -Version 7.7 07-May-08 ---------------------- - -1. Applied Craig's patch to sort out a long long problem: "If we can't convert - a string to a long long, pretend we don't even have a long long." This is - done by checking for the strtoq, strtoll, and _strtoi64 functions. - -2. Applied Craig's patch to pcrecpp.cc to restore ABI compatibility with - pre-7.6 versions, which defined a global no_arg variable instead of putting - it in the RE class. (See also #8 below.) - -3. Remove a line of dead code, identified by coverity and reported by Nuno - Lopes. - -4. Fixed two related pcregrep bugs involving -r with --include or --exclude: - - (1) The include/exclude patterns were being applied to the whole pathnames - of files, instead of just to the final components. - - (2) If there was more than one level of directory, the subdirectories were - skipped unless they satisfied the include/exclude conditions. This is - inconsistent with GNU grep (and could even be seen as contrary to the - pcregrep specification - which I improved to make it absolutely clear). - The action now is always to scan all levels of directory, and just - apply the include/exclude patterns to regular files. - -5. Added the --include_dir and --exclude_dir patterns to pcregrep, and used - --exclude_dir in the tests to avoid scanning .svn directories. - -6. Applied Craig's patch to the QuoteMeta function so that it escapes the - NUL character as backslash + 0 rather than backslash + NUL, because PCRE - doesn't support NULs in patterns. - -7. Added some missing "const"s to declarations of static tables in - pcre_compile.c and pcre_dfa_exec.c. - -8. Applied Craig's patch to pcrecpp.cc to fix a problem in OS X that was - caused by fix #2 above. (Subsequently also a second patch to fix the - first patch. And a third patch - this was a messy problem.) - -9. Applied Craig's patch to remove the use of push_back(). - -10. Applied Alan Lehotsky's patch to add REG_STARTEND support to the POSIX - matching function regexec(). - -11. Added support for the Oniguruma syntax \g, \g, \g'name', \g'n', - which, however, unlike Perl's \g{...}, are subroutine calls, not back - references. PCRE supports relative numbers with this syntax (I don't think - Oniguruma does). - -12. Previously, a group with a zero repeat such as (...){0} was completely - omitted from the compiled regex. However, this means that if the group - was called as a subroutine from elsewhere in the pattern, things went wrong - (an internal error was given). Such groups are now left in the compiled - pattern, with a new opcode that causes them to be skipped at execution - time. - -13. Added the PCRE_JAVASCRIPT_COMPAT option. This makes the following changes - to the way PCRE behaves: - - (a) A lone ] character is dis-allowed (Perl treats it as data). - - (b) A back reference to an unmatched subpattern matches an empty string - (Perl fails the current match path). - - (c) A data ] in a character class must be notated as \] because if the - first data character in a class is ], it defines an empty class. (In - Perl it is not possible to have an empty class.) The empty class [] - never matches; it forces failure and is equivalent to (*FAIL) or (?!). - The negative empty class [^] matches any one character, independently - of the DOTALL setting. - -14. A pattern such as /(?2)[]a()b](abc)/ which had a forward reference to a - non-existent subpattern following a character class starting with ']' and - containing () gave an internal compiling error instead of "reference to - non-existent subpattern". Fortunately, when the pattern did exist, the - compiled code was correct. (When scanning forwards to check for the - existencd of the subpattern, it was treating the data ']' as terminating - the class, so got the count wrong. When actually compiling, the reference - was subsequently set up correctly.) - -15. The "always fail" assertion (?!) is optimzed to (*FAIL) by pcre_compile; - it was being rejected as not supported by pcre_dfa_exec(), even though - other assertions are supported. I have made pcre_dfa_exec() support - (*FAIL). - -16. The implementation of 13c above involved the invention of a new opcode, - OP_ALLANY, which is like OP_ANY but doesn't check the /s flag. Since /s - cannot be changed at match time, I realized I could make a small - improvement to matching performance by compiling OP_ALLANY instead of - OP_ANY for "." when DOTALL was set, and then removing the runtime tests - on the OP_ANY path. - -17. Compiling pcretest on Windows with readline support failed without the - following two fixes: (1) Make the unistd.h include conditional on - HAVE_UNISTD_H; (2) #define isatty and fileno as _isatty and _fileno. - -18. Changed CMakeLists.txt and cmake/FindReadline.cmake to arrange for the - ncurses library to be included for pcretest when ReadLine support is - requested, but also to allow for it to be overridden. This patch came from - Daniel Bergström. - -19. There was a typo in the file ucpinternal.h where f0_rangeflag was defined - as 0x00f00000 instead of 0x00800000. Luckily, this would not have caused - any errors with the current Unicode tables. Thanks to Peter Kankowski for - spotting this. - - -Version 7.6 28-Jan-08 ---------------------- - -1. A character class containing a very large number of characters with - codepoints greater than 255 (in UTF-8 mode, of course) caused a buffer - overflow. - -2. Patch to cut out the "long long" test in pcrecpp_unittest when - HAVE_LONG_LONG is not defined. - -3. Applied Christian Ehrlicher's patch to update the CMake build files to - bring them up to date and include new features. This patch includes: - - - Fixed PH's badly added libz and libbz2 support. - - Fixed a problem with static linking. - - Added pcredemo. [But later removed - see 7 below.] - - Fixed dftables problem and added an option. - - Added a number of HAVE_XXX tests, including HAVE_WINDOWS_H and - HAVE_LONG_LONG. - - Added readline support for pcretest. - - Added an listing of the option settings after cmake has run. - -4. A user submitted a patch to Makefile that makes it easy to create - "pcre.dll" under mingw when using Configure/Make. I added stuff to - Makefile.am that cause it to include this special target, without - affecting anything else. Note that the same mingw target plus all - the other distribution libraries and programs are now supported - when configuring with CMake (see 6 below) instead of with - Configure/Make. - -5. Applied Craig's patch that moves no_arg into the RE class in the C++ code. - This is an attempt to solve the reported problem "pcrecpp::no_arg is not - exported in the Windows port". It has not yet been confirmed that the patch - solves the problem, but it does no harm. - -6. Applied Sheri's patch to CMakeLists.txt to add NON_STANDARD_LIB_PREFIX and - NON_STANDARD_LIB_SUFFIX for dll names built with mingw when configured - with CMake, and also correct the comment about stack recursion. - -7. Remove the automatic building of pcredemo from the ./configure system and - from CMakeLists.txt. The whole idea of pcredemo.c is that it is an example - of a program that users should build themselves after PCRE is installed, so - building it automatically is not really right. What is more, it gave - trouble in some build environments. - -8. Further tidies to CMakeLists.txt from Sheri and Christian. - - -Version 7.5 10-Jan-08 ---------------------- - -1. Applied a patch from Craig: "This patch makes it possible to 'ignore' - values in parens when parsing an RE using the C++ wrapper." - -2. Negative specials like \S did not work in character classes in UTF-8 mode. - Characters greater than 255 were excluded from the class instead of being - included. - -3. The same bug as (2) above applied to negated POSIX classes such as - [:^space:]. - -4. PCRECPP_STATIC was referenced in pcrecpp_internal.h, but nowhere was it - defined or documented. It seems to have been a typo for PCRE_STATIC, so - I have changed it. - -5. The construct (?&) was not diagnosed as a syntax error (it referenced the - first named subpattern) and a construct such as (?&a) would reference the - first named subpattern whose name started with "a" (in other words, the - length check was missing). Both these problems are fixed. "Subpattern name - expected" is now given for (?&) (a zero-length name), and this patch also - makes it give the same error for \k'' (previously it complained that that - was a reference to a non-existent subpattern). - -6. The erroneous patterns (?+-a) and (?-+a) give different error messages; - this is right because (?- can be followed by option settings as well as by - digits. I have, however, made the messages clearer. - -7. Patterns such as (?(1)a|b) (a pattern that contains fewer subpatterns - than the number used in the conditional) now cause a compile-time error. - This is actually not compatible with Perl, which accepts such patterns, but - treats the conditional as always being FALSE (as PCRE used to), but it - seems to me that giving a diagnostic is better. - -8. Change "alphameric" to the more common word "alphanumeric" in comments - and messages. - -9. Fix two occurrences of "backslash" in comments that should have been - "backspace". - -10. Remove two redundant lines of code that can never be obeyed (their function - was moved elsewhere). - -11. The program that makes PCRE's Unicode character property table had a bug - which caused it to generate incorrect table entries for sequences of - characters that have the same character type, but are in different scripts. - It amalgamated them into a single range, with the script of the first of - them. In other words, some characters were in the wrong script. There were - thirteen such cases, affecting characters in the following ranges: - - U+002b0 - U+002c1 - U+0060c - U+0060d - U+0061e - U+00612 - U+0064b - U+0065e - U+0074d - U+0076d - U+01800 - U+01805 - U+01d00 - U+01d77 - U+01d9b - U+01dbf - U+0200b - U+0200f - U+030fc - U+030fe - U+03260 - U+0327f - U+0fb46 - U+0fbb1 - U+10450 - U+1049d - -12. The -o option (show only the matching part of a line) for pcregrep was not - compatible with GNU grep in that, if there was more than one match in a - line, it showed only the first of them. It now behaves in the same way as - GNU grep. - -13. If the -o and -v options were combined for pcregrep, it printed a blank - line for every non-matching line. GNU grep prints nothing, and pcregrep now - does the same. The return code can be used to tell if there were any - non-matching lines. - -14. Added --file-offsets and --line-offsets to pcregrep. - -15. The pattern (?=something)(?R) was not being diagnosed as a potentially - infinitely looping recursion. The bug was that positive lookaheads were not - being skipped when checking for a possible empty match (negative lookaheads - and both kinds of lookbehind were skipped). - -16. Fixed two typos in the Windows-only code in pcregrep.c, and moved the - inclusion of to before rather than after the definition of - INVALID_FILE_ATTRIBUTES (patch from David Byron). - -17. Specifying a possessive quantifier with a specific limit for a Unicode - character property caused pcre_compile() to compile bad code, which led at - runtime to PCRE_ERROR_INTERNAL (-14). Examples of patterns that caused this - are: /\p{Zl}{2,3}+/8 and /\p{Cc}{2}+/8. It was the possessive "+" that - caused the error; without that there was no problem. - -18. Added --enable-pcregrep-libz and --enable-pcregrep-libbz2. - -19. Added --enable-pcretest-libreadline. - -20. In pcrecpp.cc, the variable 'count' was incremented twice in - RE::GlobalReplace(). As a result, the number of replacements returned was - double what it should be. I removed one of the increments, but Craig sent a - later patch that removed the other one (the right fix) and added unit tests - that check the return values (which was not done before). - -21. Several CMake things: - - (1) Arranged that, when cmake is used on Unix, the libraries end up with - the names libpcre and libpcreposix, not just pcre and pcreposix. - - (2) The above change means that pcretest and pcregrep are now correctly - linked with the newly-built libraries, not previously installed ones. - - (3) Added PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, PCRE_SUPPORT_LIBBZ2. - -22. In UTF-8 mode, with newline set to "any", a pattern such as .*a.*=.b.* - crashed when matching a string such as a\x{2029}b (note that \x{2029} is a - UTF-8 newline character). The key issue is that the pattern starts .*; - this means that the match must be either at the beginning, or after a - newline. The bug was in the code for advancing after a failed match and - checking that the new position followed a newline. It was not taking - account of UTF-8 characters correctly. - -23. PCRE was behaving differently from Perl in the way it recognized POSIX - character classes. PCRE was not treating the sequence [:...:] as a - character class unless the ... were all letters. Perl, however, seems to - allow any characters between [: and :], though of course it rejects as - unknown any "names" that contain non-letters, because all the known class - names consist only of letters. Thus, Perl gives an error for [[:1234:]], - for example, whereas PCRE did not - it did not recognize a POSIX character - class. This seemed a bit dangerous, so the code has been changed to be - closer to Perl. The behaviour is not identical to Perl, because PCRE will - diagnose an unknown class for, for example, [[:l\ower:]] where Perl will - treat it as [[:lower:]]. However, PCRE does now give "unknown" errors where - Perl does, and where it didn't before. - -24. Rewrite so as to remove the single use of %n from pcregrep because in some - Windows environments %n is disabled by default. - - -Version 7.4 21-Sep-07 ---------------------- - -1. Change 7.3/28 was implemented for classes by looking at the bitmap. This - means that a class such as [\s] counted as "explicit reference to CR or - LF". That isn't really right - the whole point of the change was to try to - help when there was an actual mention of one of the two characters. So now - the change happens only if \r or \n (or a literal CR or LF) character is - encountered. - -2. The 32-bit options word was also used for 6 internal flags, but the numbers - of both had grown to the point where there were only 3 bits left. - Fortunately, there was spare space in the data structure, and so I have - moved the internal flags into a new 16-bit field to free up more option - bits. - -3. The appearance of (?J) at the start of a pattern set the DUPNAMES option, - but did not set the internal JCHANGED flag - either of these is enough to - control the way the "get" function works - but the PCRE_INFO_JCHANGED - facility is supposed to tell if (?J) was ever used, so now (?J) at the - start sets both bits. - -4. Added options (at build time, compile time, exec time) to change \R from - matching any Unicode line ending sequence to just matching CR, LF, or CRLF. - -5. doc/pcresyntax.html was missing from the distribution. - -6. Put back the definition of PCRE_ERROR_NULLWSLIMIT, for backward - compatibility, even though it is no longer used. - -7. Added macro for snprintf to pcrecpp_unittest.cc and also for strtoll and - strtoull to pcrecpp.cc to select the available functions in WIN32 when the - windows.h file is present (where different names are used). [This was - reversed later after testing - see 16 below.] - -8. Changed all #include to #include "config.h". There were also - some further cases that I changed to "pcre.h". - -9. When pcregrep was used with the --colour option, it missed the line ending - sequence off the lines that it output. - -10. It was pointed out to me that arrays of string pointers cause lots of - relocations when a shared library is dynamically loaded. A technique of - using a single long string with a table of offsets can drastically reduce - these. I have refactored PCRE in four places to do this. The result is - dramatic: - - Originally: 290 - After changing UCP table: 187 - After changing error message table: 43 - After changing table of "verbs" 36 - After changing table of Posix names 22 - - Thanks to the folks working on Gregex for glib for this insight. - -11. --disable-stack-for-recursion caused compiling to fail unless -enable- - unicode-properties was also set. - -12. Updated the tests so that they work when \R is defaulted to ANYCRLF. - -13. Added checks for ANY and ANYCRLF to pcrecpp.cc where it previously - checked only for CRLF. - -14. Added casts to pcretest.c to avoid compiler warnings. - -15. Added Craig's patch to various pcrecpp modules to avoid compiler warnings. - -16. Added Craig's patch to remove the WINDOWS_H tests, that were not working, - and instead check for _strtoi64 explicitly, and avoid the use of snprintf() - entirely. This removes changes made in 7 above. - -17. The CMake files have been updated, and there is now more information about - building with CMake in the NON-UNIX-USE document. - - -Version 7.3 28-Aug-07 ---------------------- - - 1. In the rejigging of the build system that eventually resulted in 7.1, the - line "#include " was included in pcre_internal.h. The use of angle - brackets there is not right, since it causes compilers to look for an - installed pcre.h, not the version that is in the source that is being - compiled (which of course may be different). I have changed it back to: - - #include "pcre.h" - - I have a vague recollection that the change was concerned with compiling in - different directories, but in the new build system, that is taken care of - by the VPATH setting the Makefile. - - 2. The pattern .*$ when run in not-DOTALL UTF-8 mode with newline=any failed - when the subject happened to end in the byte 0x85 (e.g. if the last - character was \x{1ec5}). *Character* 0x85 is one of the "any" newline - characters but of course it shouldn't be taken as a newline when it is part - of another character. The bug was that, for an unlimited repeat of . in - not-DOTALL UTF-8 mode, PCRE was advancing by bytes rather than by - characters when looking for a newline. - - 3. A small performance improvement in the DOTALL UTF-8 mode .* case. - - 4. Debugging: adjusted the names of opcodes for different kinds of parentheses - in debug output. - - 5. Arrange to use "%I64d" instead of "%lld" and "%I64u" instead of "%llu" for - long printing in the pcrecpp unittest when running under MinGW. - - 6. ESC_K was left out of the EBCDIC table. - - 7. Change 7.0/38 introduced a new limit on the number of nested non-capturing - parentheses; I made it 1000, which seemed large enough. Unfortunately, the - limit also applies to "virtual nesting" when a pattern is recursive, and in - this case 1000 isn't so big. I have been able to remove this limit at the - expense of backing off one optimization in certain circumstances. Normally, - when pcre_exec() would call its internal match() function recursively and - immediately return the result unconditionally, it uses a "tail recursion" - feature to save stack. However, when a subpattern that can match an empty - string has an unlimited repetition quantifier, it no longer makes this - optimization. That gives it a stack frame in which to save the data for - checking that an empty string has been matched. Previously this was taken - from the 1000-entry workspace that had been reserved. So now there is no - explicit limit, but more stack is used. - - 8. Applied Daniel's patches to solve problems with the import/export magic - syntax that is required for Windows, and which was going wrong for the - pcreposix and pcrecpp parts of the library. These were overlooked when this - problem was solved for the main library. - - 9. There were some crude static tests to avoid integer overflow when computing - the size of patterns that contain repeated groups with explicit upper - limits. As the maximum quantifier is 65535, the maximum group length was - set at 30,000 so that the product of these two numbers did not overflow a - 32-bit integer. However, it turns out that people want to use groups that - are longer than 30,000 bytes (though not repeat them that many times). - Change 7.0/17 (the refactoring of the way the pattern size is computed) has - made it possible to implement the integer overflow checks in a much more - dynamic way, which I have now done. The artificial limitation on group - length has been removed - we now have only the limit on the total length of - the compiled pattern, which depends on the LINK_SIZE setting. - -10. Fixed a bug in the documentation for get/copy named substring when - duplicate names are permitted. If none of the named substrings are set, the - functions return PCRE_ERROR_NOSUBSTRING (7); the doc said they returned an - empty string. - -11. Because Perl interprets \Q...\E at a high level, and ignores orphan \E - instances, patterns such as [\Q\E] or [\E] or even [^\E] cause an error, - because the ] is interpreted as the first data character and the - terminating ] is not found. PCRE has been made compatible with Perl in this - regard. Previously, it interpreted [\Q\E] as an empty class, and [\E] could - cause memory overwriting. - -10. Like Perl, PCRE automatically breaks an unlimited repeat after an empty - string has been matched (to stop an infinite loop). It was not recognizing - a conditional subpattern that could match an empty string if that - subpattern was within another subpattern. For example, it looped when - trying to match (((?(1)X|))*) but it was OK with ((?(1)X|)*) where the - condition was not nested. This bug has been fixed. - -12. A pattern like \X?\d or \P{L}?\d in non-UTF-8 mode could cause a backtrack - past the start of the subject in the presence of bytes with the top bit - set, for example "\x8aBCD". - -13. Added Perl 5.10 experimental backtracking controls (*FAIL), (*F), (*PRUNE), - (*SKIP), (*THEN), (*COMMIT), and (*ACCEPT). - -14. Optimized (?!) to (*FAIL). - -15. Updated the test for a valid UTF-8 string to conform to the later RFC 3629. - This restricts code points to be within the range 0 to 0x10FFFF, excluding - the "low surrogate" sequence 0xD800 to 0xDFFF. Previously, PCRE allowed the - full range 0 to 0x7FFFFFFF, as defined by RFC 2279. Internally, it still - does: it's just the validity check that is more restrictive. - -16. Inserted checks for integer overflows during escape sequence (backslash) - processing, and also fixed erroneous offset values for syntax errors during - backslash processing. - -17. Fixed another case of looking too far back in non-UTF-8 mode (cf 12 above) - for patterns like [\PPP\x8a]{1,}\x80 with the subject "A\x80". - -18. An unterminated class in a pattern like (?1)\c[ with a "forward reference" - caused an overrun. - -19. A pattern like (?:[\PPa*]*){8,} which had an "extended class" (one with - something other than just ASCII characters) inside a group that had an - unlimited repeat caused a loop at compile time (while checking to see - whether the group could match an empty string). - -20. Debugging a pattern containing \p or \P could cause a crash. For example, - [\P{Any}] did so. (Error in the code for printing property names.) - -21. An orphan \E inside a character class could cause a crash. - -22. A repeated capturing bracket such as (A)? could cause a wild memory - reference during compilation. - -23. There are several functions in pcre_compile() that scan along a compiled - expression for various reasons (e.g. to see if it's fixed length for look - behind). There were bugs in these functions when a repeated \p or \P was - present in the pattern. These operators have additional parameters compared - with \d, etc, and these were not being taken into account when moving along - the compiled data. Specifically: - - (a) A item such as \p{Yi}{3} in a lookbehind was not treated as fixed - length. - - (b) An item such as \pL+ within a repeated group could cause crashes or - loops. - - (c) A pattern such as \p{Yi}+(\P{Yi}+)(?1) could give an incorrect - "reference to non-existent subpattern" error. - - (d) A pattern like (\P{Yi}{2}\277)? could loop at compile time. - -24. A repeated \S or \W in UTF-8 mode could give wrong answers when multibyte - characters were involved (for example /\S{2}/8g with "A\x{a3}BC"). - -25. Using pcregrep in multiline, inverted mode (-Mv) caused it to loop. - -26. Patterns such as [\P{Yi}A] which include \p or \P and just one other - character were causing crashes (broken optimization). - -27. Patterns such as (\P{Yi}*\277)* (group with possible zero repeat containing - \p or \P) caused a compile-time loop. - -28. More problems have arisen in unanchored patterns when CRLF is a valid line - break. For example, the unstudied pattern [\r\n]A does not match the string - "\r\nA" because change 7.0/46 below moves the current point on by two - characters after failing to match at the start. However, the pattern \nA - *does* match, because it doesn't start till \n, and if [\r\n]A is studied, - the same is true. There doesn't seem any very clean way out of this, but - what I have chosen to do makes the common cases work: PCRE now takes note - of whether there can be an explicit match for \r or \n anywhere in the - pattern, and if so, 7.0/46 no longer applies. As part of this change, - there's a new PCRE_INFO_HASCRORLF option for finding out whether a compiled - pattern has explicit CR or LF references. - -29. Added (*CR) etc for changing newline setting at start of pattern. - - -Version 7.2 19-Jun-07 ---------------------- - - 1. If the fr_FR locale cannot be found for test 3, try the "french" locale, - which is apparently normally available under Windows. - - 2. Re-jig the pcregrep tests with different newline settings in an attempt - to make them independent of the local environment's newline setting. - - 3. Add code to configure.ac to remove -g from the CFLAGS default settings. - - 4. Some of the "internals" tests were previously cut out when the link size - was not 2, because the output contained actual offsets. The recent new - "Z" feature of pcretest means that these can be cut out, making the tests - usable with all link sizes. - - 5. Implemented Stan Switzer's goto replacement for longjmp() when not using - stack recursion. This gives a massive performance boost under BSD, but just - a small improvement under Linux. However, it saves one field in the frame - in all cases. - - 6. Added more features from the forthcoming Perl 5.10: - - (a) (?-n) (where n is a string of digits) is a relative subroutine or - recursion call. It refers to the nth most recently opened parentheses. - - (b) (?+n) is also a relative subroutine call; it refers to the nth next - to be opened parentheses. - - (c) Conditions that refer to capturing parentheses can be specified - relatively, for example, (?(-2)... or (?(+3)... - - (d) \K resets the start of the current match so that everything before - is not part of it. - - (e) \k{name} is synonymous with \k and \k'name' (.NET compatible). - - (f) \g{name} is another synonym - part of Perl 5.10's unification of - reference syntax. - - (g) (?| introduces a group in which the numbering of parentheses in each - alternative starts with the same number. - - (h) \h, \H, \v, and \V match horizontal and vertical whitespace. - - 7. Added two new calls to pcre_fullinfo(): PCRE_INFO_OKPARTIAL and - PCRE_INFO_JCHANGED. - - 8. A pattern such as (.*(.)?)* caused pcre_exec() to fail by either not - terminating or by crashing. Diagnosed by Viktor Griph; it was in the code - for detecting groups that can match an empty string. - - 9. A pattern with a very large number of alternatives (more than several - hundred) was running out of internal workspace during the pre-compile - phase, where pcre_compile() figures out how much memory will be needed. A - bit of new cunning has reduced the workspace needed for groups with - alternatives. The 1000-alternative test pattern now uses 12 bytes of - workspace instead of running out of the 4096 that are available. - -10. Inserted some missing (unsigned int) casts to get rid of compiler warnings. - -11. Applied patch from Google to remove an optimization that didn't quite work. - The report of the bug said: - - pcrecpp::RE("a*").FullMatch("aaa") matches, while - pcrecpp::RE("a*?").FullMatch("aaa") does not, and - pcrecpp::RE("a*?\\z").FullMatch("aaa") does again. - -12. If \p or \P was used in non-UTF-8 mode on a character greater than 127 - it matched the wrong number of bytes. - - -Version 7.1 24-Apr-07 ---------------------- - - 1. Applied Bob Rossi and Daniel G's patches to convert the build system to one - that is more "standard", making use of automake and other Autotools. There - is some re-arrangement of the files and adjustment of comments consequent - on this. - - 2. Part of the patch fixed a problem with the pcregrep tests. The test of -r - for recursive directory scanning broke on some systems because the files - are not scanned in any specific order and on different systems the order - was different. A call to "sort" has been inserted into RunGrepTest for the - approprate test as a short-term fix. In the longer term there may be an - alternative. - - 3. I had an email from Eric Raymond about problems translating some of PCRE's - man pages to HTML (despite the fact that I distribute HTML pages, some - people do their own conversions for various reasons). The problems - concerned the use of low-level troff macros .br and .in. I have therefore - removed all such uses from the man pages (some were redundant, some could - be replaced by .nf/.fi pairs). The 132html script that I use to generate - HTML has been updated to handle .nf/.fi and to complain if it encounters - .br or .in. - - 4. Updated comments in configure.ac that get placed in config.h.in and also - arranged for config.h to be included in the distribution, with the name - config.h.generic, for the benefit of those who have to compile without - Autotools (compare pcre.h, which is now distributed as pcre.h.generic). - - 5. Updated the support (such as it is) for Virtual Pascal, thanks to Stefan - Weber: (1) pcre_internal.h was missing some function renames; (2) updated - makevp.bat for the current PCRE, using the additional files - makevp_c.txt, makevp_l.txt, and pcregexp.pas. - - 6. A Windows user reported a minor discrepancy with test 2, which turned out - to be caused by a trailing space on an input line that had got lost in his - copy. The trailing space was an accident, so I've just removed it. - - 7. Add -Wl,-R... flags in pcre-config.in for *BSD* systems, as I'm told - that is needed. - - 8. Mark ucp_table (in ucptable.h) and ucp_gentype (in pcre_ucp_searchfuncs.c) - as "const" (a) because they are and (b) because it helps the PHP - maintainers who have recently made a script to detect big data structures - in the php code that should be moved to the .rodata section. I remembered - to update Builducptable as well, so it won't revert if ucptable.h is ever - re-created. - - 9. Added some extra #ifdef SUPPORT_UTF8 conditionals into pcretest.c, - pcre_printint.src, pcre_compile.c, pcre_study.c, and pcre_tables.c, in - order to be able to cut out the UTF-8 tables in the latter when UTF-8 - support is not required. This saves 1.5-2K of code, which is important in - some applications. - - Later: more #ifdefs are needed in pcre_ord2utf8.c and pcre_valid_utf8.c - so as not to refer to the tables, even though these functions will never be - called when UTF-8 support is disabled. Otherwise there are problems with a - shared library. - -10. Fixed two bugs in the emulated memmove() function in pcre_internal.h: - - (a) It was defining its arguments as char * instead of void *. - - (b) It was assuming that all moves were upwards in memory; this was true - a long time ago when I wrote it, but is no longer the case. - - The emulated memove() is provided for those environments that have neither - memmove() nor bcopy(). I didn't think anyone used it these days, but that - is clearly not the case, as these two bugs were recently reported. - -11. The script PrepareRelease is now distributed: it calls 132html, CleanTxt, - and Detrail to create the HTML documentation, the .txt form of the man - pages, and it removes trailing spaces from listed files. It also creates - pcre.h.generic and config.h.generic from pcre.h and config.h. In the latter - case, it wraps all the #defines with #ifndefs. This script should be run - before "make dist". - -12. Fixed two fairly obscure bugs concerned with quantified caseless matching - with Unicode property support. - - (a) For a maximizing quantifier, if the two different cases of the - character were of different lengths in their UTF-8 codings (there are - some cases like this - I found 11), and the matching function had to - back up over a mixture of the two cases, it incorrectly assumed they - were both the same length. - - (b) When PCRE was configured to use the heap rather than the stack for - recursion during matching, it was not correctly preserving the data for - the other case of a UTF-8 character when checking ahead for a match - while processing a minimizing repeat. If the check also involved - matching a wide character, but failed, corruption could cause an - erroneous result when trying to check for a repeat of the original - character. - -13. Some tidying changes to the testing mechanism: - - (a) The RunTest script now detects the internal link size and whether there - is UTF-8 and UCP support by running ./pcretest -C instead of relying on - values substituted by "configure". (The RunGrepTest script already did - this for UTF-8.) The configure.ac script no longer substitutes the - relevant variables. - - (b) The debugging options /B and /D in pcretest show the compiled bytecode - with length and offset values. This means that the output is different - for different internal link sizes. Test 2 is skipped for link sizes - other than 2 because of this, bypassing the problem. Unfortunately, - there was also a test in test 3 (the locale tests) that used /B and - failed for link sizes other than 2. Rather than cut the whole test out, - I have added a new /Z option to pcretest that replaces the length and - offset values with spaces. This is now used to make test 3 independent - of link size. (Test 2 will be tidied up later.) - -14. If erroroffset was passed as NULL to pcre_compile, it provoked a - segmentation fault instead of returning the appropriate error message. - -15. In multiline mode when the newline sequence was set to "any", the pattern - ^$ would give a match between the \r and \n of a subject such as "A\r\nB". - This doesn't seem right; it now treats the CRLF combination as the line - ending, and so does not match in that case. It's only a pattern such as ^$ - that would hit this one: something like ^ABC$ would have failed after \r - and then tried again after \r\n. - -16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub" - in an attempt to make files that differ only in their line terminators - compare equal. This works on Linux. - -17. Under certain error circumstances pcregrep might try to free random memory - as it exited. This is now fixed, thanks to valgrind. - -19. In pcretest, if the pattern /(?m)^$/g was matched against the string - "abc\r\n\r\n", it found an unwanted second match after the second \r. This - was because its rules for how to advance for /g after matching an empty - string at the end of a line did not allow for this case. They now check for - it specially. - -20. pcretest is supposed to handle patterns and data of any length, by - extending its buffers when necessary. It was getting this wrong when the - buffer for a data line had to be extended. - -21. Added PCRE_NEWLINE_ANYCRLF which is like ANY, but matches only CR, LF, or - CRLF as a newline sequence. - -22. Code for handling Unicode properties in pcre_dfa_exec() wasn't being cut - out by #ifdef SUPPORT_UCP. This did no harm, as it could never be used, but - I have nevertheless tidied it up. - -23. Added some casts to kill warnings from HP-UX ia64 compiler. - -24. Added a man page for pcre-config. - - -Version 7.0 19-Dec-06 ---------------------- - - 1. Fixed a signed/unsigned compiler warning in pcre_compile.c, shown up by - moving to gcc 4.1.1. - - 2. The -S option for pcretest uses setrlimit(); I had omitted to #include - sys/time.h, which is documented as needed for this function. It doesn't - seem to matter on Linux, but it showed up on some releases of OS X. - - 3. It seems that there are systems where bytes whose values are greater than - 127 match isprint() in the "C" locale. The "C" locale should be the - default when a C program starts up. In most systems, only ASCII printing - characters match isprint(). This difference caused the output from pcretest - to vary, making some of the tests fail. I have changed pcretest so that: - - (a) When it is outputting text in the compiled version of a pattern, bytes - other than 32-126 are always shown as hex escapes. - - (b) When it is outputting text that is a matched part of a subject string, - it does the same, unless a different locale has been set for the match - (using the /L modifier). In this case, it uses isprint() to decide. - - 4. Fixed a major bug that caused incorrect computation of the amount of memory - required for a compiled pattern when options that changed within the - pattern affected the logic of the preliminary scan that determines the - length. The relevant options are -x, and -i in UTF-8 mode. The result was - that the computed length was too small. The symptoms of this bug were - either the PCRE error "internal error: code overflow" from pcre_compile(), - or a glibc crash with a message such as "pcretest: free(): invalid next - size (fast)". Examples of patterns that provoked this bug (shown in - pcretest format) are: - - /(?-x: )/x - /(?x)(?-x: \s*#\s*)/ - /((?i)[\x{c0}])/8 - /(?i:[\x{c0}])/8 - - HOWEVER: Change 17 below makes this fix obsolete as the memory computation - is now done differently. - - 5. Applied patches from Google to: (a) add a QuoteMeta function to the C++ - wrapper classes; (b) implement a new function in the C++ scanner that is - more efficient than the old way of doing things because it avoids levels of - recursion in the regex matching; (c) add a paragraph to the documentation - for the FullMatch() function. - - 6. The escape sequence \n was being treated as whatever was defined as - "newline". Not only was this contrary to the documentation, which states - that \n is character 10 (hex 0A), but it also went horribly wrong when - "newline" was defined as CRLF. This has been fixed. - - 7. In pcre_dfa_exec.c the value of an unsigned integer (the variable called c) - was being set to -1 for the "end of line" case (supposedly a value that no - character can have). Though this value is never used (the check for end of - line is "zero bytes in current character"), it caused compiler complaints. - I've changed it to 0xffffffff. - - 8. In pcre_version.c, the version string was being built by a sequence of - C macros that, in the event of PCRE_PRERELEASE being defined as an empty - string (as it is for production releases) called a macro with an empty - argument. The C standard says the result of this is undefined. The gcc - compiler treats it as an empty string (which was what was wanted) but it is - reported that Visual C gives an error. The source has been hacked around to - avoid this problem. - - 9. On the advice of a Windows user, included and in Windows - builds of pcretest, and changed the call to _setmode() to use _O_BINARY - instead of 0x8000. Made all the #ifdefs test both _WIN32 and WIN32 (not all - of them did). - -10. Originally, pcretest opened its input and output without "b"; then I was - told that "b" was needed in some environments, so it was added for release - 5.0 to both the input and output. (It makes no difference on Unix-like - systems.) Later I was told that it is wrong for the input on Windows. I've - now abstracted the modes into two macros, to make it easier to fiddle with - them, and removed "b" from the input mode under Windows. - -11. Added pkgconfig support for the C++ wrapper library, libpcrecpp. - -12. Added -help and --help to pcretest as an official way of being reminded - of the options. - -13. Removed some redundant semicolons after macro calls in pcrecpparg.h.in - and pcrecpp.cc because they annoy compilers at high warning levels. - -14. A bit of tidying/refactoring in pcre_exec.c in the main bumpalong loop. - -15. Fixed an occurrence of == in configure.ac that should have been = (shell - scripts are not C programs :-) and which was not noticed because it works - on Linux. - -16. pcretest is supposed to handle any length of pattern and data line (as one - line or as a continued sequence of lines) by extending its input buffer if - necessary. This feature was broken for very long pattern lines, leading to - a string of junk being passed to pcre_compile() if the pattern was longer - than about 50K. - -17. I have done a major re-factoring of the way pcre_compile() computes the - amount of memory needed for a compiled pattern. Previously, there was code - that made a preliminary scan of the pattern in order to do this. That was - OK when PCRE was new, but as the facilities have expanded, it has become - harder and harder to keep it in step with the real compile phase, and there - have been a number of bugs (see for example, 4 above). I have now found a - cunning way of running the real compile function in a "fake" mode that - enables it to compute how much memory it would need, while actually only - ever using a few hundred bytes of working memory and without too many - tests of the mode. This should make future maintenance and development - easier. A side effect of this work is that the limit of 200 on the nesting - depth of parentheses has been removed (though this was never a serious - limitation, I suspect). However, there is a downside: pcre_compile() now - runs more slowly than before (30% or more, depending on the pattern). I - hope this isn't a big issue. There is no effect on runtime performance. - -18. Fixed a minor bug in pcretest: if a pattern line was not terminated by a - newline (only possible for the last line of a file) and it was a - pattern that set a locale (followed by /Lsomething), pcretest crashed. - -19. Added additional timing features to pcretest. (1) The -tm option now times - matching only, not compiling. (2) Both -t and -tm can be followed, as a - separate command line item, by a number that specifies the number of - repeats to use when timing. The default is 50000; this gives better - precision, but takes uncomfortably long for very large patterns. - -20. Extended pcre_study() to be more clever in cases where a branch of a - subpattern has no definite first character. For example, (a*|b*)[cd] would - previously give no result from pcre_study(). Now it recognizes that the - first character must be a, b, c, or d. - -21. There was an incorrect error "recursive call could loop indefinitely" if - a subpattern (or the entire pattern) that was being tested for matching an - empty string contained only one non-empty item after a nested subpattern. - For example, the pattern (?>\x{100}*)\d(?R) provoked this error - incorrectly, because the \d was being skipped in the check. - -22. The pcretest program now has a new pattern option /B and a command line - option -b, which is equivalent to adding /B to every pattern. This causes - it to show the compiled bytecode, without the additional information that - -d shows. The effect of -d is now the same as -b with -i (and similarly, /D - is the same as /B/I). - -23. A new optimization is now able automatically to treat some sequences such - as a*b as a*+b. More specifically, if something simple (such as a character - or a simple class like \d) has an unlimited quantifier, and is followed by - something that cannot possibly match the quantified thing, the quantifier - is automatically "possessified". - -24. A recursive reference to a subpattern whose number was greater than 39 - went wrong under certain circumstances in UTF-8 mode. This bug could also - have affected the operation of pcre_study(). - -25. Realized that a little bit of performance could be had by replacing - (c & 0xc0) == 0xc0 with c >= 0xc0 when processing UTF-8 characters. - -26. Timing data from pcretest is now shown to 4 decimal places instead of 3. - -27. Possessive quantifiers such as a++ were previously implemented by turning - them into atomic groups such as ($>a+). Now they have their own opcodes, - which improves performance. This includes the automatically created ones - from 23 above. - -28. A pattern such as (?=(\w+))\1: which simulates an atomic group using a - lookahead was broken if it was not anchored. PCRE was mistakenly expecting - the first matched character to be a colon. This applied both to named and - numbered groups. - -29. The ucpinternal.h header file was missing its idempotency #ifdef. - -30. I was sent a "project" file called libpcre.a.dev which I understand makes - building PCRE on Windows easier, so I have included it in the distribution. - -31. There is now a check in pcretest against a ridiculously large number being - returned by pcre_exec() or pcre_dfa_exec(). If this happens in a /g or /G - loop, the loop is abandoned. - -32. Forward references to subpatterns in conditions such as (?(2)...) where - subpattern 2 is defined later cause pcre_compile() to search forwards in - the pattern for the relevant set of parentheses. This search went wrong - when there were unescaped parentheses in a character class, parentheses - escaped with \Q...\E, or parentheses in a #-comment in /x mode. - -33. "Subroutine" calls and backreferences were previously restricted to - referencing subpatterns earlier in the regex. This restriction has now - been removed. - -34. Added a number of extra features that are going to be in Perl 5.10. On the - whole, these are just syntactic alternatives for features that PCRE had - previously implemented using the Python syntax or my own invention. The - other formats are all retained for compatibility. - - (a) Named groups can now be defined as (?...) or (?'name'...) as well - as (?P...). The new forms, as well as being in Perl 5.10, are - also .NET compatible. - - (b) A recursion or subroutine call to a named group can now be defined as - (?&name) as well as (?P>name). - - (c) A backreference to a named group can now be defined as \k or - \k'name' as well as (?P=name). The new forms, as well as being in Perl - 5.10, are also .NET compatible. - - (d) A conditional reference to a named group can now use the syntax - (?() or (?('name') as well as (?(name). - - (e) A "conditional group" of the form (?(DEFINE)...) can be used to define - groups (named and numbered) that are never evaluated inline, but can be - called as "subroutines" from elsewhere. In effect, the DEFINE condition - is always false. There may be only one alternative in such a group. - - (f) A test for recursion can be given as (?(R1).. or (?(R&name)... as well - as the simple (?(R). The condition is true only if the most recent - recursion is that of the given number or name. It does not search out - through the entire recursion stack. - - (g) The escape \gN or \g{N} has been added, where N is a positive or - negative number, specifying an absolute or relative reference. - -35. Tidied to get rid of some further signed/unsigned compiler warnings and - some "unreachable code" warnings. - -36. Updated the Unicode property tables to Unicode version 5.0.0. Amongst other - things, this adds five new scripts. - -37. Perl ignores orphaned \E escapes completely. PCRE now does the same. - There were also incompatibilities regarding the handling of \Q..\E inside - character classes, for example with patterns like [\Qa\E-\Qz\E] where the - hyphen was adjacent to \Q or \E. I hope I've cleared all this up now. - -38. Like Perl, PCRE detects when an indefinitely repeated parenthesized group - matches an empty string, and forcibly breaks the loop. There were bugs in - this code in non-simple cases. For a pattern such as ^(a()*)* matched - against aaaa the result was just "a" rather than "aaaa", for example. Two - separate and independent bugs (that affected different cases) have been - fixed. - -39. Refactored the code to abolish the use of different opcodes for small - capturing bracket numbers. This is a tidy that I avoided doing when I - removed the limit on the number of capturing brackets for 3.5 back in 2001. - The new approach is not only tidier, it makes it possible to reduce the - memory needed to fix the previous bug (38). - -40. Implemented PCRE_NEWLINE_ANY to recognize any of the Unicode newline - sequences (http://unicode.org/unicode/reports/tr18/) as "newline" when - processing dot, circumflex, or dollar metacharacters, or #-comments in /x - mode. - -41. Add \R to match any Unicode newline sequence, as suggested in the Unicode - report. - -42. Applied patch, originally from Ari Pollak, modified by Google, to allow - copy construction and assignment in the C++ wrapper. - -43. Updated pcregrep to support "--newline=any". In the process, I fixed a - couple of bugs that could have given wrong results in the "--newline=crlf" - case. - -44. Added a number of casts and did some reorganization of signed/unsigned int - variables following suggestions from Dair Grant. Also renamed the variable - "this" as "item" because it is a C++ keyword. - -45. Arranged for dftables to add - - #include "pcre_internal.h" - - to pcre_chartables.c because without it, gcc 4.x may remove the array - definition from the final binary if PCRE is built into a static library and - dead code stripping is activated. - -46. For an unanchored pattern, if a match attempt fails at the start of a - newline sequence, and the newline setting is CRLF or ANY, and the next two - characters are CRLF, advance by two characters instead of one. - - -Version 6.7 04-Jul-06 ---------------------- - - 1. In order to handle tests when input lines are enormously long, pcretest has - been re-factored so that it automatically extends its buffers when - necessary. The code is crude, but this _is_ just a test program. The - default size has been increased from 32K to 50K. - - 2. The code in pcre_study() was using the value of the re argument before - testing it for NULL. (Of course, in any sensible call of the function, it - won't be NULL.) - - 3. The memmove() emulation function in pcre_internal.h, which is used on - systems that lack both memmove() and bcopy() - that is, hardly ever - - was missing a "static" storage class specifier. - - 4. When UTF-8 mode was not set, PCRE looped when compiling certain patterns - containing an extended class (one that cannot be represented by a bitmap - because it contains high-valued characters or Unicode property items, e.g. - [\pZ]). Almost always one would set UTF-8 mode when processing such a - pattern, but PCRE should not loop if you do not (it no longer does). - [Detail: two cases were found: (a) a repeated subpattern containing an - extended class; (b) a recursive reference to a subpattern that followed a - previous extended class. It wasn't skipping over the extended class - correctly when UTF-8 mode was not set.] - - 5. A negated single-character class was not being recognized as fixed-length - in lookbehind assertions such as (?<=[^f]), leading to an incorrect - compile error "lookbehind assertion is not fixed length". - - 6. The RunPerlTest auxiliary script was showing an unexpected difference - between PCRE and Perl for UTF-8 tests. It turns out that it is hard to - write a Perl script that can interpret lines of an input file either as - byte characters or as UTF-8, which is what "perltest" was being required to - do for the non-UTF-8 and UTF-8 tests, respectively. Essentially what you - can't do is switch easily at run time between having the "use utf8;" pragma - or not. In the end, I fudged it by using the RunPerlTest script to insert - "use utf8;" explicitly for the UTF-8 tests. - - 7. In multiline (/m) mode, PCRE was matching ^ after a terminating newline at - the end of the subject string, contrary to the documentation and to what - Perl does. This was true of both matching functions. Now it matches only at - the start of the subject and immediately after *internal* newlines. - - 8. A call of pcre_fullinfo() from pcretest to get the option bits was passing - a pointer to an int instead of a pointer to an unsigned long int. This - caused problems on 64-bit systems. - - 9. Applied a patch from the folks at Google to pcrecpp.cc, to fix "another - instance of the 'standard' template library not being so standard". - -10. There was no check on the number of named subpatterns nor the maximum - length of a subpattern name. The product of these values is used to compute - the size of the memory block for a compiled pattern. By supplying a very - long subpattern name and a large number of named subpatterns, the size - computation could be caused to overflow. This is now prevented by limiting - the length of names to 32 characters, and the number of named subpatterns - to 10,000. - -11. Subpatterns that are repeated with specific counts have to be replicated in - the compiled pattern. The size of memory for this was computed from the - length of the subpattern and the repeat count. The latter is limited to - 65535, but there was no limit on the former, meaning that integer overflow - could in principle occur. The compiled length of a repeated subpattern is - now limited to 30,000 bytes in order to prevent this. - -12. Added the optional facility to have named substrings with the same name. - -13. Added the ability to use a named substring as a condition, using the - Python syntax: (?(name)yes|no). This overloads (?(R)... and names that - are numbers (not recommended). Forward references are permitted. - -14. Added forward references in named backreferences (if you see what I mean). - -15. In UTF-8 mode, with the PCRE_DOTALL option set, a quantified dot in the - pattern could run off the end of the subject. For example, the pattern - "(?s)(.{1,5})"8 did this with the subject "ab". - -16. If PCRE_DOTALL or PCRE_MULTILINE were set, pcre_dfa_exec() behaved as if - PCRE_CASELESS was set when matching characters that were quantified with ? - or *. - -17. A character class other than a single negated character that had a minimum - but no maximum quantifier - for example [ab]{6,} - was not handled - correctly by pce_dfa_exec(). It would match only one character. - -18. A valid (though odd) pattern that looked like a POSIX character - class but used an invalid character after [ (for example [[,abc,]]) caused - pcre_compile() to give the error "Failed: internal error: code overflow" or - in some cases to crash with a glibc free() error. This could even happen if - the pattern terminated after [[ but there just happened to be a sequence of - letters, a binary zero, and a closing ] in the memory that followed. - -19. Perl's treatment of octal escapes in the range \400 to \777 has changed - over the years. Originally (before any Unicode support), just the bottom 8 - bits were taken. Thus, for example, \500 really meant \100. Nowadays the - output from "man perlunicode" includes this: - - The regular expression compiler produces polymorphic opcodes. That - is, the pattern adapts to the data and automatically switches to - the Unicode character scheme when presented with Unicode data--or - instead uses a traditional byte scheme when presented with byte - data. - - Sadly, a wide octal escape does not cause a switch, and in a string with - no other multibyte characters, these octal escapes are treated as before. - Thus, in Perl, the pattern /\500/ actually matches \100 but the pattern - /\500|\x{1ff}/ matches \500 or \777 because the whole thing is treated as a - Unicode string. - - I have not perpetrated such confusion in PCRE. Up till now, it took just - the bottom 8 bits, as in old Perl. I have now made octal escapes with - values greater than \377 illegal in non-UTF-8 mode. In UTF-8 mode they - translate to the appropriate multibyte character. - -29. Applied some refactoring to reduce the number of warnings from Microsoft - and Borland compilers. This has included removing the fudge introduced - seven years ago for the OS/2 compiler (see 2.02/2 below) because it caused - a warning about an unused variable. - -21. PCRE has not included VT (character 0x0b) in the set of whitespace - characters since release 4.0, because Perl (from release 5.004) does not. - [Or at least, is documented not to: some releases seem to be in conflict - with the documentation.] However, when a pattern was studied with - pcre_study() and all its branches started with \s, PCRE still included VT - as a possible starting character. Of course, this did no harm; it just - caused an unnecessary match attempt. - -22. Removed a now-redundant internal flag bit that recorded the fact that case - dependency changed within the pattern. This was once needed for "required - byte" processing, but is no longer used. This recovers a now-scarce options - bit. Also moved the least significant internal flag bit to the most- - significant bit of the word, which was not previously used (hangover from - the days when it was an int rather than a uint) to free up another bit for - the future. - -23. Added support for CRLF line endings as well as CR and LF. As well as the - default being selectable at build time, it can now be changed at runtime - via the PCRE_NEWLINE_xxx flags. There are now options for pcregrep to - specify that it is scanning data with non-default line endings. - -24. Changed the definition of CXXLINK to make it agree with the definition of - LINK in the Makefile, by replacing LDFLAGS to CXXFLAGS. - -25. Applied Ian Taylor's patches to avoid using another stack frame for tail - recursions. This makes a big different to stack usage for some patterns. - -26. If a subpattern containing a named recursion or subroutine reference such - as (?P>B) was quantified, for example (xxx(?P>B)){3}, the calculation of - the space required for the compiled pattern went wrong and gave too small a - value. Depending on the environment, this could lead to "Failed: internal - error: code overflow at offset 49" or "glibc detected double free or - corruption" errors. - -27. Applied patches from Google (a) to support the new newline modes and (b) to - advance over multibyte UTF-8 characters in GlobalReplace. - -28. Change free() to pcre_free() in pcredemo.c. Apparently this makes a - difference for some implementation of PCRE in some Windows version. - -29. Added some extra testing facilities to pcretest: - - \q in a data line sets the "match limit" value - \Q in a data line sets the "match recursion limt" value - -S sets the stack size, where is in megabytes - - The -S option isn't available for Windows. - - -Version 6.6 06-Feb-06 ---------------------- - - 1. Change 16(a) for 6.5 broke things, because PCRE_DATA_SCOPE was not defined - in pcreposix.h. I have copied the definition from pcre.h. - - 2. Change 25 for 6.5 broke compilation in a build directory out-of-tree - because pcre.h is no longer a built file. - - 3. Added Jeff Friedl's additional debugging patches to pcregrep. These are - not normally included in the compiled code. - - -Version 6.5 01-Feb-06 ---------------------- - - 1. When using the partial match feature with pcre_dfa_exec(), it was not - anchoring the second and subsequent partial matches at the new starting - point. This could lead to incorrect results. For example, with the pattern - /1234/, partially matching against "123" and then "a4" gave a match. - - 2. Changes to pcregrep: - - (a) All non-match returns from pcre_exec() were being treated as failures - to match the line. Now, unless the error is PCRE_ERROR_NOMATCH, an - error message is output. Some extra information is given for the - PCRE_ERROR_MATCHLIMIT and PCRE_ERROR_RECURSIONLIMIT errors, which are - probably the only errors that are likely to be caused by users (by - specifying a regex that has nested indefinite repeats, for instance). - If there are more than 20 of these errors, pcregrep is abandoned. - - (b) A binary zero was treated as data while matching, but terminated the - output line if it was written out. This has been fixed: binary zeroes - are now no different to any other data bytes. - - (c) Whichever of the LC_ALL or LC_CTYPE environment variables is set is - used to set a locale for matching. The --locale=xxxx long option has - been added (no short equivalent) to specify a locale explicitly on the - pcregrep command, overriding the environment variables. - - (d) When -B was used with -n, some line numbers in the output were one less - than they should have been. - - (e) Added the -o (--only-matching) option. - - (f) If -A or -C was used with -c (count only), some lines of context were - accidentally printed for the final match. - - (g) Added the -H (--with-filename) option. - - (h) The combination of options -rh failed to suppress file names for files - that were found from directory arguments. - - (i) Added the -D (--devices) and -d (--directories) options. - - (j) Added the -F (--fixed-strings) option. - - (k) Allow "-" to be used as a file name for -f as well as for a data file. - - (l) Added the --colo(u)r option. - - (m) Added Jeffrey Friedl's -S testing option, but within #ifdefs so that it - is not present by default. - - 3. A nasty bug was discovered in the handling of recursive patterns, that is, - items such as (?R) or (?1), when the recursion could match a number of - alternatives. If it matched one of the alternatives, but subsequently, - outside the recursion, there was a failure, the code tried to back up into - the recursion. However, because of the way PCRE is implemented, this is not - possible, and the result was an incorrect result from the match. - - In order to prevent this happening, the specification of recursion has - been changed so that all such subpatterns are automatically treated as - atomic groups. Thus, for example, (?R) is treated as if it were (?>(?R)). - - 4. I had overlooked the fact that, in some locales, there are characters for - which isalpha() is true but neither isupper() nor islower() are true. In - the fr_FR locale, for instance, the \xAA and \xBA characters (ordmasculine - and ordfeminine) are like this. This affected the treatment of \w and \W - when they appeared in character classes, but not when they appeared outside - a character class. The bit map for "word" characters is now created - separately from the results of isalnum() instead of just taking it from the - upper, lower, and digit maps. (Plus the underscore character, of course.) - - 5. The above bug also affected the handling of POSIX character classes such as - [[:alpha:]] and [[:alnum:]]. These do not have their own bit maps in PCRE's - permanent tables. Instead, the bit maps for such a class were previously - created as the appropriate unions of the upper, lower, and digit bitmaps. - Now they are created by subtraction from the [[:word:]] class, which has - its own bitmap. - - 6. The [[:blank:]] character class matches horizontal, but not vertical space. - It is created by subtracting the vertical space characters (\x09, \x0a, - \x0b, \x0c) from the [[:space:]] bitmap. Previously, however, the - subtraction was done in the overall bitmap for a character class, meaning - that a class such as [\x0c[:blank:]] was incorrect because \x0c would not - be recognized. This bug has been fixed. - - 7. Patches from the folks at Google: - - (a) pcrecpp.cc: "to handle a corner case that may or may not happen in - real life, but is still worth protecting against". - - (b) pcrecpp.cc: "corrects a bug when negative radixes are used with - regular expressions". - - (c) pcre_scanner.cc: avoid use of std::count() because not all systems - have it. - - (d) Split off pcrecpparg.h from pcrecpp.h and had the former built by - "configure" and the latter not, in order to fix a problem somebody had - with compiling the Arg class on HP-UX. - - (e) Improve the error-handling of the C++ wrapper a little bit. - - (f) New tests for checking recursion limiting. - - 8. The pcre_memmove() function, which is used only if the environment does not - have a standard memmove() function (and is therefore rarely compiled), - contained two bugs: (a) use of int instead of size_t, and (b) it was not - returning a result (though PCRE never actually uses the result). - - 9. In the POSIX regexec() interface, if nmatch is specified as a ridiculously - large number - greater than INT_MAX/(3*sizeof(int)) - REG_ESPACE is - returned instead of calling malloc() with an overflowing number that would - most likely cause subsequent chaos. - -10. The debugging option of pcretest was not showing the NO_AUTO_CAPTURE flag. - -11. The POSIX flag REG_NOSUB is now supported. When a pattern that was compiled - with this option is matched, the nmatch and pmatch options of regexec() are - ignored. - -12. Added REG_UTF8 to the POSIX interface. This is not defined by POSIX, but is - provided in case anyone wants to the the POSIX interface with UTF-8 - strings. - -13. Added CXXLDFLAGS to the Makefile parameters to provide settings only on the - C++ linking (needed for some HP-UX environments). - -14. Avoid compiler warnings in get_ucpname() when compiled without UCP support - (unused parameter) and in the pcre_printint() function (omitted "default" - switch label when the default is to do nothing). - -15. Added some code to make it possible, when PCRE is compiled as a C++ - library, to replace subject pointers for pcre_exec() with a smart pointer - class, thus making it possible to process discontinuous strings. - -16. The two macros PCRE_EXPORT and PCRE_DATA_SCOPE are confusing, and perform - much the same function. They were added by different people who were trying - to make PCRE easy to compile on non-Unix systems. It has been suggested - that PCRE_EXPORT be abolished now that there is more automatic apparatus - for compiling on Windows systems. I have therefore replaced it with - PCRE_DATA_SCOPE. This is set automatically for Windows; if not set it - defaults to "extern" for C or "extern C" for C++, which works fine on - Unix-like systems. It is now possible to override the value of PCRE_DATA_ - SCOPE with something explicit in config.h. In addition: - - (a) pcreposix.h still had just "extern" instead of either of these macros; - I have replaced it with PCRE_DATA_SCOPE. - - (b) Functions such as _pcre_xclass(), which are internal to the library, - but external in the C sense, all had PCRE_EXPORT in their definitions. - This is apparently wrong for the Windows case, so I have removed it. - (It makes no difference on Unix-like systems.) - -17. Added a new limit, MATCH_LIMIT_RECURSION, which limits the depth of nesting - of recursive calls to match(). This is different to MATCH_LIMIT because - that limits the total number of calls to match(), not all of which increase - the depth of recursion. Limiting the recursion depth limits the amount of - stack (or heap if NO_RECURSE is set) that is used. The default can be set - when PCRE is compiled, and changed at run time. A patch from Google adds - this functionality to the C++ interface. - -18. Changes to the handling of Unicode character properties: - - (a) Updated the table to Unicode 4.1.0. - - (b) Recognize characters that are not in the table as "Cn" (undefined). - - (c) I revised the way the table is implemented to a much improved format - which includes recognition of ranges. It now supports the ranges that - are defined in UnicodeData.txt, and it also amalgamates other - characters into ranges. This has reduced the number of entries in the - table from around 16,000 to around 3,000, thus reducing its size - considerably. I realized I did not need to use a tree structure after - all - a binary chop search is just as efficient. Having reduced the - number of entries, I extended their size from 6 bytes to 8 bytes to - allow for more data. - - (d) Added support for Unicode script names via properties such as \p{Han}. - -19. In UTF-8 mode, a backslash followed by a non-Ascii character was not - matching that character. - -20. When matching a repeated Unicode property with a minimum greater than zero, - (for example \pL{2,}), PCRE could look past the end of the subject if it - reached it while seeking the minimum number of characters. This could - happen only if some of the characters were more than one byte long, because - there is a check for at least the minimum number of bytes. - -21. Refactored the implementation of \p and \P so as to be more general, to - allow for more different types of property in future. This has changed the - compiled form incompatibly. Anybody with saved compiled patterns that use - \p or \P will have to recompile them. - -22. Added "Any" and "L&" to the supported property types. - -23. Recognize \x{...} as a code point specifier, even when not in UTF-8 mode, - but give a compile time error if the value is greater than 0xff. - -24. The man pages for pcrepartial, pcreprecompile, and pcre_compile2 were - accidentally not being installed or uninstalled. - -25. The pcre.h file was built from pcre.h.in, but the only changes that were - made were to insert the current release number. This seemed silly, because - it made things harder for people building PCRE on systems that don't run - "configure". I have turned pcre.h into a distributed file, no longer built - by "configure", with the version identification directly included. There is - no longer a pcre.h.in file. - - However, this change necessitated a change to the pcre-config script as - well. It is built from pcre-config.in, and one of the substitutions was the - release number. I have updated configure.ac so that ./configure now finds - the release number by grepping pcre.h. - -26. Added the ability to run the tests under valgrind. - - -Version 6.4 05-Sep-05 ---------------------- - - 1. Change 6.0/10/(l) to pcregrep introduced a bug that caused separator lines - "--" to be printed when multiple files were scanned, even when none of the - -A, -B, or -C options were used. This is not compatible with Gnu grep, so I - consider it to be a bug, and have restored the previous behaviour. - - 2. A couple of code tidies to get rid of compiler warnings. - - 3. The pcretest program used to cheat by referring to symbols in the library - whose names begin with _pcre_. These are internal symbols that are not - really supposed to be visible externally, and in some environments it is - possible to suppress them. The cheating is now confined to including - certain files from the library's source, which is a bit cleaner. - - 4. Renamed pcre.in as pcre.h.in to go with pcrecpp.h.in; it also makes the - file's purpose clearer. - - 5. Reorganized pcre_ucp_findchar(). - - -Version 6.3 15-Aug-05 ---------------------- - - 1. The file libpcre.pc.in did not have general read permission in the tarball. - - 2. There were some problems when building without C++ support: - - (a) If C++ support was not built, "make install" and "make test" still - tried to test it. - - (b) There were problems when the value of CXX was explicitly set. Some - changes have been made to try to fix these, and ... - - (c) --disable-cpp can now be used to explicitly disable C++ support. - - (d) The use of @CPP_OBJ@ directly caused a blank line preceded by a - backslash in a target when C++ was disabled. This confuses some - versions of "make", apparently. Using an intermediate variable solves - this. (Same for CPP_LOBJ.) - - 3. $(LINK_FOR_BUILD) now includes $(CFLAGS_FOR_BUILD) and $(LINK) - (non-Windows) now includes $(CFLAGS) because these flags are sometimes - necessary on certain architectures. - - 4. Added a setting of -export-symbols-regex to the link command to remove - those symbols that are exported in the C sense, but actually are local - within the library, and not documented. Their names all begin with - "_pcre_". This is not a perfect job, because (a) we have to except some - symbols that pcretest ("illegally") uses, and (b) the facility isn't always - available (and never for static libraries). I have made a note to try to - find a way round (a) in the future. - - -Version 6.2 01-Aug-05 ---------------------- - - 1. There was no test for integer overflow of quantifier values. A construction - such as {1111111111111111} would give undefined results. What is worse, if - a minimum quantifier for a parenthesized subpattern overflowed and became - negative, the calculation of the memory size went wrong. This could have - led to memory overwriting. - - 2. Building PCRE using VPATH was broken. Hopefully it is now fixed. - - 3. Added "b" to the 2nd argument of fopen() in dftables.c, for non-Unix-like - operating environments where this matters. - - 4. Applied Giuseppe Maxia's patch to add additional features for controlling - PCRE options from within the C++ wrapper. - - 5. Named capturing subpatterns were not being correctly counted when a pattern - was compiled. This caused two problems: (a) If there were more than 100 - such subpatterns, the calculation of the memory needed for the whole - compiled pattern went wrong, leading to an overflow error. (b) Numerical - back references of the form \12, where the number was greater than 9, were - not recognized as back references, even though there were sufficient - previous subpatterns. - - 6. Two minor patches to pcrecpp.cc in order to allow it to compile on older - versions of gcc, e.g. 2.95.4. - - -Version 6.1 21-Jun-05 ---------------------- - - 1. There was one reference to the variable "posix" in pcretest.c that was not - surrounded by "#if !defined NOPOSIX". - - 2. Make it possible to compile pcretest without DFA support, UTF8 support, or - the cross-check on the old pcre_info() function, for the benefit of the - cut-down version of PCRE that is currently imported into Exim. - - 3. A (silly) pattern starting with (?i)(?-i) caused an internal space - allocation error. I've done the easy fix, which wastes 2 bytes for sensible - patterns that start (?i) but I don't think that matters. The use of (?i) is - just an example; this all applies to the other options as well. - - 4. Since libtool seems to echo the compile commands it is issuing, the output - from "make" can be reduced a bit by putting "@" in front of each libtool - compile command. - - 5. Patch from the folks at Google for configure.in to be a bit more thorough - in checking for a suitable C++ installation before trying to compile the - C++ stuff. This should fix a reported problem when a compiler was present, - but no suitable headers. - - 6. The man pages all had just "PCRE" as their title. I have changed them to - be the relevant file name. I have also arranged that these names are - retained in the file doc/pcre.txt, which is a concatenation in text format - of all the man pages except the little individual ones for each function. - - 7. The NON-UNIX-USE file had not been updated for the different set of source - files that come with release 6. I also added a few comments about the C++ - wrapper. - - -Version 6.0 07-Jun-05 ---------------------- - - 1. Some minor internal re-organization to help with my DFA experiments. - - 2. Some missing #ifdef SUPPORT_UCP conditionals in pcretest and printint that - didn't matter for the library itself when fully configured, but did matter - when compiling without UCP support, or within Exim, where the ucp files are - not imported. - - 3. Refactoring of the library code to split up the various functions into - different source modules. The addition of the new DFA matching code (see - below) to a single monolithic source would have made it really too - unwieldy, quite apart from causing all the code to be include in a - statically linked application, when only some functions are used. This is - relevant even without the DFA addition now that patterns can be compiled in - one application and matched in another. - - The downside of splitting up is that there have to be some external - functions and data tables that are used internally in different modules of - the library but which are not part of the API. These have all had their - names changed to start with "_pcre_" so that they are unlikely to clash - with other external names. - - 4. Added an alternate matching function, pcre_dfa_exec(), which matches using - a different (DFA) algorithm. Although it is slower than the original - function, it does have some advantages for certain types of matching - problem. - - 5. Upgrades to pcretest in order to test the features of pcre_dfa_exec(), - including restarting after a partial match. - - 6. A patch for pcregrep that defines INVALID_FILE_ATTRIBUTES if it is not - defined when compiling for Windows was sent to me. I have put it into the - code, though I have no means of testing or verifying it. - - 7. Added the pcre_refcount() auxiliary function. - - 8. Added the PCRE_FIRSTLINE option. This constrains an unanchored pattern to - match before or at the first newline in the subject string. In pcretest, - the /f option on a pattern can be used to set this. - - 9. A repeated \w when used in UTF-8 mode with characters greater than 256 - would behave wrongly. This has been present in PCRE since release 4.0. - -10. A number of changes to the pcregrep command: - - (a) Refactored how -x works; insert ^(...)$ instead of setting - PCRE_ANCHORED and checking the length, in preparation for adding - something similar for -w. - - (b) Added the -w (match as a word) option. - - (c) Refactored the way lines are read and buffered so as to have more - than one at a time available. - - (d) Implemented a pcregrep test script. - - (e) Added the -M (multiline match) option. This allows patterns to match - over several lines of the subject. The buffering ensures that at least - 8K, or the rest of the document (whichever is the shorter) is available - for matching (and similarly the previous 8K for lookbehind assertions). - - (f) Changed the --help output so that it now says - - -w, --word-regex(p) - - instead of two lines, one with "regex" and the other with "regexp" - because that confused at least one person since the short forms are the - same. (This required a bit of code, as the output is generated - automatically from a table. It wasn't just a text change.) - - (g) -- can be used to terminate pcregrep options if the next thing isn't an - option but starts with a hyphen. Could be a pattern or a path name - starting with a hyphen, for instance. - - (h) "-" can be given as a file name to represent stdin. - - (i) When file names are being printed, "(standard input)" is used for - the standard input, for compatibility with GNU grep. Previously - "" was used. - - (j) The option --label=xxx can be used to supply a name to be used for - stdin when file names are being printed. There is no short form. - - (k) Re-factored the options decoding logic because we are going to add - two more options that take data. Such options can now be given in four - different ways, e.g. "-fname", "-f name", "--file=name", "--file name". - - (l) Added the -A, -B, and -C options for requesting that lines of context - around matches be printed. - - (m) Added the -L option to print the names of files that do not contain - any matching lines, that is, the complement of -l. - - (n) The return code is 2 if any file cannot be opened, but pcregrep does - continue to scan other files. - - (o) The -s option was incorrectly implemented. For compatibility with other - greps, it now suppresses the error message for a non-existent or non- - accessible file (but not the return code). There is a new option called - -q that suppresses the output of matching lines, which was what -s was - previously doing. - - (p) Added --include and --exclude options to specify files for inclusion - and exclusion when recursing. - -11. The Makefile was not using the Autoconf-supported LDFLAGS macro properly. - Hopefully, it now does. - -12. Missing cast in pcre_study(). - -13. Added an "uninstall" target to the makefile. - -14. Replaced "extern" in the function prototypes in Makefile.in with - "PCRE_DATA_SCOPE", which defaults to 'extern' or 'extern "C"' in the Unix - world, but is set differently for Windows. - -15. Added a second compiling function called pcre_compile2(). The only - difference is that it has an extra argument, which is a pointer to an - integer error code. When there is a compile-time failure, this is set - non-zero, in addition to the error test pointer being set to point to an - error message. The new argument may be NULL if no error number is required - (but then you may as well call pcre_compile(), which is now just a - wrapper). This facility is provided because some applications need a - numeric error indication, but it has also enabled me to tidy up the way - compile-time errors are handled in the POSIX wrapper. - -16. Added VPATH=.libs to the makefile; this should help when building with one - prefix path and installing with another. (Or so I'm told by someone who - knows more about this stuff than I do.) - -17. Added a new option, REG_DOTALL, to the POSIX function regcomp(). This - passes PCRE_DOTALL to the pcre_compile() function, making the "." character - match everything, including newlines. This is not POSIX-compatible, but - somebody wanted the feature. From pcretest it can be activated by using - both the P and the s flags. - -18. AC_PROG_LIBTOOL appeared twice in Makefile.in. Removed one. - -19. libpcre.pc was being incorrectly installed as executable. - -20. A couple of places in pcretest check for end-of-line by looking for '\n'; - it now also looks for '\r' so that it will work unmodified on Windows. - -21. Added Google's contributed C++ wrapper to the distribution. - -22. Added some untidy missing memory free() calls in pcretest, to keep - Electric Fence happy when testing. - - - -Version 5.0 13-Sep-04 ---------------------- - - 1. Internal change: literal characters are no longer packed up into items - containing multiple characters in a single byte-string. Each character - is now matched using a separate opcode. However, there may be more than one - byte in the character in UTF-8 mode. - - 2. The pcre_callout_block structure has two new fields: pattern_position and - next_item_length. These contain the offset in the pattern to the next match - item, and its length, respectively. - - 3. The PCRE_AUTO_CALLOUT option for pcre_compile() requests the automatic - insertion of callouts before each pattern item. Added the /C option to - pcretest to make use of this. - - 4. On the advice of a Windows user, the lines - - #if defined(_WIN32) || defined(WIN32) - _setmode( _fileno( stdout ), 0x8000 ); - #endif /* defined(_WIN32) || defined(WIN32) */ - - have been added to the source of pcretest. This apparently does useful - magic in relation to line terminators. - - 5. Changed "r" and "w" in the calls to fopen() in pcretest to "rb" and "wb" - for the benefit of those environments where the "b" makes a difference. - - 6. The icc compiler has the same options as gcc, but "configure" doesn't seem - to know about it. I have put a hack into configure.in that adds in code - to set GCC=yes if CC=icc. This seems to end up at a point in the - generated configure script that is early enough to affect the setting of - compiler options, which is what is needed, but I have no means of testing - whether it really works. (The user who reported this had patched the - generated configure script, which of course I cannot do.) - - LATER: After change 22 below (new libtool files), the configure script - seems to know about icc (and also ecc). Therefore, I have commented out - this hack in configure.in. - - 7. Added support for pkg-config (2 patches were sent in). - - 8. Negated POSIX character classes that used a combination of internal tables - were completely broken. These were [[:^alpha:]], [[:^alnum:]], and - [[:^ascii]]. Typically, they would match almost any characters. The other - POSIX classes were not broken in this way. - - 9. Matching the pattern "\b.*?" against "ab cd", starting at offset 1, failed - to find the match, as PCRE was deluded into thinking that the match had to - start at the start point or following a newline. The same bug applied to - patterns with negative forward assertions or any backward assertions - preceding ".*" at the start, unless the pattern required a fixed first - character. This was a failing pattern: "(?!.bcd).*". The bug is now fixed. - -10. In UTF-8 mode, when moving forwards in the subject after a failed match - starting at the last subject character, bytes beyond the end of the subject - string were read. - -11. Renamed the variable "class" as "classbits" to make life easier for C++ - users. (Previously there was a macro definition, but it apparently wasn't - enough.) - -12. Added the new field "tables" to the extra data so that tables can be passed - in at exec time, or the internal tables can be re-selected. This allows - a compiled regex to be saved and re-used at a later time by a different - program that might have everything at different addresses. - -13. Modified the pcre-config script so that, when run on Solaris, it shows a - -R library as well as a -L library. - -14. The debugging options of pcretest (-d on the command line or D on a - pattern) showed incorrect output for anything following an extended class - that contained multibyte characters and which was followed by a quantifier. - -15. Added optional support for general category Unicode character properties - via the \p, \P, and \X escapes. Unicode property support implies UTF-8 - support. It adds about 90K to the size of the library. The meanings of the - inbuilt class escapes such as \d and \s have NOT been changed. - -16. Updated pcredemo.c to include calls to free() to release the memory for the - compiled pattern. - -17. The generated file chartables.c was being created in the source directory - instead of in the building directory. This caused the build to fail if the - source directory was different from the building directory, and was - read-only. - -18. Added some sample Win commands from Mark Tetrode into the NON-UNIX-USE - file. No doubt somebody will tell me if they don't make sense... Also added - Dan Mooney's comments about building on OpenVMS. - -19. Added support for partial matching via the PCRE_PARTIAL option for - pcre_exec() and the \P data escape in pcretest. - -20. Extended pcretest with 3 new pattern features: - - (i) A pattern option of the form ">rest-of-line" causes pcretest to - write the compiled pattern to the file whose name is "rest-of-line". - This is a straight binary dump of the data, with the saved pointer to - the character tables forced to be NULL. The study data, if any, is - written too. After writing, pcretest reads a new pattern. - - (ii) If, instead of a pattern, ": new target - : new target - : use native compiler - : use native linker - : handle Windows platform correctly - : ditto - : ditto - copy DLL to top builddir before testing - - As part of these changes, -no-undefined was removed again. This was reported - to give trouble on HP-UX 11.0, so getting rid of it seems like a good idea - in any case. - -3. Some tidies to get rid of compiler warnings: - - . In the match_data structure, match_limit was an unsigned long int, whereas - match_call_count was an int. I've made them both unsigned long ints. - - . In pcretest the fact that a const uschar * doesn't automatically cast to - a void * provoked a warning. - - . Turning on some more compiler warnings threw up some "shadow" variables - and a few more missing casts. - -4. If PCRE was complied with UTF-8 support, but called without the PCRE_UTF8 - option, a class that contained a single character with a value between 128 - and 255 (e.g. /[\xFF]/) caused PCRE to crash. - -5. If PCRE was compiled with UTF-8 support, but called without the PCRE_UTF8 - option, a class that contained several characters, but with at least one - whose value was between 128 and 255 caused PCRE to crash. - - -Version 4.1 12-Mar-03 ---------------------- - -1. Compiling with gcc -pedantic found a couple of places where casts were -needed, and a string in dftables.c that was longer than standard compilers are -required to support. - -2. Compiling with Sun's compiler found a few more places where the code could -be tidied up in order to avoid warnings. - -3. The variables for cross-compiling were called HOST_CC and HOST_CFLAGS; the -first of these names is deprecated in the latest Autoconf in favour of the name -CC_FOR_BUILD, because "host" is typically used to mean the system on which the -compiled code will be run. I can't find a reference for HOST_CFLAGS, but by -analogy I have changed it to CFLAGS_FOR_BUILD. - -4. Added -no-undefined to the linking command in the Makefile, because this is -apparently helpful for Windows. To make it work, also added "-L. -lpcre" to the -linking step for the pcreposix library. - -5. PCRE was failing to diagnose the case of two named groups with the same -name. - -6. A problem with one of PCRE's optimizations was discovered. PCRE remembers a -literal character that is needed in the subject for a match, and scans along to -ensure that it is present before embarking on the full matching process. This -saves time in cases of nested unlimited repeats that are never going to match. -Problem: the scan can take a lot of time if the subject is very long (e.g. -megabytes), thus penalizing straightforward matches. It is now done only if the -amount of subject to be scanned is less than 1000 bytes. - -7. A lesser problem with the same optimization is that it was recording the -first character of an anchored pattern as "needed", thus provoking a search -right along the subject, even when the first match of the pattern was going to -fail. The "needed" character is now not set for anchored patterns, unless it -follows something in the pattern that is of non-fixed length. Thus, it still -fulfils its original purpose of finding quick non-matches in cases of nested -unlimited repeats, but isn't used for simple anchored patterns such as /^abc/. - - -Version 4.0 17-Feb-03 ---------------------- - -1. If a comment in an extended regex that started immediately after a meta-item -extended to the end of string, PCRE compiled incorrect data. This could lead to -all kinds of weird effects. Example: /#/ was bad; /()#/ was bad; /a#/ was not. - -2. Moved to autoconf 2.53 and libtool 1.4.2. - -3. Perl 5.8 no longer needs "use utf8" for doing UTF-8 things. Consequently, -the special perltest8 script is no longer needed - all the tests can be run -from a single perltest script. - -4. From 5.004, Perl has not included the VT character (0x0b) in the set defined -by \s. It has now been removed in PCRE. This means it isn't recognized as -whitespace in /x regexes too, which is the same as Perl. Note that the POSIX -class [:space:] *does* include VT, thereby creating a mess. - -5. Added the class [:blank:] (a GNU extension from Perl 5.8) to match only -space and tab. - -6. Perl 5.005 was a long time ago. It's time to amalgamate the tests that use -its new features into the main test script, reducing the number of scripts. - -7. Perl 5.8 has changed the meaning of patterns like /a(?i)b/. Earlier versions -were backward compatible, and made the (?i) apply to the whole pattern, as if -/i were given. Now it behaves more logically, and applies the option setting -only to what follows. PCRE has been changed to follow suit. However, if it -finds options settings right at the start of the pattern, it extracts them into -the global options, as before. Thus, they show up in the info data. - -8. Added support for the \Q...\E escape sequence. Characters in between are -treated as literals. This is slightly different from Perl in that $ and @ are -also handled as literals inside the quotes. In Perl, they will cause variable -interpolation. Note the following examples: - - Pattern PCRE matches Perl matches - - \Qabc$xyz\E abc$xyz abc followed by the contents of $xyz - \Qabc\$xyz\E abc\$xyz abc\$xyz - \Qabc\E\$\Qxyz\E abc$xyz abc$xyz - -For compatibility with Perl, \Q...\E sequences are recognized inside character -classes as well as outside them. - -9. Re-organized 3 code statements in pcretest to avoid "overflow in -floating-point constant arithmetic" warnings from a Microsoft compiler. Added a -(size_t) cast to one statement in pcretest and one in pcreposix to avoid -signed/unsigned warnings. - -10. SunOS4 doesn't have strtoul(). This was used only for unpicking the -o -option for pcretest, so I've replaced it by a simple function that does just -that job. - -11. pcregrep was ending with code 0 instead of 2 for the commands "pcregrep" or -"pcregrep -". - -12. Added "possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's -Java package. This provides some syntactic sugar for simple cases of what my -documentation calls "once-only subpatterns". A pattern such as x*+ is the same -as (?>x*). In other words, if what is inside (?>...) is just a single repeated -item, you can use this simplified notation. Note that only makes sense with -greedy quantifiers. Consequently, the use of the possessive quantifier forces -greediness, whatever the setting of the PCRE_UNGREEDY option. - -13. A change of greediness default within a pattern was not taking effect at -the current level for patterns like /(b+(?U)a+)/. It did apply to parenthesized -subpatterns that followed. Patterns like /b+(?U)a+/ worked because the option -was abstracted outside. - -14. PCRE now supports the \G assertion. It is true when the current matching -position is at the start point of the match. This differs from \A when the -starting offset is non-zero. Used with the /g option of pcretest (or similar -code), it works in the same way as it does for Perl's /g option. If all -alternatives of a regex begin with \G, the expression is anchored to the start -match position, and the "anchored" flag is set in the compiled expression. - -15. Some bugs concerning the handling of certain option changes within patterns -have been fixed. These applied to options other than (?ims). For example, -"a(?x: b c )d" did not match "XabcdY" but did match "Xa b c dY". It should have -been the other way round. Some of this was related to change 7 above. - -16. PCRE now gives errors for /[.x.]/ and /[=x=]/ as unsupported POSIX -features, as Perl does. Previously, PCRE gave the warnings only for /[[.x.]]/ -and /[[=x=]]/. PCRE now also gives an error for /[:name:]/ because it supports -POSIX classes only within a class (e.g. /[[:alpha:]]/). - -17. Added support for Perl's \C escape. This matches one byte, even in UTF8 -mode. Unlike ".", it always matches newline, whatever the setting of -PCRE_DOTALL. However, PCRE does not permit \C to appear in lookbehind -assertions. Perl allows it, but it doesn't (in general) work because it can't -calculate the length of the lookbehind. At least, that's the case for Perl -5.8.0 - I've been told they are going to document that it doesn't work in -future. - -18. Added an error diagnosis for escapes that PCRE does not support: these are -\L, \l, \N, \P, \p, \U, \u, and \X. - -19. Although correctly diagnosing a missing ']' in a character class, PCRE was -reading past the end of the pattern in cases such as /[abcd/. - -20. PCRE was getting more memory than necessary for patterns with classes that -contained both POSIX named classes and other characters, e.g. /[[:space:]abc/. - -21. Added some code, conditional on #ifdef VPCOMPAT, to make life easier for -compiling PCRE for use with Virtual Pascal. - -22. Small fix to the Makefile to make it work properly if the build is done -outside the source tree. - -23. Added a new extension: a condition to go with recursion. If a conditional -subpattern starts with (?(R) the "true" branch is used if recursion has -happened, whereas the "false" branch is used only at the top level. - -24. When there was a very long string of literal characters (over 255 bytes -without UTF support, over 250 bytes with UTF support), the computation of how -much memory was required could be incorrect, leading to segfaults or other -strange effects. - -25. PCRE was incorrectly assuming anchoring (either to start of subject or to -start of line for a non-DOTALL pattern) when a pattern started with (.*) and -there was a subsequent back reference to those brackets. This meant that, for -example, /(.*)\d+\1/ failed to match "abc123bc". Unfortunately, it isn't -possible to check for precisely this case. All we can do is abandon the -optimization if .* occurs inside capturing brackets when there are any back -references whatsoever. (See below for a better fix that came later.) - -26. The handling of the optimization for finding the first character of a -non-anchored pattern, and for finding a character that is required later in the -match were failing in some cases. This didn't break the matching; it just -failed to optimize when it could. The way this is done has been re-implemented. - -27. Fixed typo in error message for invalid (?R item (it said "(?p"). - -28. Added a new feature that provides some of the functionality that Perl -provides with (?{...}). The facility is termed a "callout". The way it is done -in PCRE is for the caller to provide an optional function, by setting -pcre_callout to its entry point. Like pcre_malloc and pcre_free, this is a -global variable. By default it is unset, which disables all calling out. To get -the function called, the regex must include (?C) at appropriate points. This -is, in fact, equivalent to (?C0), and any number <= 255 may be given with (?C). -This provides a means of identifying different callout points. When PCRE -reaches such a point in the regex, if pcre_callout has been set, the external -function is called. It is provided with data in a structure called -pcre_callout_block, which is defined in pcre.h. If the function returns 0, -matching continues; if it returns a non-zero value, the match at the current -point fails. However, backtracking will occur if possible. [This was changed -later and other features added - see item 49 below.] - -29. pcretest is upgraded to test the callout functionality. It provides a -callout function that displays information. By default, it shows the start of -the match and the current position in the text. There are some new data escapes -to vary what happens: - - \C+ in addition, show current contents of captured substrings - \C- do not supply a callout function - \C!n return 1 when callout number n is reached - \C!n!m return 1 when callout number n is reached for the mth time - -30. If pcregrep was called with the -l option and just a single file name, it -output "" if a match was found, instead of the file name. - -31. Improve the efficiency of the POSIX API to PCRE. If the number of capturing -slots is less than POSIX_MALLOC_THRESHOLD, use a block on the stack to pass to -pcre_exec(). This saves a malloc/free per call. The default value of -POSIX_MALLOC_THRESHOLD is 10; it can be changed by --with-posix-malloc-threshold -when configuring. - -32. The default maximum size of a compiled pattern is 64K. There have been a -few cases of people hitting this limit. The code now uses macros to handle the -storing of links as offsets within the compiled pattern. It defaults to 2-byte -links, but this can be changed to 3 or 4 bytes by --with-link-size when -configuring. Tests 2 and 5 work only with 2-byte links because they output -debugging information about compiled patterns. - -33. Internal code re-arrangements: - -(a) Moved the debugging function for printing out a compiled regex into - its own source file (printint.c) and used #include to pull it into - pcretest.c and, when DEBUG is defined, into pcre.c, instead of having two - separate copies. - -(b) Defined the list of op-code names for debugging as a macro in - internal.h so that it is next to the definition of the opcodes. - -(c) Defined a table of op-code lengths for simpler skipping along compiled - code. This is again a macro in internal.h so that it is next to the - definition of the opcodes. - -34. Added support for recursive calls to individual subpatterns, along the -lines of Robin Houston's patch (but implemented somewhat differently). - -35. Further mods to the Makefile to help Win32. Also, added code to pcregrep to -allow it to read and process whole directories in Win32. This code was -contributed by Lionel Fourquaux; it has not been tested by me. - -36. Added support for named subpatterns. The Python syntax (?P...) is -used to name a group. Names consist of alphanumerics and underscores, and must -be unique. Back references use the syntax (?P=name) and recursive calls use -(?P>name) which is a PCRE extension to the Python extension. Groups still have -numbers. The function pcre_fullinfo() can be used after compilation to extract -a name/number map. There are three relevant calls: - - PCRE_INFO_NAMEENTRYSIZE yields the size of each entry in the map - PCRE_INFO_NAMECOUNT yields the number of entries - PCRE_INFO_NAMETABLE yields a pointer to the map. - -The map is a vector of fixed-size entries. The size of each entry depends on -the length of the longest name used. The first two bytes of each entry are the -group number, most significant byte first. There follows the corresponding -name, zero terminated. The names are in alphabetical order. - -37. Make the maximum literal string in the compiled code 250 for the non-UTF-8 -case instead of 255. Making it the same both with and without UTF-8 support -means that the same test output works with both. - -38. There was a case of malloc(0) in the POSIX testing code in pcretest. Avoid -calling malloc() with a zero argument. - -39. Change 25 above had to resort to a heavy-handed test for the .* anchoring -optimization. I've improved things by keeping a bitmap of backreferences with -numbers 1-31 so that if .* occurs inside capturing brackets that are not in -fact referenced, the optimization can be applied. It is unlikely that a -relevant occurrence of .* (i.e. one which might indicate anchoring or forcing -the match to follow \n) will appear inside brackets with a number greater than -31, but if it does, any back reference > 31 suppresses the optimization. - -40. Added a new compile-time option PCRE_NO_AUTO_CAPTURE. This has the effect -of disabling numbered capturing parentheses. Any opening parenthesis that is -not followed by ? behaves as if it were followed by ?: but named parentheses -can still be used for capturing (and they will acquire numbers in the usual -way). - -41. Redesigned the return codes from the match() function into yes/no/error so -that errors can be passed back from deep inside the nested calls. A malloc -failure while inside a recursive subpattern call now causes the -PCRE_ERROR_NOMEMORY return instead of quietly going wrong. - -42. It is now possible to set a limit on the number of times the match() -function is called in a call to pcre_exec(). This facility makes it possible to -limit the amount of recursion and backtracking, though not in a directly -obvious way, because the match() function is used in a number of different -circumstances. The count starts from zero for each position in the subject -string (for non-anchored patterns). The default limit is, for compatibility, a -large number, namely 10 000 000. You can change this in two ways: - -(a) When configuring PCRE before making, you can use --with-match-limit=n - to set a default value for the compiled library. - -(b) For each call to pcre_exec(), you can pass a pcre_extra block in which - a different value is set. See 45 below. - -If the limit is exceeded, pcre_exec() returns PCRE_ERROR_MATCHLIMIT. - -43. Added a new function pcre_config(int, void *) to enable run-time extraction -of things that can be changed at compile time. The first argument specifies -what is wanted and the second points to where the information is to be placed. -The current list of available information is: - - PCRE_CONFIG_UTF8 - -The output is an integer that is set to one if UTF-8 support is available; -otherwise it is set to zero. - - PCRE_CONFIG_NEWLINE - -The output is an integer that it set to the value of the code that is used for -newline. It is either LF (10) or CR (13). - - PCRE_CONFIG_LINK_SIZE - -The output is an integer that contains the number of bytes used for internal -linkage in compiled expressions. The value is 2, 3, or 4. See item 32 above. - - PCRE_CONFIG_POSIX_MALLOC_THRESHOLD - -The output is an integer that contains the threshold above which the POSIX -interface uses malloc() for output vectors. See item 31 above. - - PCRE_CONFIG_MATCH_LIMIT - -The output is an unsigned integer that contains the default limit of the number -of match() calls in a pcre_exec() execution. See 42 above. - -44. pcretest has been upgraded by the addition of the -C option. This causes it -to extract all the available output from the new pcre_config() function, and to -output it. The program then exits immediately. - -45. A need has arisen to pass over additional data with calls to pcre_exec() in -order to support additional features. One way would have been to define -pcre_exec2() (for example) with extra arguments, but this would not have been -extensible, and would also have required all calls to the original function to -be mapped to the new one. Instead, I have chosen to extend the mechanism that -is used for passing in "extra" data from pcre_study(). - -The pcre_extra structure is now exposed and defined in pcre.h. It currently -contains the following fields: - - flags a bitmap indicating which of the following fields are set - study_data opaque data from pcre_study() - match_limit a way of specifying a limit on match() calls for a specific - call to pcre_exec() - callout_data data for callouts (see 49 below) - -The flag bits are also defined in pcre.h, and are - - PCRE_EXTRA_STUDY_DATA - PCRE_EXTRA_MATCH_LIMIT - PCRE_EXTRA_CALLOUT_DATA - -The pcre_study() function now returns one of these new pcre_extra blocks, with -the actual study data pointed to by the study_data field, and the -PCRE_EXTRA_STUDY_DATA flag set. This can be passed directly to pcre_exec() as -before. That is, this change is entirely upwards-compatible and requires no -change to existing code. - -If you want to pass in additional data to pcre_exec(), you can either place it -in a pcre_extra block provided by pcre_study(), or create your own pcre_extra -block. - -46. pcretest has been extended to test the PCRE_EXTRA_MATCH_LIMIT feature. If a -data string contains the escape sequence \M, pcretest calls pcre_exec() several -times with different match limits, until it finds the minimum value needed for -pcre_exec() to complete. The value is then output. This can be instructive; for -most simple matches the number is quite small, but for pathological cases it -gets very large very quickly. - -47. There's a new option for pcre_fullinfo() called PCRE_INFO_STUDYSIZE. It -returns the size of the data block pointed to by the study_data field in a -pcre_extra block, that is, the value that was passed as the argument to -pcre_malloc() when PCRE was getting memory in which to place the information -created by pcre_study(). The fourth argument should point to a size_t variable. -pcretest has been extended so that this information is shown after a successful -pcre_study() call when information about the compiled regex is being displayed. - -48. Cosmetic change to Makefile: there's no need to have / after $(DESTDIR) -because what follows is always an absolute path. (Later: it turns out that this -is more than cosmetic for MinGW, because it doesn't like empty path -components.) - -49. Some changes have been made to the callout feature (see 28 above): - -(i) A callout function now has three choices for what it returns: - - 0 => success, carry on matching - > 0 => failure at this point, but backtrack if possible - < 0 => serious error, return this value from pcre_exec() - - Negative values should normally be chosen from the set of PCRE_ERROR_xxx - values. In particular, returning PCRE_ERROR_NOMATCH forces a standard - "match failed" error. The error number PCRE_ERROR_CALLOUT is reserved for - use by callout functions. It will never be used by PCRE itself. - -(ii) The pcre_extra structure (see 45 above) has a void * field called - callout_data, with corresponding flag bit PCRE_EXTRA_CALLOUT_DATA. The - pcre_callout_block structure has a field of the same name. The contents of - the field passed in the pcre_extra structure are passed to the callout - function in the corresponding field in the callout block. This makes it - easier to use the same callout-containing regex from multiple threads. For - testing, the pcretest program has a new data escape - - \C*n pass the number n (may be negative) as callout_data - - If the callout function in pcretest receives a non-zero value as - callout_data, it returns that value. - -50. Makefile wasn't handling CFLAGS properly when compiling dftables. Also, -there were some redundant $(CFLAGS) in commands that are now specified as -$(LINK), which already includes $(CFLAGS). - -51. Extensions to UTF-8 support are listed below. These all apply when (a) PCRE -has been compiled with UTF-8 support *and* pcre_compile() has been compiled -with the PCRE_UTF8 flag. Patterns that are compiled without that flag assume -one-byte characters throughout. Note that case-insensitive matching applies -only to characters whose values are less than 256. PCRE doesn't support the -notion of cases for higher-valued characters. - -(i) A character class whose characters are all within 0-255 is handled as - a bit map, and the map is inverted for negative classes. Previously, a - character > 255 always failed to match such a class; however it should - match if the class was a negative one (e.g. [^ab]). This has been fixed. - -(ii) A negated character class with a single character < 255 is coded as - "not this character" (OP_NOT). This wasn't working properly when the test - character was multibyte, either singly or repeated. - -(iii) Repeats of multibyte characters are now handled correctly in UTF-8 - mode, for example: \x{100}{2,3}. - -(iv) The character escapes \b, \B, \d, \D, \s, \S, \w, and \W (either - singly or repeated) now correctly test multibyte characters. However, - PCRE doesn't recognize any characters with values greater than 255 as - digits, spaces, or word characters. Such characters always match \D, \S, - and \W, and never match \d, \s, or \w. - -(v) Classes may now contain characters and character ranges with values - greater than 255. For example: [ab\x{100}-\x{400}]. - -(vi) pcregrep now has a --utf-8 option (synonym -u) which makes it call - PCRE in UTF-8 mode. - -52. The info request value PCRE_INFO_FIRSTCHAR has been renamed -PCRE_INFO_FIRSTBYTE because it is a byte value. However, the old name is -retained for backwards compatibility. (Note that LASTLITERAL is also a byte -value.) - -53. The single man page has become too large. I have therefore split it up into -a number of separate man pages. These also give rise to individual HTML pages; -these are now put in a separate directory, and there is an index.html page that -lists them all. Some hyperlinking between the pages has been installed. - -54. Added convenience functions for handling named capturing parentheses. - -55. Unknown escapes inside character classes (e.g. [\M]) and escapes that -aren't interpreted therein (e.g. [\C]) are literals in Perl. This is now also -true in PCRE, except when the PCRE_EXTENDED option is set, in which case they -are faulted. - -56. Introduced HOST_CC and HOST_CFLAGS which can be set in the environment when -calling configure. These values are used when compiling the dftables.c program -which is run to generate the source of the default character tables. They -default to the values of CC and CFLAGS. If you are cross-compiling PCRE, -you will need to set these values. - -57. Updated the building process for Windows DLL, as provided by Fred Cox. - - -Version 3.9 02-Jan-02 ---------------------- - -1. A bit of extraneous text had somehow crept into the pcregrep documentation. - -2. If --disable-static was given, the building process failed when trying to -build pcretest and pcregrep. (For some reason it was using libtool to compile -them, which is not right, as they aren't part of the library.) - - -Version 3.8 18-Dec-01 ---------------------- - -1. The experimental UTF-8 code was completely screwed up. It was packing the -bytes in the wrong order. How dumb can you get? - - -Version 3.7 29-Oct-01 ---------------------- - -1. In updating pcretest to check change 1 of version 3.6, I screwed up. -This caused pcretest, when used on the test data, to segfault. Unfortunately, -this didn't happen under Solaris 8, where I normally test things. - -2. The Makefile had to be changed to make it work on BSD systems, where 'make' -doesn't seem to recognize that ./xxx and xxx are the same file. (This entry -isn't in ChangeLog distributed with 3.7 because I forgot when I hastily made -this fix an hour or so after the initial 3.7 release.) - - -Version 3.6 23-Oct-01 ---------------------- - -1. Crashed with /(sens|respons)e and \1ibility/ and "sense and sensibility" if -offsets passed as NULL with zero offset count. - -2. The config.guess and config.sub files had not been updated when I moved to -the latest autoconf. - - -Version 3.5 15-Aug-01 ---------------------- - -1. Added some missing #if !defined NOPOSIX conditionals in pcretest.c that -had been forgotten. - -2. By using declared but undefined structures, we can avoid using "void" -definitions in pcre.h while keeping the internal definitions of the structures -private. - -3. The distribution is now built using autoconf 2.50 and libtool 1.4. From a -user point of view, this means that both static and shared libraries are built -by default, but this can be individually controlled. More of the work of -handling this static/shared cases is now inside libtool instead of PCRE's make -file. - -4. The pcretest utility is now installed along with pcregrep because it is -useful for users (to test regexs) and by doing this, it automatically gets -relinked by libtool. The documentation has been turned into a man page, so -there are now .1, .txt, and .html versions in /doc. - -5. Upgrades to pcregrep: - (i) Added long-form option names like gnu grep. - (ii) Added --help to list all options with an explanatory phrase. - (iii) Added -r, --recursive to recurse into sub-directories. - (iv) Added -f, --file to read patterns from a file. - -6. pcre_exec() was referring to its "code" argument before testing that -argument for NULL (and giving an error if it was NULL). - -7. Upgraded Makefile.in to allow for compiling in a different directory from -the source directory. - -8. Tiny buglet in pcretest: when pcre_fullinfo() was called to retrieve the -options bits, the pointer it was passed was to an int instead of to an unsigned -long int. This mattered only on 64-bit systems. - -9. Fixed typo (3.4/1) in pcre.h again. Sigh. I had changed pcre.h (which is -generated) instead of pcre.in, which it its source. Also made the same change -in several of the .c files. - -10. A new release of gcc defines printf() as a macro, which broke pcretest -because it had an ifdef in the middle of a string argument for printf(). Fixed -by using separate calls to printf(). - -11. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure -script, to force use of CR or LF instead of \n in the source. On non-Unix -systems, the value can be set in config.h. - -12. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an -absolute limit. Changed the text of the error message to make this clear, and -likewise updated the man page. - -13. The limit of 99 on the number of capturing subpatterns has been removed. -The new limit is 65535, which I hope will not be a "real" limit. - - -Version 3.4 22-Aug-00 ---------------------- - -1. Fixed typo in pcre.h: unsigned const char * changed to const unsigned char *. - -2. Diagnose condition (?(0) as an error instead of crashing on matching. - - -Version 3.3 01-Aug-00 ---------------------- - -1. If an octal character was given, but the value was greater than \377, it -was not getting masked to the least significant bits, as documented. This could -lead to crashes in some systems. - -2. Perl 5.6 (if not earlier versions) accepts classes like [a-\d] and treats -the hyphen as a literal. PCRE used to give an error; it now behaves like Perl. - -3. Added the functions pcre_free_substring() and pcre_free_substring_list(). -These just pass their arguments on to (pcre_free)(), but they are provided -because some uses of PCRE bind it to non-C systems that can call its functions, -but cannot call free() or pcre_free() directly. - -4. Add "make test" as a synonym for "make check". Corrected some comments in -the Makefile. - -5. Add $(DESTDIR)/ in front of all the paths in the "install" target in the -Makefile. - -6. Changed the name of pgrep to pcregrep, because Solaris has introduced a -command called pgrep for grepping around the active processes. - -7. Added the beginnings of support for UTF-8 character strings. - -8. Arranged for the Makefile to pass over the settings of CC, CFLAGS, and -RANLIB to ./ltconfig so that they are used by libtool. I think these are all -the relevant ones. (AR is not passed because ./ltconfig does its own figuring -out for the ar command.) - - -Version 3.2 12-May-00 ---------------------- - -This is purely a bug fixing release. - -1. If the pattern /((Z)+|A)*/ was matched agained ZABCDEFG it matched Z instead -of ZA. This was just one example of several cases that could provoke this bug, -which was introduced by change 9 of version 2.00. The code for breaking -infinite loops after an iteration that matches an empty string was't working -correctly. - -2. The pcretest program was not imitating Perl correctly for the pattern /a*/g -when matched against abbab (for example). After matching an empty string, it -wasn't forcing anchoring when setting PCRE_NOTEMPTY for the next attempt; this -caused it to match further down the string than it should. - -3. The code contained an inclusion of sys/types.h. It isn't clear why this -was there because it doesn't seem to be needed, and it causes trouble on some -systems, as it is not a Standard C header. It has been removed. - -4. Made 4 silly changes to the source to avoid stupid compiler warnings that -were reported on the Macintosh. The changes were from - - while ((c = *(++ptr)) != 0 && c != '\n'); -to - while ((c = *(++ptr)) != 0 && c != '\n') ; - -Totally extraordinary, but if that's what it takes... - -5. PCRE is being used in one environment where neither memmove() nor bcopy() is -available. Added HAVE_BCOPY and an autoconf test for it; if neither -HAVE_MEMMOVE nor HAVE_BCOPY is set, use a built-in emulation function which -assumes the way PCRE uses memmove() (always moving upwards). - -6. PCRE is being used in one environment where strchr() is not available. There -was only one use in pcre.c, and writing it out to avoid strchr() probably gives -faster code anyway. - - -Version 3.1 09-Feb-00 ---------------------- - -The only change in this release is the fixing of some bugs in Makefile.in for -the "install" target: - -(1) It was failing to install pcreposix.h. - -(2) It was overwriting the pcre.3 man page with the pcreposix.3 man page. - - -Version 3.0 01-Feb-00 ---------------------- - -1. Add support for the /+ modifier to perltest (to output $` like it does in -pcretest). - -2. Add support for the /g modifier to perltest. - -3. Fix pcretest so that it behaves even more like Perl for /g when the pattern -matches null strings. - -4. Fix perltest so that it doesn't do unwanted things when fed an empty -pattern. Perl treats empty patterns specially - it reuses the most recent -pattern, which is not what we want. Replace // by /(?#)/ in order to avoid this -effect. - -5. The POSIX interface was broken in that it was just handing over the POSIX -captured string vector to pcre_exec(), but (since release 2.00) PCRE has -required a bigger vector, with some working space on the end. This means that -the POSIX wrapper now has to get and free some memory, and copy the results. - -6. Added some simple autoconf support, placing the test data and the -documentation in separate directories, re-organizing some of the -information files, and making it build pcre-config (a GNU standard). Also added -libtool support for building PCRE as a shared library, which is now the -default. - -7. Got rid of the leading zero in the definition of PCRE_MINOR because 08 and -09 are not valid octal constants. Single digits will be used for minor values -less than 10. - -8. Defined REG_EXTENDED and REG_NOSUB as zero in the POSIX header, so that -existing programs that set these in the POSIX interface can use PCRE without -modification. - -9. Added a new function, pcre_fullinfo() with an extensible interface. It can -return all that pcre_info() returns, plus additional data. The pcre_info() -function is retained for compatibility, but is considered to be obsolete. - -10. Added experimental recursion feature (?R) to handle one common case that -Perl 5.6 will be able to do with (?p{...}). - -11. Added support for POSIX character classes like [:alpha:], which Perl is -adopting. - - -Version 2.08 31-Aug-99 ----------------------- - -1. When startoffset was not zero and the pattern began with ".*", PCRE was not -trying to match at the startoffset position, but instead was moving forward to -the next newline as if a previous match had failed. - -2. pcretest was not making use of PCRE_NOTEMPTY when repeating for /g and /G, -and could get into a loop if a null string was matched other than at the start -of the subject. - -3. Added definitions of PCRE_MAJOR and PCRE_MINOR to pcre.h so the version can -be distinguished at compile time, and for completeness also added PCRE_DATE. - -5. Added Paul Sokolovsky's minor changes to make it easy to compile a Win32 DLL -in GnuWin32 environments. - - -Version 2.07 29-Jul-99 ----------------------- - -1. The documentation is now supplied in plain text form and HTML as well as in -the form of man page sources. - -2. C++ compilers don't like assigning (void *) values to other pointer types. -In particular this affects malloc(). Although there is no problem in Standard -C, I've put in casts to keep C++ compilers happy. - -3. Typo on pcretest.c; a cast of (unsigned char *) in the POSIX regexec() call -should be (const char *). - -4. If NOPOSIX is defined, pcretest.c compiles without POSIX support. This may -be useful for non-Unix systems who don't want to bother with the POSIX stuff. -However, I haven't made this a standard facility. The documentation doesn't -mention it, and the Makefile doesn't support it. - -5. The Makefile now contains an "install" target, with editable destinations at -the top of the file. The pcretest program is not installed. - -6. pgrep -V now gives the PCRE version number and date. - -7. Fixed bug: a zero repetition after a literal string (e.g. /abcde{0}/) was -causing the entire string to be ignored, instead of just the last character. - -8. If a pattern like /"([^\\"]+|\\.)*"/ is applied in the normal way to a -non-matching string, it can take a very, very long time, even for strings of -quite modest length, because of the nested recursion. PCRE now does better in -some of these cases. It does this by remembering the last required literal -character in the pattern, and pre-searching the subject to ensure it is present -before running the real match. In other words, it applies a heuristic to detect -some types of certain failure quickly, and in the above example, if presented -with a string that has no trailing " it gives "no match" very quickly. - -9. A new runtime option PCRE_NOTEMPTY causes null string matches to be ignored; -other alternatives are tried instead. - - -Version 2.06 09-Jun-99 ----------------------- - -1. Change pcretest's output for amount of store used to show just the code -space, because the remainder (the data block) varies in size between 32-bit and -64-bit systems. - -2. Added an extra argument to pcre_exec() to supply an offset in the subject to -start matching at. This allows lookbehinds to work when searching for multiple -occurrences in a string. - -3. Added additional options to pcretest for testing multiple occurrences: - - /+ outputs the rest of the string that follows a match - /g loops for multiple occurrences, using the new startoffset argument - /G loops for multiple occurrences by passing an incremented pointer - -4. PCRE wasn't doing the "first character" optimization for patterns starting -with \b or \B, though it was doing it for other lookbehind assertions. That is, -it wasn't noticing that a match for a pattern such as /\bxyz/ has to start with -the letter 'x'. On long subject strings, this gives a significant speed-up. - - -Version 2.05 21-Apr-99 ----------------------- - -1. Changed the type of magic_number from int to long int so that it works -properly on 16-bit systems. - -2. Fixed a bug which caused patterns starting with .* not to work correctly -when the subject string contained newline characters. PCRE was assuming -anchoring for such patterns in all cases, which is not correct because .* will -not pass a newline unless PCRE_DOTALL is set. It now assumes anchoring only if -DOTALL is set at top level; otherwise it knows that patterns starting with .* -must be retried after every newline in the subject. - - -Version 2.04 18-Feb-99 ----------------------- - -1. For parenthesized subpatterns with repeats whose minimum was zero, the -computation of the store needed to hold the pattern was incorrect (too large). -If such patterns were nested a few deep, this could multiply and become a real -problem. - -2. Added /M option to pcretest to show the memory requirement of a specific -pattern. Made -m a synonym of -s (which does this globally) for compatibility. - -3. Subpatterns of the form (regex){n,m} (i.e. limited maximum) were being -compiled in such a way that the backtracking after subsequent failure was -pessimal. Something like (a){0,3} was compiled as (a)?(a)?(a)? instead of -((a)((a)(a)?)?)? with disastrous performance if the maximum was of any size. - - -Version 2.03 02-Feb-99 ----------------------- - -1. Fixed typo and small mistake in man page. - -2. Added 4th condition (GPL supersedes if conflict) and created separate -LICENCE file containing the conditions. - -3. Updated pcretest so that patterns such as /abc\/def/ work like they do in -Perl, that is the internal \ allows the delimiter to be included in the -pattern. Locked out the use of \ as a delimiter. If \ immediately follows -the final delimiter, add \ to the end of the pattern (to test the error). - -4. Added the convenience functions for extracting substrings after a successful -match. Updated pcretest to make it able to test these functions. - - -Version 2.02 14-Jan-99 ----------------------- - -1. Initialized the working variables associated with each extraction so that -their saving and restoring doesn't refer to uninitialized store. - -2. Put dummy code into study.c in order to trick the optimizer of the IBM C -compiler for OS/2 into generating correct code. Apparently IBM isn't going to -fix the problem. - -3. Pcretest: the timing code wasn't using LOOPREPEAT for timing execution -calls, and wasn't printing the correct value for compiling calls. Increased the -default value of LOOPREPEAT, and the number of significant figures in the -times. - -4. Changed "/bin/rm" in the Makefile to "-rm" so it works on Windows NT. - -5. Renamed "deftables" as "dftables" to get it down to 8 characters, to avoid -a building problem on Windows NT with a FAT file system. - - -Version 2.01 21-Oct-98 ----------------------- - -1. Changed the API for pcre_compile() to allow for the provision of a pointer -to character tables built by pcre_maketables() in the current locale. If NULL -is passed, the default tables are used. - - -Version 2.00 24-Sep-98 ----------------------- - -1. Since the (>?) facility is in Perl 5.005, don't require PCRE_EXTRA to enable -it any more. - -2. Allow quantification of (?>) groups, and make it work correctly. - -3. The first character computation wasn't working for (?>) groups. - -4. Correct the implementation of \Z (it is permitted to match on the \n at the -end of the subject) and add 5.005's \z, which really does match only at the -very end of the subject. - -5. Remove the \X "cut" facility; Perl doesn't have it, and (?> is neater. - -6. Remove the ability to specify CASELESS, MULTILINE, DOTALL, and -DOLLAR_END_ONLY at runtime, to make it possible to implement the Perl 5.005 -localized options. All options to pcre_study() were also removed. - -7. Add other new features from 5.005: - - $(?<= positive lookbehind - $(?a*))*/ (a PCRE_EXTRA facility). - - -Version 1.00 18-Nov-97 ----------------------- - -1. Added compile-time macros to support systems such as SunOS4 which don't have -memmove() or strerror() but have other things that can be used instead. - -2. Arranged that "make clean" removes the executables. - - -Version 0.99 27-Oct-97 ----------------------- - -1. Fixed bug in code for optimizing classes with only one character. It was -initializing a 32-byte map regardless, which could cause it to run off the end -of the memory it had got. - -2. Added, conditional on PCRE_EXTRA, the proposed (?>REGEX) construction. - - -Version 0.98 22-Oct-97 ----------------------- - -1. Fixed bug in code for handling temporary memory usage when there are more -back references than supplied space in the ovector. This could cause segfaults. - - -Version 0.97 21-Oct-97 ----------------------- - -1. Added the \X "cut" facility, conditional on PCRE_EXTRA. - -2. Optimized negated single characters not to use a bit map. - -3. Brought error texts together as macro definitions; clarified some of them; -fixed one that was wrong - it said "range out of order" when it meant "invalid -escape sequence". - -4. Changed some char * arguments to const char *. - -5. Added PCRE_NOTBOL and PCRE_NOTEOL (from POSIX). - -6. Added the POSIX-style API wrapper in pcreposix.a and testing facilities in -pcretest. - - -Version 0.96 16-Oct-97 ----------------------- - -1. Added a simple "pgrep" utility to the distribution. - -2. Fixed an incompatibility with Perl: "{" is now treated as a normal character -unless it appears in one of the precise forms "{ddd}", "{ddd,}", or "{ddd,ddd}" -where "ddd" means "one or more decimal digits". - -3. Fixed serious bug. If a pattern had a back reference, but the call to -pcre_exec() didn't supply a large enough ovector to record the related -identifying subpattern, the match always failed. PCRE now remembers the number -of the largest back reference, and gets some temporary memory in which to save -the offsets during matching if necessary, in order to ensure that -backreferences always work. - -4. Increased the compatibility with Perl in a number of ways: - - (a) . no longer matches \n by default; an option PCRE_DOTALL is provided - to request this handling. The option can be set at compile or exec time. - - (b) $ matches before a terminating newline by default; an option - PCRE_DOLLAR_ENDONLY is provided to override this (but not in multiline - mode). The option can be set at compile or exec time. - - (c) The handling of \ followed by a digit other than 0 is now supposed to be - the same as Perl's. If the decimal number it represents is less than 10 - or there aren't that many previous left capturing parentheses, an octal - escape is read. Inside a character class, it's always an octal escape, - even if it is a single digit. - - (d) An escaped but undefined alphabetic character is taken as a literal, - unless PCRE_EXTRA is set. Currently this just reserves the remaining - escapes. - - (e) {0} is now permitted. (The previous item is removed from the compiled - pattern). - -5. Changed all the names of code files so that the basic parts are no longer -than 10 characters, and abolished the teeny "globals.c" file. - -6. Changed the handling of character classes; they are now done with a 32-byte -bit map always. - -7. Added the -d and /D options to pcretest to make it possible to look at the -internals of compilation without having to recompile pcre. - - -Version 0.95 23-Sep-97 ----------------------- - -1. Fixed bug in pre-pass concerning escaped "normal" characters such as \x5c or -\x20 at the start of a run of normal characters. These were being treated as -real characters, instead of the source characters being re-checked. - - -Version 0.94 18-Sep-97 ----------------------- - -1. The functions are now thread-safe, with the caveat that the global variables -containing pointers to malloc() and free() or alternative functions are the -same for all threads. - -2. Get pcre_study() to generate a bitmap of initial characters for non- -anchored patterns when this is possible, and use it if passed to pcre_exec(). - - -Version 0.93 15-Sep-97 ----------------------- - -1. /(b)|(:+)/ was computing an incorrect first character. - -2. Add pcre_study() to the API and the passing of pcre_extra to pcre_exec(), -but not actually doing anything yet. - -3. Treat "-" characters in classes that cannot be part of ranges as literals, -as Perl does (e.g. [-az] or [az-]). - -4. Set the anchored flag if a branch starts with .* or .*? because that tests -all possible positions. - -5. Split up into different modules to avoid including unneeded functions in a -compiled binary. However, compile and exec are still in one module. The "study" -function is split off. - -6. The character tables are now in a separate module whose source is generated -by an auxiliary program - but can then be edited by hand if required. There are -now no calls to isalnum(), isspace(), isdigit(), isxdigit(), tolower() or -toupper() in the code. - -7. Turn the malloc/free funtions variables into pcre_malloc and pcre_free and -make them global. Abolish the function for setting them, as the caller can now -set them directly. - - -Version 0.92 11-Sep-97 ----------------------- - -1. A repeat with a fixed maximum and a minimum of 1 for an ordinary character -(e.g. /a{1,3}/) was broken (I mis-optimized it). - -2. Caseless matching was not working in character classes if the characters in -the pattern were in upper case. - -3. Make ranges like [W-c] work in the same way as Perl for caseless matching. - -4. Make PCRE_ANCHORED public and accept as a compile option. - -5. Add an options word to pcre_exec() and accept PCRE_ANCHORED and -PCRE_CASELESS at run time. Add escapes \A and \I to pcretest to cause it to -pass them. - -6. Give an error if bad option bits passed at compile or run time. - -7. Add PCRE_MULTILINE at compile and exec time, and (?m) as well. Add \M to -pcretest to cause it to pass that flag. - -8. Add pcre_info(), to get the number of identifying subpatterns, the stored -options, and the first character, if set. - -9. Recognize C+ or C{n,m} where n >= 1 as providing a fixed starting character. - - -Version 0.91 10-Sep-97 ----------------------- - -1. PCRE was failing to diagnose unlimited repeats of subpatterns that could -match the empty string as in /(a*)*/. It was looping and ultimately crashing. - -2. PCRE was looping on encountering an indefinitely repeated back reference to -a subpattern that had matched an empty string, e.g. /(a|)\1*/. It now does what -Perl does - treats the match as successful. - -**** diff --git a/deps/libmagic/pcre/HACKING b/deps/libmagic/pcre/HACKING deleted file mode 100644 index a90ddf8..0000000 --- a/deps/libmagic/pcre/HACKING +++ /dev/null @@ -1,473 +0,0 @@ -Technical Notes about PCRE --------------------------- - -These are very rough technical notes that record potentially useful information -about PCRE internals. For information about testing PCRE, see the pcretest -documentation and the comment at the head of the RunTest file. - - -Historical note 1 ------------------ - -Many years ago I implemented some regular expression functions to an algorithm -suggested by Martin Richards. These were not Unix-like in form, and were quite -restricted in what they could do by comparison with Perl. The interesting part -about the algorithm was that the amount of space required to hold the compiled -form of an expression was known in advance. The code to apply an expression did -not operate by backtracking, as the original Henry Spencer code and current -Perl code does, but instead checked all possibilities simultaneously by keeping -a list of current states and checking all of them as it advanced through the -subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA -algorithm", though it was not a traditional Finite State Machine (FSM). When -the pattern was all used up, all remaining states were possible matches, and -the one matching the longest subset of the subject string was chosen. This did -not necessarily maximize the individual wild portions of the pattern, as is -expected in Unix and Perl-style regular expressions. - - -Historical note 2 ------------------ - -By contrast, the code originally written by Henry Spencer (which was -subsequently heavily modified for Perl) compiles the expression twice: once in -a dummy mode in order to find out how much store will be needed, and then for -real. (The Perl version probably doesn't do this any more; I'm talking about -the original library.) The execution function operates by backtracking and -maximizing (or, optionally, minimizing in Perl) the amount of the subject that -matches individual wild portions of the pattern. This is an "NFA algorithm" in -Friedl's terminology. - - -OK, here's the real stuff -------------------------- - -For the set of functions that form the "basic" PCRE library (which are -unrelated to those mentioned above), I tried at first to invent an algorithm -that used an amount of store bounded by a multiple of the number of characters -in the pattern, to save on compiling time. However, because of the greater -complexity in Perl regular expressions, I couldn't do this. In any case, a -first pass through the pattern is helpful for other reasons. - - -Support for 16-bit and 32-bit data strings -------------------------------------------- - -From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from -release 8.32, PCRE supports 32-bit data strings. The library can be compiled -in any combination of 8-bit, 16-bit or 32-bit modes, creating different -libraries. In the description that follows, the word "short" is -used for a 16-bit data quantity, and the word "unit" is used for a quantity -that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned -integer in 32-bit mode. However, so as not to over-complicate the text, the -names of PCRE functions are given in 8-bit form only. - - -Computing the memory requirement: how it was --------------------------------------------- - -Up to and including release 6.7, PCRE worked by running a very degenerate first -pass to calculate a maximum store size, and then a second pass to do the real -compile - which might use a bit less than the predicted amount of memory. The -idea was that this would turn out faster than the Henry Spencer code because -the first pass is degenerate and the second pass can just store stuff straight -into the vector, which it knows is big enough. - - -Computing the memory requirement: how it is -------------------------------------------- - -By the time I was working on a potential 6.8 release, the degenerate first pass -had become very complicated and hard to maintain. Indeed one of the early -things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then -I had a flash of inspiration as to how I could run the real compile function in -a "fake" mode that enables it to compute how much memory it would need, while -actually only ever using a few hundred bytes of working memory, and without too -many tests of the mode that might slow it down. So I refactored the compiling -functions to work this way. This got rid of about 600 lines of source. It -should make future maintenance and development easier. As this was such a major -change, I never released 6.8, instead upping the number to 7.0 (other quite -major changes were also present in the 7.0 release). - -A side effect of this work was that the previous limit of 200 on the nesting -depth of parentheses was removed. However, there is a downside: pcre_compile() -runs more slowly than before (30% or more, depending on the pattern) because it -is doing a full analysis of the pattern. My hope was that this would not be a -big issue, and in the event, nobody has commented on it. - - -Traditional matching function ------------------------------ - -The "traditional", and original, matching function is called pcre_exec(), and -it implements an NFA algorithm, similar to the original Henry Spencer algorithm -and the way that Perl works. This is not surprising, since it is intended to be -as compatible with Perl as possible. This is the function most users of PCRE -will use most of the time. From release 8.20, if PCRE is compiled with -just-in-time (JIT) support, and studying a compiled pattern with JIT is -successful, the JIT code is run instead of the normal pcre_exec() code, but the -result is the same. - - -Supplementary matching function -------------------------------- - -From PCRE 6.0, there is also a supplementary matching function called -pcre_dfa_exec(). This implements a DFA matching algorithm that searches -simultaneously for all possible matches that start at one point in the subject -string. (Going back to my roots: see Historical Note 1 above.) This function -intreprets the same compiled pattern data as pcre_exec(); however, not all the -facilities are available, and those that are do not always work in quite the -same way. See the user documentation for details. - -The algorithm that is used for pcre_dfa_exec() is not a traditional FSM, -because it may have a number of states active at one time. More work would be -needed at compile time to produce a traditional FSM where only one state is -ever active at once. I believe some other regex matchers work this way. - - -Changeable options ------------------- - -The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may -change in the middle of patterns. From PCRE 8.13, their processing is handled -entirely at compile time by generating different opcodes for the different -settings. The runtime functions do not need to keep track of an options state -any more. - - -Format of compiled patterns ---------------------------- - -The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or -shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing -items of variable length. The first unit in an item contains an opcode, and -the length of the item is either implicit in the opcode or contained in the -data that follows it. - -In many cases listed below, LINK_SIZE data values are specified for offsets -within the compiled pattern. LINK_SIZE always specifies a number of bytes. The -default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or -4-byte values for these offsets, although this impairs the performance. (3-byte -LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE -larger than 2 is necessary only when patterns whose compiled length is greater -than 64K are going to be processed. In this description, we assume the "normal" -compilation options. Data values that are counts (e.g. for quantifiers) are -always just two bytes long (one short in 16-bit mode). - -Opcodes with no following data ------------------------------- - -These items are all just one unit long - - OP_END end of pattern - OP_ANY match any one character other than newline - OP_ALLANY match any one character, including newline - OP_ANYBYTE match any single byte, even in UTF-8 mode - OP_SOD match start of data: \A - OP_SOM, start of match (subject + offset): \G - OP_SET_SOM, set start of match (\K) - OP_CIRC ^ (start of data) - OP_CIRCM ^ multiline mode (start of data or after newline) - OP_NOT_WORD_BOUNDARY \W - OP_WORD_BOUNDARY \w - OP_NOT_DIGIT \D - OP_DIGIT \d - OP_NOT_HSPACE \H - OP_HSPACE \h - OP_NOT_WHITESPACE \S - OP_WHITESPACE \s - OP_NOT_VSPACE \V - OP_VSPACE \v - OP_NOT_WORDCHAR \W - OP_WORDCHAR \w - OP_EODN match end of data or \n at end: \Z - OP_EOD match end of data: \z - OP_DOLL $ (end of data, or before final newline) - OP_DOLLM $ multiline mode (end of data or before newline) - OP_EXTUNI match an extended Unicode character - OP_ANYNL match any Unicode newline sequence - - OP_ACCEPT ) These are Perl 5.10's "backtracking control - OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing - OP_FAIL ) parentheses, it may be preceded by one or more - OP_PRUNE ) OP_CLOSE, followed by a 2-byte number, - OP_SKIP ) indicating which parentheses must be closed. - - -Backtracking control verbs with (optional) data ------------------------------------------------ - -(*THEN) without an argument generates the opcode OP_THEN and no following data. -OP_MARK is followed by the mark name, preceded by a one-unit length, and -followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments, -the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name -following in the same format. - - -Matching literal characters ---------------------------- - -The OP_CHAR opcode is followed by a single character that is to be matched -casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes, -the character may be more than one unit long. In UTF-32 mode, characters -are always exactly one unit long. - - -Repeating single characters ---------------------------- - -The common repeats (*, +, ?), when applied to a single character, use the -following opcodes, which come in caseful and caseless versions: - - Caseful Caseless - OP_STAR OP_STARI - OP_MINSTAR OP_MINSTARI - OP_POSSTAR OP_POSSTARI - OP_PLUS OP_PLUSI - OP_MINPLUS OP_MINPLUSI - OP_POSPLUS OP_POSPLUSI - OP_QUERY OP_QUERYI - OP_MINQUERY OP_MINQUERYI - OP_POSQUERY OP_POSQUERYI - -Each opcode is followed by the character that is to be repeated. In ASCII mode, -these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in -UTF-32 mode these are one-unit items. -Those with "MIN" in their names are the minimizing versions. Those with "POS" -in their names are possessive versions. Other repeats make use of these -opcodes: - - Caseful Caseless - OP_UPTO OP_UPTOI - OP_MINUPTO OP_MINUPTOI - OP_POSUPTO OP_POSUPTOI - OP_EXACT OP_EXACTI - -Each of these is followed by a two-byte (one short) count (most significant -byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from -0 to the given number. A repeat with a non-zero minimum and a fixed maximum is -coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO). - - -Repeating character types -------------------------- - -Repeats of things like \d are done exactly as for single characters, except -that instead of a character, the opcode for the type is stored in the data -unit. The opcodes are: - - OP_TYPESTAR - OP_TYPEMINSTAR - OP_TYPEPOSSTAR - OP_TYPEPLUS - OP_TYPEMINPLUS - OP_TYPEPOSPLUS - OP_TYPEQUERY - OP_TYPEMINQUERY - OP_TYPEPOSQUERY - OP_TYPEUPTO - OP_TYPEMINUPTO - OP_TYPEPOSUPTO - OP_TYPEEXACT - - -Match by Unicode property -------------------------- - -OP_PROP and OP_NOTPROP are used for positive and negative matches of a -character by testing its Unicode property (the \p and \P escape sequences). -Each is followed by two units that encode the desired property as a type and a -value. - -Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by -three units: OP_PROP or OP_NOTPROP, and then the desired property type and -value. - - -Character classes ------------------ - -If there is only one character in the class, OP_CHAR or OP_CHARI is used for a -positive class, and OP_NOT or OP_NOTI for a negative one (that is, for -something like [^a]). - -Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for -repeated, negated, single-character classes. The normal single-character -opcodes (OP_STAR, etc.) are used for repeated positive single-character -classes. - -When there is more than one character in a class and all the characters are -less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a -negative one. In either case, the opcode is followed by a 32-byte (16-short) -bit map containing a 1 bit for every character that is acceptable. The bits are -counted from the least significant end of each unit. In caseless mode, bits for -both cases are set. - -The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode, -subject characters with values greater than 255 can be handled correctly. For -OP_CLASS they do not match, whereas for OP_NCLASS they do. - -For classes containing characters with values greater than 255, OP_XCLASS is -used. It optionally uses a bit map (if any characters lie within it), followed -by a list of pairs (for a range) and single characters. In caseless mode, both -cases are explicitly listed. There is a flag character than indicates whether -it is a positive or a negative class. - - -Back references ---------------- - -OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short) -containing the reference number. - - -Repeating character classes and back references ------------------------------------------------ - -Single-character classes are handled specially (see above). This section -applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information -follows the base item. The matching code looks at the following opcode to see -if it is one of - - OP_CRSTAR - OP_CRMINSTAR - OP_CRPLUS - OP_CRMINPLUS - OP_CRQUERY - OP_CRMINQUERY - OP_CRRANGE - OP_CRMINRANGE - -All but the last two are just single-unit items. The others are followed by -four bytes (two shorts) of data, comprising the minimum and maximum repeat -counts. There are no special possessive opcodes for these repeats; a possessive -repeat is compiled into an atomic group. - - -Brackets and alternation ------------------------- - -A pair of non-capturing (round) brackets is wrapped round each expression at -compile time, so alternation always happens in the context of brackets. - -[Note for North Americans: "bracket" to some English speakers, including -myself, can be round, square, curly, or pointy. Hence this usage rather than -"parentheses".] - -Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99 -capturing brackets and it used a different opcode for each one. From release -3.5, the limit was removed by putting the bracket number into the data for -higher-numbered brackets. From release 7.0 all capturing brackets are handled -this way, using the single opcode OP_CBRA. - -A bracket opcode is followed by LINK_SIZE bytes which give the offset to the -next alternative OP_ALT or, if there aren't any branches, to the matching -OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to -the next one, or to the OP_KET opcode. For capturing brackets, the bracket -number immediately follows the offset, always as a 2-byte (one short) item. - -OP_KET is used for subpatterns that do not repeat indefinitely, and -OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or -maximally respectively (see below for possessive repetitions). All three are -followed by LINK_SIZE bytes giving (as a positive number) the offset back to -the matching bracket opcode. - -If a subpattern is quantified such that it is permitted to match zero times, it -is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are -single-unit opcodes that tell the matcher that skipping the following -subpattern entirely is a valid branch. In the case of the first two, not -skipping the pattern is also valid (greedy and non-greedy). The third is used -when a pattern has the quantifier {0,0}. It cannot be entirely discarded, -because it may be called as a subroutine from elsewhere in the regex. - -A subpattern with an indefinite maximum repetition is replicated in the -compiled data its minimum number of times (or once with OP_BRAZERO if the -minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX -as appropriate. - -A subpattern with a bounded maximum repetition is replicated in a nested -fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO -before each replication after the minimum, so that, for example, (abc){2,5} is -compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group -has the same number. - -When a repeated subpattern has an unbounded upper limit, it is checked to see -whether it could match an empty string. If this is the case, the opcode in the -final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher -that it needs to check for matching an empty string when it hits OP_KETRMIN or -OP_KETRMAX, and if so, to break the loop. - -Possessive brackets -------------------- - -When a repeated group (capturing or non-capturing) is marked as possessive by -the "+" notation, e.g. (abc)++, different opcodes are used. Their names all -have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead -of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum -repetition is zero, the group is preceded by OP_BRAPOSZERO. - - -Assertions ----------- - -Forward assertions are just like other subpatterns, but starting with one of -the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes -OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion -is OP_REVERSE, followed by a two byte (one short) count of the number of -characters to move back the pointer in the subject string. In ASCII mode, the -count is a number of units, but in UTF-8/16 mode each character may occupy more -than one unit; in UTF-32 mode each character occupies exactly one unit. -A separate count is present in each alternative of a lookbehind -assertion, allowing them to have different fixed lengths. - - -Once-only (atomic) subpatterns ------------------------------- - -These are also just like other subpatterns, but they start with the opcode -OP_ONCE. The check for matching an empty string in an unbounded repeat is -handled entirely at runtime, so there is just this one opcode. - - -Conditional subpatterns ------------------------ - -These are like other subpatterns, but they start with the opcode OP_COND, or -OP_SCOND for one that might match an empty string in an unbounded repeat. If -the condition is a back reference, this is stored at the start of the -subpattern using the opcode OP_CREF followed by two bytes (one short) -containing the reference number. OP_NCREF is used instead if the reference was -generated by name (so that the runtime code knows to check for duplicate -names). - -If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of -group x" (coded as "(?(Rx)"), the group number is stored at the start of the -subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of -zero for "the whole pattern". For a DEFINE condition, just the single unit -OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern -always starts with one of the assertions. - - -Recursion ---------- - -Recursion either matches the current regex, or some subexpression. The opcode -OP_RECURSE is followed by an value which is the offset to the starting bracket -from the start of the whole pattern. From release 6.5, OP_RECURSE is -automatically wrapped inside OP_ONCE brackets (because otherwise some patterns -broke it). OP_RECURSE is also used for "subroutine" calls, even though they -are not strictly a recursion. - - -Callout -------- - -OP_CALLOUT is followed by one unit of data that holds a callout number in the -range 0 to 254 for manual callouts, or 255 for an automatic callout. In both -cases there follows a two-byte (one short) value giving the offset in the -pattern to the start of the following item, and another two-byte (one short) -item giving the length of the next item. - - -Philip Hazel -February 2012 diff --git a/deps/libmagic/pcre/INSTALL b/deps/libmagic/pcre/INSTALL deleted file mode 100644 index a1e89e1..0000000 --- a/deps/libmagic/pcre/INSTALL +++ /dev/null @@ -1,370 +0,0 @@ -Installation Instructions -************************* - -Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation, -Inc. - - Copying and distribution of this file, with or without modification, -are permitted in any medium without royalty provided the copyright -notice and this notice are preserved. This file is offered as-is, -without warranty of any kind. - -Basic Installation -================== - - Briefly, the shell commands `./configure; make; make install' should -configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for -instructions specific to this package. Some packages provide this -`INSTALL' file but do not implement all of the features documented -below. The lack of an optional feature in a given package is not -necessarily a bug. More recommendations for GNU packages can be found -in *note Makefile Conventions: (standards)Makefile Conventions. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. - - The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. - - Running `configure' might take a while. While running, it prints - some messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package, generally using the just-built uninstalled binaries. - - 4. Type `make install' to install the programs and any data files and - documentation. When installing into a prefix owned by root, it is - recommended that the package be configured and built as a regular - user, and only the `make install' phase executed with root - privileges. - - 5. Optionally, type `make installcheck' to repeat any self-tests, but - this time using the binaries in their final installed location. - This target does not install anything. Running this target as a - regular user, particularly if the prior `make install' required - root privileges, verifies that the installation completed - correctly. - - 6. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - - 7. Often, you can also type `make uninstall' to remove the installed - files again. In practice, not all packages have tested that - uninstallation works correctly, even though it is required by the - GNU Coding Standards. - - 8. Some packages, particularly those that use Automake, provide `make - distcheck', which can by used by developers to test that all other - targets like `make install' and `make uninstall' work correctly. - This target is generally not run by end users. - -Compilers and Options -===================== - - Some systems require unusual options for compilation or linking that -the `configure' script does not know about. Run `./configure --help' -for details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c99 CFLAGS=-g LIBS=-lposix - - *Note Defining Variables::, for more details. - -Compiling For Multiple Architectures -==================================== - - You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. This -is known as a "VPATH" build. - - With a non-GNU `make', it is safer to compile the package for one -architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before -reconfiguring for another architecture. - - On MacOS X 10.5 and later systems, you can create libraries and -executables that work on multiple system types--known as "fat" or -"universal" binaries--by specifying multiple `-arch' options to the -compiler but only a single `-arch' option to the preprocessor. Like -this: - - ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ - CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ - CPP="gcc -E" CXXCPP="g++ -E" - - This is not guaranteed to produce working output in all cases, you -may have to build one architecture at a time and combine the results -using the `lipo' tool if you have problems. - -Installation Names -================== - - By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX', where PREFIX must be an -absolute file name. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. In general, the -default for these options is expressed in terms of `${prefix}', so that -specifying just `--prefix' will affect all of the other directory -specifications that were not explicitly provided. - - The most portable way to affect installation locations is to pass the -correct locations to `configure'; however, many packages provide one or -both of the following shortcuts of passing variable assignments to the -`make install' command line to change installation locations without -having to reconfigure or recompile. - - The first method involves providing an override variable for each -affected directory. For example, `make install -prefix=/alternate/directory' will choose an alternate location for all -directory configuration variables that were expressed in terms of -`${prefix}'. Any directories that were specified during `configure', -but not in terms of `${prefix}', must each be overridden at install -time for the entire installation to be relocated. The approach of -makefile variable overrides for each directory variable is required by -the GNU Coding Standards, and ideally causes no recompilation. -However, some platforms have known limitations with the semantics of -shared libraries that end up requiring recompilation when using this -method, particularly noticeable in packages that use GNU Libtool. - - The second method involves providing the `DESTDIR' variable. For -example, `make install DESTDIR=/alternate/directory' will prepend -`/alternate/directory' before all installation names. The approach of -`DESTDIR' overrides is not required by the GNU Coding Standards, and -does not work on platforms that have drive letters. On the other hand, -it does better at avoiding recompilation issues, and works well even -when some directory options were not specified in terms of `${prefix}' -at `configure' time. - -Optional Features -================= - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - - Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - - Some packages offer the ability to configure how verbose the -execution of `make' will be. For these packages, running `./configure ---enable-silent-rules' sets the default to minimal output, which can be -overridden with `make V=1'; while running `./configure ---disable-silent-rules' sets the default to verbose, which can be -overridden with `make V=0'. - -Particular systems -================== - - On HP-UX, the default C compiler is not ANSI C compatible. If GNU -CC is not installed, it is recommended to use the following options in -order to use an ANSI C compiler: - - ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" - -and if that doesn't work, install pre-built binaries of GCC for HP-UX. - - HP-UX `make' updates targets which have the same time stamps as -their prerequisites, which makes it generally unusable when shipped -generated files such as `configure' are involved. Use GNU `make' -instead. - - On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot -parse its `' header file. The option `-nodtk' can be used as -a workaround. If GNU CC is not installed, it is therefore recommended -to try - - ./configure CC="cc" - -and if that doesn't work, try - - ./configure CC="cc -nodtk" - - On Solaris, don't put `/usr/ucb' early in your `PATH'. This -directory contains several dysfunctional programs; working variants of -these programs are available in `/usr/bin'. So, if you need `/usr/ucb' -in your `PATH', put it _after_ `/usr/bin'. - - On Haiku, software installed for all users goes in `/boot/common', -not `/usr/local'. It is recommended to use the following options: - - ./configure --prefix=/boot/common - -Specifying the System Type -========================== - - There may be some features `configure' cannot figure out -automatically, but needs to determine by the type of machine the package -will run on. Usually, assuming the package is built to be run on the -_same_ architectures, `configure' can figure that out, but if it prints -a message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS - KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - -Sharing Defaults -================ - - If you want to set default values for `configure' scripts to share, -you can create a site shell script called `config.site' that gives -default values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Defining Variables -================== - - Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf bug. Until the bug is fixed you can use this workaround: - - CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash - -`configure' Invocation -====================== - - `configure' recognizes the following options to control how it -operates. - -`--help' -`-h' - Print a summary of all of the options to `configure', and exit. - -`--help=short' -`--help=recursive' - Print a summary of the options unique to this package's - `configure', and exit. The `short' variant lists options used - only in the top level, while the `recursive' variant lists options - also present in any nested packages. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`--prefix=DIR' - Use DIR as the installation prefix. *note Installation Names:: - for more details, including other options available for fine-tuning - the installation locations. - -`--no-create' -`-n' - Run the configure checks, but stop before creating any output - files. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - diff --git a/deps/libmagic/pcre/LICENCE b/deps/libmagic/pcre/LICENCE deleted file mode 100644 index 5ce31a8..0000000 --- a/deps/libmagic/pcre/LICENCE +++ /dev/null @@ -1,92 +0,0 @@ -PCRE LICENCE ------------- - -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -Release 8 of PCRE is distributed under the terms of the "BSD" licence, as -specified below. The documentation for PCRE, supplied in the "doc" -directory, is distributed under the same terms as the software itself. - -The basic library functions are written in C and are freestanding. Also -included in the distribution is a set of C++ wrapper functions, and a -just-in-time compiler that can be used to optimize pattern matching. These -are both optional features that can be omitted when the library is built. - - -THE BASIC LIBRARY FUNCTIONS ---------------------------- - -Written by: Philip Hazel -Email local part: ph10 -Email domain: cam.ac.uk - -University of Cambridge Computing Service, -Cambridge, England. - -Copyright (c) 1997-2012 University of Cambridge -All rights reserved. - - -PCRE JUST-IN-TIME COMPILATION SUPPORT -------------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2010-2012 Zoltan Herczeg -All rights reserved. - - -STACK-LESS JUST-IN-TIME COMPILER --------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2009-2012 Zoltan Herczeg -All rights reserved. - - -THE C++ WRAPPER FUNCTIONS -------------------------- - -Contributed by: Google Inc. - -Copyright (c) 2007-2012, Google Inc. -All rights reserved. - - -THE "BSD" LICENCE ------------------ - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the name of Google - Inc. nor the names of their contributors may be used to endorse or - promote products derived from this software without specific prior - written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -End diff --git a/deps/libmagic/pcre/NEWS b/deps/libmagic/pcre/NEWS deleted file mode 100644 index ebd9c5e..0000000 --- a/deps/libmagic/pcre/NEWS +++ /dev/null @@ -1,611 +0,0 @@ -News about PCRE releases ------------------------- - -Release 8.32 30-November-2012 ------------------------------ - -This release fixes a number of bugs, but also has some new features. These are -the highlights: - -. There is now support for 32-bit character strings and UTF-32. Like the - 16-bit support, this is done by compiling a separate 32-bit library. - -. \X now matches a Unicode extended grapheme cluster. - -. Case-independent matching of Unicode characters that have more than one - "other case" now makes all three (or more) characters equivalent. This - applies, for example, to Greek Sigma, which has two lowercase versions. - -. Unicode character properties are updated to Unicode 6.2.0. - -. The EBCDIC support, which had decayed, has had a spring clean. - -. A number of JIT optimizations have been added, which give faster JIT - execution speed. In addition, a new direct interface to JIT execution is - available. This bypasses some of the sanity checks of pcre_exec() to give a - noticeable speed-up. - -. A number of issues in pcregrep have been fixed, making it more compatible - with GNU grep. In particular, --exclude and --include (and variants) apply - to all files now, not just those obtained from scanning a directory - recursively. In Windows environments, the default action for directories is - now "skip" instead of "read" (which provokes an error). - -. If the --only-matching (-o) option in pcregrep is specified multiple - times, each one causes appropriate output. For example, -o1 -o2 outputs the - substrings matched by the 1st and 2nd capturing parentheses. A separating - string can be specified by --om-separator (default empty). - -. When PCRE is built via Autotools using a version of gcc that has the - "visibility" feature, it is used to hide internal library functions that are - not part of the public API. - - -Release 8.31 06-July-2012 -------------------------- - -This is mainly a bug-fixing release, with a small number of developments: - -. The JIT compiler now supports partial matching and the (*MARK) and - (*COMMIT) verbs. - -. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a - pattern. - -. There should be a performance improvement when using the heap instead of the - stack for recursion. - -. pcregrep can now be linked with libedit as an alternative to libreadline. - -. pcregrep now has a --file-list option where the list of files to scan is - given as a file. - -. pcregrep now recognizes binary files and there are related options. - -. The Unicode tables have been updated to 6.1.0. - -As always, the full list of changes is in the ChangeLog file. - - -Release 8.30 04-February-2012 ------------------------------ - -Release 8.30 introduces a major new feature: support for 16-bit character -strings, compiled as a separate library. There are a few changes to the -8-bit library, in addition to some bug fixes. - -. The pcre_info() function, which has been obsolete for over 10 years, has - been removed. - -. When a compiled pattern was saved to a file and later reloaded on a host - with different endianness, PCRE used automatically to swap the bytes in some - of the data fields. With the advent of the 16-bit library, where more of this - swapping is needed, it is no longer done automatically. Instead, the bad - endianness is detected and a specific error is given. The user can then call - a new function called pcre_pattern_to_host_byte_order() (or an equivalent - 16-bit function) to do the swap. - -. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode - code points and are now faulted. (They are the so-called "surrogates" - that are reserved for coding high values in UTF-16.) - - -Release 8.21 12-Dec-2011 ------------------------- - -This is almost entirely a bug-fix release. The only new feature is the ability -to obtain the size of the memory used by the JIT compiler. - - -Release 8.20 21-Oct-2011 ------------------------- - -The main change in this release is the inclusion of Zoltan Herczeg's -just-in-time compiler support, which can be accessed by building PCRE with ---enable-jit. Large performance benefits can be had in many situations. 8.20 -also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up -a number of infelicities and differences from Perl. - - -Release 8.13 16-Aug-2011 ------------------------- - -This is mainly a bug-fix release. There has been a lot of internal refactoring. -The Unicode tables have been updated. The only new feature in the library is -the passing of *MARK information to callouts. Some additions have been made to -pcretest to make testing easier and more comprehensive. There is a new option -for pcregrep to adjust its internal buffer size. - - -Release 8.12 15-Jan-2011 ------------------------- - -This release fixes some bugs in pcregrep, one of which caused the tests to fail -on 64-bit big-endian systems. There are no changes to the code of the library. - - -Release 8.11 10-Dec-2010 ------------------------- - -A number of bugs in the library and in pcregrep have been fixed. As always, see -ChangeLog for details. The following are the non-bug-fix changes: - -. Added --match-limit and --recursion-limit to pcregrep. - -. Added an optional parentheses number to the -o and --only-matching options - of pcregrep. - -. Changed the way PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and - \B. - -. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a - bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD. - -. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_ - START_OPTIMIZE option, which is now allowed at compile time - - -Release 8.10 25-Jun-2010 ------------------------- - -There are two major additions: support for (*MARK) and friends, and the option -PCRE_UCP, which changes the behaviour of \b, \d, \s, and \w (and their -opposites) so that they make use of Unicode properties. There are also a number -of lesser new features, and several bugs have been fixed. A new option, ---line-buffered, has been added to pcregrep, for use when it is connected to -pipes. - - -Release 8.02 19-Mar-2010 ------------------------- - -Another bug-fix release. - - -Release 8.01 19-Jan-2010 ------------------------- - -This is a bug-fix release. Several bugs in the code itself and some bugs and -infelicities in the build system have been fixed. - - -Release 8.00 19-Oct-09 ----------------------- - -Bugs have been fixed in the library and in pcregrep. There are also some -enhancements. Restrictions on patterns used for partial matching have been -removed, extra information is given for partial matches, the partial matching -process has been improved, and an option to make a partial match override a -full match is available. The "study" process has been enhanced by finding a -lower bound matching length. Groups with duplicate numbers may now have -duplicated names without the use of PCRE_DUPNAMES. However, they may not have -different names. The documentation has been revised to reflect these changes. -The version number has been expanded to 3 digits as it is clear that the rate -of change is not slowing down. - - -Release 7.9 11-Apr-09 ---------------------- - -Mostly bugfixes and tidies with just a couple of minor functional additions. - - -Release 7.8 05-Sep-08 ---------------------- - -More bug fixes, plus a performance improvement in Unicode character property -lookup. - - -Release 7.7 07-May-08 ---------------------- - -This is once again mainly a bug-fix release, but there are a couple of new -features. - - -Release 7.6 28-Jan-08 ---------------------- - -The main reason for having this release so soon after 7.5 is because it fixes a -potential buffer overflow problem in pcre_compile() when run in UTF-8 mode. In -addition, the CMake configuration files have been brought up to date. - - -Release 7.5 10-Jan-08 ---------------------- - -This is mainly a bug-fix release. However the ability to link pcregrep with -libz or libbz2 and the ability to link pcretest with libreadline have been -added. Also the --line-offsets and --file-offsets options were added to -pcregrep. - - -Release 7.4 21-Sep-07 ---------------------- - -The only change of specification is the addition of options to control whether -\R matches any Unicode line ending (the default) or just CR, LF, and CRLF. -Otherwise, the changes are bug fixes and a refactoring to reduce the number of -relocations needed in a shared library. There have also been some documentation -updates, in particular, some more information about using CMake to build PCRE -has been added to the NON-UNIX-USE file. - - -Release 7.3 28-Aug-07 ---------------------- - -Most changes are bug fixes. Some that are not: - -1. There is some support for Perl 5.10's experimental "backtracking control - verbs" such as (*PRUNE). - -2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more - restrictive in the strings it accepts. - -3. Checking for potential integer overflow has been made more dynamic, and as a - consequence there is no longer a hard limit on the size of a subpattern that - has a limited repeat count. - -4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec() - no longer advance by two characters instead of one when an unanchored match - fails at CRLF if there are explicit CR or LF matches within the pattern. - This gets rid of some anomalous effects that previously occurred. - -5. Some PCRE-specific settings for varying the newline options at the start of - a pattern have been added. - - -Release 7.2 19-Jun-07 ---------------------- - -WARNING: saved patterns that were compiled by earlier versions of PCRE must be -recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v, -and \V). - -Correction to the notes for 7.1: the note about shared libraries for Windows is -wrong. Previously, three libraries were built, but each could function -independently. For example, the pcreposix library also included all the -functions from the basic pcre library. The change is that the three libraries -are no longer independent. They are like the Unix libraries. To use the -pcreposix functions, for example, you need to link with both the pcreposix and -the basic pcre library. - -Some more features from Perl 5.10 have been added: - - (?-n) and (?+n) relative references for recursion and subroutines. - - (?(-n) and (?(+n) relative references as conditions. - - \k{name} and \g{name} are synonyms for \k. - - \K to reset the start of the matched string; for example, (foo)\Kbar - matches bar preceded by foo, but only sets bar as the matched string. - - (?| introduces a group where the capturing parentheses in each alternative - start from the same number; for example, (?|(abc)|(xyz)) sets capturing - parentheses number 1 in both cases. - - \h, \H, \v, \V match horizontal and vertical whitespace, respectively. - - -Release 7.1 24-Apr-07 ---------------------- - -There is only one new feature in this release: a linebreak setting of -PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which -recognizes only CRLF, CR, and LF as linebreaks. - -A few bugs are fixed (see ChangeLog for details), but the major change is a -complete re-implementation of the build system. This now has full Autotools -support and so is now "standard" in some sense. It should help with compiling -PCRE in a wide variety of environments. - -NOTE: when building shared libraries for Windows, three dlls are now built, -called libpcre, libpcreposix, and libpcrecpp. Previously, everything was -included in a single dll. - -Another important change is that the dftables auxiliary program is no longer -compiled and run at "make" time by default. Instead, a default set of character -tables (assuming ASCII coding) is used. If you want to use dftables to generate -the character tables as previously, add --enable-rebuild-chartables to the -"configure" command. You must do this if you are compiling PCRE to run on a -system that uses EBCDIC code. - -There is a discussion about character tables in the README file. The default is -not to use dftables so that that there is no problem when cross-compiling. - - -Release 7.0 19-Dec-06 ---------------------- - -This release has a new major number because there have been some internal -upheavals to facilitate the addition of new optimizations and other facilities, -and to make subsequent maintenance and extension easier. Compilation is likely -to be a bit slower, but there should be no major effect on runtime performance. -Previously compiled patterns are NOT upwards compatible with this release. If -you have saved compiled patterns from a previous release, you will have to -re-compile them. Important changes that are visible to users are: - -1. The Unicode property tables have been updated to Unicode 5.0.0, which adds - some more scripts. - -2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline - sequence as a newline. - -3. The \R escape matches a single Unicode newline sequence as a single unit. - -4. New features that will appear in Perl 5.10 are now in PCRE. These include - alternative Perl syntax for named parentheses, and Perl syntax for - recursion. - -5. The C++ wrapper interface has been extended by the addition of a - QuoteMeta function and the ability to allow copy construction and - assignment. - -For a complete list of changes, see the ChangeLog file. - - -Release 6.7 04-Jul-06 ---------------------- - -The main additions to this release are the ability to use the same name for -multiple sets of parentheses, and support for CRLF line endings in both the -library and pcregrep (and in pcretest for testing). - -Thanks to Ian Taylor, the stack usage for many kinds of pattern has been -significantly reduced for certain subject strings. - - -Release 6.5 01-Feb-06 ---------------------- - -Important changes in this release: - -1. A number of new features have been added to pcregrep. - -2. The Unicode property tables have been updated to Unicode 4.1.0, and the - supported properties have been extended with script names such as "Arabic", - and the derived properties "Any" and "L&". This has necessitated a change to - the interal format of compiled patterns. Any saved compiled patterns that - use \p or \P must be recompiled. - -3. The specification of recursion in patterns has been changed so that all - recursive subpatterns are automatically treated as atomic groups. Thus, for - example, (?R) is treated as if it were (?>(?R)). This is necessary because - otherwise there are situations where recursion does not work. - -See the ChangeLog for a complete list of changes, which include a number of bug -fixes and tidies. - - -Release 6.0 07-Jun-05 ---------------------- - -The release number has been increased to 6.0 because of the addition of several -major new pieces of functionality. - -A new function, pcre_dfa_exec(), which implements pattern matching using a DFA -algorithm, has been added. This has a number of advantages for certain cases, -though it does run more slowly, and lacks the ability to capture substrings. On -the other hand, it does find all matches, not just the first, and it works -better for partial matching. The pcrematching man page discusses the -differences. - -The pcretest program has been enhanced so that it can make use of the new -pcre_dfa_exec() matching function and the extra features it provides. - -The distribution now includes a C++ wrapper library. This is built -automatically if a C++ compiler is found. The pcrecpp man page discusses this -interface. - -The code itself has been re-organized into many more files, one for each -function, so it no longer requires everything to be linked in when static -linkage is used. As a consequence, some internal functions have had to have -their names exposed. These functions all have names starting with _pcre_. They -are undocumented, and are not intended for use by outside callers. - -The pcregrep program has been enhanced with new functionality such as -multiline-matching and options for output more matching context. See the -ChangeLog for a complete list of changes to the library and the utility -programs. - - -Release 5.0 13-Sep-04 ---------------------- - -The licence under which PCRE is released has been changed to the more -conventional "BSD" licence. - -In the code, some bugs have been fixed, and there are also some major changes -in this release (which is why I've increased the number to 5.0). Some changes -are internal rearrangements, and some provide a number of new facilities. The -new features are: - -1. There's an "automatic callout" feature that inserts callouts before every - item in the regex, and there's a new callout field that gives the position - in the pattern - useful for debugging and tracing. - -2. The extra_data structure can now be used to pass in a set of character - tables at exec time. This is useful if compiled regex are saved and re-used - at a later time when the tables may not be at the same address. If the - default internal tables are used, the pointer saved with the compiled - pattern is now set to NULL, which means that you don't need to do anything - special unless you are using custom tables. - -3. It is possible, with some restrictions on the content of the regex, to - request "partial" matching. A special return code is given if all of the - subject string matched part of the regex. This could be useful for testing - an input field as it is being typed. - -4. There is now some optional support for Unicode character properties, which - means that the patterns items such as \p{Lu} and \X can now be used. Only - the general category properties are supported. If PCRE is compiled with this - support, an additional 90K data structure is include, which increases the - size of the library dramatically. - -5. There is support for saving compiled patterns and re-using them later. - -6. There is support for running regular expressions that were compiled on a - different host with the opposite endianness. - -7. The pcretest program has been extended to accommodate the new features. - -The main internal rearrangement is that sequences of literal characters are no -longer handled as strings. Instead, each character is handled on its own. This -makes some UTF-8 handling easier, and makes the support of partial matching -possible. Compiled patterns containing long literal strings will be larger as a -result of this change; I hope that performance will not be much affected. - - -Release 4.5 01-Dec-03 ---------------------- - -Again mainly a bug-fix and tidying release, with only a couple of new features: - -1. It's possible now to compile PCRE so that it does not use recursive -function calls when matching. Instead it gets memory from the heap. This slows -things down, but may be necessary on systems with limited stacks. - -2. UTF-8 string checking has been tightened to reject overlong sequences and to -check that a starting offset points to the start of a character. Failure of the -latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET. - -3. PCRE can now be compiled for systems that use EBCDIC code. - - -Release 4.4 21-Aug-03 ---------------------- - -This is mainly a bug-fix and tidying release. The only new feature is that PCRE -checks UTF-8 strings for validity by default. There is an option to suppress -this, just in case anybody wants that teeny extra bit of performance. - - -Releases 4.1 - 4.3 ------------------- - -Sorry, I forgot about updating the NEWS file for these releases. Please take a -look at ChangeLog. - - -Release 4.0 17-Feb-03 ---------------------- - -There have been a lot of changes for the 4.0 release, adding additional -functionality and mending bugs. Below is a list of the highlights of the new -functionality. For full details of these features, please consult the -documentation. For a complete list of changes, see the ChangeLog file. - -1. Support for Perl's \Q...\E escapes. - -2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java -package. They provide some syntactic sugar for simple cases of "atomic -grouping". - -3. Support for the \G assertion. It is true when the current matching position -is at the start point of the match. - -4. A new feature that provides some of the functionality that Perl provides -with (?{...}). The facility is termed a "callout". The way it is done in PCRE -is for the caller to provide an optional function, by setting pcre_callout to -its entry point. To get the function called, the regex must include (?C) at -appropriate points. - -5. Support for recursive calls to individual subpatterns. This makes it really -easy to get totally confused. - -6. Support for named subpatterns. The Python syntax (?P...) is used to -name a group. - -7. Several extensions to UTF-8 support; it is now fairly complete. There is an -option for pcregrep to make it operate in UTF-8 mode. - -8. The single man page has been split into a number of separate man pages. -These also give rise to individual HTML pages which are put in a separate -directory. There is an index.html page that lists them all. Some hyperlinking -between the pages has been installed. - - -Release 3.5 15-Aug-01 ---------------------- - -1. The configuring system has been upgraded to use later versions of autoconf -and libtool. By default it builds both a shared and a static library if the OS -supports it. You can use --disable-shared or --disable-static on the configure -command if you want only one of them. - -2. The pcretest utility is now installed along with pcregrep because it is -useful for users (to test regexs) and by doing this, it automatically gets -relinked by libtool. The documentation has been turned into a man page, so -there are now .1, .txt, and .html versions in /doc. - -3. Upgrades to pcregrep: - (i) Added long-form option names like gnu grep. - (ii) Added --help to list all options with an explanatory phrase. - (iii) Added -r, --recursive to recurse into sub-directories. - (iv) Added -f, --file to read patterns from a file. - -4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure -script, to force use of CR or LF instead of \n in the source. On non-Unix -systems, the value can be set in config.h. - -5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an -absolute limit. Changed the text of the error message to make this clear, and -likewise updated the man page. - -6. The limit of 99 on the number of capturing subpatterns has been removed. -The new limit is 65535, which I hope will not be a "real" limit. - - -Release 3.3 01-Aug-00 ---------------------- - -There is some support for UTF-8 character strings. This is incomplete and -experimental. The documentation describes what is and what is not implemented. -Otherwise, this is just a bug-fixing release. - - -Release 3.0 01-Feb-00 ---------------------- - -1. A "configure" script is now used to configure PCRE for Unix systems. It -builds a Makefile, a config.h file, and the pcre-config script. - -2. PCRE is built as a shared library by default. - -3. There is support for POSIX classes such as [:alpha:]. - -5. There is an experimental recursion feature. - ----------------------------------------------------------------------------- - IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00 - -Please note that there has been a change in the API such that a larger -ovector is required at matching time, to provide some additional workspace. -The new man page has details. This change was necessary in order to support -some of the new functionality in Perl 5.005. - - IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00 - -Another (I hope this is the last!) change has been made to the API for the -pcre_compile() function. An additional argument has been added to make it -possible to pass over a pointer to character tables built in the current -locale by pcre_maketables(). To use the default tables, this new arguement -should be passed as NULL. - - IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05 - -Yet another (and again I hope this really is the last) change has been made -to the API for the pcre_exec() function. An additional argument has been -added to make it possible to start the match other than at the start of the -subject string. This is important if there are lookbehinds. The new man -page has the details, but you just want to convert existing programs, all -you need to do is to stick in a new fifth argument to pcre_exec(), with a -value of zero. For example, change - - pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize) -to - pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize) - -**** diff --git a/deps/libmagic/pcre/NON-AUTOTOOLS-BUILD b/deps/libmagic/pcre/NON-AUTOTOOLS-BUILD deleted file mode 100644 index a4e2e46..0000000 --- a/deps/libmagic/pcre/NON-AUTOTOOLS-BUILD +++ /dev/null @@ -1,639 +0,0 @@ -Building PCRE without using autotools -------------------------------------- - -This document contains the following sections: - - General - Generic instructions for the PCRE C library - The C++ wrapper functions - Building for virtual Pascal - Stack size in Windows environments - Linking programs in Windows environments - Comments about Win32 builds - Building PCRE on Windows with CMake - Use of relative paths with CMake on Windows - Testing with RunTest.bat - Building under Windows with BCC5.5 - Building PCRE on OpenVMS - Building PCRE on Stratus OpenVOS - Building PCRE on native z/OS and z/VM - - -GENERAL - -I (Philip Hazel) have no experience of Windows or VMS sytems and how their -libraries work. The items in the PCRE distribution and Makefile that relate to -anything other than Linux systems are untested by me. - -There are some other comments and files (including some documentation in CHM -format) in the Contrib directory on the FTP site: - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib - -The basic PCRE library consists entirely of code written in Standard C, and so -should compile successfully on any system that has a Standard C compiler and -library. The C++ wrapper functions are a separate issue (see below). - -The PCRE distribution includes a "configure" file for use by the configure/make -(autotools) build system, as found in many Unix-like environments. The README -file contains information about the options for "configure". - -There is also support for CMake, which some users prefer, especially in Windows -environments, though it can also be run in Unix-like environments. See the -section entitled "Building PCRE on Windows with CMake" below. - -Versions of config.h and pcre.h are distributed in the PCRE tarballs under the -names config.h.generic and pcre.h.generic. These are provided for those who -build PCRE without using "configure" or CMake. If you use "configure" or CMake, -the .generic versions are not used. - - -GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY - -The following are generic instructions for building the PCRE C library "by -hand". If you are going to use CMake, this section does not apply to you; you -can skip ahead to the CMake section. - - (1) Copy or rename the file config.h.generic as config.h, and edit the macro - settings that it contains to whatever is appropriate for your environment. - - In particular, you can alter the definition of the NEWLINE macro to - specify what character(s) you want to be interpreted as line terminators. - In an EBCDIC environment, you MUST change NEWLINE, because its default - value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15, - NL), though in some cases it may be 37 (0x25). - - When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H - to your compiler so that config.h is included in the sources. - - An alternative approach is not to edit config.h, but to use -D on the - compiler command line to make any changes that you need to the - configuration options. In this case -DHAVE_CONFIG_H must not be set. - - NOTE: There have been occasions when the way in which certain parameters - in config.h are used has changed between releases. (In the configure/make - world, this is handled automatically.) When upgrading to a new release, - you are strongly advised to review config.h.generic before re-using what - you had previously. - - (2) Copy or rename the file pcre.h.generic as pcre.h. - - (3) EITHER: - Copy or rename file pcre_chartables.c.dist as pcre_chartables.c. - - OR: - Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if - you have set up config.h), and then run it with the single argument - "pcre_chartables.c". This generates a set of standard character tables - and writes them to that file. The tables are generated using the default - C locale for your system. If you want to use a locale that is specified - by LC_xxx environment variables, add the -L option to the dftables - command. You must use this method if you are building on a system that - uses EBCDIC code. - - The tables in pcre_chartables.c are defaults. The caller of PCRE can - specify alternative tables at run time. - - (4) Ensure that you have the following header files: - - pcre_internal.h - ucp.h - - (5) For an 8-bit library, compile the following source files, setting - -DHAVE_CONFIG_H as a compiler option if you have set up config.h with your - configuration, or else use other -D settings to change the configuration - as required. - - pcre_byte_order.c - pcre_chartables.c - pcre_compile.c - pcre_config.c - pcre_dfa_exec.c - pcre_exec.c - pcre_fullinfo.c - pcre_get.c - pcre_globals.c - pcre_jit_compile.c - pcre_maketables.c - pcre_newline.c - pcre_ord2utf8.c - pcre_refcount.c - pcre_string_utils.c - pcre_study.c - pcre_tables.c - pcre_ucd.c - pcre_valid_utf8.c - pcre_version.c - pcre_xclass.c - - Make sure that you include -I. in the compiler command (or equivalent for - an unusual compiler) so that all included PCRE header files are first - sought in the current directory. Otherwise you run the risk of picking up - a previously-installed file from somewhere else. - - Note that you must still compile pcre_jit_compile.c, even if you have not - defined SUPPORT_JIT in config.h, because when JIT support is not - configured, dummy functions are compiled. When JIT support IS configured, - pcre_jit_compile.c #includes sources from the sljit subdirectory, where - there should be 16 files, all of whose names begin with "sljit". - - (6) Now link all the compiled code into an object library in whichever form - your system keeps such libraries. This is the basic PCRE C 8-bit library. - If your system has static and shared libraries, you may have to do this - once for each type. - - (7) If you want to build a 16-bit library (as well as, or instead of the 8-bit - or 32-bit libraries) repeat steps 5-6 with the following files: - - pcre16_byte_order.c - pcre16_chartables.c - pcre16_compile.c - pcre16_config.c - pcre16_dfa_exec.c - pcre16_exec.c - pcre16_fullinfo.c - pcre16_get.c - pcre16_globals.c - pcre16_jit_compile.c - pcre16_maketables.c - pcre16_newline.c - pcre16_ord2utf16.c - pcre16_refcount.c - pcre16_string_utils.c - pcre16_study.c - pcre16_tables.c - pcre16_ucd.c - pcre16_utf16_utils.c - pcre16_valid_utf16.c - pcre16_version.c - pcre16_xclass.c - - (7') If you want to build a 16-bit library (as well as, or instead of the 8-bit - or 32-bit libraries) repeat steps 5-6 with the following files: - - pcre32_byte_order.c - pcre32_chartables.c - pcre32_compile.c - pcre32_config.c - pcre32_dfa_exec.c - pcre32_exec.c - pcre32_fullinfo.c - pcre32_get.c - pcre32_globals.c - pcre32_jit_compile.c - pcre32_maketables.c - pcre32_newline.c - pcre32_ord2utf32.c - pcre32_refcount.c - pcre32_string_utils.c - pcre32_study.c - pcre32_tables.c - pcre32_ucd.c - pcre32_utf32_utils.c - pcre32_valid_utf32.c - pcre32_version.c - pcre32_xclass.c - - (8) If you want to build the POSIX wrapper functions (which apply only to the - 8-bit library), ensure that you have the pcreposix.h file and then compile - pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result - (on its own) as the pcreposix library. - - (9) The pcretest program can be linked with any combination of the 8-bit, 16-bit - and 32-bit libraries (depending on what you selected in config.h). Compile - pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and - link them together with the appropriate library/ies. If you compiled an - 8-bit library, pcretest also needs the pcreposix wrapper library unless - you compiled it with -DNOPOSIX. - -(10) Run pcretest on the testinput files in the testdata directory, and check - that the output matches the corresponding testoutput files. There are - comments about what each test does in the section entitled "Testing PCRE" - in the README file. If you compiled more than one of the 8-bit, 16-bit and - 32-bit libraries, you need to run pcretest with the -16 option to do 16-bit - tests and with the -32 option to do 32-bit tests. - - Some tests are relevant only when certain build-time options are selected. - For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if - you have built PCRE without it. See the comments at the start of each - testinput file. If you have a suitable Unix-like shell, the RunTest script - will run the appropriate tests for you. - - Note that the supplied files are in Unix format, with just LF characters - as line terminators. You may need to edit them to change this if your - system uses a different convention. If you are using Windows, you probably - should use the wintestinput3 file instead of testinput3 (and the - corresponding output file). This is a locale test; wintestinput3 sets the - locale to "french" rather than "fr_FR", and there some minor output - differences. - -(11) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested - by the testdata files. However, you might also like to build and run - the JIT test program, pcre_jit_test.c. - -(12) If you want to use the pcregrep command, compile and link pcregrep.c; it - uses only the basic 8-bit PCRE library (it does not need the pcreposix - library). - - -THE C++ WRAPPER FUNCTIONS - -The PCRE distribution also contains some C++ wrapper functions and tests, -applicable to the 8-bit library, which were contributed by Google Inc. On a -system that can use "configure" and "make", the functions are automatically -built into a library called pcrecpp. It should be straightforward to compile -the .cc files manually on other systems. The files called xxx_unittest.cc are -test programs for each of the corresponding xxx.cc files. - - -BUILDING FOR VIRTUAL PASCAL - -A script for building PCRE using Borland's C++ compiler for use with VPASCAL -was contributed by Alexander Tokarev. Stefan Weber updated the script and added -additional files. The following files in the distribution are for building PCRE -for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas. - - -STACK SIZE IN WINDOWS ENVIRONMENTS - -The default processor stack size of 1Mb in some Windows environments is too -small for matching patterns that need much recursion. In particular, test 2 may -fail because of this. Normally, running out of stack causes a crash, but there -have been cases where the test program has just died silently. See your linker -documentation for how to increase stack size if you experience problems. The -Linux default of 8Mb is a reasonable choice for the stack, though even that can -be too small for some pattern/subject combinations. - -PCRE has a compile configuration option to disable the use of stack for -recursion so that heap is used instead. However, pattern matching is -significantly slower when this is done. There is more about stack usage in the -"pcrestack" documentation. - - -LINKING PROGRAMS IN WINDOWS ENVIRONMENTS - -If you want to statically link a program against a PCRE library in the form of -a non-dll .a file, you must define PCRE_STATIC before including pcre.h or -pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will -be declared __declspec(dllimport), with unwanted results. - - -CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS - -It is possible to compile programs to use different calling conventions using -MSVC. Search the web for "calling conventions" for more information. To make it -easier to change the calling convention for the exported functions in the -PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external -definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is -not set, it defaults to empty; the default calling convention is then used -(which is what is wanted most of the time). - - -COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE") - -There are two ways of building PCRE using the "configure, make, make install" -paradigm on Windows systems: using MinGW or using Cygwin. These are not at all -the same thing; they are completely different from each other. There is also -support for building using CMake, which some users find a more straightforward -way of building PCRE under Windows. - -The MinGW home page (http://www.mingw.org/) says this: - - MinGW: A collection of freely available and freely distributable Windows - specific header files and import libraries combined with GNU toolsets that - allow one to produce native Windows programs that do not rely on any - 3rd-party C runtime DLLs. - -The Cygwin home page (http://www.cygwin.com/) says this: - - Cygwin is a Linux-like environment for Windows. It consists of two parts: - - . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing - substantial Linux API functionality - - . A collection of tools which provide Linux look and feel. - - The Cygwin DLL currently works with all recent, commercially released x86 32 - bit and 64 bit versions of Windows, with the exception of Windows CE. - -On both MinGW and Cygwin, PCRE should build correctly using: - - ./configure && make && make install - -This should create two libraries called libpcre and libpcreposix, and, if you -have enabled building the C++ wrapper, a third one called libpcrecpp. These are -independent libraries: when you link with libpcreposix or libpcrecpp you must -also link with libpcre, which contains the basic functions. (Some earlier -releases of PCRE included the basic libpcre functions in libpcreposix. This no -longer happens.) - -A user submitted a special-purpose patch that makes it easy to create -"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll" -as a special target. If you use this target, no other files are built, and in -particular, the pcretest and pcregrep programs are not built. An example of how -this might be used is: - - ./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll - -Using Cygwin's compiler generates libraries and executables that depend on -cygwin1.dll. If a library that is generated this way is distributed, -cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL -licence, this forces not only PCRE to be under the GPL, but also the entire -application. A distributor who wants to keep their own code proprietary must -purchase an appropriate Cygwin licence. - -MinGW has no such restrictions. The MinGW compiler generates a library or -executable that can run standalone on Windows without any third party dll or -licensing issues. - -But there is more complication: - -If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is -to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a -front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's -gcc and MinGW's gcc). So, a user can: - -. Build native binaries by using MinGW or by getting Cygwin and using - -mno-cygwin. - -. Build binaries that depend on cygwin1.dll by using Cygwin with the normal - compiler flags. - -The test files that are supplied with PCRE are in UNIX format, with LF -characters as line terminators. Unless your PCRE library uses a default newline -option that includes LF as a valid newline, it may be necessary to change the -line terminators in the test files to get some of the tests to work. - - -BUILDING PCRE ON WINDOWS WITH CMAKE - -CMake is an alternative configuration facility that can be used instead of -"configure". CMake creates project files (make files, solution files, etc.) -tailored to numerous development environments, including Visual Studio, -Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no -spaces in the names for your CMake installation and your PCRE source and build -directories. - -The following instructions were contributed by a PCRE user. If they are not -followed exactly, errors may occur. In the event that errors do occur, it is -recommended that you delete the CMake cache before attempting to repeat the -CMake build process. In the CMake GUI, the cache can be deleted by selecting -"File > Delete Cache". - -1. Install the latest CMake version available from http://www.cmake.org/, and - ensure that cmake\bin is on your path. - -2. Unzip (retaining folder structure) the PCRE source tree into a source - directory such as C:\pcre. You should ensure your local date and time - is not earlier than the file dates in your source dir if the release is - very new. - -3. Create a new, empty build directory, preferably a subdirectory of the - source dir. For example, C:\pcre\pcre-xx\build. - -4. Run cmake-gui from the Shell envirornment of your build tool, for example, - Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try - to start Cmake from the Windows Start menu, as this can lead to errors. - -5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build - directories, respectively. - -6. Hit the "Configure" button. - -7. Select the particular IDE / build tool that you are using (Visual - Studio, MSYS makefiles, MinGW makefiles, etc.) - -8. The GUI will then list several configuration options. This is where - you can enable UTF-8 support or other PCRE optional features. - -9. Hit "Configure" again. The adjacent "Generate" button should now be - active. - -10. Hit "Generate". - -11. The build directory should now contain a usable build system, be it a - solution file for Visual Studio, makefiles for MinGW, etc. Exit from - cmake-gui and use the generated build system with your compiler or IDE. - E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE - solution, select the desired configuration (Debug, or Release, etc.) and - build the ALL_BUILD project. - -12. If during configuration with cmake-gui you've elected to build the test - programs, you can execute them by building the test project. E.g., for - MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The - most recent build configuration is targeted by the tests. A summary of - test results is presented. Complete test output is subsequently - available for review in Testing\Temporary under your build dir. - - -USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS - -A PCRE user comments as follows: - -I thought that others may want to know the current state of -CMAKE_USE_RELATIVE_PATHS support on Windows. - -Here it is: --- AdditionalIncludeDirectories is only partially modified (only the -first path - see below) --- Only some of the contained file paths are modified - shown below for -pcre.vcproj --- It properly modifies - -I am sure CMake people can fix that if they want to. Until then one will -need to replace existing absolute paths in project files with relative -paths manually (e.g. from VS) - relative to project file location. I did -just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big -deal. - -AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;" -AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;" - -RelativePath="pcre.h"> -RelativePath="pcre_chartables.c"> -RelativePath="pcre_chartables.c.rule"> - - -TESTING WITH RUNTEST.BAT - -If configured with CMake, building the test project ("make test" or building -ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending -on your configuration options, possibly other test programs) in the build -directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths. - -For manual testing with RunTest.bat, provided the build dir is a subdirectory -of the source directory: Open command shell window. Chdir to the location -of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with -"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. - -To run only a particular test with RunTest.Bat provide a test number argument. - -Otherwise: - -1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe - have been created. - -2. Edit RunTest.bat to indentify the full or relative location of - the pcre source (wherein which the testdata folder resides), e.g.: - - set srcdir=C:\pcre\pcre-8.20 - -3. In a Windows command environment, chdir to the location of your bat and - exe programs. - -4. Run RunTest.bat. Test outputs will automatically be compared to expected - results, and discrepancies will be identified in the console output. - -To independently test the just-in-time compiler, run pcre_jit_test.exe. -To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and -pcre_scanner_unittest.exe. - - -BUILDING UNDER WINDOWS WITH BCC5.5 - -Michael Roy sent these comments about building PCRE under Windows with BCC5.5: - - Some of the core BCC libraries have a version of PCRE from 1998 built in, - which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a - version mismatch. I'm including an easy workaround below, if you'd like to - include it in the non-unix instructions: - - When linking a project with BCC5.5, pcre.lib must be included before any of - the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command - line. - - -BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x - -Vincent Richomme sent a zip archive of files to help with this process. They -can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP -site. - - -BUILDING PCRE ON OPENVMS - -Dan Mooney sent the following comments about building PCRE on OpenVMS. They -relate to an older version of PCRE that used fewer source files, so the exact -commands will need changing. See the current list of source files above. - -"It was quite easy to compile and link the library. I don't have a formal -make file but the attached file [reproduced below] contains the OpenVMS DCL -commands I used to build the library. I had to add #define -POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere. - -The library was built on: -O/S: HP OpenVMS v7.3-1 -Compiler: Compaq C v6.5-001-48BCD -Linker: vA13-01 - -The test results did not match 100% due to the issues you mention in your -documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I -modified some of the character tables temporarily and was able to get the -results to match. Tests using the fr locale did not match since I don't have -that locale loaded. The study size was always reported to be 3 less than the -value in the standard test output files." - -========================= -$! This DCL procedure builds PCRE on OpenVMS -$! -$! I followed the instructions in the non-unix-use file in the distribution. -$! -$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES -$ COMPILE DFTABLES.C -$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ -$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C -$ COMPILE MAKETABLES.C -$ COMPILE GET.C -$ COMPILE STUDY.C -$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol -$! did not seem to be defined anywhere. -$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support. -$ COMPILE PCRE.C -$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ -$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol -$! did not seem to be defined anywhere. -$ COMPILE PCREPOSIX.C -$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ -$ COMPILE PCRETEST.C -$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB -$! C programs that want access to command line arguments must be -$! defined as a symbol -$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE" -$! Arguments must be enclosed in quotes. -$ PCRETEST "-C" -$! Test results: -$! -$! The test results did not match 100%. The functions isprint(), iscntrl(), -$! isgraph() and ispunct() on OpenVMS must not produce the same results -$! as the system that built the test output files provided with the -$! distribution. -$! -$! The study size did not match and was always 3 less on OpenVMS. -$! -$! Locale could not be set to fr -$! -========================= - - -BUILDING PCRE ON STRATUS OPENVOS - -These notes on the port of PCRE to VOS (lightly edited) were supplied by -Ashutosh Warikoo, whose email address has the local part awarikoo and the -domain nse.co.in. The port was for version 7.9 in August 2009. - -1. Building PCRE - -I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any -problems. I used the following packages to build PCRE: - - ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz - -Please read and follow the instructions that come with these packages. To start -the build of pcre, from the root of the package type: - - ./build.sh - -2. Installing PCRE - -Once you have successfully built PCRE, login to the SysAdmin group, switch to -the root user, and type - - [ !create_dir (master_disk)>usr --if needed ] - [ !create_dir (master_disk)>usr>local --if needed ] - !gmake install - -This installs PCRE and its man pages into /usr/local. You can add -(master_disk)>usr>local>bin to your command search paths, or if you are in -BASH, add /usr/local/bin to the PATH environment variable. - -4. Restrictions - -This port requires readline library optionally. However during the build I -faced some yet unexplored errors while linking with readline. As it was an -optional component I chose to disable it. - -5. Known Problems - -I ran the test suite, but you will have to be your own judge of whether this -command, and this port, suits your purposes. If you find any problems that -appear to be related to the port itself, please let me know. Please see the -build.log file in the root of the package also. - - -BUILDING PCRE ON NATIVE Z/OS AND Z/VM - -z/OS and z/VM are operating systems for mainframe computers, produced by IBM. -The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and -applications can be supported through UNIX System Services, and in such an -environment PCRE can be built in the same way as in other systems. However, in -native z/OS (without UNIX System Services) and in z/VM, special ports are -required. For details, please see this web site: - - http://www.zaconsultants.net - -There is also a mirror here: - - http://www.vsoft-software.com/downloads.html - -========================== -Last Updated: 21 November 2012 diff --git a/deps/libmagic/pcre/NON-UNIX-USE b/deps/libmagic/pcre/NON-UNIX-USE deleted file mode 100644 index a25546b..0000000 --- a/deps/libmagic/pcre/NON-UNIX-USE +++ /dev/null @@ -1,7 +0,0 @@ -Compiling PCRE on non-Unix systems ----------------------------------- - -This has been renamed to better reflect its contents. Please see the file -NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools. - -#### diff --git a/deps/libmagic/pcre/README b/deps/libmagic/pcre/README deleted file mode 100644 index a2c3d9b..0000000 --- a/deps/libmagic/pcre/README +++ /dev/null @@ -1,935 +0,0 @@ -README file for PCRE (Perl-compatible regular expression library) ------------------------------------------------------------------ - -The latest release of PCRE is always available in three alternative formats -from: - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2 - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip - -There is a mailing list for discussion about the development of PCRE at - - pcre-dev@exim.org - -Please read the NEWS file if you are upgrading from a previous release. -The contents of this README file are: - - The PCRE APIs - Documentation for PCRE - Contributions by users of PCRE - Building PCRE on non-Unix-like systems - Building PCRE without using autotools - Building PCRE using autotools - Retrieving configuration information - Shared libraries - Cross-compiling using autotools - Using HP's ANSI C++ compiler (aCC) - Using PCRE from MySQL - Making new tarballs - Testing PCRE - Character tables - File manifest - - -The PCRE APIs -------------- - -PCRE is written in C, and it has its own API. There are three sets of functions, -one for the 8-bit library, which processes strings of bytes, one for the -16-bit library, which processes strings of 16-bit values, and one for the 32-bit -library, which processes strings of 32-bit values. The distribution also -includes a set of C++ wrapper functions (see the pcrecpp man page for details), -courtesy of Google Inc., which can be used to call the 8-bit PCRE library from -C++. - -In addition, there is a set of C wrapper functions (again, just for the 8-bit -library) that are based on the POSIX regular expression API (see the pcreposix -man page). These end up in the library called libpcreposix. Note that this just -provides a POSIX calling interface to PCRE; the regular expressions themselves -still follow Perl syntax and semantics. The POSIX API is restricted, and does -not give full access to all of PCRE's facilities. - -The header file for the POSIX-style functions is called pcreposix.h. The -official POSIX name is regex.h, but I did not want to risk possible problems -with existing files of that name by distributing it that way. To use PCRE with -an existing program that uses the POSIX API, pcreposix.h will have to be -renamed or pointed at by a link. - -If you are using the POSIX interface to PCRE and there is already a POSIX regex -library installed on your system, as well as worrying about the regex.h header -file (as mentioned above), you must also take care when linking programs to -ensure that they link with PCRE's libpcreposix library. Otherwise they may pick -up the POSIX functions of the same name from the other library. - -One way of avoiding this confusion is to compile PCRE with the addition of --Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the -compiler flags (CFLAGS if you are using "configure" -- see below). This has the -effect of renaming the functions so that the names no longer clash. Of course, -you have to do the same thing for your applications, or write them using the -new names. - - -Documentation for PCRE ----------------------- - -If you install PCRE in the normal way on a Unix-like system, you will end up -with a set of man pages whose names all start with "pcre". The one that is just -called "pcre" lists all the others. In addition to these man pages, the PCRE -documentation is supplied in two other forms: - - 1. There are files called doc/pcre.txt, doc/pcregrep.txt, and - doc/pcretest.txt in the source distribution. The first of these is a - concatenation of the text forms of all the section 3 man pages except - those that summarize individual functions. The other two are the text - forms of the section 1 man pages for the pcregrep and pcretest commands. - These text forms are provided for ease of scanning with text editors or - similar tools. They are installed in /share/doc/pcre, where - is the installation prefix (defaulting to /usr/local). - - 2. A set of files containing all the documentation in HTML form, hyperlinked - in various ways, and rooted in a file called index.html, is distributed in - doc/html and installed in /share/doc/pcre/html. - -Users of PCRE have contributed files containing the documentation for various -releases in CHM format. These can be found in the Contrib directory of the FTP -site (see next section). - - -Contributions by users of PCRE ------------------------------- - -You can find contributions from PCRE users in the directory - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib - -There is a README file giving brief descriptions of what they are. Some are -complete in themselves; others are pointers to URLs containing relevant files. -Some of this material is likely to be well out-of-date. Several of the earlier -contributions provided support for compiling PCRE on various flavours of -Windows (I myself do not use Windows). Nowadays there is more Windows support -in the standard distribution, so these contibutions have been archived. - - -Building PCRE on non-Unix-like systems --------------------------------------- - -For a non-Unix-like system, please read the comments in the file -NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and -"make" you may be able to build PCRE using autotools in the same way as for -many Unix-like systems. - -PCRE can also be configured using the GUI facility provided by CMake's -cmake-gui command. This creates Makefiles, solution files, etc. The file -NON-AUTOTOOLS-BUILD has information about CMake. - -PCRE has been compiled on many different operating systems. It should be -straightforward to build PCRE on any system that has a Standard C compiler and -library, because it uses only Standard C functions. - - -Building PCRE without using autotools -------------------------------------- - -The use of autotools (in particular, libtool) is problematic in some -environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD -file for ways of building PCRE without using autotools. - - -Building PCRE using autotools ------------------------------ - -If you are using HP's ANSI C++ compiler (aCC), please see the special note -in the section entitled "Using HP's ANSI C++ compiler (aCC)" below. - -The following instructions assume the use of the widely used "configure; make; -make install" (autotools) process. - -To build PCRE on system that supports autotools, first run the "configure" -command from the PCRE distribution directory, with your current directory set -to the directory where you want the files to be created. This command is a -standard GNU "autoconf" configuration script, for which generic instructions -are supplied in the file INSTALL. - -Most commonly, people build PCRE within its own distribution directory, and in -this case, on many systems, just running "./configure" is sufficient. However, -the usual methods of changing standard defaults are available. For example: - -CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local - -This command specifies that the C compiler should be run with the flags '-O2 --Wall' instead of the default, and that "make install" should install PCRE -under /opt/local instead of the default /usr/local. - -If you want to build in a different directory, just run "configure" with that -directory as current. For example, suppose you have unpacked the PCRE source -into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx: - -cd /build/pcre/pcre-xxx -/source/pcre/pcre-xxx/configure - -PCRE is written in C and is normally compiled as a C library. However, it is -possible to build it as a C++ library, though the provided building apparatus -does not have any features to support this. - -There are some optional features that can be included or omitted from the PCRE -library. They are also documented in the pcrebuild man page. - -. By default, both shared and static libraries are built. You can change this - by adding one of these options to the "configure" command: - - --disable-shared - --disable-static - - (See also "Shared libraries on Unix-like systems" below.) - -. By default, only the 8-bit library is built. If you add --enable-pcre16 to - the "configure" command, the 16-bit library is also built. If you add - --enable-pcre32 to the "configure" command, the 32-bit library is also built. - If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable - building the 8-bit library. - -. If you are building the 8-bit library and want to suppress the building of - the C++ wrapper library, you can add --disable-cpp to the "configure" - command. Otherwise, when "configure" is run without --disable-pcre8, it will - try to find a C++ compiler and C++ header files, and if it succeeds, it will - try to build the C++ wrapper. - -. If you want to include support for just-in-time compiling, which can give - large performance improvements on certain platforms, add --enable-jit to the - "configure" command. This support is available only for certain hardware - architectures. If you try to enable it on an unsupported architecture, there - will be a compile time error. - -. When JIT support is enabled, pcregrep automatically makes use of it, unless - you add --disable-pcregrep-jit to the "configure" command. - -. If you want to make use of the support for UTF-8 Unicode character strings in - the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library, - or UTF-32 Unicode character strings in the 32-bit library, you must add - --enable-utf to the "configure" command. Without it, the code for handling - UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even - when --enable-utf is included, the use of a UTF encoding still has to be - enabled by an option at run time. When PCRE is compiled with this option, its - input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC - platforms. It is not possible to use both --enable-utf and --enable-ebcdic at - the same time. - -. There are no separate options for enabling UTF-8, UTF-16 and UTF-32 - independently because that would allow ridiculous settings such as requesting - UTF-16 support while building only the 8-bit library. However, the option - --enable-utf8 is retained for backwards compatibility with earlier releases - that did not support 16-bit or 32-bit character strings. It is synonymous with - --enable-utf. It is not possible to configure one library with UTF support - and the other without in the same configuration. - -. If, in addition to support for UTF-8/16/32 character strings, you want to - include support for the \P, \p, and \X sequences that recognize Unicode - character properties, you must add --enable-unicode-properties to the - "configure" command. This adds about 30K to the size of the library (in the - form of a property table); only the basic two-letter properties such as Lu - are supported. - -. You can build PCRE to recognize either CR or LF or the sequence CRLF or any - of the preceding, or any of the Unicode newline sequences as indicating the - end of a line. Whatever you specify at build time is the default; the caller - of PCRE can change the selection at run time. The default newline indicator - is a single LF character (the Unix standard). You can specify the default - newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf - or --enable-newline-is-crlf or --enable-newline-is-anycrlf or - --enable-newline-is-any to the "configure" command, respectively. - - If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of - the standard tests will fail, because the lines in the test files end with - LF. Even if the files are edited to change the line endings, there are likely - to be some failures. With --enable-newline-is-anycrlf or - --enable-newline-is-any, many tests should succeed, but there may be some - failures. - -. By default, the sequence \R in a pattern matches any Unicode line ending - sequence. This is independent of the option specifying what PCRE considers to - be the end of a line (see above). However, the caller of PCRE can restrict \R - to match only CR, LF, or CRLF. You can make this the default by adding - --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). - -. When called via the POSIX interface, PCRE uses malloc() to get additional - storage for processing capturing parentheses if there are more than 10 of - them in a pattern. You can increase this threshold by setting, for example, - - --with-posix-malloc-threshold=20 - - on the "configure" command. - -. PCRE has a counter that can be set to limit the amount of resources it uses. - If the limit is exceeded during a match, the match fails. The default is ten - million. You can change the default by setting, for example, - - --with-match-limit=500000 - - on the "configure" command. This is just the default; individual calls to - pcre_exec() can supply their own value. There is more discussion on the - pcreapi man page. - -. There is a separate counter that limits the depth of recursive function calls - during a matching process. This also has a default of ten million, which is - essentially "unlimited". You can change the default by setting, for example, - - --with-match-limit-recursion=500000 - - Recursive function calls use up the runtime stack; running out of stack can - cause programs to crash in strange ways. There is a discussion about stack - sizes in the pcrestack man page. - -. The default maximum compiled pattern size is around 64K. You can increase - this by adding --with-link-size=3 to the "configure" command. In the 8-bit - library, PCRE then uses three bytes instead of two for offsets to different - parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is - the same as --with-link-size=4, which (in both libraries) uses four-byte - offsets. Increasing the internal link size reduces performance. In the 32-bit - library, the only supported link size is 4. - -. You can build PCRE so that its internal match() function that is called from - pcre_exec() does not call itself recursively. Instead, it uses memory blocks - obtained from the heap via the special functions pcre_stack_malloc() and - pcre_stack_free() to save data that would otherwise be saved on the stack. To - build PCRE like this, use - - --disable-stack-for-recursion - - on the "configure" command. PCRE runs more slowly in this mode, but it may be - necessary in environments with limited stack sizes. This applies only to the - normal execution of the pcre_exec() function; if JIT support is being - successfully used, it is not relevant. Equally, it does not apply to - pcre_dfa_exec(), which does not use deeply nested recursion. There is a - discussion about stack sizes in the pcrestack man page. - -. For speed, PCRE uses four tables for manipulating and identifying characters - whose code point values are less than 256. By default, it uses a set of - tables for ASCII encoding that is part of the distribution. If you specify - - --enable-rebuild-chartables - - a program called dftables is compiled and run in the default C locale when - you obey "make". It builds a source file called pcre_chartables.c. If you do - not specify this option, pcre_chartables.c is created as a copy of - pcre_chartables.c.dist. See "Character tables" below for further information. - -. It is possible to compile PCRE for use on systems that use EBCDIC as their - character code (as opposed to ASCII/Unicode) by specifying - - --enable-ebcdic - - This automatically implies --enable-rebuild-chartables (see above). However, - when PCRE is built this way, it always operates in EBCDIC. It cannot support - both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25, - which specifies that the code value for the EBCDIC NL character is 0x25 - instead of the default 0x15. - -. In environments where valgrind is installed, if you specify - - --enable-valgrind - - PCRE will use valgrind annotations to mark certain memory regions as - unaddressable. This allows it to detect invalid memory accesses, and is - mostly useful for debugging PCRE itself. - -. In environments where the gcc compiler is used and lcov version 1.6 or above - is installed, if you specify - - --enable-coverage - - the build process implements a code coverage report for the test suite. The - report is generated by running "make coverage". If ccache is installed on - your system, it must be disabled when building PCRE for coverage reporting. - You can do this by setting the environment variable CCACHE_DISABLE=1 before - running "make" to build PCRE. - -. The pcregrep program currently supports only 8-bit data files, and so - requires the 8-bit PCRE library. It is possible to compile pcregrep to use - libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by - specifying one or both of - - --enable-pcregrep-libz - --enable-pcregrep-libbz2 - - Of course, the relevant libraries must be installed on your system. - -. The default size of internal buffer used by pcregrep can be set by, for - example: - - --with-pcregrep-bufsize=50K - - The default value is 20K. - -. It is possible to compile pcretest so that it links with the libreadline - or libedit libraries, by specifying, respectively, - - --enable-pcretest-libreadline or --enable-pcretest-libedit - - If this is done, when pcretest's input is from a terminal, it reads it using - the readline() function. This provides line-editing and history facilities. - Note that libreadline is GPL-licenced, so if you distribute a binary of - pcretest linked in this way, there may be licensing issues. These can be - avoided by linking with libedit (which has a BSD licence) instead. - - Enabling libreadline causes the -lreadline option to be added to the pcretest - build. In many operating environments with a sytem-installed readline - library this is sufficient. However, in some environments (e.g. if an - unmodified distribution version of readline is in use), it may be necessary - to specify something like LIBS="-lncurses" as well. This is because, to quote - the readline INSTALL, "Readline uses the termcap functions, but does not link - with the termcap or curses library itself, allowing applications which link - with readline the to choose an appropriate library." If you get error - messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto, - this is the problem, and linking with the ncurses library should fix it. - -The "configure" script builds the following files for the basic C library: - -. Makefile the makefile that builds the library -. config.h build-time configuration options for the library -. pcre.h the public PCRE header file -. pcre-config script that shows the building settings such as CFLAGS - that were set for "configure" -. libpcre.pc ) data for the pkg-config command -. libpcre16.pc ) -. libpcre32.pc ) -. libpcreposix.pc ) -. libtool script that builds shared and/or static libraries - -Versions of config.h and pcre.h are distributed in the PCRE tarballs under the -names config.h.generic and pcre.h.generic. These are provided for those who -have to built PCRE without using "configure" or CMake. If you use "configure" -or CMake, the .generic versions are not used. - -When building the 8-bit library, if a C++ compiler is found, the following -files are also built: - -. libpcrecpp.pc data for the pkg-config command -. pcrecpparg.h header file for calling PCRE via the C++ wrapper -. pcre_stringpiece.h header for the C++ "stringpiece" functions - -The "configure" script also creates config.status, which is an executable -script that can be run to recreate the configuration, and config.log, which -contains compiler output from tests that "configure" runs. - -Once "configure" has run, you can run "make". This builds the the libraries -libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you -enabled JIT support with --enable-jit, a test program called pcre_jit_test is -built as well. - -If the 8-bit library is built, libpcreposix and the pcregrep command are also -built, and if a C++ compiler was found on your system, and you did not disable -it with --disable-cpp, "make" builds the C++ wrapper library, which is called -libpcrecpp, as well as some test programs called pcrecpp_unittest, -pcre_scanner_unittest, and pcre_stringpiece_unittest. - -The command "make check" runs all the appropriate tests. Details of the PCRE -tests are given below in a separate section of this document. - -You can use "make install" to install PCRE into live directories on your -system. The following are installed (file names are all relative to the - that is set when "configure" is run): - - Commands (bin): - pcretest - pcregrep (if 8-bit support is enabled) - pcre-config - - Libraries (lib): - libpcre16 (if 16-bit support is enabled) - libpcre32 (if 32-bit support is enabled) - libpcre (if 8-bit support is enabled) - libpcreposix (if 8-bit support is enabled) - libpcrecpp (if 8-bit and C++ support is enabled) - - Configuration information (lib/pkgconfig): - libpcre16.pc - libpcre32.pc - libpcre.pc - libpcreposix.pc - libpcrecpp.pc (if C++ support is enabled) - - Header files (include): - pcre.h - pcreposix.h - pcre_scanner.h ) - pcre_stringpiece.h ) if C++ support is enabled - pcrecpp.h ) - pcrecpparg.h ) - - Man pages (share/man/man{1,3}): - pcregrep.1 - pcretest.1 - pcre-config.1 - pcre.3 - pcre*.3 (lots more pages, all starting "pcre") - - HTML documentation (share/doc/pcre/html): - index.html - *.html (lots more pages, hyperlinked from index.html) - - Text file documentation (share/doc/pcre): - AUTHORS - COPYING - ChangeLog - LICENCE - NEWS - README - pcre.txt (a concatenation of the man(3) pages) - pcretest.txt the pcretest man page - pcregrep.txt the pcregrep man page - pcre-config.txt the pcre-config man page - -If you want to remove PCRE from your system, you can run "make uninstall". -This removes all the files that "make install" installed. However, it does not -remove any directories, because these are often shared with other programs. - - -Retrieving configuration information ------------------------------------- - -Running "make install" installs the command pcre-config, which can be used to -recall information about the PCRE configuration and installation. For example: - - pcre-config --version - -prints the version number, and - - pcre-config --libs - -outputs information about where the library is installed. This command can be -included in makefiles for programs that use PCRE, saving the programmer from -having to remember too many details. - -The pkg-config command is another system for saving and retrieving information -about installed libraries. Instead of separate commands for each library, a -single command is used. For example: - - pkg-config --cflags pcre - -The data is held in *.pc files that are installed in a directory called -/lib/pkgconfig. - - -Shared libraries ----------------- - -The default distribution builds PCRE as shared libraries and static libraries, -as long as the operating system supports shared libraries. Shared library -support relies on the "libtool" script which is built as part of the -"configure" process. - -The libtool script is used to compile and link both shared and static -libraries. They are placed in a subdirectory called .libs when they are newly -built. The programs pcretest and pcregrep are built to use these uninstalled -libraries (by means of wrapper scripts in the case of shared libraries). When -you use "make install" to install shared libraries, pcregrep and pcretest are -automatically re-built to use the newly installed shared libraries before being -installed themselves. However, the versions left in the build directory still -use the uninstalled libraries. - -To build PCRE using static libraries only you must use --disable-shared when -configuring it. For example: - -./configure --prefix=/usr/gnu --disable-shared - -Then run "make" in the usual way. Similarly, you can use --disable-static to -build only shared libraries. - - -Cross-compiling using autotools -------------------------------- - -You can specify CC and CFLAGS in the normal way to the "configure" command, in -order to cross-compile PCRE for some other host. However, you should NOT -specify --enable-rebuild-chartables, because if you do, the dftables.c source -file is compiled and run on the local host, in order to generate the inbuilt -character tables (the pcre_chartables.c file). This will probably not work, -because dftables.c needs to be compiled with the local compiler, not the cross -compiler. - -When --enable-rebuild-chartables is not specified, pcre_chartables.c is created -by making a copy of pcre_chartables.c.dist, which is a default set of tables -that assumes ASCII code. Cross-compiling with the default tables should not be -a problem. - -If you need to modify the character tables when cross-compiling, you should -move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and -run it on the local host to make a new version of pcre_chartables.c.dist. -Then when you cross-compile PCRE this new version of the tables will be used. - - -Using HP's ANSI C++ compiler (aCC) ----------------------------------- - -Unless C++ support is disabled by specifying the "--disable-cpp" option of the -"configure" script, you must include the "-AA" option in the CXXFLAGS -environment variable in order for the C++ components to compile correctly. - -Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby -needed libraries fail to get included when specifying the "-AA" compiler -option. If you experience unresolved symbols when linking the C++ programs, -use the workaround of specifying the following environment variable prior to -running the "configure" script: - - CXXLDFLAGS="-lstd_v2 -lCsup_v2" - - -Using Sun's compilers for Solaris ---------------------------------- - -A user reports that the following configurations work on Solaris 9 sparcv9 and -Solaris 9 x86 (32-bit): - - Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g" - Solaris 9 x86: ./configure --disable-cpp CC=/bin/cc CFLAGS="-g" - - -Using PCRE from MySQL ---------------------- - -On systems where both PCRE and MySQL are installed, it is possible to make use -of PCRE from within MySQL, as an alternative to the built-in pattern matching. -There is a web page that tells you how to do this: - - http://www.mysqludf.org/lib_mysqludf_preg/index.php - - -Making new tarballs -------------------- - -The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and -zip formats. The command "make distcheck" does the same, but then does a trial -build of the new distribution to ensure that it works. - -If you have modified any of the man page sources in the doc directory, you -should first run the PrepareRelease script before making a distribution. This -script creates the .txt and HTML forms of the documentation from the man pages. - - -Testing PCRE ------------- - -To test the basic PCRE library on a Unix-like system, run the RunTest script. -There is another script called RunGrepTest that tests the options of the -pcregrep command. If the C++ wrapper library is built, three test programs -called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest -are also built. When JIT support is enabled, another test program called -pcre_jit_test is built. - -Both the scripts and all the program tests are run if you obey "make check" or -"make test". For other environments, see the instructions in -NON-AUTOTOOLS-BUILD. - -The RunTest script runs the pcretest test program (which is documented in its -own man page) on each of the relevant testinput files in the testdata -directory, and compares the output with the contents of the corresponding -testoutput files. Some tests are relevant only when certain build-time options -were selected. For example, the tests for UTF-8/16/32 support are run only if ---enable-utf was used. RunTest outputs a comment when it skips a test. - -Many of the tests that are not skipped are run up to three times. The second -run forces pcre_study() to be called for all patterns except for a few in some -tests that are marked "never study" (see the pcretest program for how this is -done). If JIT support is available, the non-DFA tests are run a third time, -this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option. - -The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit -libraries that are enabled. If you want to run just one set of tests, call -RunTest with either the -8, -16 or -32 option. - -RunTest uses a file called testtry to hold the main output from pcretest. -Other files whose names begin with "test" are used as working files in some -tests. To run pcretest on just one or more specific test files, give their -numbers as arguments to RunTest, for example: - - RunTest 2 7 11 - -You can also call RunTest with the single argument "list" to cause it to output -a list of tests. - -The first test file can be fed directly into the perltest.pl script to check -that Perl gives the same results. The only difference you should see is in the -first few lines, where the Perl version is given instead of the PCRE version. - -The second set of tests check pcre_fullinfo(), pcre_study(), -pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error -detection, and run-time flags that are specific to PCRE, as well as the POSIX -wrapper API. It also uses the debugging flags to check some of the internals of -pcre_compile(). - -If you build PCRE with a locale setting that is not the standard C locale, the -character tables may be different (see next paragraph). In some cases, this may -cause failures in the second set of tests. For example, in a locale where the -isprint() function yields TRUE for characters in the range 128-255, the use of -[:isascii:] inside a character class defines a different set of characters, and -this shows up in this test as a difference in the compiled code, which is being -listed for checking. Where the comparison test output contains [\x00-\x7f] the -test will contain [\x00-\xff], and similarly in some other cases. This is not a -bug in PCRE. - -The third set of tests checks pcre_maketables(), the facility for building a -set of character tables for a specific locale and using them instead of the -default tables. The tests make use of the "fr_FR" (French) locale. Before -running the test, the script checks for the presence of this locale by running -the "locale" command. If that command fails, or if it doesn't include "fr_FR" -in the list of available locales, the third test cannot be run, and a comment -is output to say why. If running this test produces instances of the error - - ** Failed to set locale "fr_FR" - -in the comparison output, it means that locale is not available on your system, -despite being listed by "locale". This does not mean that PCRE is broken. - -[If you are trying to run this test on Windows, you may be able to get it to -work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use -RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses -Windows versions of test 2. More info on using RunTest.bat is included in the -document entitled NON-UNIX-USE.] - -The fourth and fifth tests check the UTF-8/16/32 support and error handling and -internal UTF features of PCRE that are not relevant to Perl, respectively. The -sixth and seventh tests do the same for Unicode character properties support. - -The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative -matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32 -mode with Unicode property support, respectively. - -The eleventh test checks some internal offsets and code size features; it is -run only when the default "link size" of 2 is set (in other cases the sizes -change) and when Unicode property support is enabled. - -The twelfth test is run only when JIT support is available, and the thirteenth -test is run only when JIT support is not available. They test some JIT-specific -features such as information output from pcretest about JIT compilation. - -The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and -the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode. -These are tests that generate different output in the two modes. They are for -general cases, UTF-8/16/32 support, and Unicode property support, respectively. - -The twentieth test is run only in 16/32-bit mode. It tests some specific -16/32-bit features of the DFA matching engine. - -The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the -link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns. - -The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for -general cases, and UTF-16 support, respectively. - -The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for -general cases, and UTF-32 support, respectively. - -Character tables ----------------- - -For speed, PCRE uses four tables for manipulating and identifying characters -whose code point values are less than 256. The final argument of the -pcre_compile() function is a pointer to a block of memory containing the -concatenated tables. A call to pcre_maketables() can be used to generate a set -of tables in the current locale. If the final argument for pcre_compile() is -passed as NULL, a set of default tables that is built into the binary is used. - -The source file called pcre_chartables.c contains the default set of tables. By -default, this is created as a copy of pcre_chartables.c.dist, which contains -tables for ASCII coding. However, if --enable-rebuild-chartables is specified -for ./configure, a different version of pcre_chartables.c is built by the -program dftables (compiled from dftables.c), which uses the ANSI C character -handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to -build the table sources. This means that the default C locale which is set for -your system will control the contents of these default tables. You can change -the default tables by editing pcre_chartables.c and then re-building PCRE. If -you do this, you should take care to ensure that the file does not get -automatically re-generated. The best way to do this is to move -pcre_chartables.c.dist out of the way and replace it with your customized -tables. - -When the dftables program is run as a result of --enable-rebuild-chartables, -it uses the default C locale that is set on your system. It does not pay -attention to the LC_xxx environment variables. In other words, it uses the -system's default locale rather than whatever the compiling user happens to have -set. If you really do want to build a source set of character tables in a -locale that is specified by the LC_xxx variables, you can run the dftables -program by hand with the -L option. For example: - - ./dftables -L pcre_chartables.c.special - -The first two 256-byte tables provide lower casing and case flipping functions, -respectively. The next table consists of three 32-byte bit maps which identify -digits, "word" characters, and white space, respectively. These are used when -building 32-byte bit maps that represent character classes for code points less -than 256. - -The final 256-byte table has bits indicating various character types, as -follows: - - 1 white space character - 2 letter - 4 decimal digit - 8 hexadecimal digit - 16 alphanumeric or '_' - 128 regular expression metacharacter or binary zero - -You should not alter the set of characters that contain the 128 bit, as that -will cause PCRE to malfunction. - - -File manifest -------------- - -The distribution should contain the files listed below. Where a file name is -given as pcre[16|32]_xxx it means that there are three files, one with the name -pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx. - -(A) Source files of the PCRE library functions and their headers: - - dftables.c auxiliary program for building pcre_chartables.c - when --enable-rebuild-chartables is specified - - pcre_chartables.c.dist a default set of character tables that assume ASCII - coding; used, unless --enable-rebuild-chartables is - specified, by copying to pcre[16]_chartables.c - - pcreposix.c ) - pcre[16|32]_byte_order.c ) - pcre[16|32]_compile.c ) - pcre[16|32]_config.c ) - pcre[16|32]_dfa_exec.c ) - pcre[16|32]_exec.c ) - pcre[16|32]_fullinfo.c ) - pcre[16|32]_get.c ) sources for the functions in the library, - pcre[16|32]_globals.c ) and some internal functions that they use - pcre[16|32]_jit_compile.c ) - pcre[16|32]_maketables.c ) - pcre[16|32]_newline.c ) - pcre[16|32]_refcount.c ) - pcre[16|32]_string_utils.c ) - pcre[16|32]_study.c ) - pcre[16|32]_tables.c ) - pcre[16|32]_ucd.c ) - pcre[16|32]_version.c ) - pcre[16|32]_xclass.c ) - pcre_ord2utf8.c ) - pcre_valid_utf8.c ) - pcre16_ord2utf16.c ) - pcre16_utf16_utils.c ) - pcre16_valid_utf16.c ) - pcre32_utf32_utils.c ) - pcre32_valid_utf32.c ) - - pcre[16|32]_printint.c ) debugging function that is used by pcretest, - ) and can also be #included in pcre_compile() - - pcre.h.in template for pcre.h when built by "configure" - pcreposix.h header for the external POSIX wrapper API - pcre_internal.h header for internal use - sljit/* 16 files that make up the JIT compiler - ucp.h header for Unicode property handling - - config.h.in template for config.h, which is built by "configure" - - pcrecpp.h public header file for the C++ wrapper - pcrecpparg.h.in template for another C++ header file - pcre_scanner.h public header file for C++ scanner functions - pcrecpp.cc ) - pcre_scanner.cc ) source for the C++ wrapper library - - pcre_stringpiece.h.in template for pcre_stringpiece.h, the header for the - C++ stringpiece functions - pcre_stringpiece.cc source for the C++ stringpiece functions - -(B) Source files for programs that use PCRE: - - pcredemo.c simple demonstration of coding calls to PCRE - pcregrep.c source of a grep utility that uses PCRE - pcretest.c comprehensive test program - -(C) Auxiliary files: - - 132html script to turn "man" pages into HTML - AUTHORS information about the author of PCRE - ChangeLog log of changes to the code - CleanTxt script to clean nroff output for txt man pages - Detrail script to remove trailing spaces - HACKING some notes about the internals of PCRE - INSTALL generic installation instructions - LICENCE conditions for the use of PCRE - COPYING the same, using GNU's standard name - Makefile.in ) template for Unix Makefile, which is built by - ) "configure" - Makefile.am ) the automake input that was used to create - ) Makefile.in - NEWS important changes in this release - NON-UNIX-USE the previous name for NON-AUTOTOOLS-BUILD - NON-AUTOTOOLS-BUILD notes on building PCRE without using autotools - PrepareRelease script to make preparations for "make dist" - README this file - RunTest a Unix shell script for running tests - RunGrepTest a Unix shell script for pcregrep tests - aclocal.m4 m4 macros (generated by "aclocal") - config.guess ) files used by libtool, - config.sub ) used only when building a shared library - configure a configuring shell script (built by autoconf) - configure.ac ) the autoconf input that was used to build - ) "configure" and config.h - depcomp ) script to find program dependencies, generated by - ) automake - doc/*.3 man page sources for PCRE - doc/*.1 man page sources for pcregrep and pcretest - doc/index.html.src the base HTML page - doc/html/* HTML documentation - doc/pcre.txt plain text version of the man pages - doc/pcretest.txt plain text documentation of test program - doc/perltest.txt plain text documentation of Perl test program - install-sh a shell script for installing files - libpcre16.pc.in template for libpcre16.pc for pkg-config - libpcre32.pc.in template for libpcre32.pc for pkg-config - libpcre.pc.in template for libpcre.pc for pkg-config - libpcreposix.pc.in template for libpcreposix.pc for pkg-config - libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config - ltmain.sh file used to build a libtool script - missing ) common stub for a few missing GNU programs while - ) installing, generated by automake - mkinstalldirs script for making install directories - perltest.pl Perl test program - pcre-config.in source of script which retains PCRE information - pcre_jit_test.c test program for the JIT compiler - pcrecpp_unittest.cc ) - pcre_scanner_unittest.cc ) test programs for the C++ wrapper - pcre_stringpiece_unittest.cc ) - testdata/testinput* test data for main library tests - testdata/testoutput* expected test results - testdata/grep* input and output for pcregrep tests - testdata/* other supporting test files - -(D) Auxiliary files for cmake support - - cmake/COPYING-CMAKE-SCRIPTS - cmake/FindPackageHandleStandardArgs.cmake - cmake/FindEditline.cmake - cmake/FindReadline.cmake - CMakeLists.txt - config-cmake.h.in - -(E) Auxiliary files for VPASCAL - - makevp.bat - makevp_c.txt - makevp_l.txt - pcregexp.pas - -(F) Auxiliary files for building PCRE "by hand" - - pcre.h.generic ) a version of the public PCRE header file - ) for use in non-"configure" environments - config.h.generic ) a version of config.h for use in non-"configure" - ) environments - -(F) Miscellaneous - - RunTest.bat a script for running tests under Windows - -Philip Hazel -Email local part: ph10 -Email domain: cam.ac.uk -Last updated: 27 October 2012 diff --git a/deps/libmagic/pcre/config/freebsd/config.h b/deps/libmagic/pcre/config/freebsd/config.h deleted file mode 100644 index 4b19479..0000000 --- a/deps/libmagic/pcre/config/freebsd/config.h +++ /dev/null @@ -1,344 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - - -/* PCRE is written in Standard C, but there are a few non-standard things it -can cope with, allowing it to run on SunOS4 and other "close to standard" -systems. - -In environments that support the facilities, config.h.in is converted by -"configure", or config-cmake.h.in is converted by CMake, into config.h. If you -are going to build PCRE "by hand" without using "configure" or CMake, you -should copy the distributed config.h.generic to config.h, and then edit the -macro definitions to be the way you need them. You must then add --DHAVE_CONFIG_H to all of your compile commands, so that config.h is included -at the start of every source. - -Alternatively, you can avoid editing by using -D on the compiler command line -to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. - -PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if -HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set -them both to 0; an emulation function will be used. */ - -/* By default, the \R escape sequence matches any Unicode line ending - character or sequence of characters. If BSR_ANYCRLF is defined (to any - value), this is changed so that backslash-R matches only CR, LF, or CRLF. - The build-time default can be overridden by the user of PCRE at runtime. */ -/* #undef BSR_ANYCRLF */ - -/* If you are compiling for a system that uses EBCDIC instead of ASCII - character codes, define this macro to any value. You must also edit the - NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15). - On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is - automatically adjusted. When EBCDIC is set, PCRE assumes that all input - strings are in EBCDIC. If you do not define this macro, PCRE will assume - input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build - a version of PCRE that supports both EBCDIC and UTF-8/16/32. */ -/* #undef EBCDIC */ - -/* In an EBCDIC environment, define this macro to any value to arrange for the - NL character to be 0x25 instead of the default 0x15. NL plays the role that - LF does in an ASCII/Unicode environment. The value must also be set in the - NEWLINE macro below. On systems that can use "configure" or CMake to set - EBCDIC_NL25, the adjustment of NEWLINE is automatic. */ -/* #undef EBCDIC_NL25 */ - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BITS_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_BZLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDITLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDIT_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if the system has the type `long long'. */ -#define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD 1 - -/* Have PTHREAD_PRIO_INHERIT. */ -/* #undef HAVE_PTHREAD_PRIO_INHERIT */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_HISTORY_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have `strtoimax'. */ -/* #undef HAVE_STRTOIMAX */ - -/* Define to 1 if you have `strtoll'. */ -/* #undef HAVE_STRTOLL */ - -/* Define to 1 if you have `strtoq'. */ -#define HAVE_STRTOQ 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned long long'. */ -#define HAVE_UNSIGNED_LONG_LONG 1 - -/* Define to 1 or 0, depending whether the compiler supports simple visibility - declarations. */ -#define HAVE_VISIBILITY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_WINDOWS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_ZLIB_H 1 - -/* Define to 1 if you have `_strtoi64'. */ -/* #undef HAVE__STRTOI64 */ - -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows - for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre_exec(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large - so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is - a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - -/* The value of NEWLINE determines the default newline character sequence. - PCRE client programs can override this by selecting other values at run - time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 - (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or - 3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and - 0x25) that are used as the NL line terminator that is equivalent to ASCII - LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY), - or -2 (ANYCRLF). */ -#define NEWLINE 10 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* PCRE uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited - size. Define NO_RECURSE to any value to get a version that doesn't use - recursion in the match() function; instead it creates its own stack by - steam using pcre_recurse_malloc() to obtain memory from the heap. For more - detail, see the comments and other stuff just above the match() function. - */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE 8.32" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "8.32" - -/* to make a symbol visible */ -#define PCRECPP_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRECPP_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* The value of PCREGREP_BUFSIZE determines the size of buffer used by - pcregrep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcregrep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ -#define PCREGREP_BUFSIZE 20480 - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DEFN extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DATA_DEFN __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DECL extern __attribute__ ((visibility ("default"))) - - -/* If you are compiling for a system other than a Unix-like system or - Win32, and it needs some magic to be inserted before the definition - of a function that is exported by the library, define this macro to - contain the relevant magic. If you do not define this macro, a suitable - __declspec value is used for Windows systems; in other environments - "extern" is used for a C compiler and "extern C" for a C++ compiler. - This macro apears at the start of every exported function that is part - of the external API. It does not appear on functions that are "external" - in the C sense, but which are internal to the library. */ -#define PCRE_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* Define to any value if linking statically (TODO: make nice with Libtool) */ -#define PCRE_STATIC 1 - -/* When calling PCRE via the POSIX interface, additional working storage is - required for holding the pointers to capturing substrings because PCRE - requires three integers per substring, whereas the POSIX interface provides - only two. If the number of expected substrings is small, the wrapper - function uses space on the stack, because this is faster than using - malloc() for each call. The threshold above which the stack is no longer - used is defined by POSIX_MALLOC_THRESHOLD. */ -#define POSIX_MALLOC_THRESHOLD 10 - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to allow pcretest and pcregrep to be linked with gcov, so that they - are able to generate code coverage reports. */ -/* #undef SUPPORT_GCOV */ - -/* Define to any value to enable support for Just-In-Time compiling. */ -#define SUPPORT_JIT /**/ - -/* Define to any value to allow pcregrep to be linked with libbz2, so that it - is able to handle .bz2 files. */ -/* #undef SUPPORT_LIBBZ2 */ - -/* Define to any value to allow pcretest to be linked with libedit. */ -/* #undef SUPPORT_LIBEDIT */ - -/* Define to any value to allow pcretest to be linked with libreadline. */ -/* #undef SUPPORT_LIBREADLINE */ - -/* Define to any value to allow pcregrep to be linked with libz, so that it is - able to handle .gz files. */ -/* #undef SUPPORT_LIBZ */ - -/* Define to any value to enable the 16 bit PCRE library. */ -/* #undef SUPPORT_PCRE16 */ - -/* Define to any value to enable the 32 bit PCRE library. */ -/* #undef SUPPORT_PCRE32 */ - -/* Define to any value to enable the 8 bit PCRE library. */ -#define SUPPORT_PCRE8 /**/ - -/* Define to any value to enable JIT support in pcregrep. */ -#define SUPPORT_PCREGREP_JIT /**/ - -/* Define to any value to enable support for Unicode properties. */ -#define SUPPORT_UCP /**/ - -/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. - This will work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or* - ASCII/UTF-8/16/32, but not both at once. */ -#define SUPPORT_UTF /**/ - -/* Valgrind support to find invalid memory reads. */ -/* #undef SUPPORT_VALGRIND */ - -/* Version number of package */ -#define VERSION "8.32" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to the type of a signed integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef int64_t */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/deps/libmagic/pcre/config/linux/config.h b/deps/libmagic/pcre/config/linux/config.h deleted file mode 100644 index 4b19479..0000000 --- a/deps/libmagic/pcre/config/linux/config.h +++ /dev/null @@ -1,344 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - - -/* PCRE is written in Standard C, but there are a few non-standard things it -can cope with, allowing it to run on SunOS4 and other "close to standard" -systems. - -In environments that support the facilities, config.h.in is converted by -"configure", or config-cmake.h.in is converted by CMake, into config.h. If you -are going to build PCRE "by hand" without using "configure" or CMake, you -should copy the distributed config.h.generic to config.h, and then edit the -macro definitions to be the way you need them. You must then add --DHAVE_CONFIG_H to all of your compile commands, so that config.h is included -at the start of every source. - -Alternatively, you can avoid editing by using -D on the compiler command line -to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. - -PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if -HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set -them both to 0; an emulation function will be used. */ - -/* By default, the \R escape sequence matches any Unicode line ending - character or sequence of characters. If BSR_ANYCRLF is defined (to any - value), this is changed so that backslash-R matches only CR, LF, or CRLF. - The build-time default can be overridden by the user of PCRE at runtime. */ -/* #undef BSR_ANYCRLF */ - -/* If you are compiling for a system that uses EBCDIC instead of ASCII - character codes, define this macro to any value. You must also edit the - NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15). - On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is - automatically adjusted. When EBCDIC is set, PCRE assumes that all input - strings are in EBCDIC. If you do not define this macro, PCRE will assume - input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build - a version of PCRE that supports both EBCDIC and UTF-8/16/32. */ -/* #undef EBCDIC */ - -/* In an EBCDIC environment, define this macro to any value to arrange for the - NL character to be 0x25 instead of the default 0x15. NL plays the role that - LF does in an ASCII/Unicode environment. The value must also be set in the - NEWLINE macro below. On systems that can use "configure" or CMake to set - EBCDIC_NL25, the adjustment of NEWLINE is automatic. */ -/* #undef EBCDIC_NL25 */ - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BITS_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_BZLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDITLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDIT_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if the system has the type `long long'. */ -#define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD 1 - -/* Have PTHREAD_PRIO_INHERIT. */ -/* #undef HAVE_PTHREAD_PRIO_INHERIT */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_HISTORY_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have `strtoimax'. */ -/* #undef HAVE_STRTOIMAX */ - -/* Define to 1 if you have `strtoll'. */ -/* #undef HAVE_STRTOLL */ - -/* Define to 1 if you have `strtoq'. */ -#define HAVE_STRTOQ 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned long long'. */ -#define HAVE_UNSIGNED_LONG_LONG 1 - -/* Define to 1 or 0, depending whether the compiler supports simple visibility - declarations. */ -#define HAVE_VISIBILITY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_WINDOWS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_ZLIB_H 1 - -/* Define to 1 if you have `_strtoi64'. */ -/* #undef HAVE__STRTOI64 */ - -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows - for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre_exec(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large - so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is - a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - -/* The value of NEWLINE determines the default newline character sequence. - PCRE client programs can override this by selecting other values at run - time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 - (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or - 3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and - 0x25) that are used as the NL line terminator that is equivalent to ASCII - LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY), - or -2 (ANYCRLF). */ -#define NEWLINE 10 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* PCRE uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited - size. Define NO_RECURSE to any value to get a version that doesn't use - recursion in the match() function; instead it creates its own stack by - steam using pcre_recurse_malloc() to obtain memory from the heap. For more - detail, see the comments and other stuff just above the match() function. - */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE 8.32" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "8.32" - -/* to make a symbol visible */ -#define PCRECPP_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRECPP_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* The value of PCREGREP_BUFSIZE determines the size of buffer used by - pcregrep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcregrep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ -#define PCREGREP_BUFSIZE 20480 - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DEFN extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DATA_DEFN __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DECL extern __attribute__ ((visibility ("default"))) - - -/* If you are compiling for a system other than a Unix-like system or - Win32, and it needs some magic to be inserted before the definition - of a function that is exported by the library, define this macro to - contain the relevant magic. If you do not define this macro, a suitable - __declspec value is used for Windows systems; in other environments - "extern" is used for a C compiler and "extern C" for a C++ compiler. - This macro apears at the start of every exported function that is part - of the external API. It does not appear on functions that are "external" - in the C sense, but which are internal to the library. */ -#define PCRE_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* Define to any value if linking statically (TODO: make nice with Libtool) */ -#define PCRE_STATIC 1 - -/* When calling PCRE via the POSIX interface, additional working storage is - required for holding the pointers to capturing substrings because PCRE - requires three integers per substring, whereas the POSIX interface provides - only two. If the number of expected substrings is small, the wrapper - function uses space on the stack, because this is faster than using - malloc() for each call. The threshold above which the stack is no longer - used is defined by POSIX_MALLOC_THRESHOLD. */ -#define POSIX_MALLOC_THRESHOLD 10 - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to allow pcretest and pcregrep to be linked with gcov, so that they - are able to generate code coverage reports. */ -/* #undef SUPPORT_GCOV */ - -/* Define to any value to enable support for Just-In-Time compiling. */ -#define SUPPORT_JIT /**/ - -/* Define to any value to allow pcregrep to be linked with libbz2, so that it - is able to handle .bz2 files. */ -/* #undef SUPPORT_LIBBZ2 */ - -/* Define to any value to allow pcretest to be linked with libedit. */ -/* #undef SUPPORT_LIBEDIT */ - -/* Define to any value to allow pcretest to be linked with libreadline. */ -/* #undef SUPPORT_LIBREADLINE */ - -/* Define to any value to allow pcregrep to be linked with libz, so that it is - able to handle .gz files. */ -/* #undef SUPPORT_LIBZ */ - -/* Define to any value to enable the 16 bit PCRE library. */ -/* #undef SUPPORT_PCRE16 */ - -/* Define to any value to enable the 32 bit PCRE library. */ -/* #undef SUPPORT_PCRE32 */ - -/* Define to any value to enable the 8 bit PCRE library. */ -#define SUPPORT_PCRE8 /**/ - -/* Define to any value to enable JIT support in pcregrep. */ -#define SUPPORT_PCREGREP_JIT /**/ - -/* Define to any value to enable support for Unicode properties. */ -#define SUPPORT_UCP /**/ - -/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. - This will work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or* - ASCII/UTF-8/16/32, but not both at once. */ -#define SUPPORT_UTF /**/ - -/* Valgrind support to find invalid memory reads. */ -/* #undef SUPPORT_VALGRIND */ - -/* Version number of package */ -#define VERSION "8.32" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to the type of a signed integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef int64_t */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/deps/libmagic/pcre/config/mac/config.h b/deps/libmagic/pcre/config/mac/config.h deleted file mode 100644 index f9c97fd..0000000 --- a/deps/libmagic/pcre/config/mac/config.h +++ /dev/null @@ -1,344 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - - -/* PCRE is written in Standard C, but there are a few non-standard things it -can cope with, allowing it to run on SunOS4 and other "close to standard" -systems. - -In environments that support the facilities, config.h.in is converted by -"configure", or config-cmake.h.in is converted by CMake, into config.h. If you -are going to build PCRE "by hand" without using "configure" or CMake, you -should copy the distributed config.h.generic to config.h, and then edit the -macro definitions to be the way you need them. You must then add --DHAVE_CONFIG_H to all of your compile commands, so that config.h is included -at the start of every source. - -Alternatively, you can avoid editing by using -D on the compiler command line -to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. - -PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if -HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set -them both to 0; an emulation function will be used. */ - -/* By default, the \R escape sequence matches any Unicode line ending - character or sequence of characters. If BSR_ANYCRLF is defined (to any - value), this is changed so that backslash-R matches only CR, LF, or CRLF. - The build-time default can be overridden by the user of PCRE at runtime. */ -/* #undef BSR_ANYCRLF */ - -/* If you are compiling for a system that uses EBCDIC instead of ASCII - character codes, define this macro to any value. You must also edit the - NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15). - On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is - automatically adjusted. When EBCDIC is set, PCRE assumes that all input - strings are in EBCDIC. If you do not define this macro, PCRE will assume - input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build - a version of PCRE that supports both EBCDIC and UTF-8/16/32. */ -/* #undef EBCDIC */ - -/* In an EBCDIC environment, define this macro to any value to arrange for the - NL character to be 0x25 instead of the default 0x15. NL plays the role that - LF does in an ASCII/Unicode environment. The value must also be set in the - NEWLINE macro below. On systems that can use "configure" or CMake to set - EBCDIC_NL25, the adjustment of NEWLINE is automatic. */ -/* #undef EBCDIC_NL25 */ - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BITS_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_BZLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDITLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDIT_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if the system has the type `long long'. */ -/* #undef HAVE_LONG_LONG */ - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD 1 - -/* Have PTHREAD_PRIO_INHERIT. */ -#define HAVE_PTHREAD_PRIO_INHERIT 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_HISTORY_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_STRING */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have `strtoimax'. */ -/* #undef HAVE_STRTOIMAX */ - -/* Define to 1 if you have `strtoll'. */ -/* #undef HAVE_STRTOLL */ - -/* Define to 1 if you have `strtoq'. */ -/* #undef HAVE_STRTOQ */ - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned long long'. */ -/* #undef HAVE_UNSIGNED_LONG_LONG */ - -/* Define to 1 or 0, depending whether the compiler supports simple visibility - declarations. */ -#define HAVE_VISIBILITY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_WINDOWS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_ZLIB_H 1 - -/* Define to 1 if you have `_strtoi64'. */ -/* #undef HAVE__STRTOI64 */ - -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows - for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre_exec(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large - so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is - a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - -/* The value of NEWLINE determines the default newline character sequence. - PCRE client programs can override this by selecting other values at run - time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 - (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or - 3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and - 0x25) that are used as the NL line terminator that is equivalent to ASCII - LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY), - or -2 (ANYCRLF). */ -#define NEWLINE 10 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* PCRE uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited - size. Define NO_RECURSE to any value to get a version that doesn't use - recursion in the match() function; instead it creates its own stack by - steam using pcre_recurse_malloc() to obtain memory from the heap. For more - detail, see the comments and other stuff just above the match() function. - */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE 8.32" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "8.32" - -/* to make a symbol visible */ -#define PCRECPP_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRECPP_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* The value of PCREGREP_BUFSIZE determines the size of buffer used by - pcregrep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcregrep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ -#define PCREGREP_BUFSIZE 20480 - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DEFN extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DATA_DEFN __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DECL extern __attribute__ ((visibility ("default"))) - - -/* If you are compiling for a system other than a Unix-like system or - Win32, and it needs some magic to be inserted before the definition - of a function that is exported by the library, define this macro to - contain the relevant magic. If you do not define this macro, a suitable - __declspec value is used for Windows systems; in other environments - "extern" is used for a C compiler and "extern C" for a C++ compiler. - This macro apears at the start of every exported function that is part - of the external API. It does not appear on functions that are "external" - in the C sense, but which are internal to the library. */ -#define PCRE_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* Define to any value if linking statically (TODO: make nice with Libtool) */ -#define PCRE_STATIC 1 - -/* When calling PCRE via the POSIX interface, additional working storage is - required for holding the pointers to capturing substrings because PCRE - requires three integers per substring, whereas the POSIX interface provides - only two. If the number of expected substrings is small, the wrapper - function uses space on the stack, because this is faster than using - malloc() for each call. The threshold above which the stack is no longer - used is defined by POSIX_MALLOC_THRESHOLD. */ -#define POSIX_MALLOC_THRESHOLD 10 - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to allow pcretest and pcregrep to be linked with gcov, so that they - are able to generate code coverage reports. */ -/* #undef SUPPORT_GCOV */ - -/* Define to any value to enable support for Just-In-Time compiling. */ -#define SUPPORT_JIT /**/ - -/* Define to any value to allow pcregrep to be linked with libbz2, so that it - is able to handle .bz2 files. */ -/* #undef SUPPORT_LIBBZ2 */ - -/* Define to any value to allow pcretest to be linked with libedit. */ -/* #undef SUPPORT_LIBEDIT */ - -/* Define to any value to allow pcretest to be linked with libreadline. */ -/* #undef SUPPORT_LIBREADLINE */ - -/* Define to any value to allow pcregrep to be linked with libz, so that it is - able to handle .gz files. */ -/* #undef SUPPORT_LIBZ */ - -/* Define to any value to enable the 16 bit PCRE library. */ -/* #undef SUPPORT_PCRE16 */ - -/* Define to any value to enable the 32 bit PCRE library. */ -/* #undef SUPPORT_PCRE32 */ - -/* Define to any value to enable the 8 bit PCRE library. */ -#define SUPPORT_PCRE8 /**/ - -/* Define to any value to enable JIT support in pcregrep. */ -#define SUPPORT_PCREGREP_JIT /**/ - -/* Define to any value to enable support for Unicode properties. */ -#define SUPPORT_UCP /**/ - -/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. - This will work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or* - ASCII/UTF-8/16/32, but not both at once. */ -#define SUPPORT_UTF /**/ - -/* Valgrind support to find invalid memory reads. */ -/* #undef SUPPORT_VALGRIND */ - -/* Version number of package */ -#define VERSION "8.32" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to the type of a signed integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef int64_t */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/deps/libmagic/pcre/config/openbsd/config.h b/deps/libmagic/pcre/config/openbsd/config.h deleted file mode 100644 index 15e4ff7..0000000 --- a/deps/libmagic/pcre/config/openbsd/config.h +++ /dev/null @@ -1,344 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - - -/* PCRE is written in Standard C, but there are a few non-standard things it -can cope with, allowing it to run on SunOS4 and other "close to standard" -systems. - -In environments that support the facilities, config.h.in is converted by -"configure", or config-cmake.h.in is converted by CMake, into config.h. If you -are going to build PCRE "by hand" without using "configure" or CMake, you -should copy the distributed config.h.generic to config.h, and then edit the -macro definitions to be the way you need them. You must then add --DHAVE_CONFIG_H to all of your compile commands, so that config.h is included -at the start of every source. - -Alternatively, you can avoid editing by using -D on the compiler command line -to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. - -PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if -HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set -them both to 0; an emulation function will be used. */ - -/* By default, the \R escape sequence matches any Unicode line ending - character or sequence of characters. If BSR_ANYCRLF is defined (to any - value), this is changed so that backslash-R matches only CR, LF, or CRLF. - The build-time default can be overridden by the user of PCRE at runtime. */ -/* #undef BSR_ANYCRLF */ - -/* If you are compiling for a system that uses EBCDIC instead of ASCII - character codes, define this macro to any value. You must also edit the - NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15). - On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is - automatically adjusted. When EBCDIC is set, PCRE assumes that all input - strings are in EBCDIC. If you do not define this macro, PCRE will assume - input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build - a version of PCRE that supports both EBCDIC and UTF-8/16/32. */ -/* #undef EBCDIC */ - -/* In an EBCDIC environment, define this macro to any value to arrange for the - NL character to be 0x25 instead of the default 0x15. NL plays the role that - LF does in an ASCII/Unicode environment. The value must also be set in the - NEWLINE macro below. On systems that can use "configure" or CMake to set - EBCDIC_NL25, the adjustment of NEWLINE is automatic. */ -/* #undef EBCDIC_NL25 */ - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BITS_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BZLIB_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDITLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDIT_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if the system has the type `long long'. */ -#define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD 1 - -/* Have PTHREAD_PRIO_INHERIT. */ -#define HAVE_PTHREAD_PRIO_INHERIT 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_HISTORY_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have `strtoimax'. */ -/* #undef HAVE_STRTOIMAX */ - -/* Define to 1 if you have `strtoll'. */ -/* #undef HAVE_STRTOLL */ - -/* Define to 1 if you have `strtoq'. */ -#define HAVE_STRTOQ 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned long long'. */ -#define HAVE_UNSIGNED_LONG_LONG 1 - -/* Define to 1 or 0, depending whether the compiler supports simple visibility - declarations. */ -#define HAVE_VISIBILITY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_WINDOWS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_ZLIB_H 1 - -/* Define to 1 if you have `_strtoi64'. */ -/* #undef HAVE__STRTOI64 */ - -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows - for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre_exec(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large - so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is - a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - -/* The value of NEWLINE determines the default newline character sequence. - PCRE client programs can override this by selecting other values at run - time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 - (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or - 3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and - 0x25) that are used as the NL line terminator that is equivalent to ASCII - LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY), - or -2 (ANYCRLF). */ -#define NEWLINE 10 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* PCRE uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited - size. Define NO_RECURSE to any value to get a version that doesn't use - recursion in the match() function; instead it creates its own stack by - steam using pcre_recurse_malloc() to obtain memory from the heap. For more - detail, see the comments and other stuff just above the match() function. - */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE 8.32" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "8.32" - -/* to make a symbol visible */ -#define PCRECPP_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRECPP_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* The value of PCREGREP_BUFSIZE determines the size of buffer used by - pcregrep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcregrep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ -#define PCREGREP_BUFSIZE 20480 - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DEFN extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DATA_DEFN __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DECL extern __attribute__ ((visibility ("default"))) - - -/* If you are compiling for a system other than a Unix-like system or - Win32, and it needs some magic to be inserted before the definition - of a function that is exported by the library, define this macro to - contain the relevant magic. If you do not define this macro, a suitable - __declspec value is used for Windows systems; in other environments - "extern" is used for a C compiler and "extern C" for a C++ compiler. - This macro apears at the start of every exported function that is part - of the external API. It does not appear on functions that are "external" - in the C sense, but which are internal to the library. */ -#define PCRE_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* Define to any value if linking statically (TODO: make nice with Libtool) */ -#define PCRE_STATIC 1 - -/* When calling PCRE via the POSIX interface, additional working storage is - required for holding the pointers to capturing substrings because PCRE - requires three integers per substring, whereas the POSIX interface provides - only two. If the number of expected substrings is small, the wrapper - function uses space on the stack, because this is faster than using - malloc() for each call. The threshold above which the stack is no longer - used is defined by POSIX_MALLOC_THRESHOLD. */ -#define POSIX_MALLOC_THRESHOLD 10 - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to allow pcretest and pcregrep to be linked with gcov, so that they - are able to generate code coverage reports. */ -/* #undef SUPPORT_GCOV */ - -/* Define to any value to enable support for Just-In-Time compiling. */ -#define SUPPORT_JIT /**/ - -/* Define to any value to allow pcregrep to be linked with libbz2, so that it - is able to handle .bz2 files. */ -/* #undef SUPPORT_LIBBZ2 */ - -/* Define to any value to allow pcretest to be linked with libedit. */ -/* #undef SUPPORT_LIBEDIT */ - -/* Define to any value to allow pcretest to be linked with libreadline. */ -/* #undef SUPPORT_LIBREADLINE */ - -/* Define to any value to allow pcregrep to be linked with libz, so that it is - able to handle .gz files. */ -/* #undef SUPPORT_LIBZ */ - -/* Define to any value to enable the 16 bit PCRE library. */ -/* #undef SUPPORT_PCRE16 */ - -/* Define to any value to enable the 32 bit PCRE library. */ -/* #undef SUPPORT_PCRE32 */ - -/* Define to any value to enable the 8 bit PCRE library. */ -#define SUPPORT_PCRE8 /**/ - -/* Define to any value to enable JIT support in pcregrep. */ -#define SUPPORT_PCREGREP_JIT /**/ - -/* Define to any value to enable support for Unicode properties. */ -#define SUPPORT_UCP /**/ - -/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. - This will work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or* - ASCII/UTF-8/16/32, but not both at once. */ -#define SUPPORT_UTF /**/ - -/* Valgrind support to find invalid memory reads. */ -/* #undef SUPPORT_VALGRIND */ - -/* Version number of package */ -#define VERSION "8.32" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to the type of a signed integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef int64_t */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/deps/libmagic/pcre/config/sunos/config.h b/deps/libmagic/pcre/config/sunos/config.h deleted file mode 100644 index 4fdca2f..0000000 --- a/deps/libmagic/pcre/config/sunos/config.h +++ /dev/null @@ -1,344 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - - -/* PCRE is written in Standard C, but there are a few non-standard things it -can cope with, allowing it to run on SunOS4 and other "close to standard" -systems. - -In environments that support the facilities, config.h.in is converted by -"configure", or config-cmake.h.in is converted by CMake, into config.h. If you -are going to build PCRE "by hand" without using "configure" or CMake, you -should copy the distributed config.h.generic to config.h, and then edit the -macro definitions to be the way you need them. You must then add --DHAVE_CONFIG_H to all of your compile commands, so that config.h is included -at the start of every source. - -Alternatively, you can avoid editing by using -D on the compiler command line -to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. - -PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if -HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set -them both to 0; an emulation function will be used. */ - -/* By default, the \R escape sequence matches any Unicode line ending - character or sequence of characters. If BSR_ANYCRLF is defined (to any - value), this is changed so that backslash-R matches only CR, LF, or CRLF. - The build-time default can be overridden by the user of PCRE at runtime. */ -/* #undef BSR_ANYCRLF */ - -/* If you are compiling for a system that uses EBCDIC instead of ASCII - character codes, define this macro to any value. You must also edit the - NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15). - On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is - automatically adjusted. When EBCDIC is set, PCRE assumes that all input - strings are in EBCDIC. If you do not define this macro, PCRE will assume - input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build - a version of PCRE that supports both EBCDIC and UTF-8/16/32. */ -/* #undef EBCDIC */ - -/* In an EBCDIC environment, define this macro to any value to arrange for the - NL character to be 0x25 instead of the default 0x15. NL plays the role that - LF does in an ASCII/Unicode environment. The value must also be set in the - NEWLINE macro below. On systems that can use "configure" or CMake to set - EBCDIC_NL25, the adjustment of NEWLINE is automatic. */ -/* #undef EBCDIC_NL25 */ - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BITS_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_BZLIB_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDITLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDIT_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if the system has the type `long long'. */ -#define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD 1 - -/* Have PTHREAD_PRIO_INHERIT. */ -#define HAVE_PTHREAD_PRIO_INHERIT 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_HISTORY_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have `strtoimax'. */ -/* #undef HAVE_STRTOIMAX */ - -/* Define to 1 if you have `strtoll'. */ -#define HAVE_STRTOLL 1 - -/* Define to 1 if you have `strtoq'. */ -/* #undef HAVE_STRTOQ */ - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_TYPE_TRAITS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `unsigned long long'. */ -#define HAVE_UNSIGNED_LONG_LONG 1 - -/* Define to 1 or 0, depending whether the compiler supports simple visibility - declarations. */ -#define HAVE_VISIBILITY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_WINDOWS_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_ZLIB_H */ - -/* Define to 1 if you have `_strtoi64'. */ -/* #undef HAVE__STRTOI64 */ - -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows - for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre_exec(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large - so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is - a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - -/* The value of NEWLINE determines the default newline character sequence. - PCRE client programs can override this by selecting other values at run - time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 - (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or - 3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and - 0x25) that are used as the NL line terminator that is equivalent to ASCII - LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY), - or -2 (ANYCRLF). */ -#define NEWLINE 10 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* PCRE uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited - size. Define NO_RECURSE to any value to get a version that doesn't use - recursion in the match() function; instead it creates its own stack by - steam using pcre_recurse_malloc() to obtain memory from the heap. For more - detail, see the comments and other stuff just above the match() function. - */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE 8.32" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "8.32" - -/* to make a symbol visible */ -#define PCRECPP_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRECPP_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* The value of PCREGREP_BUFSIZE determines the size of buffer used by - pcregrep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcregrep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ -#define PCREGREP_BUFSIZE 20480 - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCREPOSIX_EXP_DEFN extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DATA_DEFN __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE_EXP_DECL extern __attribute__ ((visibility ("default"))) - - -/* If you are compiling for a system other than a Unix-like system or - Win32, and it needs some magic to be inserted before the definition - of a function that is exported by the library, define this macro to - contain the relevant magic. If you do not define this macro, a suitable - __declspec value is used for Windows systems; in other environments - "extern" is used for a C compiler and "extern C" for a C++ compiler. - This macro apears at the start of every exported function that is part - of the external API. It does not appear on functions that are "external" - in the C sense, but which are internal to the library. */ -#define PCRE_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* Define to any value if linking statically (TODO: make nice with Libtool) */ -/* #undef PCRE_STATIC */ - -/* When calling PCRE via the POSIX interface, additional working storage is - required for holding the pointers to capturing substrings because PCRE - requires three integers per substring, whereas the POSIX interface provides - only two. If the number of expected substrings is small, the wrapper - function uses space on the stack, because this is faster than using - malloc() for each call. The threshold above which the stack is no longer - used is defined by POSIX_MALLOC_THRESHOLD. */ -#define POSIX_MALLOC_THRESHOLD 10 - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to allow pcretest and pcregrep to be linked with gcov, so that they - are able to generate code coverage reports. */ -/* #undef SUPPORT_GCOV */ - -/* Define to any value to enable support for Just-In-Time compiling. */ -#define SUPPORT_JIT /**/ - -/* Define to any value to allow pcregrep to be linked with libbz2, so that it - is able to handle .bz2 files. */ -/* #undef SUPPORT_LIBBZ2 */ - -/* Define to any value to allow pcretest to be linked with libedit. */ -/* #undef SUPPORT_LIBEDIT */ - -/* Define to any value to allow pcretest to be linked with libreadline. */ -/* #undef SUPPORT_LIBREADLINE */ - -/* Define to any value to allow pcregrep to be linked with libz, so that it is - able to handle .gz files. */ -/* #undef SUPPORT_LIBZ */ - -/* Define to any value to enable the 16 bit PCRE library. */ -/* #undef SUPPORT_PCRE16 */ - -/* Define to any value to enable the 32 bit PCRE library. */ -/* #undef SUPPORT_PCRE32 */ - -/* Define to any value to enable the 8 bit PCRE library. */ -#define SUPPORT_PCRE8 /**/ - -/* Define to any value to enable JIT support in pcregrep. */ -#define SUPPORT_PCREGREP_JIT /**/ - -/* Define to any value to enable support for Unicode properties. */ -#define SUPPORT_UCP /**/ - -/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. - This will work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or* - ASCII/UTF-8/16/32, but not both at once. */ -#define SUPPORT_UTF /**/ - -/* Valgrind support to find invalid memory reads. */ -/* #undef SUPPORT_VALGRIND */ - -/* Version number of package */ -#define VERSION "8.32" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to the type of a signed integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef int64_t */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/deps/libmagic/pcre/config/win/config.h b/deps/libmagic/pcre/config/win/config.h deleted file mode 100644 index b30fc8a..0000000 --- a/deps/libmagic/pcre/config/win/config.h +++ /dev/null @@ -1,54 +0,0 @@ -/* config.h for CMake builds */ - -/* #undef HAVE_DIRENT_H */ -#define HAVE_SYS_STAT_H 1 -#define HAVE_SYS_TYPES_H 1 -/* #undef HAVE_UNISTD_H */ -#define HAVE_WINDOWS_H 1 - -/* #undef HAVE_TYPE_TRAITS_H */ -/* #undef HAVE_BITS_TYPE_TRAITS_H */ - -/* #undef HAVE_BCOPY */ -#define HAVE_MEMMOVE 1 -#define HAVE_STRERROR 1 -/* #undef HAVE_STRTOLL */ -/* #undef HAVE_STRTOQ */ -#define HAVE__STRTOI64 1 - -#define PCRE_STATIC 1 - -#define SUPPORT_PCRE8 1 -/* #undef SUPPORT_PCRE16 */ -/* #undef SUPPORT_PCRE32 */ -#define SUPPORT_JIT 1 -/* #undef SUPPORT_PCREGREP_JIT */ -#define SUPPORT_UTF 1 -#define SUPPORT_UCP 1 -/* #undef EBCDIC */ -/* #undef EBCDIC_NL25 */ -/* #undef BSR_ANYCRLF */ -/* #undef NO_RECURSE */ - -#define HAVE_LONG_LONG 1 -#define HAVE_UNSIGNED_LONG_LONG 1 - -/* #undef SUPPORT_LIBBZ2 */ -/* #undef SUPPORT_LIBZ */ -/* #undef SUPPORT_LIBEDIT */ -/* #undef SUPPORT_LIBREADLINE */ - -/* #undef SUPPORT_VALGRIND */ -/* #undef SUPPORT_GCOV */ - -#define NEWLINE 10 -#define POSIX_MALLOC_THRESHOLD 10 -#define LINK_SIZE 2 -#define MATCH_LIMIT 10000000 -#define MATCH_LIMIT_RECURSION MATCH_LIMIT -#define PCREGREP_BUFSIZE 20480 - -#define MAX_NAME_SIZE 32 -#define MAX_NAME_COUNT 10000 - -/* end config.h for CMake builds */ diff --git a/deps/libmagic/pcre/pcre.gyp b/deps/libmagic/pcre/pcre.gyp deleted file mode 100644 index cd3737a..0000000 --- a/deps/libmagic/pcre/pcre.gyp +++ /dev/null @@ -1,84 +0,0 @@ -{ - 'target_defaults': { - 'include_dirs': [ - '.', - ], - 'defines': [ - 'LINK_SIZE=2', - 'PCRE_STATIC', - 'HAVE_CONFIG_H', - '_CRT_SECURE_NO_WARNINGS', - ], - }, - 'targets': [ - { - 'target_name': 'libpcre', - 'type': 'static_library', - 'sources': [ - # C sources - 'pcre_byte_order.c', - 'pcre_chartables.c', - 'pcre_compile.c', - 'pcre_config.c', - 'pcre_dfa_exec.c', - 'pcre_exec.c', - 'pcre_fullinfo.c', - 'pcre_get.c', - 'pcre_globals.c', - 'pcre_jit_compile.c', - 'pcre_maketables.c', - 'pcre_newline.c', - 'pcre_ord2utf8.c', - 'pcre_refcount.c', - 'pcre_string_utils.c', - 'pcre_study.c', - 'pcre_tables.c', - 'pcre_ucd.c', - 'pcre_valid_utf8.c', - 'pcre_version.c', - 'pcre_xclass.c', - 'pcreposix.c', - ], - 'cflags!': [ '-O2' ], - 'cflags+': [ '-O3' ], - 'cflags_cc!': [ '-O2' ], - 'cflags_cc+': [ '-O3' ], - 'cflags_c!': [ '-O2' ], - 'cflags_c+': [ '-O3' ], - 'msvs_settings': { - 'VCCLCompilerTool': { - 'AdditionalOptions': ['/wd4018', '/wd4996'], - }, - }, - 'conditions': [ - [ 'OS=="win"', { - 'include_dirs': [ 'config/win' ], - }], - [ 'OS=="linux"', { - 'include_dirs': [ 'config/linux' ], - }], - [ 'OS=="mac"', { - 'include_dirs': [ 'config/mac' ], - }], - [ 'OS=="freebsd"', { - 'include_dirs': [ 'config/freebsd' ], - }], - [ 'OS=="openbsd"', { - 'include_dirs': [ 'config/openbsd' ], - }], - [ 'OS=="solaris"', { - 'include_dirs': [ 'config/sunos' ], - }], - ], - 'all_dependent_settings': { - 'defines': [ - 'LINK_SIZE=2', - 'PCRE_STATIC', - ], - 'include_dirs': [ - '.', - ], - }, - }, - ] -} \ No newline at end of file diff --git a/deps/libmagic/pcre/pcre.h b/deps/libmagic/pcre/pcre.h deleted file mode 100644 index a6aa4e9..0000000 --- a/deps/libmagic/pcre/pcre.h +++ /dev/null @@ -1,653 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This is the public header file for the PCRE library, to be #included by -applications that call the PCRE functions. - - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -#ifndef _PCRE_H -#define _PCRE_H - -/* The current PCRE version information. */ - -#define PCRE_MAJOR 8 -#define PCRE_MINOR 32 -#define PCRE_PRERELEASE -#define PCRE_DATE 2012-11-30 - -/* When an application links to a PCRE DLL in Windows, the symbols that are -imported have to be identified as such. When building PCRE, the appropriate -export setting is defined in pcre_internal.h, which includes this file. So we -don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */ - -#if defined(_WIN32) && !defined(PCRE_STATIC) -# ifndef PCRE_EXP_DECL -# define PCRE_EXP_DECL extern __declspec(dllimport) -# endif -# ifdef __cplusplus -# ifndef PCRECPP_EXP_DECL -# define PCRECPP_EXP_DECL extern __declspec(dllimport) -# endif -# ifndef PCRECPP_EXP_DEFN -# define PCRECPP_EXP_DEFN __declspec(dllimport) -# endif -# endif -#endif - -/* By default, we use the standard "extern" declarations. */ - -#ifndef PCRE_EXP_DECL -# ifdef __cplusplus -# define PCRE_EXP_DECL extern "C" -# else -# define PCRE_EXP_DECL extern -# endif -#endif - -#ifdef __cplusplus -# ifndef PCRECPP_EXP_DECL -# define PCRECPP_EXP_DECL extern -# endif -# ifndef PCRECPP_EXP_DEFN -# define PCRECPP_EXP_DEFN -# endif -#endif - -/* Have to include stdlib.h in order to ensure that size_t is defined; -it is needed here for malloc. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Public options. Some are compile-time only, some are run-time only, and some -are both, so we keep them all distinct. However, almost all the bits in the -options word are now used. In the long run, we may have to re-use some of the -compile-time only bits for runtime options, or vice versa. Any of the -compile-time options may be inspected during studying (and therefore JIT -compiling). - -Some options for pcre_compile() change its behaviour but do not affect the -behaviour of the execution functions. Other options are passed through to the -execution functions and affect their behaviour, with or without affecting the -behaviour of pcre_compile(). - -Options that can be passed to pcre_compile() are tagged Cx below, with these -variants: - -C1 Affects compile only -C2 Does not affect compile; affects exec, dfa_exec -C3 Affects compile, exec, dfa_exec -C4 Affects compile, exec, dfa_exec, study -C5 Affects compile, exec, study - -Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with -E and D, respectively. They take precedence over C3, C4, and C5 settings passed -from pcre_compile(). Those that are compatible with JIT execution are flagged -with J. */ - -#define PCRE_CASELESS 0x00000001 /* C1 */ -#define PCRE_MULTILINE 0x00000002 /* C1 */ -#define PCRE_DOTALL 0x00000004 /* C1 */ -#define PCRE_EXTENDED 0x00000008 /* C1 */ -#define PCRE_ANCHORED 0x00000010 /* C4 E D */ -#define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */ -#define PCRE_EXTRA 0x00000040 /* C1 */ -#define PCRE_NOTBOL 0x00000080 /* E D J */ -#define PCRE_NOTEOL 0x00000100 /* E D J */ -#define PCRE_UNGREEDY 0x00000200 /* C1 */ -#define PCRE_NOTEMPTY 0x00000400 /* E D J */ -#define PCRE_UTF8 0x00000800 /* C4 ) */ -#define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */ -#define PCRE_UTF32 0x00000800 /* C4 ) */ -#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */ -#define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */ -#define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */ -#define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */ -#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */ -#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */ -#define PCRE_PARTIAL 0x00008000 /* E D J ) */ -#define PCRE_DFA_SHORTEST 0x00010000 /* D */ -#define PCRE_DFA_RESTART 0x00020000 /* D */ -#define PCRE_FIRSTLINE 0x00040000 /* C3 */ -#define PCRE_DUPNAMES 0x00080000 /* C1 */ -#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */ -#define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */ -#define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */ -#define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */ -#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */ -#define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */ -#define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */ -#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */ -#define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */ -#define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */ -#define PCRE_PARTIAL_HARD 0x08000000 /* E D J */ -#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */ -#define PCRE_UCP 0x20000000 /* C3 */ - -/* Exec-time and get/set-time error codes */ - -#define PCRE_ERROR_NOMATCH (-1) -#define PCRE_ERROR_NULL (-2) -#define PCRE_ERROR_BADOPTION (-3) -#define PCRE_ERROR_BADMAGIC (-4) -#define PCRE_ERROR_UNKNOWN_OPCODE (-5) -#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */ -#define PCRE_ERROR_NOMEMORY (-6) -#define PCRE_ERROR_NOSUBSTRING (-7) -#define PCRE_ERROR_MATCHLIMIT (-8) -#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ -#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */ -#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */ -#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */ -#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */ -#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */ -#define PCRE_ERROR_PARTIAL (-12) -#define PCRE_ERROR_BADPARTIAL (-13) -#define PCRE_ERROR_INTERNAL (-14) -#define PCRE_ERROR_BADCOUNT (-15) -#define PCRE_ERROR_DFA_UITEM (-16) -#define PCRE_ERROR_DFA_UCOND (-17) -#define PCRE_ERROR_DFA_UMLIMIT (-18) -#define PCRE_ERROR_DFA_WSSIZE (-19) -#define PCRE_ERROR_DFA_RECURSE (-20) -#define PCRE_ERROR_RECURSIONLIMIT (-21) -#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */ -#define PCRE_ERROR_BADNEWLINE (-23) -#define PCRE_ERROR_BADOFFSET (-24) -#define PCRE_ERROR_SHORTUTF8 (-25) -#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */ -#define PCRE_ERROR_RECURSELOOP (-26) -#define PCRE_ERROR_JIT_STACKLIMIT (-27) -#define PCRE_ERROR_BADMODE (-28) -#define PCRE_ERROR_BADENDIANNESS (-29) -#define PCRE_ERROR_DFA_BADRESTART (-30) -#define PCRE_ERROR_JIT_BADOPTION (-31) -#define PCRE_ERROR_BADLENGTH (-32) - -/* Specific error codes for UTF-8 validity checks */ - -#define PCRE_UTF8_ERR0 0 -#define PCRE_UTF8_ERR1 1 -#define PCRE_UTF8_ERR2 2 -#define PCRE_UTF8_ERR3 3 -#define PCRE_UTF8_ERR4 4 -#define PCRE_UTF8_ERR5 5 -#define PCRE_UTF8_ERR6 6 -#define PCRE_UTF8_ERR7 7 -#define PCRE_UTF8_ERR8 8 -#define PCRE_UTF8_ERR9 9 -#define PCRE_UTF8_ERR10 10 -#define PCRE_UTF8_ERR11 11 -#define PCRE_UTF8_ERR12 12 -#define PCRE_UTF8_ERR13 13 -#define PCRE_UTF8_ERR14 14 -#define PCRE_UTF8_ERR15 15 -#define PCRE_UTF8_ERR16 16 -#define PCRE_UTF8_ERR17 17 -#define PCRE_UTF8_ERR18 18 -#define PCRE_UTF8_ERR19 19 -#define PCRE_UTF8_ERR20 20 -#define PCRE_UTF8_ERR21 21 -#define PCRE_UTF8_ERR22 22 - -/* Specific error codes for UTF-16 validity checks */ - -#define PCRE_UTF16_ERR0 0 -#define PCRE_UTF16_ERR1 1 -#define PCRE_UTF16_ERR2 2 -#define PCRE_UTF16_ERR3 3 -#define PCRE_UTF16_ERR4 4 - -/* Specific error codes for UTF-32 validity checks */ - -#define PCRE_UTF32_ERR0 0 -#define PCRE_UTF32_ERR1 1 -#define PCRE_UTF32_ERR2 2 -#define PCRE_UTF32_ERR3 3 - -/* Request types for pcre_fullinfo() */ - -#define PCRE_INFO_OPTIONS 0 -#define PCRE_INFO_SIZE 1 -#define PCRE_INFO_CAPTURECOUNT 2 -#define PCRE_INFO_BACKREFMAX 3 -#define PCRE_INFO_FIRSTBYTE 4 -#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ -#define PCRE_INFO_FIRSTTABLE 5 -#define PCRE_INFO_LASTLITERAL 6 -#define PCRE_INFO_NAMEENTRYSIZE 7 -#define PCRE_INFO_NAMECOUNT 8 -#define PCRE_INFO_NAMETABLE 9 -#define PCRE_INFO_STUDYSIZE 10 -#define PCRE_INFO_DEFAULT_TABLES 11 -#define PCRE_INFO_OKPARTIAL 12 -#define PCRE_INFO_JCHANGED 13 -#define PCRE_INFO_HASCRORLF 14 -#define PCRE_INFO_MINLENGTH 15 -#define PCRE_INFO_JIT 16 -#define PCRE_INFO_JITSIZE 17 -#define PCRE_INFO_MAXLOOKBEHIND 18 -#define PCRE_INFO_FIRSTCHARACTER 19 -#define PCRE_INFO_FIRSTCHARACTERFLAGS 20 -#define PCRE_INFO_REQUIREDCHAR 21 -#define PCRE_INFO_REQUIREDCHARFLAGS 22 - -/* Request types for pcre_config(). Do not re-arrange, in order to remain -compatible. */ - -#define PCRE_CONFIG_UTF8 0 -#define PCRE_CONFIG_NEWLINE 1 -#define PCRE_CONFIG_LINK_SIZE 2 -#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 -#define PCRE_CONFIG_MATCH_LIMIT 4 -#define PCRE_CONFIG_STACKRECURSE 5 -#define PCRE_CONFIG_UNICODE_PROPERTIES 6 -#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 -#define PCRE_CONFIG_BSR 8 -#define PCRE_CONFIG_JIT 9 -#define PCRE_CONFIG_UTF16 10 -#define PCRE_CONFIG_JITTARGET 11 -#define PCRE_CONFIG_UTF32 12 - -/* Request types for pcre_study(). Do not re-arrange, in order to remain -compatible. */ - -#define PCRE_STUDY_JIT_COMPILE 0x0001 -#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002 -#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004 -#define PCRE_STUDY_EXTRA_NEEDED 0x0008 - -/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine -these bits, just add new ones on the end, in order to remain compatible. */ - -#define PCRE_EXTRA_STUDY_DATA 0x0001 -#define PCRE_EXTRA_MATCH_LIMIT 0x0002 -#define PCRE_EXTRA_CALLOUT_DATA 0x0004 -#define PCRE_EXTRA_TABLES 0x0008 -#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010 -#define PCRE_EXTRA_MARK 0x0020 -#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040 - -/* Types */ - -struct real_pcre; /* declaration; the definition is private */ -typedef struct real_pcre pcre; - -struct real_pcre16; /* declaration; the definition is private */ -typedef struct real_pcre16 pcre16; - -struct real_pcre32; /* declaration; the definition is private */ -typedef struct real_pcre32 pcre32; - -struct real_pcre_jit_stack; /* declaration; the definition is private */ -typedef struct real_pcre_jit_stack pcre_jit_stack; - -struct real_pcre16_jit_stack; /* declaration; the definition is private */ -typedef struct real_pcre16_jit_stack pcre16_jit_stack; - -struct real_pcre32_jit_stack; /* declaration; the definition is private */ -typedef struct real_pcre32_jit_stack pcre32_jit_stack; - -/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain -a 16 bit wide signed data type. Otherwise it can be a dummy data type since -pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */ -#ifndef PCRE_UCHAR16 -#define PCRE_UCHAR16 unsigned short -#endif - -#ifndef PCRE_SPTR16 -#define PCRE_SPTR16 const PCRE_UCHAR16 * -#endif - -/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain -a 32 bit wide signed data type. Otherwise it can be a dummy data type since -pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */ -#ifndef PCRE_UCHAR32 -#define PCRE_UCHAR32 unsigned int -#endif - -#ifndef PCRE_SPTR32 -#define PCRE_SPTR32 const PCRE_UCHAR32 * -#endif - -/* When PCRE is compiled as a C++ library, the subject pointer type can be -replaced with a custom type. For conventional use, the public interface is a -const char *. */ - -#ifndef PCRE_SPTR -#define PCRE_SPTR const char * -#endif - -/* The structure for passing additional data to pcre_exec(). This is defined in -such as way as to be extensible. Always add new fields at the end, in order to -remain compatible. */ - -typedef struct pcre_extra { - unsigned long int flags; /* Bits for which fields are set */ - void *study_data; /* Opaque data from pcre_study() */ - unsigned long int match_limit; /* Maximum number of calls to match() */ - void *callout_data; /* Data passed back in callouts */ - const unsigned char *tables; /* Pointer to character tables */ - unsigned long int match_limit_recursion; /* Max recursive calls to match() */ - unsigned char **mark; /* For passing back a mark pointer */ - void *executable_jit; /* Contains a pointer to a compiled jit code */ -} pcre_extra; - -/* Same structure as above, but with 16 bit char pointers. */ - -typedef struct pcre16_extra { - unsigned long int flags; /* Bits for which fields are set */ - void *study_data; /* Opaque data from pcre_study() */ - unsigned long int match_limit; /* Maximum number of calls to match() */ - void *callout_data; /* Data passed back in callouts */ - const unsigned char *tables; /* Pointer to character tables */ - unsigned long int match_limit_recursion; /* Max recursive calls to match() */ - PCRE_UCHAR16 **mark; /* For passing back a mark pointer */ - void *executable_jit; /* Contains a pointer to a compiled jit code */ -} pcre16_extra; - -/* Same structure as above, but with 32 bit char pointers. */ - -typedef struct pcre32_extra { - unsigned long int flags; /* Bits for which fields are set */ - void *study_data; /* Opaque data from pcre_study() */ - unsigned long int match_limit; /* Maximum number of calls to match() */ - void *callout_data; /* Data passed back in callouts */ - const unsigned char *tables; /* Pointer to character tables */ - unsigned long int match_limit_recursion; /* Max recursive calls to match() */ - PCRE_UCHAR32 **mark; /* For passing back a mark pointer */ - void *executable_jit; /* Contains a pointer to a compiled jit code */ -} pcre32_extra; - -/* The structure for passing out data via the pcre_callout_function. We use a -structure so that new fields can be added on the end in future versions, -without changing the API of the function, thereby allowing old clients to work -without modification. */ - -typedef struct pcre_callout_block { - int version; /* Identifies version of block */ - /* ------------------------ Version 0 ------------------------------- */ - int callout_number; /* Number compiled into pattern */ - int *offset_vector; /* The offset vector */ - PCRE_SPTR subject; /* The subject being matched */ - int subject_length; /* The length of the subject */ - int start_match; /* Offset to start of this match attempt */ - int current_position; /* Where we currently are in the subject */ - int capture_top; /* Max current capture */ - int capture_last; /* Most recently closed capture */ - void *callout_data; /* Data passed in with the call */ - /* ------------------- Added for Version 1 -------------------------- */ - int pattern_position; /* Offset to next item in the pattern */ - int next_item_length; /* Length of next item in the pattern */ - /* ------------------- Added for Version 2 -------------------------- */ - const unsigned char *mark; /* Pointer to current mark or NULL */ - /* ------------------------------------------------------------------ */ -} pcre_callout_block; - -/* Same structure as above, but with 16 bit char pointers. */ - -typedef struct pcre16_callout_block { - int version; /* Identifies version of block */ - /* ------------------------ Version 0 ------------------------------- */ - int callout_number; /* Number compiled into pattern */ - int *offset_vector; /* The offset vector */ - PCRE_SPTR16 subject; /* The subject being matched */ - int subject_length; /* The length of the subject */ - int start_match; /* Offset to start of this match attempt */ - int current_position; /* Where we currently are in the subject */ - int capture_top; /* Max current capture */ - int capture_last; /* Most recently closed capture */ - void *callout_data; /* Data passed in with the call */ - /* ------------------- Added for Version 1 -------------------------- */ - int pattern_position; /* Offset to next item in the pattern */ - int next_item_length; /* Length of next item in the pattern */ - /* ------------------- Added for Version 2 -------------------------- */ - const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */ - /* ------------------------------------------------------------------ */ -} pcre16_callout_block; - -/* Same structure as above, but with 32 bit char pointers. */ - -typedef struct pcre32_callout_block { - int version; /* Identifies version of block */ - /* ------------------------ Version 0 ------------------------------- */ - int callout_number; /* Number compiled into pattern */ - int *offset_vector; /* The offset vector */ - PCRE_SPTR32 subject; /* The subject being matched */ - int subject_length; /* The length of the subject */ - int start_match; /* Offset to start of this match attempt */ - int current_position; /* Where we currently are in the subject */ - int capture_top; /* Max current capture */ - int capture_last; /* Most recently closed capture */ - void *callout_data; /* Data passed in with the call */ - /* ------------------- Added for Version 1 -------------------------- */ - int pattern_position; /* Offset to next item in the pattern */ - int next_item_length; /* Length of next item in the pattern */ - /* ------------------- Added for Version 2 -------------------------- */ - const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */ - /* ------------------------------------------------------------------ */ -} pcre32_callout_block; - -/* Indirection for store get and free functions. These can be set to -alternative malloc/free functions if required. Special ones are used in the -non-recursive case for "frames". There is also an optional callout function -that is triggered by the (?) regex item. For Virtual Pascal, these definitions -have to take another form. */ - -#ifndef VPCOMPAT -PCRE_EXP_DECL void *(*pcre_malloc)(size_t); -PCRE_EXP_DECL void (*pcre_free)(void *); -PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); -PCRE_EXP_DECL void (*pcre_stack_free)(void *); -PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); - -PCRE_EXP_DECL void *(*pcre16_malloc)(size_t); -PCRE_EXP_DECL void (*pcre16_free)(void *); -PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t); -PCRE_EXP_DECL void (*pcre16_stack_free)(void *); -PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *); - -PCRE_EXP_DECL void *(*pcre32_malloc)(size_t); -PCRE_EXP_DECL void (*pcre32_free)(void *); -PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t); -PCRE_EXP_DECL void (*pcre32_stack_free)(void *); -PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *); -#else /* VPCOMPAT */ -PCRE_EXP_DECL void *pcre_malloc(size_t); -PCRE_EXP_DECL void pcre_free(void *); -PCRE_EXP_DECL void *pcre_stack_malloc(size_t); -PCRE_EXP_DECL void pcre_stack_free(void *); -PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); - -PCRE_EXP_DECL void *pcre16_malloc(size_t); -PCRE_EXP_DECL void pcre16_free(void *); -PCRE_EXP_DECL void *pcre16_stack_malloc(size_t); -PCRE_EXP_DECL void pcre16_stack_free(void *); -PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *); - -PCRE_EXP_DECL void *pcre32_malloc(size_t); -PCRE_EXP_DECL void pcre32_free(void *); -PCRE_EXP_DECL void *pcre32_stack_malloc(size_t); -PCRE_EXP_DECL void pcre32_stack_free(void *); -PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *); -#endif /* VPCOMPAT */ - -/* User defined callback which provides a stack just before the match starts. */ - -typedef pcre_jit_stack *(*pcre_jit_callback)(void *); -typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *); -typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *); - -/* Exported PCRE functions */ - -PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, - const unsigned char *); -PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *, - const unsigned char *); -PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *, - const unsigned char *); -PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, - int *, const unsigned char *); -PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **, - int *, const unsigned char *); -PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **, - int *, const unsigned char *); -PCRE_EXP_DECL int pcre_config(int, void *); -PCRE_EXP_DECL int pcre16_config(int, void *); -PCRE_EXP_DECL int pcre32_config(int, void *); -PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, - int *, int, const char *, char *, int); -PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16, - int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int); -PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32, - int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int); -PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, - char *, int); -PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int, - PCRE_UCHAR16 *, int); -PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int, - PCRE_UCHAR32 *, int); -PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, - const char *, int, int, int, int *, int , int *, int); -PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *, - PCRE_SPTR16, int, int, int, int *, int , int *, int); -PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *, - PCRE_SPTR32, int, int, int, int *, int , int *, int); -PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, - int, int, int, int *, int); -PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *, - PCRE_SPTR16, int, int, int, int *, int); -PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *, - PCRE_SPTR32, int, int, int, int *, int); -PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *, - PCRE_SPTR, int, int, int, int *, int, - pcre_jit_stack *); -PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *, - PCRE_SPTR16, int, int, int, int *, int, - pcre16_jit_stack *); -PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *, - PCRE_SPTR32, int, int, int, int *, int, - pcre32_jit_stack *); -PCRE_EXP_DECL void pcre_free_substring(const char *); -PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16); -PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32); -PCRE_EXP_DECL void pcre_free_substring_list(const char **); -PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *); -PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *); -PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, - void *); -PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int, - void *); -PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int, - void *); -PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *, - int *, int, const char *, const char **); -PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16, - int *, int, PCRE_SPTR16, PCRE_SPTR16 *); -PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32, - int *, int, PCRE_SPTR32, PCRE_SPTR32 *); -PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *); -PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16); -PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32); -PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *, - char **, char **); -PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16, - PCRE_UCHAR16 **, PCRE_UCHAR16 **); -PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32, - PCRE_UCHAR32 **, PCRE_UCHAR32 **); -PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int, - const char **); -PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int, - PCRE_SPTR16 *); -PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int, - PCRE_SPTR32 *); -PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int, - const char ***); -PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int, - PCRE_SPTR16 **); -PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int, - PCRE_SPTR32 **); -PCRE_EXP_DECL const unsigned char *pcre_maketables(void); -PCRE_EXP_DECL const unsigned char *pcre16_maketables(void); -PCRE_EXP_DECL const unsigned char *pcre32_maketables(void); -PCRE_EXP_DECL int pcre_refcount(pcre *, int); -PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int); -PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int); -PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); -PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **); -PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **); -PCRE_EXP_DECL void pcre_free_study(pcre_extra *); -PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *); -PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *); -PCRE_EXP_DECL const char *pcre_version(void); -PCRE_EXP_DECL const char *pcre16_version(void); -PCRE_EXP_DECL const char *pcre32_version(void); - -/* Utility functions for byte order swaps. */ -PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *, - const unsigned char *); -PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *, - const unsigned char *); -PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *, - const unsigned char *); -PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *, - PCRE_SPTR16, int, int *, int); -PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *, - PCRE_SPTR32, int, int *, int); - -/* JIT compiler related functions. */ - -PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int); -PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int); -PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int); -PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *); -PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *); -PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *); -PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, - pcre_jit_callback, void *); -PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *, - pcre16_jit_callback, void *); -PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *, - pcre32_jit_callback, void *); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcre.h */ diff --git a/deps/libmagic/pcre/pcre_byte_order.c b/deps/libmagic/pcre/pcre_byte_order.c deleted file mode 100644 index 472eb38..0000000 --- a/deps/libmagic/pcre/pcre_byte_order.c +++ /dev/null @@ -1,318 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function that tests a compiled pattern to -see if it was compiled with the opposite endianness. If so, it uses an -auxiliary local function to flip the appropriate bytes. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Swap byte functions * -*************************************************/ - -/* The following functions swap the bytes of a pcre_uint16 -and pcre_uint32 value. - -Arguments: - value any number - -Returns: the byte swapped value -*/ - -static pcre_uint32 -swap_uint32(pcre_uint32 value) -{ -return ((value & 0x000000ff) << 24) | - ((value & 0x0000ff00) << 8) | - ((value & 0x00ff0000) >> 8) | - (value >> 24); -} - -static pcre_uint16 -swap_uint16(pcre_uint16 value) -{ -return (value >> 8) | (value << 8); -} - - -/************************************************* -* Test for a byte-flipped compiled regex * -*************************************************/ - -/* This function swaps the bytes of a compiled pattern usually -loaded form the disk. It also sets the tables pointer, which -is likely an invalid pointer after reload. - -Arguments: - argument_re points to the compiled expression - extra_data points to extra data or is NULL - tables points to the character tables or NULL - -Returns: 0 if the swap is successful, negative on error -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re, - pcre_extra *extra_data, const unsigned char *tables) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re, - pcre16_extra *extra_data, const unsigned char *tables) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *argument_re, - pcre32_extra *extra_data, const unsigned char *tables) -#endif -{ -REAL_PCRE *re = (REAL_PCRE *)argument_re; -pcre_study_data *study; -#ifndef COMPILE_PCRE8 -pcre_uchar *ptr; -int length; -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 -BOOL utf; -BOOL utf16_char; -#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ -#endif /* !COMPILE_PCRE8 */ - -if (re == NULL) return PCRE_ERROR_NULL; -if (re->magic_number == MAGIC_NUMBER) - { - if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; - re->tables = tables; - return 0; - } - -if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; -if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; - -re->magic_number = MAGIC_NUMBER; -re->size = swap_uint32(re->size); -re->options = swap_uint32(re->options); -re->flags = swap_uint16(re->flags); -re->top_bracket = swap_uint16(re->top_bracket); -re->top_backref = swap_uint16(re->top_backref); -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 -re->first_char = swap_uint16(re->first_char); -re->req_char = swap_uint16(re->req_char); -#elif defined COMPILE_PCRE32 -re->first_char = swap_uint32(re->first_char); -re->req_char = swap_uint32(re->req_char); -#endif -re->name_table_offset = swap_uint16(re->name_table_offset); -re->name_entry_size = swap_uint16(re->name_entry_size); -re->name_count = swap_uint16(re->name_count); -re->ref_count = swap_uint16(re->ref_count); -re->tables = tables; -#ifdef COMPILE_PCRE32 -re->dummy1 = swap_uint16(re->dummy1); -re->dummy2 = swap_uint16(re->dummy2); -#endif - -if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) - { - study = (pcre_study_data *)extra_data->study_data; - study->size = swap_uint32(study->size); - study->flags = swap_uint32(study->flags); - study->minlength = swap_uint32(study->minlength); - } - -#ifndef COMPILE_PCRE8 -ptr = (pcre_uchar *)re + re->name_table_offset; -length = re->name_count * re->name_entry_size; -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 -utf = (re->options & PCRE_UTF16) != 0; -utf16_char = FALSE; -#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ - -while(TRUE) - { - /* Swap previous characters. */ - while (length-- > 0) - { -#if defined COMPILE_PCRE16 - *ptr = swap_uint16(*ptr); -#elif defined COMPILE_PCRE32 - *ptr = swap_uint32(*ptr); -#endif - ptr++; - } -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 - if (utf16_char) - { - if (HAS_EXTRALEN(ptr[-1])) - { - /* We know that there is only one extra character in UTF-16. */ - *ptr = swap_uint16(*ptr); - ptr++; - } - } - utf16_char = FALSE; -#endif /* SUPPORT_UTF */ - - /* Get next opcode. */ - length = 0; -#if defined COMPILE_PCRE16 - *ptr = swap_uint16(*ptr); -#elif defined COMPILE_PCRE32 - *ptr = swap_uint32(*ptr); -#endif - switch (*ptr) - { - case OP_END: - return 0; - -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_UPTO: - case OP_MINUPTO: - case OP_EXACT: - case OP_POSSTAR: - case OP_POSPLUS: - case OP_POSQUERY: - case OP_POSUPTO: - case OP_STARI: - case OP_MINSTARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_QUERYI: - case OP_MINQUERYI: - case OP_UPTOI: - case OP_MINUPTOI: - case OP_EXACTI: - case OP_POSSTARI: - case OP_POSPLUSI: - case OP_POSQUERYI: - case OP_POSUPTOI: - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTEXACT: - case OP_NOTPOSSTAR: - case OP_NOTPOSPLUS: - case OP_NOTPOSQUERY: - case OP_NOTPOSUPTO: - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTQUERYI: - case OP_NOTMINQUERYI: - case OP_NOTUPTOI: - case OP_NOTMINUPTOI: - case OP_NOTEXACTI: - case OP_NOTPOSSTARI: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERYI: - case OP_NOTPOSUPTOI: - if (utf) utf16_char = TRUE; -#endif - /* Fall through. */ - - default: - length = PRIV(OP_lengths)[*ptr] - 1; - break; - - case OP_CLASS: - case OP_NCLASS: - /* Skip the character bit map. */ - ptr += 32/sizeof(pcre_uchar); - length = 0; - break; - - case OP_XCLASS: - /* Reverse the size of the XCLASS instance. */ - ptr++; -#if defined COMPILE_PCRE16 - *ptr = swap_uint16(*ptr); -#elif defined COMPILE_PCRE32 - *ptr = swap_uint32(*ptr); -#endif -#ifndef COMPILE_PCRE32 - if (LINK_SIZE > 1) - { - /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ - ptr++; - *ptr = swap_uint16(*ptr); - } -#endif - ptr++; - length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1); -#if defined COMPILE_PCRE16 - *ptr = swap_uint16(*ptr); -#elif defined COMPILE_PCRE32 - *ptr = swap_uint32(*ptr); -#endif - if ((*ptr & XCL_MAP) != 0) - { - /* Skip the character bit map. */ - ptr += 32/sizeof(pcre_uchar); - length -= 32/sizeof(pcre_uchar); - } - break; - } - ptr++; - } -/* Control should never reach here in 16/32 bit mode. */ -#endif /* !COMPILE_PCRE8 */ - -return 0; -} - -/* End of pcre_byte_order.c */ diff --git a/deps/libmagic/pcre/pcre_chartables.c b/deps/libmagic/pcre/pcre_chartables.c deleted file mode 100644 index 2a39e9f..0000000 --- a/deps/libmagic/pcre/pcre_chartables.c +++ /dev/null @@ -1,198 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This file contains character tables that are used when no external tables -are passed to PCRE by the application that calls it. The tables are used only -for characters whose code values are less than 256. - -This is a default version of the tables that assumes ASCII encoding. A program -called dftables (which is distributed with PCRE) can be used to build -alternative versions of this file. This is necessary if you are running in an -EBCDIC environment, or if you want to default to a different encoding, for -example ISO-8859-1. When dftables is run, it creates these tables in the -current locale. If PCRE is configured with --enable-rebuild-chartables, this -happens automatically. - -The following #includes are present because without them gcc 4.x may remove the -array definition from the final binary if PCRE is built into a static library -and dead code stripping is activated. This leads to link errors. Pulling in the -header ensures that the array gets flagged as "someone outside this compilation -unit might reference this" and so it will always be supplied to the linker. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - -const pcre_uint8 PRIV(default_tables)[] = { - -/* This table is a lower casing table. */ - - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122, 91, 92, 93, 94, 95, - 96, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135, - 136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151, - 152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167, - 168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183, - 184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231, - 232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255, - -/* This table is a case flipping table. */ - - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122, 91, 92, 93, 94, 95, - 96, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90,123,124,125,126,127, - 128,129,130,131,132,133,134,135, - 136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151, - 152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167, - 168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183, - 184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231, - 232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255, - -/* This table contains bit maps for various character classes. Each map is 32 -bytes long and the bits run from the least significant end of each byte. The -classes that have their own maps are: space, xdigit, digit, upper, lower, word, -graph, print, punct, and cntrl. Other classes are built from combinations. */ - - 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, - 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - -/* This table identifies various classes of character by individual bits: - 0x01 white space character - 0x02 letter - 0x04 decimal digit - 0x08 hexadecimal digit - 0x10 alphanumeric or '_' - 0x80 regular expression metacharacter or binary zero -*/ - - 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ - 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ - 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ - 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ - 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ - -/* End of pcre_chartables.c */ diff --git a/deps/libmagic/pcre/pcre_compile.c b/deps/libmagic/pcre/pcre_compile.c deleted file mode 100644 index 5f0c8ed..0000000 --- a/deps/libmagic/pcre/pcre_compile.c +++ /dev/null @@ -1,8386 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_compile(), along with -supporting internal functions that are not used by other modules. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define NLBLOCK cd /* Block containing newline information */ -#define PSSTART start_pattern /* Field containing processed string start */ -#define PSEND end_pattern /* Field containing processed string end */ - -#include "pcre_internal.h" - - -/* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which -is also used by pcretest. PCRE_DEBUG is not defined when building a production -library. We do not need to select pcre16_printint.c specially, because the -COMPILE_PCREx macro will already be appropriately set. */ - -#ifdef PCRE_DEBUG -/* pcre_printint.c should not include any headers */ -#define PCRE_INCLUDED -#include "pcre_printint.c" -#undef PCRE_INCLUDED -#endif - - -/* Macro for setting individual bits in class bitmaps. */ - -#define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7)) - -/* Maximum length value to check against when making sure that the integer that -holds the compiled pattern length does not overflow. We make it a bit less than -INT_MAX to allow for adding in group terminating bytes, so that we don't have -to check them every time. */ - -#define OFLOW_MAX (INT_MAX - 20) - -/* Definitions to allow mutual recursion */ - -static int - add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *, - const pcre_uint32 *, unsigned int); - -static BOOL - compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int, - pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *, - compile_data *, int *); - - - -/************************************************* -* Code parameters and static tables * -*************************************************/ - -/* This value specifies the size of stack workspace that is used during the -first pre-compile phase that determines how much memory is required. The regex -is partly compiled into this space, but the compiled parts are discarded as -soon as they can be, so that hopefully there will never be an overrun. The code -does, however, check for an overrun. The largest amount I've seen used is 218, -so this number is very generous. - -The same workspace is used during the second, actual compile phase for -remembering forward references to groups so that they can be filled in at the -end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE -is 4 there is plenty of room for most patterns. However, the memory can get -filled up by repetitions of forward references, for example patterns like -/(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so -that the workspace is expanded using malloc() in this situation. The value -below is therefore a minimum, and we put a maximum on it for safety. The -minimum is now also defined in terms of LINK_SIZE so that the use of malloc() -kicks in at the same number of forward references in all cases. */ - -#define COMPILE_WORK_SIZE (2048*LINK_SIZE) -#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) - -/* The overrun tests check for a slightly smaller size so that they detect the -overrun before it actually does run off the end of the data block. */ - -#define WORK_SIZE_SAFETY_MARGIN (100) - -/* Private flags added to firstchar and reqchar. */ - -#define REQ_CASELESS (1 << 0) /* Indicates caselessness */ -#define REQ_VARY (1 << 1) /* Reqchar followed non-literal item */ -/* Negative values for the firstchar and reqchar flags */ -#define REQ_UNSET (-2) -#define REQ_NONE (-1) - -/* Repeated character flags. */ - -#define UTF_LENGTH 0x10000000l /* The char contains its length. */ - -/* Table for handling escaped characters in the range '0'-'z'. Positive returns -are simple data values; negative values are for special things like \d and so -on. Zero means further processing is needed (for things like \x), or the escape -is invalid. */ - -#ifndef EBCDIC - -/* This is the "normal" table for ASCII systems or for EBCDIC systems running -in UTF-8 mode. */ - -static const short int escapes[] = { - 0, 0, - 0, 0, - 0, 0, - 0, 0, - 0, 0, - CHAR_COLON, CHAR_SEMICOLON, - CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, - CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK, - CHAR_COMMERCIAL_AT, -ESC_A, - -ESC_B, -ESC_C, - -ESC_D, -ESC_E, - 0, -ESC_G, - -ESC_H, 0, - 0, -ESC_K, - 0, 0, - -ESC_N, 0, - -ESC_P, -ESC_Q, - -ESC_R, -ESC_S, - 0, 0, - -ESC_V, -ESC_W, - -ESC_X, 0, - -ESC_Z, CHAR_LEFT_SQUARE_BRACKET, - CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, - CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, - CHAR_GRAVE_ACCENT, 7, - -ESC_b, 0, - -ESC_d, ESC_e, - ESC_f, 0, - -ESC_h, 0, - 0, -ESC_k, - 0, 0, - ESC_n, 0, - -ESC_p, 0, - ESC_r, -ESC_s, - ESC_tee, 0, - -ESC_v, -ESC_w, - 0, 0, - -ESC_z -}; - -#else - -/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */ - -static const short int escapes[] = { -/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', -/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, -/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', -/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, -/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', -/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, -/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', -/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, -/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, -/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p, -/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, -/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, -/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, -/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, -/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', -/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, -/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, -/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P, -/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, -/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, -/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, -/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, -/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 -}; -#endif - - -/* Table of special "verbs" like (*PRUNE). This is a short table, so it is -searched linearly. Put all the names into a single string, in order to reduce -the number of relocations when a shared library is dynamically linked. The -string is built from string macros so that it works in UTF-8 mode on EBCDIC -platforms. */ - -typedef struct verbitem { - int len; /* Length of verb name */ - int op; /* Op when no arg, or -1 if arg mandatory */ - int op_arg; /* Op when arg present, or -1 if not allowed */ -} verbitem; - -static const char verbnames[] = - "\0" /* Empty name is a shorthand for MARK */ - STRING_MARK0 - STRING_ACCEPT0 - STRING_COMMIT0 - STRING_F0 - STRING_FAIL0 - STRING_PRUNE0 - STRING_SKIP0 - STRING_THEN; - -static const verbitem verbs[] = { - { 0, -1, OP_MARK }, - { 4, -1, OP_MARK }, - { 6, OP_ACCEPT, -1 }, - { 6, OP_COMMIT, -1 }, - { 1, OP_FAIL, -1 }, - { 4, OP_FAIL, -1 }, - { 5, OP_PRUNE, OP_PRUNE_ARG }, - { 4, OP_SKIP, OP_SKIP_ARG }, - { 4, OP_THEN, OP_THEN_ARG } -}; - -static const int verbcount = sizeof(verbs)/sizeof(verbitem); - - -/* Tables of names of POSIX character classes and their lengths. The names are -now all in a single string, to reduce the number of relocations when a shared -library is dynamically loaded. The list of lengths is terminated by a zero -length entry. The first three must be alpha, lower, upper, as this is assumed -for handling case independence. */ - -static const char posix_names[] = - STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 - STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 - STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 - STRING_word0 STRING_xdigit; - -static const pcre_uint8 posix_name_lengths[] = { - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; - -/* Table of class bit maps for each POSIX class. Each class is formed from a -base map, with an optional addition or removal of another map. Then, for some -classes, there is some additional tweaking: for [:blank:] the vertical space -characters are removed, and for [:alpha:] and [:alnum:] the underscore -character is removed. The triples in the table consist of the base map offset, -second map offset or -1 if no second map, and a non-negative value for map -addition or a negative value for map subtraction (if there are two maps). The -absolute value of the third field has these meanings: 0 => no tweaking, 1 => -remove vertical space characters, 2 => remove underscore. */ - -static const int posix_class_maps[] = { - cbit_word, cbit_digit, -2, /* alpha */ - cbit_lower, -1, 0, /* lower */ - cbit_upper, -1, 0, /* upper */ - cbit_word, -1, 2, /* alnum - word without underscore */ - cbit_print, cbit_cntrl, 0, /* ascii */ - cbit_space, -1, 1, /* blank - a GNU extension */ - cbit_cntrl, -1, 0, /* cntrl */ - cbit_digit, -1, 0, /* digit */ - cbit_graph, -1, 0, /* graph */ - cbit_print, -1, 0, /* print */ - cbit_punct, -1, 0, /* punct */ - cbit_space, -1, 0, /* space */ - cbit_word, -1, 0, /* word - a Perl extension */ - cbit_xdigit,-1, 0 /* xdigit */ -}; - -/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class -substitutes must be in the order of the names, defined above, and there are -both positive and negative cases. NULL means no substitute. */ - -#ifdef SUPPORT_UCP -static const pcre_uchar string_PNd[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_pNd[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_PXsp[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_pXsp[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_PXwd[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_pXwd[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; - -static const pcre_uchar *substitutes[] = { - string_PNd, /* \D */ - string_pNd, /* \d */ - string_PXsp, /* \S */ /* NOTE: Xsp is Perl space */ - string_pXsp, /* \s */ - string_PXwd, /* \W */ - string_pXwd /* \w */ -}; - -static const pcre_uchar string_pL[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_pLl[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_pLu[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_pXan[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_h[] = { - CHAR_BACKSLASH, CHAR_h, '\0' }; -static const pcre_uchar string_pXps[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_PL[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_PLl[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_PLu[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_PXan[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const pcre_uchar string_H[] = { - CHAR_BACKSLASH, CHAR_H, '\0' }; -static const pcre_uchar string_PXps[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; - -static const pcre_uchar *posix_substitutes[] = { - string_pL, /* alpha */ - string_pLl, /* lower */ - string_pLu, /* upper */ - string_pXan, /* alnum */ - NULL, /* ascii */ - string_h, /* blank */ - NULL, /* cntrl */ - string_pNd, /* digit */ - NULL, /* graph */ - NULL, /* print */ - NULL, /* punct */ - string_pXps, /* space */ /* NOTE: Xps is POSIX space */ - string_pXwd, /* word */ - NULL, /* xdigit */ - /* Negated cases */ - string_PL, /* ^alpha */ - string_PLl, /* ^lower */ - string_PLu, /* ^upper */ - string_PXan, /* ^alnum */ - NULL, /* ^ascii */ - string_H, /* ^blank */ - NULL, /* ^cntrl */ - string_PNd, /* ^digit */ - NULL, /* ^graph */ - NULL, /* ^print */ - NULL, /* ^punct */ - string_PXps, /* ^space */ /* NOTE: Xps is POSIX space */ - string_PXwd, /* ^word */ - NULL /* ^xdigit */ -}; -#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *)) -#endif - -#define STRING(a) # a -#define XSTRING(s) STRING(s) - -/* The texts of compile-time error messages. These are "char *" because they -are passed to the outside world. Do not ever re-use any error number, because -they are documented. Always add a new error instead. Messages marked DEAD below -are no longer used. This used to be a table of strings, but in order to reduce -the number of relocations needed when a shared library is loaded dynamically, -it is now one long string. We cannot use a table of offsets, because the -lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we -simply count through to the one we want - this isn't a performance issue -because these strings are used only when there is a compilation error. - -Each substring ends with \0 to insert a null character. This includes the final -substring, so that the whole string ends with \0\0, which can be detected when -counting through. */ - -static const char error_texts[] = - "no error\0" - "\\ at end of pattern\0" - "\\c at end of pattern\0" - "unrecognized character follows \\\0" - "numbers out of order in {} quantifier\0" - /* 5 */ - "number too big in {} quantifier\0" - "missing terminating ] for character class\0" - "invalid escape sequence in character class\0" - "range out of order in character class\0" - "nothing to repeat\0" - /* 10 */ - "operand of unlimited repeat could match the empty string\0" /** DEAD **/ - "internal error: unexpected repeat\0" - "unrecognized character after (? or (?-\0" - "POSIX named classes are supported only within a class\0" - "missing )\0" - /* 15 */ - "reference to non-existent subpattern\0" - "erroffset passed as NULL\0" - "unknown option bit(s) set\0" - "missing ) after comment\0" - "parentheses nested too deeply\0" /** DEAD **/ - /* 20 */ - "regular expression is too large\0" - "failed to get memory\0" - "unmatched parentheses\0" - "internal error: code overflow\0" - "unrecognized character after (?<\0" - /* 25 */ - "lookbehind assertion is not fixed length\0" - "malformed number or name after (?(\0" - "conditional group contains more than two branches\0" - "assertion expected after (?(\0" - "(?R or (?[+-]digits must be followed by )\0" - /* 30 */ - "unknown POSIX class name\0" - "POSIX collating elements are not supported\0" - "this version of PCRE is compiled without UTF support\0" - "spare error\0" /** DEAD **/ - "character value in \\x{...} sequence is too large\0" - /* 35 */ - "invalid condition (?(0)\0" - "\\C not allowed in lookbehind assertion\0" - "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" - "number after (?C is > 255\0" - "closing ) for (?C expected\0" - /* 40 */ - "recursive call could loop indefinitely\0" - "unrecognized character after (?P\0" - "syntax error in subpattern name (missing terminator)\0" - "two named subpatterns have the same name\0" - "invalid UTF-8 string\0" - /* 45 */ - "support for \\P, \\p, and \\X has not been compiled\0" - "malformed \\P or \\p sequence\0" - "unknown property name after \\P or \\p\0" - "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" - "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" - /* 50 */ - "repeated subpattern is too long\0" /** DEAD **/ - "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" - "internal error: overran compiling workspace\0" - "internal error: previously-checked referenced subpattern not found\0" - "DEFINE group contains more than one branch\0" - /* 55 */ - "repeating a DEFINE group is not allowed\0" /** DEAD **/ - "inconsistent NEWLINE options\0" - "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" - "a numbered reference must not be zero\0" - "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" - /* 60 */ - "(*VERB) not recognized\0" - "number is too big\0" - "subpattern name expected\0" - "digit expected after (?+\0" - "] is an invalid data character in JavaScript compatibility mode\0" - /* 65 */ - "different names for subpatterns of the same number are not allowed\0" - "(*MARK) must have an argument\0" - "this version of PCRE is not compiled with Unicode property support\0" - "\\c must be followed by an ASCII character\0" - "\\k is not followed by a braced, angle-bracketed, or quoted name\0" - /* 70 */ - "internal error: unknown opcode in find_fixedlength()\0" - "\\N is not supported in a class\0" - "too many forward references\0" - "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" - "invalid UTF-16 string\0" - /* 75 */ - "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" - "character value in \\u.... sequence is too large\0" - "invalid UTF-32 string\0" - ; - -/* Table to identify digits and hex digits. This is used when compiling -patterns. Note that the tables in chartables are dependent on the locale, and -may mark arbitrary characters as digits - but the PCRE compiling code expects -to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have -a private table here. It costs 256 bytes, but it is a lot faster than doing -character value tests (at least in some simple cases I timed), and in some -applications one wants PCRE to compile efficiently as well as match -efficiently. - -For convenience, we use the same bit definitions as in chartables: - - 0x04 decimal digit - 0x08 hexadecimal digit - -Then we can use ctype_digit and ctype_xdigit in the code. */ - -/* Using a simple comparison for decimal numbers rather than a memory read -is much faster, and the resulting code is simpler (the compiler turns it -into a subtraction and unsigned comparison). */ - -#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9) - -#ifndef EBCDIC - -/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in -UTF-8 mode. */ - -static const pcre_uint8 digitab[] = - { - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ - 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ - 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ - -#else - -/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ - -static const pcre_uint8 digitab[] = - { - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ - 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ - 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ - -static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */ - 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ - 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ - 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ - 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ - 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ - 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ - 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ - 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ - 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ - 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ - 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ - 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ - 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ - 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ - 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ -#endif - - - -/************************************************* -* Find an error text * -*************************************************/ - -/* The error texts are now all in one long string, to save on relocations. As -some of the text is of unknown length, we can't use a table of offsets. -Instead, just count through the strings. This is not a performance issue -because it happens only when there has been a compilation error. - -Argument: the error number -Returns: pointer to the error string -*/ - -static const char * -find_error_text(int n) -{ -const char *s = error_texts; -for (; n > 0; n--) - { - while (*s++ != CHAR_NULL) {}; - if (*s == CHAR_NULL) return "Error text not found (please report)"; - } -return s; -} - - -/************************************************* -* Expand the workspace * -*************************************************/ - -/* This function is called during the second compiling phase, if the number of -forward references fills the existing workspace, which is originally a block on -the stack. A larger block is obtained from malloc() unless the ultimate limit -has been reached or the increase will be rather small. - -Argument: pointer to the compile data block -Returns: 0 if all went well, else an error number -*/ - -static int -expand_workspace(compile_data *cd) -{ -pcre_uchar *newspace; -int newsize = cd->workspace_size * 2; - -if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX; -if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX || - newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN) - return ERR72; - -newspace = (PUBL(malloc))(IN_UCHARS(newsize)); -if (newspace == NULL) return ERR21; -memcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar)); -cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace); -if (cd->workspace_size > COMPILE_WORK_SIZE) - (PUBL(free))((void *)cd->start_workspace); -cd->start_workspace = newspace; -cd->workspace_size = newsize; -return 0; -} - - - -/************************************************* -* Check for counted repeat * -*************************************************/ - -/* This function is called when a '{' is encountered in a place where it might -start a quantifier. It looks ahead to see if it really is a quantifier or not. -It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} -where the ddds are digits. - -Arguments: - p pointer to the first char after '{' - -Returns: TRUE or FALSE -*/ - -static BOOL -is_counted_repeat(const pcre_uchar *p) -{ -if (!IS_DIGIT(*p)) return FALSE; -p++; -while (IS_DIGIT(*p)) p++; -if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; - -if (*p++ != CHAR_COMMA) return FALSE; -if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; - -if (!IS_DIGIT(*p)) return FALSE; -p++; -while (IS_DIGIT(*p)) p++; - -return (*p == CHAR_RIGHT_CURLY_BRACKET); -} - - - -/************************************************* -* Handle escapes * -*************************************************/ - -/* This function is called when a \ has been encountered. It either returns a -positive value for a simple escape such as \n, or 0 for a data character -which will be placed in chptr. A backreference to group n is returned as -negative n. When UTF-8 is enabled, a positive value greater than 255 may -be returned in chptr. -On entry,ptr is pointing at the \. On exit, it is on the final character of the -escape sequence. - -Arguments: - ptrptr points to the pattern position pointer - chptr points to the data character - errorcodeptr points to the errorcode variable - bracount number of previous extracting brackets - options the options bits - isclass TRUE if inside a character class - -Returns: zero => a data character - positive => a special escape sequence - negative => a back reference - on error, errorcodeptr is set -*/ - -static int -check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr, - int bracount, int options, BOOL isclass) -{ -/* PCRE_UTF16 has the same value as PCRE_UTF8. */ -BOOL utf = (options & PCRE_UTF8) != 0; -const pcre_uchar *ptr = *ptrptr + 1; -pcre_uint32 c; -int escape = 0; -int i; - -GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ -ptr--; /* Set pointer back to the last byte */ - -/* If backslash is at the end of the pattern, it's an error. */ - -if (c == CHAR_NULL) *errorcodeptr = ERR1; - -/* Non-alphanumerics are literals. For digits or letters, do an initial lookup -in a table. A non-zero result is something that can be returned immediately. -Otherwise further processing may be required. */ - -#ifndef EBCDIC /* ASCII/UTF-8 coding */ -/* Not alphanumeric */ -else if (c < CHAR_0 || c > CHAR_z) {} -else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } - -#else /* EBCDIC coding */ -/* Not alphanumeric */ -else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {} -else if ((i = escapes[c - 0x48]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } -#endif - -/* Escapes that need further processing, or are illegal. */ - -else - { - const pcre_uchar *oldptr; - BOOL braced, negated, overflow; - int s; - - switch (c) - { - /* A number of Perl escapes are not handled by PCRE. We give an explicit - error. */ - - case CHAR_l: - case CHAR_L: - *errorcodeptr = ERR37; - break; - - case CHAR_u: - if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) - { - /* In JavaScript, \u must be followed by four hexadecimal numbers. - Otherwise it is a lowercase u letter. */ - if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0 - && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0 - && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0 - && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0) - { - c = 0; - for (i = 0; i < 4; ++i) - { - register pcre_uint32 cc = *(++ptr); -#ifndef EBCDIC /* ASCII/UTF-8 coding */ - if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ - c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); -#else /* EBCDIC coding */ - if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ - c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); -#endif - } - -#if defined COMPILE_PCRE8 - if (c > (utf ? 0x10ffff : 0xff)) -#elif defined COMPILE_PCRE16 - if (c > (utf ? 0x10ffff : 0xffff)) -#elif defined COMPILE_PCRE32 - if (utf && c > 0x10ffff) -#endif - { - *errorcodeptr = ERR76; - } - else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; - } - } - else - *errorcodeptr = ERR37; - break; - - case CHAR_U: - /* In JavaScript, \U is an uppercase U letter. */ - if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37; - break; - - /* In a character class, \g is just a literal "g". Outside a character - class, \g must be followed by one of a number of specific things: - - (1) A number, either plain or braced. If positive, it is an absolute - backreference. If negative, it is a relative backreference. This is a Perl - 5.10 feature. - - (2) Perl 5.10 also supports \g{name} as a reference to a named group. This - is part of Perl's movement towards a unified syntax for back references. As - this is synonymous with \k{name}, we fudge it up by pretending it really - was \k. - - (3) For Oniguruma compatibility we also support \g followed by a name or a - number either in angle brackets or in single quotes. However, these are - (possibly recursive) subroutine calls, _not_ backreferences. Just return - the ESC_g code (cf \k). */ - - case CHAR_g: - if (isclass) break; - if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE) - { - escape = ESC_g; - break; - } - - /* Handle the Perl-compatible cases */ - - if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) - { - const pcre_uchar *p; - for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++) - if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; - if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET) - { - escape = ESC_k; - break; - } - braced = TRUE; - ptr++; - } - else braced = FALSE; - - if (ptr[1] == CHAR_MINUS) - { - negated = TRUE; - ptr++; - } - else negated = FALSE; - - /* The integer range is limited by the machine's int representation. */ - s = 0; - overflow = FALSE; - while (IS_DIGIT(ptr[1])) - { - if (s > INT_MAX / 10 - 1) /* Integer overflow */ - { - overflow = TRUE; - break; - } - s = s * 10 + (int)(*(++ptr) - CHAR_0); - } - if (overflow) /* Integer overflow */ - { - while (IS_DIGIT(ptr[1])) - ptr++; - *errorcodeptr = ERR61; - break; - } - - if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET) - { - *errorcodeptr = ERR57; - break; - } - - if (s == 0) - { - *errorcodeptr = ERR58; - break; - } - - if (negated) - { - if (s > bracount) - { - *errorcodeptr = ERR15; - break; - } - s = bracount - (s - 1); - } - - escape = -s; - break; - - /* The handling of escape sequences consisting of a string of digits - starting with one that is not zero is not straightforward. By experiment, - the way Perl works seems to be as follows: - - Outside a character class, the digits are read as a decimal number. If the - number is less than 10, or if there are that many previous extracting - left brackets, then it is a back reference. Otherwise, up to three octal - digits are read to form an escaped byte. Thus \123 is likely to be octal - 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal - value is greater than 377, the least significant 8 bits are taken. Inside a - character class, \ followed by a digit is always an octal number. */ - - case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: - case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - - if (!isclass) - { - oldptr = ptr; - /* The integer range is limited by the machine's int representation. */ - s = (int)(c -CHAR_0); - overflow = FALSE; - while (IS_DIGIT(ptr[1])) - { - if (s > INT_MAX / 10 - 1) /* Integer overflow */ - { - overflow = TRUE; - break; - } - s = s * 10 + (int)(*(++ptr) - CHAR_0); - } - if (overflow) /* Integer overflow */ - { - while (IS_DIGIT(ptr[1])) - ptr++; - *errorcodeptr = ERR61; - break; - } - if (s < 10 || s <= bracount) - { - escape = -s; - break; - } - ptr = oldptr; /* Put the pointer back and fall through */ - } - - /* Handle an octal number following \. If the first digit is 8 or 9, Perl - generates a binary zero byte and treats the digit as a following literal. - Thus we have to pull back the pointer by one. */ - - if ((c = *ptr) >= CHAR_8) - { - ptr--; - c = 0; - break; - } - - /* \0 always starts an octal number, but we may drop through to here with a - larger first octal digit. The original code used just to take the least - significant 8 bits of octal numbers (I think this is what early Perls used - to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, - but no more than 3 octal digits. */ - - case CHAR_0: - c -= CHAR_0; - while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) - c = c * 8 + *(++ptr) - CHAR_0; -#ifdef COMPILE_PCRE8 - if (!utf && c > 0xff) *errorcodeptr = ERR51; -#endif - break; - - /* \x is complicated. \x{ddd} is a character number which can be greater - than 0xff in utf or non-8bit mode, but only if the ddd are hex digits. - If not, { is treated as a data character. */ - - case CHAR_x: - if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) - { - /* In JavaScript, \x must be followed by two hexadecimal numbers. - Otherwise it is a lowercase x letter. */ - if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0 - && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0) - { - c = 0; - for (i = 0; i < 2; ++i) - { - register pcre_uint32 cc = *(++ptr); -#ifndef EBCDIC /* ASCII/UTF-8 coding */ - if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ - c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); -#else /* EBCDIC coding */ - if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ - c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); -#endif - } - } - break; - } - - if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) - { - const pcre_uchar *pt = ptr + 2; - - c = 0; - overflow = FALSE; - while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) - { - register pcre_uint32 cc = *pt++; - if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ - -#ifdef COMPILE_PCRE32 - if (c >= 0x10000000l) { overflow = TRUE; break; } -#endif - -#ifndef EBCDIC /* ASCII/UTF-8 coding */ - if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ - c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); -#else /* EBCDIC coding */ - if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ - c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); -#endif - -#if defined COMPILE_PCRE8 - if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; } -#elif defined COMPILE_PCRE16 - if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; } -#elif defined COMPILE_PCRE32 - if (utf && c > 0x10ffff) { overflow = TRUE; break; } -#endif - } - - if (overflow) - { - while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++; - *errorcodeptr = ERR34; - } - - if (*pt == CHAR_RIGHT_CURLY_BRACKET) - { - if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; - ptr = pt; - break; - } - - /* If the sequence of hex digits does not end with '}', then we don't - recognize this construct; fall through to the normal \x handling. */ - } - - /* Read just a single-byte hex-defined char */ - - c = 0; - while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0) - { - pcre_uint32 cc; /* Some compilers don't like */ - cc = *(++ptr); /* ++ in initializers */ -#ifndef EBCDIC /* ASCII/UTF-8 coding */ - if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ - c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); -#else /* EBCDIC coding */ - if (cc <= CHAR_z) cc += 64; /* Convert to upper case */ - c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); -#endif - } - break; - - /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. - An error is given if the byte following \c is not an ASCII character. This - coding is ASCII-specific, but then the whole concept of \cx is - ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ - - case CHAR_c: - c = *(++ptr); - if (c == CHAR_NULL) - { - *errorcodeptr = ERR2; - break; - } -#ifndef EBCDIC /* ASCII/UTF-8 coding */ - if (c > 127) /* Excludes all non-ASCII in either mode */ - { - *errorcodeptr = ERR68; - break; - } - if (c >= CHAR_a && c <= CHAR_z) c -= 32; - c ^= 0x40; -#else /* EBCDIC coding */ - if (c >= CHAR_a && c <= CHAR_z) c += 64; - c ^= 0xC0; -#endif - break; - - /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any - other alphanumeric following \ is an error if PCRE_EXTRA was set; - otherwise, for Perl compatibility, it is a literal. This code looks a bit - odd, but there used to be some cases other than the default, and there may - be again in future, so I haven't "optimized" it. */ - - default: - if ((options & PCRE_EXTRA) != 0) switch(c) - { - default: - *errorcodeptr = ERR3; - break; - } - break; - } - } - -/* Perl supports \N{name} for character names, as well as plain \N for "not -newline". PCRE does not support \N{name}. However, it does support -quantification such as \N{2,3}. */ - -if (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET && - !is_counted_repeat(ptr+2)) - *errorcodeptr = ERR37; - -/* If PCRE_UCP is set, we change the values for \d etc. */ - -if ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w) - escape += (ESC_DU - ESC_D); - -/* Set the pointer to the final character before returning. */ - -*ptrptr = ptr; -*chptr = c; -return escape; -} - -#ifdef SUPPORT_UCP -/************************************************* -* Handle \P and \p * -*************************************************/ - -/* This function is called after \P or \p has been encountered, provided that -PCRE is compiled with support for Unicode properties. On entry, ptrptr is -pointing at the P or p. On exit, it is pointing at the final character of the -escape sequence. - -Argument: - ptrptr points to the pattern position pointer - negptr points to a boolean that is set TRUE for negation else FALSE - ptypeptr points to an unsigned int that is set to the type value - pdataptr points to an unsigned int that is set to the detailed property value - errorcodeptr points to the error code variable - -Returns: TRUE if the type value was found, or FALSE for an invalid type -*/ - -static BOOL -get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr, - unsigned int *pdataptr, int *errorcodeptr) -{ -pcre_uchar c; -int i, bot, top; -const pcre_uchar *ptr = *ptrptr; -pcre_uchar name[32]; - -c = *(++ptr); -if (c == CHAR_NULL) goto ERROR_RETURN; - -*negptr = FALSE; - -/* \P or \p can be followed by a name in {}, optionally preceded by ^ for -negation. */ - -if (c == CHAR_LEFT_CURLY_BRACKET) - { - if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT) - { - *negptr = TRUE; - ptr++; - } - for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++) - { - c = *(++ptr); - if (c == CHAR_NULL) goto ERROR_RETURN; - if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = c; - } - if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; - name[i] = 0; - } - -/* Otherwise there is just one following character */ - -else - { - name[0] = c; - name[1] = 0; - } - -*ptrptr = ptr; - -/* Search for a recognized property name using binary chop */ - -bot = 0; -top = PRIV(utt_size); - -while (bot < top) - { - int r; - i = (bot + top) >> 1; - r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); - if (r == 0) - { - *ptypeptr = PRIV(utt)[i].type; - *pdataptr = PRIV(utt)[i].value; - return TRUE; - } - if (r > 0) bot = i + 1; else top = i; - } - -*errorcodeptr = ERR47; -*ptrptr = ptr; -return FALSE; - -ERROR_RETURN: -*errorcodeptr = ERR46; -*ptrptr = ptr; -return FALSE; -} -#endif - - - - -/************************************************* -* Read repeat counts * -*************************************************/ - -/* Read an item of the form {n,m} and return the values. This is called only -after is_counted_repeat() has confirmed that a repeat-count quantifier exists, -so the syntax is guaranteed to be correct, but we need to check the values. - -Arguments: - p pointer to first char after '{' - minp pointer to int for min - maxp pointer to int for max - returned as -1 if no max - errorcodeptr points to error code variable - -Returns: pointer to '}' on success; - current ptr on error, with errorcodeptr set non-zero -*/ - -static const pcre_uchar * -read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr) -{ -int min = 0; -int max = -1; - -/* Read the minimum value and do a paranoid check: a negative value indicates -an integer overflow. */ - -while (IS_DIGIT(*p)) min = min * 10 + (int)(*p++ - CHAR_0); -if (min < 0 || min > 65535) - { - *errorcodeptr = ERR5; - return p; - } - -/* Read the maximum value if there is one, and again do a paranoid on its size. -Also, max must not be less than min. */ - -if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else - { - if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) - { - max = 0; - while(IS_DIGIT(*p)) max = max * 10 + (int)(*p++ - CHAR_0); - if (max < 0 || max > 65535) - { - *errorcodeptr = ERR5; - return p; - } - if (max < min) - { - *errorcodeptr = ERR4; - return p; - } - } - } - -/* Fill in the required variables, and pass back the pointer to the terminating -'}'. */ - -*minp = min; -*maxp = max; -return p; -} - - - -/************************************************* -* Subroutine for finding forward reference * -*************************************************/ - -/* This recursive function is called only from find_parens() below. The -top-level call starts at the beginning of the pattern. All other calls must -start at a parenthesis. It scans along a pattern's text looking for capturing -subpatterns, and counting them. If it finds a named pattern that matches the -name it is given, it returns its number. Alternatively, if the name is NULL, it -returns when it reaches a given numbered subpattern. Recursion is used to keep -track of subpatterns that reset the capturing group numbers - the (?| feature. - -This function was originally called only from the second pass, in which we know -that if (?< or (?' or (?P< is encountered, the name will be correctly -terminated because that is checked in the first pass. There is now one call to -this function in the first pass, to check for a recursive back reference by -name (so that we can make the whole group atomic). In this case, we need check -only up to the current position in the pattern, and that is still OK because -and previous occurrences will have been checked. To make this work, the test -for "end of pattern" is a check against cd->end_pattern in the main loop, -instead of looking for a binary zero. This means that the special first-pass -call can adjust cd->end_pattern temporarily. (Checks for binary zero while -processing items within the loop are OK, because afterwards the main loop will -terminate.) - -Arguments: - ptrptr address of the current character pointer (updated) - cd compile background data - name name to seek, or NULL if seeking a numbered subpattern - lorn name length, or subpattern number if name is NULL - xmode TRUE if we are in /x mode - utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode - count pointer to the current capturing subpattern number (updated) - -Returns: the number of the named subpattern, or -1 if not found -*/ - -static int -find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn, - BOOL xmode, BOOL utf, int *count) -{ -pcre_uchar *ptr = *ptrptr; -int start_count = *count; -int hwm_count = start_count; -BOOL dup_parens = FALSE; - -/* If the first character is a parenthesis, check on the type of group we are -dealing with. The very first call may not start with a parenthesis. */ - -if (ptr[0] == CHAR_LEFT_PARENTHESIS) - { - /* Handle specials such as (*SKIP) or (*UTF8) etc. */ - - if (ptr[1] == CHAR_ASTERISK) ptr += 2; - - /* Handle a normal, unnamed capturing parenthesis. */ - - else if (ptr[1] != CHAR_QUESTION_MARK) - { - *count += 1; - if (name == NULL && *count == lorn) return *count; - ptr++; - } - - /* All cases now have (? at the start. Remember when we are in a group - where the parenthesis numbers are duplicated. */ - - else if (ptr[2] == CHAR_VERTICAL_LINE) - { - ptr += 3; - dup_parens = TRUE; - } - - /* Handle comments; all characters are allowed until a ket is reached. */ - - else if (ptr[2] == CHAR_NUMBER_SIGN) - { - for (ptr += 3; *ptr != CHAR_NULL; ptr++) - if (*ptr == CHAR_RIGHT_PARENTHESIS) break; - goto FAIL_EXIT; - } - - /* Handle a condition. If it is an assertion, just carry on so that it - is processed as normal. If not, skip to the closing parenthesis of the - condition (there can't be any nested parens). */ - - else if (ptr[2] == CHAR_LEFT_PARENTHESIS) - { - ptr += 2; - if (ptr[1] != CHAR_QUESTION_MARK) - { - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr != CHAR_NULL) ptr++; - } - } - - /* Start with (? but not a condition. */ - - else - { - ptr += 2; - if (*ptr == CHAR_P) ptr++; /* Allow optional P */ - - /* We have to disambiguate (? for named groups */ - - if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && - ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) - { - pcre_uchar term; - const pcre_uchar *thisname; - *count += 1; - if (name == NULL && *count == lorn) return *count; - term = *ptr++; - if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; - thisname = ptr; - while (*ptr != term) ptr++; - if (name != NULL && lorn == (int)(ptr - thisname) && - STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0) - return *count; - term++; - } - } - } - -/* Past any initial parenthesis handling, scan for parentheses or vertical -bars. Stop if we get to cd->end_pattern. Note that this is important for the -first-pass call when this value is temporarily adjusted to stop at the current -position. So DO NOT change this to a test for binary zero. */ - -for (; ptr < cd->end_pattern; ptr++) - { - /* Skip over backslashed characters and also entire \Q...\E */ - - if (*ptr == CHAR_BACKSLASH) - { - if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT; - if (*ptr == CHAR_Q) for (;;) - { - while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {}; - if (*ptr == CHAR_NULL) goto FAIL_EXIT; - if (*(++ptr) == CHAR_E) break; - } - continue; - } - - /* Skip over character classes; this logic must be similar to the way they - are handled for real. If the first character is '^', skip it. Also, if the - first few characters (either before or after ^) are \Q\E or \E we skip them - too. This makes for compatibility with Perl. Note the use of STR macros to - encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */ - - if (*ptr == CHAR_LEFT_SQUARE_BRACKET) - { - BOOL negate_class = FALSE; - for (;;) - { - if (ptr[1] == CHAR_BACKSLASH) - { - if (ptr[2] == CHAR_E) - ptr+= 2; - else if (STRNCMP_UC_C8(ptr + 2, - STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 4; - else - break; - } - else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT) - { - negate_class = TRUE; - ptr++; - } - else break; - } - - /* If the next character is ']', it is a data character that must be - skipped, except in JavaScript compatibility mode. */ - - if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET && - (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) - ptr++; - - while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET) - { - if (*ptr == CHAR_NULL) return -1; - if (*ptr == CHAR_BACKSLASH) - { - if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT; - if (*ptr == CHAR_Q) for (;;) - { - while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {}; - if (*ptr == CHAR_NULL) goto FAIL_EXIT; - if (*(++ptr) == CHAR_E) break; - } - continue; - } - } - continue; - } - - /* Skip comments in /x mode */ - - if (xmode && *ptr == CHAR_NUMBER_SIGN) - { - ptr++; - while (*ptr != CHAR_NULL) - { - if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } - ptr++; -#ifdef SUPPORT_UTF - if (utf) FORWARDCHAR(ptr); -#endif - } - if (*ptr == CHAR_NULL) goto FAIL_EXIT; - continue; - } - - /* Check for the special metacharacters */ - - if (*ptr == CHAR_LEFT_PARENTHESIS) - { - int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count); - if (rc > 0) return rc; - if (*ptr == CHAR_NULL) goto FAIL_EXIT; - } - - else if (*ptr == CHAR_RIGHT_PARENTHESIS) - { - if (dup_parens && *count < hwm_count) *count = hwm_count; - goto FAIL_EXIT; - } - - else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) - { - if (*count > hwm_count) hwm_count = *count; - *count = start_count; - } - } - -FAIL_EXIT: -*ptrptr = ptr; -return -1; -} - - - - -/************************************************* -* Find forward referenced subpattern * -*************************************************/ - -/* This function scans along a pattern's text looking for capturing -subpatterns, and counting them. If it finds a named pattern that matches the -name it is given, it returns its number. Alternatively, if the name is NULL, it -returns when it reaches a given numbered subpattern. This is used for forward -references to subpatterns. We used to be able to start this scan from the -current compiling point, using the current count value from cd->bracount, and -do it all in a single loop, but the addition of the possibility of duplicate -subpattern numbers means that we have to scan from the very start, in order to -take account of such duplicates, and to use a recursive function to keep track -of the different types of group. - -Arguments: - cd compile background data - name name to seek, or NULL if seeking a numbered subpattern - lorn name length, or subpattern number if name is NULL - xmode TRUE if we are in /x mode - utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode - -Returns: the number of the found subpattern, or -1 if not found -*/ - -static int -find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode, - BOOL utf) -{ -pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern; -int count = 0; -int rc; - -/* If the pattern does not start with an opening parenthesis, the first call -to find_parens_sub() will scan right to the end (if necessary). However, if it -does start with a parenthesis, find_parens_sub() will return when it hits the -matching closing parens. That is why we have to have a loop. */ - -for (;;) - { - rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count); - if (rc > 0 || *ptr++ == CHAR_NULL) break; - } - -return rc; -} - - - - -/************************************************* -* Find first significant op code * -*************************************************/ - -/* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things -that do not influence this. For some calls, it makes sense to skip negative -forward and all backward assertions, and also the \b assertion; for others it -does not. - -Arguments: - code pointer to the start of the group - skipassert TRUE if certain assertions are to be skipped - -Returns: pointer to the first significant opcode -*/ - -static const pcre_uchar* -first_significant_code(const pcre_uchar *code, BOOL skipassert) -{ -for (;;) - { - switch ((int)*code) - { - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - if (!skipassert) return code; - do code += GET(code, 1); while (*code == OP_ALT); - code += PRIV(OP_lengths)[*code]; - break; - - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - if (!skipassert) return code; - /* Fall through */ - - case OP_CALLOUT: - case OP_CREF: - case OP_NCREF: - case OP_RREF: - case OP_NRREF: - case OP_DEF: - code += PRIV(OP_lengths)[*code]; - break; - - default: - return code; - } - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Find the fixed length of a branch * -*************************************************/ - -/* Scan a branch and compute the fixed length of subject that will match it, -if the length is fixed. This is needed for dealing with backward assertions. -In UTF8 mode, the result is in characters rather than bytes. The branch is -temporarily terminated with OP_END when this function is called. - -This function is called when a backward assertion is encountered, so that if it -fails, the error message can point to the correct place in the pattern. -However, we cannot do this when the assertion contains subroutine calls, -because they can be forward references. We solve this by remembering this case -and doing the check at the end; a flag specifies which mode we are running in. - -Arguments: - code points to the start of the pattern (the bracket) - utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - atend TRUE if called when the pattern is complete - cd the "compile data" structure - -Returns: the fixed length, - or -1 if there is no fixed length, - or -2 if \C was encountered (in UTF-8 mode only) - or -3 if an OP_RECURSE item was encountered and atend is FALSE - or -4 if an unknown opcode was encountered (internal error) -*/ - -static int -find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd) -{ -int length = -1; - -register int branchlength = 0; -register pcre_uchar *cc = code + 1 + LINK_SIZE; - -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ - -for (;;) - { - int d; - pcre_uchar *ce, *cs; - register pcre_uchar op = *cc; - - switch (op) - { - /* We only need to continue for OP_CBRA (normal capturing bracket) and - OP_BRA (normal non-capturing bracket) because the other variants of these - opcodes are all concerned with unlimited repeated groups, which of course - are not of fixed length. */ - - case OP_CBRA: - case OP_BRA: - case OP_ONCE: - case OP_ONCE_NC: - case OP_COND: - d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd); - if (d < 0) return d; - branchlength += d; - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; - - /* Reached end of a branch; if it's a ket it is the end of a nested call. - If it's ALT it is an alternation in a nested call. An ACCEPT is effectively - an ALT. If it is END it's the end of the outer call. All can be handled by - the same code. Note that we must not include the OP_KETRxxx opcodes here, - because they all imply an unlimited repeat. */ - - case OP_ALT: - case OP_KET: - case OP_END: - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - if (length < 0) length = branchlength; - else if (length != branchlength) return -1; - if (*cc != OP_ALT) return length; - cc += 1 + LINK_SIZE; - branchlength = 0; - break; - - /* A true recursion implies not fixed length, but a subroutine call may - be OK. If the subroutine is a forward reference, we can't deal with - it until the end of the pattern, so return -3. */ - - case OP_RECURSE: - if (!atend) return -3; - cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1); /* Start subpattern */ - do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ - if (cc > cs && cc < ce) return -1; /* Recursion */ - d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd); - if (d < 0) return d; - branchlength += d; - cc += 1 + LINK_SIZE; - break; - - /* Skip over assertive subpatterns */ - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += PRIV(OP_lengths)[*cc]; - break; - - /* Skip over things that don't match chars */ - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - case OP_THEN_ARG: - cc += cc[1] + PRIV(OP_lengths)[*cc]; - break; - - case OP_CALLOUT: - case OP_CIRC: - case OP_CIRCM: - case OP_CLOSE: - case OP_COMMIT: - case OP_CREF: - case OP_DEF: - case OP_DOLL: - case OP_DOLLM: - case OP_EOD: - case OP_EODN: - case OP_FAIL: - case OP_NCREF: - case OP_NRREF: - case OP_NOT_WORD_BOUNDARY: - case OP_PRUNE: - case OP_REVERSE: - case OP_RREF: - case OP_SET_SOM: - case OP_SKIP: - case OP_SOD: - case OP_SOM: - case OP_THEN: - case OP_WORD_BOUNDARY: - cc += PRIV(OP_lengths)[*cc]; - break; - - /* Handle literal characters */ - - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - branchlength++; - cc += 2; -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - /* Handle exact repetitions. The count is already in characters, but we - need to skip over a multibyte character in UTF8 mode. */ - - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - branchlength += (int)GET2(cc,1); - cc += 2 + IMM2_SIZE; -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - case OP_TYPEEXACT: - branchlength += GET2(cc,1); - if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) - cc += 2; - cc += 1 + IMM2_SIZE + 1; - break; - - /* Handle single-char matchers */ - - case OP_PROP: - case OP_NOTPROP: - cc += 2; - /* Fall through */ - - case OP_HSPACE: - case OP_VSPACE: - case OP_NOT_HSPACE: - case OP_NOT_VSPACE: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - branchlength++; - cc++; - break; - - /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode; - otherwise \C is coded as OP_ALLANY. */ - - case OP_ANYBYTE: - return -2; - - /* Check a class for variable quantification */ - - case OP_CLASS: - case OP_NCLASS: -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case OP_XCLASS: - /* The original code caused an unsigned overflow in 64 bit systems, - so now we use a conditional statement. */ - if (op == OP_XCLASS) - cc += GET(cc, 1); - else - cc += PRIV(OP_lengths)[OP_CLASS]; -#else - cc += PRIV(OP_lengths)[OP_CLASS]; -#endif - - switch (*cc) - { - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - return -1; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; - branchlength += (int)GET2(cc,1); - cc += 1 + 2 * IMM2_SIZE; - break; - - default: - branchlength++; - } - break; - - /* Anything else is variable length */ - - case OP_ANYNL: - case OP_BRAMINZERO: - case OP_BRAPOS: - case OP_BRAPOSZERO: - case OP_BRAZERO: - case OP_CBRAPOS: - case OP_EXTUNI: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_PLUS: - case OP_PLUSI: - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_POSQUERY: - case OP_POSQUERYI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_QUERY: - case OP_QUERYI: - case OP_REF: - case OP_REFI: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - case OP_SCOND: - case OP_SKIPZERO: - case OP_STAR: - case OP_STARI: - case OP_TYPEMINPLUS: - case OP_TYPEMINQUERY: - case OP_TYPEMINSTAR: - case OP_TYPEMINUPTO: - case OP_TYPEPLUS: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSUPTO: - case OP_TYPEQUERY: - case OP_TYPESTAR: - case OP_TYPEUPTO: - case OP_UPTO: - case OP_UPTOI: - return -1; - - /* Catch unrecognized opcodes so that when new ones are added they - are not forgotten, as has happened in the past. */ - - default: - return -4; - } - } -/* Control never gets here */ -} - - - - -/************************************************* -* Scan compiled regex for specific bracket * -*************************************************/ - -/* This little function scans through a compiled pattern until it finds a -capturing bracket with the given number, or, if the number is negative, an -instance of OP_REVERSE for a lookbehind. The function is global in the C sense -so that it can be called from pcre_study() when finding the minimum matching -length. - -Arguments: - code points to start of expression - utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - number the required bracket number or negative to find a lookbehind - -Returns: pointer to the opcode for the bracket, or NULL if not found -*/ - -const pcre_uchar * -PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number) -{ -for (;;) - { - register pcre_uchar c = *code; - - if (c == OP_END) return NULL; - - /* XCLASS is used for classes that cannot be represented just by a bit - map. This includes negated single high-valued characters. The length in - the table is zero; the actual length is stored in the compiled code. */ - - if (c == OP_XCLASS) code += GET(code, 1); - - /* Handle recursion */ - - else if (c == OP_REVERSE) - { - if (number < 0) return (pcre_uchar *)code; - code += PRIV(OP_lengths)[c]; - } - - /* Handle capturing bracket */ - - else if (c == OP_CBRA || c == OP_SCBRA || - c == OP_CBRAPOS || c == OP_SCBRAPOS) - { - int n = (int)GET2(code, 1+LINK_SIZE); - if (n == number) return (pcre_uchar *)code; - code += PRIV(OP_lengths)[c]; - } - - /* Otherwise, we can get the item's length from the table, except that for - repeated character types, we have to test for \p and \P, which have an extra - two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we - must add in its length. */ - - else - { - switch(c) - { - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - case OP_TYPEPOSUPTO: - if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) - code += 2; - break; - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - code += code[1]; - break; - - case OP_THEN_ARG: - code += code[1]; - break; - } - - /* Add in the fixed length from the table */ - - code += PRIV(OP_lengths)[c]; - - /* In UTF-8 mode, opcodes that are followed by a character may be followed by - a multi-byte character. The length in the table is a minimum, so we have to - arrange to skip the extra bytes. */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf) switch(c) - { - case OP_CHAR: - case OP_CHARI: - case OP_EXACT: - case OP_EXACTI: - case OP_UPTO: - case OP_UPTOI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_STAR: - case OP_STARI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_PLUS: - case OP_PLUSI: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_QUERY: - case OP_QUERYI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_POSQUERY: - case OP_POSQUERYI: - if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); - break; - } -#else - (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif - } - } -} - - - -/************************************************* -* Scan compiled regex for recursion reference * -*************************************************/ - -/* This little function scans through a compiled pattern until it finds an -instance of OP_RECURSE. - -Arguments: - code points to start of expression - utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - -Returns: pointer to the opcode for OP_RECURSE, or NULL if not found -*/ - -static const pcre_uchar * -find_recurse(const pcre_uchar *code, BOOL utf) -{ -for (;;) - { - register pcre_uchar c = *code; - if (c == OP_END) return NULL; - if (c == OP_RECURSE) return code; - - /* XCLASS is used for classes that cannot be represented just by a bit - map. This includes negated single high-valued characters. The length in - the table is zero; the actual length is stored in the compiled code. */ - - if (c == OP_XCLASS) code += GET(code, 1); - - /* Otherwise, we can get the item's length from the table, except that for - repeated character types, we have to test for \p and \P, which have an extra - two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we - must add in its length. */ - - else - { - switch(c) - { - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - case OP_TYPEPOSUPTO: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) - code += 2; - break; - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - code += code[1]; - break; - - case OP_THEN_ARG: - code += code[1]; - break; - } - - /* Add in the fixed length from the table */ - - code += PRIV(OP_lengths)[c]; - - /* In UTF-8 mode, opcodes that are followed by a character may be followed - by a multi-byte character. The length in the table is a minimum, so we have - to arrange to skip the extra bytes. */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf) switch(c) - { - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - case OP_UPTO: - case OP_UPTOI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - case OP_STAR: - case OP_STARI: - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - case OP_PLUS: - case OP_PLUSI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - case OP_QUERY: - case OP_QUERYI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - case OP_POSQUERY: - case OP_POSQUERYI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); - break; - } -#else - (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif - } - } -} - - - -/************************************************* -* Scan compiled branch for non-emptiness * -*************************************************/ - -/* This function scans through a branch of a compiled pattern to see whether it -can match the empty string or not. It is called from could_be_empty() -below and from compile_branch() when checking for an unlimited repeat of a -group that can match nothing. Note that first_significant_code() skips over -backward and negative forward assertions when its final argument is TRUE. If we -hit an unclosed bracket, we return "empty" - this means we've struck an inner -bracket whose current branch will already have been scanned. - -Arguments: - code points to start of search - endcode points to where to stop - utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode - cd contains pointers to tables etc. - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL -could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, - BOOL utf, compile_data *cd) -{ -register pcre_uchar c; -for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); - code < endcode; - code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) - { - const pcre_uchar *ccode; - - c = *code; - - /* Skip over forward assertions; the other assertions are skipped by - first_significant_code() with a TRUE final argument. */ - - if (c == OP_ASSERT) - { - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - - /* For a recursion/subroutine call, if its end has been reached, which - implies a backward reference subroutine call, we can scan it. If it's a - forward reference subroutine call, we can't. To detect forward reference - we have to scan up the list that is kept in the workspace. This function is - called only when doing the real compile, not during the pre-compile that - measures the size of the compiled pattern. */ - - if (c == OP_RECURSE) - { - const pcre_uchar *scode; - BOOL empty_branch; - - /* Test for forward reference */ - - for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE) - if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; - - /* Not a forward reference, test for completed backward reference */ - - empty_branch = FALSE; - scode = cd->start_code + GET(code, 1); - if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ - - /* Completed backwards reference */ - - do - { - if (could_be_empty_branch(scode, endcode, utf, cd)) - { - empty_branch = TRUE; - break; - } - scode += GET(scode, 1); - } - while (*scode == OP_ALT); - - if (!empty_branch) return FALSE; /* All branches are non-empty */ - continue; - } - - /* Groups with zero repeats can of course be empty; skip them. */ - - if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO || - c == OP_BRAPOSZERO) - { - code += PRIV(OP_lengths)[c]; - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - - /* A nested group that is already marked as "could be empty" can just be - skipped. */ - - if (c == OP_SBRA || c == OP_SBRAPOS || - c == OP_SCBRA || c == OP_SCBRAPOS) - { - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - - /* For other groups, scan the branches. */ - - if (c == OP_BRA || c == OP_BRAPOS || - c == OP_CBRA || c == OP_CBRAPOS || - c == OP_ONCE || c == OP_ONCE_NC || - c == OP_COND) - { - BOOL empty_branch; - if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ - - /* If a conditional group has only one branch, there is a second, implied, - empty branch, so just skip over the conditional, because it could be empty. - Otherwise, scan the individual branches of the group. */ - - if (c == OP_COND && code[GET(code, 1)] != OP_ALT) - code += GET(code, 1); - else - { - empty_branch = FALSE; - do - { - if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd)) - empty_branch = TRUE; - code += GET(code, 1); - } - while (*code == OP_ALT); - if (!empty_branch) return FALSE; /* All branches are non-empty */ - } - - c = *code; - continue; - } - - /* Handle the other opcodes */ - - switch (c) - { - /* Check for quantifiers after a class. XCLASS is used for classes that - cannot be represented just by a bit map. This includes negated single - high-valued characters. The length in PRIV(OP_lengths)[] is zero; the - actual length is stored in the compiled code, so we must update "code" - here. */ - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - ccode = code += GET(code, 1); - goto CHECK_CLASS_REPEAT; -#endif - - case OP_CLASS: - case OP_NCLASS: - ccode = code + PRIV(OP_lengths)[OP_CLASS]; - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - CHECK_CLASS_REPEAT: -#endif - - switch (*ccode) - { - case OP_CRSTAR: /* These could be empty; continue */ - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - break; - - default: /* Non-repeat => class must match */ - case OP_CRPLUS: /* These repeats aren't empty */ - case OP_CRMINPLUS: - return FALSE; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ - break; - } - break; - - /* Opcodes that must match a character */ - - case OP_PROP: - case OP_NOTPROP: - case OP_EXTUNI: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_ANYBYTE: - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - case OP_EXACT: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTPOSPLUS: - case OP_NOTEXACT: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - case OP_TYPEEXACT: - return FALSE; - - /* These are going to continue, as they may be empty, but we have to - fudge the length for the \p and \P cases. */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - /* Same for these */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: - if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) - code += 2; - break; - - /* End of branch */ - - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - case OP_ALT: - return TRUE; - - /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, - MINUPTO, and POSUPTO may be followed by a multibyte character */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - case OP_STAR: - case OP_STARI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_QUERY: - case OP_QUERYI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_POSQUERY: - case OP_POSQUERYI: - if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); - break; - - case OP_UPTO: - case OP_UPTOI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_POSUPTO: - case OP_POSUPTOI: - if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); - break; -#endif - - /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument - string. */ - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - code += code[1]; - break; - - case OP_THEN_ARG: - code += code[1]; - break; - - /* None of the remaining opcodes are required to match a character. */ - - default: - break; - } - } - -return TRUE; -} - - - -/************************************************* -* Scan compiled regex for non-emptiness * -*************************************************/ - -/* This function is called to check for left recursive calls. We want to check -the current branch of the current pattern to see if it could match the empty -string. If it could, we must look outwards for branches at other levels, -stopping when we pass beyond the bracket which is the subject of the recursion. -This function is called only during the real compile, not during the -pre-compile. - -Arguments: - code points to start of the recursion - endcode points to where to stop (current RECURSE item) - bcptr points to the chain of current (unclosed) branch starts - utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode - cd pointers to tables etc - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL -could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode, - branch_chain *bcptr, BOOL utf, compile_data *cd) -{ -while (bcptr != NULL && bcptr->current_branch >= code) - { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd)) - return FALSE; - bcptr = bcptr->outer; - } -return TRUE; -} - - - -/************************************************* -* Check for POSIX class syntax * -*************************************************/ - -/* This function is called when the sequence "[:" or "[." or "[=" is -encountered in a character class. It checks whether this is followed by a -sequence of characters terminated by a matching ":]" or ".]" or "=]". If we -reach an unescaped ']' without the special preceding character, return FALSE. - -Originally, this function only recognized a sequence of letters between the -terminators, but it seems that Perl recognizes any sequence of characters, -though of course unknown POSIX names are subsequently rejected. Perl gives an -"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE -didn't consider this to be a POSIX class. Likewise for [:1234:]. - -The problem in trying to be exactly like Perl is in the handling of escapes. We -have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX -class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code -below handles the special case of \], but does not try to do any other escape -processing. This makes it different from Perl for cases such as [:l\ower:] -where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize -"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does, -I think. - -A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. -It seems that the appearance of a nested POSIX class supersedes an apparent -external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or -a digit. - -In Perl, unescaped square brackets may also appear as part of class names. For -example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for -[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not -seem right at all. PCRE does not allow closing square brackets in POSIX class -names. - -Arguments: - ptr pointer to the initial [ - endptr where to return the end pointer - -Returns: TRUE or FALSE -*/ - -static BOOL -check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr) -{ -pcre_uchar terminator; /* Don't combine these lines; the Solaris cc */ -terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ -for (++ptr; *ptr != CHAR_NULL; ptr++) - { - if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - ptr++; - else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; - else - { - if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - { - *endptr = ptr; - return TRUE; - } - if (*ptr == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, endptr)) - return FALSE; - } - } -return FALSE; -} - - - - -/************************************************* -* Check POSIX class name * -*************************************************/ - -/* This function is called to check the name given in a POSIX-style class entry -such as [:alnum:]. - -Arguments: - ptr points to the first letter - len the length of the name - -Returns: a value representing the name, or -1 if unknown -*/ - -static int -check_posix_name(const pcre_uchar *ptr, int len) -{ -const char *pn = posix_names; -register int yield = 0; -while (posix_name_lengths[yield] != 0) - { - if (len == posix_name_lengths[yield] && - STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield; - pn += posix_name_lengths[yield] + 1; - yield++; - } -return -1; -} - - -/************************************************* -* Adjust OP_RECURSE items in repeated group * -*************************************************/ - -/* OP_RECURSE items contain an offset from the start of the regex to the group -that is referenced. This means that groups can be replicated for fixed -repetition simply by copying (because the recursion is allowed to refer to -earlier groups that are outside the current group). However, when a group is -optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is -inserted before it, after it has been compiled. This means that any OP_RECURSE -items within it that refer to the group itself or any contained groups have to -have their offsets adjusted. That one of the jobs of this function. Before it -is called, the partially compiled regex must be temporarily terminated with -OP_END. - -This function has been extended with the possibility of forward references for -recursions and subroutine calls. It must also check the list of such references -for the group we are dealing with. If it finds that one of the recursions in -the current group is on this list, it adjusts the offset in the list, not the -value in the reference (which is a group number). - -Arguments: - group points to the start of the group - adjust the amount by which the group is to be moved - utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - cd contains pointers to tables etc. - save_hwm the hwm forward reference pointer at the start of the group - -Returns: nothing -*/ - -static void -adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd, - pcre_uchar *save_hwm) -{ -pcre_uchar *ptr = group; - -while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) - { - int offset; - pcre_uchar *hc; - - /* See if this recursion is on the forward reference list. If so, adjust the - reference. */ - - for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) - { - offset = (int)GET(hc, 0); - if (cd->start_code + offset == ptr + 1) - { - PUT(hc, 0, offset + adjust); - break; - } - } - - /* Otherwise, adjust the recursion offset if it's after the start of this - group. */ - - if (hc >= cd->hwm) - { - offset = (int)GET(ptr, 1); - if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); - } - - ptr += 1 + LINK_SIZE; - } -} - - - -/************************************************* -* Insert an automatic callout point * -*************************************************/ - -/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert -callout points before each pattern item. - -Arguments: - code current code pointer - ptr current pattern pointer - cd pointers to tables etc - -Returns: new code pointer -*/ - -static pcre_uchar * -auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd) -{ -*code++ = OP_CALLOUT; -*code++ = 255; -PUT(code, 0, (int)(ptr - cd->start_pattern)); /* Pattern offset */ -PUT(code, LINK_SIZE, 0); /* Default length */ -return code + 2 * LINK_SIZE; -} - - - -/************************************************* -* Complete a callout item * -*************************************************/ - -/* A callout item contains the length of the next item in the pattern, which -we can't fill in till after we have reached the relevant point. This is used -for both automatic and manual callouts. - -Arguments: - previous_callout points to previous callout item - ptr current pattern pointer - cd pointers to tables etc - -Returns: nothing -*/ - -static void -complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd) -{ -int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2)); -PUT(previous_callout, 2 + LINK_SIZE, length); -} - - - -#ifdef SUPPORT_UCP -/************************************************* -* Get othercase range * -*************************************************/ - -/* This function is passed the start and end of a class range, in UTF-8 mode -with UCP support. It searches up the characters, looking for ranges of -characters in the "other" case. Each call returns the next one, updating the -start address. A character with multiple other cases is returned on its own -with a special return value. - -Arguments: - cptr points to starting character value; updated - d end value - ocptr where to put start of othercase range - odptr where to put end of othercase range - -Yield: -1 when no more - 0 when a range is returned - >0 the CASESET offset for char with multiple other cases - in this case, ocptr contains the original -*/ - -static int -get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr, - pcre_uint32 *odptr) -{ -pcre_uint32 c, othercase, next; -unsigned int co; - -/* Find the first character that has an other case. If it has multiple other -cases, return its case offset value. */ - -for (c = *cptr; c <= d; c++) - { - if ((co = UCD_CASESET(c)) != 0) - { - *ocptr = c++; /* Character that has the set */ - *cptr = c; /* Rest of input range */ - return (int)co; - } - if ((othercase = UCD_OTHERCASE(c)) != c) break; - } - -if (c > d) return -1; /* Reached end of range */ - -*ocptr = othercase; -next = othercase + 1; - -for (++c; c <= d; c++) - { - if (UCD_OTHERCASE(c) != next) break; - next++; - } - -*odptr = next - 1; /* End of othercase range */ -*cptr = c; /* Rest of input range */ -return 0; -} - - - -/************************************************* -* Check a character and a property * -*************************************************/ - -/* This function is called by check_auto_possessive() when a property item -is adjacent to a fixed character. - -Arguments: - c the character - ptype the property type - pdata the data for the type - negated TRUE if it's a negated property (\P or \p{^) - -Returns: TRUE if auto-possessifying is OK -*/ - -static BOOL -check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata, BOOL negated) -{ -#ifdef SUPPORT_UCP -const pcre_uint32 *p; -#endif - -const ucd_record *prop = GET_UCD(c); - -switch(ptype) - { - case PT_LAMP: - return (prop->chartype == ucp_Lu || - prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == negated; - - case PT_GC: - return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; - - case PT_PC: - return (pdata == prop->chartype) == negated; - - case PT_SC: - return (pdata == prop->script) == negated; - - /* These are specials */ - - case PT_ALNUM: - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; - - case PT_SPACE: /* Perl space */ - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) - == negated; - - case PT_PXSPACE: /* POSIX space */ - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR) - == negated; - - case PT_WORD: - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE) == negated; - -#ifdef SUPPORT_UCP - case PT_CLIST: - p = PRIV(ucd_caseless_sets) + prop->caseset; - for (;;) - { - if (c < *p) return !negated; - if (c == *p++) return negated; - } - break; /* Control never reaches here */ -#endif - } - -return FALSE; -} -#endif /* SUPPORT_UCP */ - - - -/************************************************* -* Check if auto-possessifying is possible * -*************************************************/ - -/* This function is called for unlimited repeats of certain items, to see -whether the next thing could possibly match the repeated item. If not, it makes -sense to automatically possessify the repeated item. - -Arguments: - previous pointer to the repeated opcode - utf TRUE in UTF-8 / UTF-16 / UTF-32 mode - ptr next character in pattern - options options bits - cd contains pointers to tables etc. - -Returns: TRUE if possessifying is wanted -*/ - -static BOOL -check_auto_possessive(const pcre_uchar *previous, BOOL utf, - const pcre_uchar *ptr, int options, compile_data *cd) -{ -pcre_uint32 c = NOTACHAR; -pcre_uint32 next; -int escape; -pcre_uchar op_code = *previous++; - -/* Skip whitespace and comments in extended mode */ - -if ((options & PCRE_EXTENDED) != 0) - { - for (;;) - { - while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++; - if (*ptr == CHAR_NUMBER_SIGN) - { - ptr++; - while (*ptr != CHAR_NULL) - { - if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } - ptr++; -#ifdef SUPPORT_UTF - if (utf) FORWARDCHAR(ptr); -#endif - } - } - else break; - } - } - -/* If the next item is one that we can handle, get its value. A non-negative -value is a character, a negative value is an escape value. */ - -if (*ptr == CHAR_BACKSLASH) - { - int temperrorcode = 0; - escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE); - if (temperrorcode != 0) return FALSE; - ptr++; /* Point after the escape sequence */ - } -else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0) - { - escape = 0; -#ifdef SUPPORT_UTF - if (utf) { GETCHARINC(next, ptr); } else -#endif - next = *ptr++; - } -else return FALSE; - -/* Skip whitespace and comments in extended mode */ - -if ((options & PCRE_EXTENDED) != 0) - { - for (;;) - { - while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++; - if (*ptr == CHAR_NUMBER_SIGN) - { - ptr++; - while (*ptr != CHAR_NULL) - { - if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } - ptr++; -#ifdef SUPPORT_UTF - if (utf) FORWARDCHAR(ptr); -#endif - } - } - else break; - } - } - -/* If the next thing is itself optional, we have to give up. */ - -if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || - STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) - return FALSE; - -/* If the previous item is a character, get its value. */ - -if (op_code == OP_CHAR || op_code == OP_CHARI || - op_code == OP_NOT || op_code == OP_NOTI) - { -#ifdef SUPPORT_UTF - GETCHARTEST(c, previous); -#else - c = *previous; -#endif - } - -/* Now compare the next item with the previous opcode. First, handle cases when -the next item is a character. */ - -if (escape == 0) - { - /* For a caseless UTF match, the next character may have more than one other - case, which maps to the special PT_CLIST property. Check this first. */ - -#ifdef SUPPORT_UCP - if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0) - { - unsigned int ocs = UCD_CASESET(next); - if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT); - } -#endif - - switch(op_code) - { - case OP_CHAR: - return c != next; - - /* For CHARI (caseless character) we must check the other case. If we have - Unicode property support, we can use it to test the other case of - high-valued characters. We know that next can have only one other case, - because multi-other-case characters are dealt with above. */ - - case OP_CHARI: - if (c == next) return FALSE; -#ifdef SUPPORT_UTF - if (utf) - { - pcre_uint32 othercase; - if (next < 128) othercase = cd->fcc[next]; else -#ifdef SUPPORT_UCP - othercase = UCD_OTHERCASE(next); -#else - othercase = NOTACHAR; -#endif - return c != othercase; - } - else -#endif /* SUPPORT_UTF */ - return (c != TABLE_GET(next, cd->fcc, next)); /* Not UTF */ - - case OP_NOT: - return c == next; - - case OP_NOTI: - if (c == next) return TRUE; -#ifdef SUPPORT_UTF - if (utf) - { - pcre_uint32 othercase; - if (next < 128) othercase = cd->fcc[next]; else -#ifdef SUPPORT_UCP - othercase = UCD_OTHERCASE(next); -#else - othercase = NOTACHAR; -#endif - return c == othercase; - } - else -#endif /* SUPPORT_UTF */ - return (c == TABLE_GET(next, cd->fcc, next)); /* Not UTF */ - - /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. - When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ - - case OP_DIGIT: - return next > 255 || (cd->ctypes[next] & ctype_digit) == 0; - - case OP_NOT_DIGIT: - return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0; - - case OP_WHITESPACE: - return next > 255 || (cd->ctypes[next] & ctype_space) == 0; - - case OP_NOT_WHITESPACE: - return next <= 255 && (cd->ctypes[next] & ctype_space) != 0; - - case OP_WORDCHAR: - return next > 255 || (cd->ctypes[next] & ctype_word) == 0; - - case OP_NOT_WORDCHAR: - return next <= 255 && (cd->ctypes[next] & ctype_word) != 0; - - case OP_HSPACE: - case OP_NOT_HSPACE: - switch(next) - { - HSPACE_CASES: - return op_code == OP_NOT_HSPACE; - - default: - return op_code != OP_NOT_HSPACE; - } - - case OP_ANYNL: - case OP_VSPACE: - case OP_NOT_VSPACE: - switch(next) - { - VSPACE_CASES: - return op_code == OP_NOT_VSPACE; - - default: - return op_code != OP_NOT_VSPACE; - } - -#ifdef SUPPORT_UCP - case OP_PROP: - return check_char_prop(next, previous[0], previous[1], FALSE); - - case OP_NOTPROP: - return check_char_prop(next, previous[0], previous[1], TRUE); -#endif - - default: - return FALSE; - } - } - -/* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP -is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are -generated only when PCRE_UCP is *not* set, that is, when only ASCII -characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are -replaced by OP_PROP codes when PCRE_UCP is set. */ - -switch(op_code) - { - case OP_CHAR: - case OP_CHARI: - switch(escape) - { - case ESC_d: - return c > 255 || (cd->ctypes[c] & ctype_digit) == 0; - - case ESC_D: - return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0; - - case ESC_s: - return c > 255 || (cd->ctypes[c] & ctype_space) == 0; - - case ESC_S: - return c <= 255 && (cd->ctypes[c] & ctype_space) != 0; - - case ESC_w: - return c > 255 || (cd->ctypes[c] & ctype_word) == 0; - - case ESC_W: - return c <= 255 && (cd->ctypes[c] & ctype_word) != 0; - - case ESC_h: - case ESC_H: - switch(c) - { - HSPACE_CASES: - return escape != ESC_h; - - default: - return escape == ESC_h; - } - - case ESC_v: - case ESC_V: - switch(c) - { - VSPACE_CASES: - return escape != ESC_v; - - default: - return escape == ESC_v; - } - - /* When PCRE_UCP is set, these values get generated for \d etc. Find - their substitutions and process them. The result will always be either - ESC_p or ESC_P. Then fall through to process those values. */ - -#ifdef SUPPORT_UCP - case ESC_du: - case ESC_DU: - case ESC_wu: - case ESC_WU: - case ESC_su: - case ESC_SU: - { - int temperrorcode = 0; - ptr = substitutes[escape - ESC_DU]; - escape = check_escape(&ptr, &next, &temperrorcode, 0, options, FALSE); - if (temperrorcode != 0) return FALSE; - ptr++; /* For compatibility */ - } - /* Fall through */ - - case ESC_p: - case ESC_P: - { - unsigned int ptype = 0, pdata = 0; - int errorcodeptr; - BOOL negated; - - ptr--; /* Make ptr point at the p or P */ - if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcodeptr)) - return FALSE; - ptr++; /* Point past the final curly ket */ - - /* If the property item is optional, we have to give up. (When generated - from \d etc by PCRE_UCP, this test will have been applied much earlier, - to the original \d etc. At this point, ptr will point to a zero byte. */ - - if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || - STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) - return FALSE; - - /* Do the property check. */ - - return check_char_prop(c, ptype, pdata, (escape == ESC_P) != negated); - } -#endif - - default: - return FALSE; - } - - /* In principle, support for Unicode properties should be integrated here as - well. It means re-organizing the above code so as to get hold of the property - values before switching on the op-code. However, I wonder how many patterns - combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set, - these op-codes are never generated.) */ - - case OP_DIGIT: - return escape == ESC_D || escape == ESC_s || escape == ESC_W || - escape == ESC_h || escape == ESC_v || escape == ESC_R; - - case OP_NOT_DIGIT: - return escape == ESC_d; - - case OP_WHITESPACE: - return escape == ESC_S || escape == ESC_d || escape == ESC_w; - - case OP_NOT_WHITESPACE: - return escape == ESC_s || escape == ESC_h || escape == ESC_v || escape == ESC_R; - - case OP_HSPACE: - return escape == ESC_S || escape == ESC_H || escape == ESC_d || - escape == ESC_w || escape == ESC_v || escape == ESC_R; - - case OP_NOT_HSPACE: - return escape == ESC_h; - - /* Can't have \S in here because VT matches \S (Perl anomaly) */ - case OP_ANYNL: - case OP_VSPACE: - return escape == ESC_V || escape == ESC_d || escape == ESC_w; - - case OP_NOT_VSPACE: - return escape == ESC_v || escape == ESC_R; - - case OP_WORDCHAR: - return escape == ESC_W || escape == ESC_s || escape == ESC_h || - escape == ESC_v || escape == ESC_R; - - case OP_NOT_WORDCHAR: - return escape == ESC_w || escape == ESC_d; - - default: - return FALSE; - } - -/* Control does not reach here */ -} - - - -/************************************************* -* Add a character or range to a class * -*************************************************/ - -/* This function packages up the logic of adding a character or range of -characters to a class. The character values in the arguments will be within the -valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is -mutually recursive with the function immediately below. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options word - cd contains pointers to tables etc. - start start of range character - end end of range character - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static int -add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, - compile_data *cd, pcre_uint32 start, pcre_uint32 end) -{ -pcre_uint32 c; -int n8 = 0; - -/* If caseless matching is required, scan the range and process alternate -cases. In Unicode, there are 8-bit characters that have alternate cases that -are greater than 255 and vice-versa. Sometimes we can just extend the original -range. */ - -if ((options & PCRE_CASELESS) != 0) - { -#ifdef SUPPORT_UCP - if ((options & PCRE_UTF8) != 0) - { - int rc; - pcre_uint32 oc, od; - - options &= ~PCRE_CASELESS; /* Remove for recursive calls */ - c = start; - - while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) - { - /* Handle a single character that has more than one other case. */ - - if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd, - PRIV(ucd_caseless_sets) + rc, oc); - - /* Do nothing if the other case range is within the original range. */ - - else if (oc >= start && od <= end) continue; - - /* Extend the original range if there is overlap, noting that if oc < c, we - can't have od > end because a subrange is always shorter than the basic - range. Otherwise, use a recursive call to add the additional range. */ - - else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) end = od; /* Extend upwards */ - else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od); - } - } - else -#endif /* SUPPORT_UCP */ - - /* Not UTF-mode, or no UCP */ - - for (c = start; c <= end && c < 256; c++) - { - SETBIT(classbits, cd->fcc[c]); - n8++; - } - } - -/* Now handle the original range. Adjust the final value according to the bit -length - this means that the same lists of (e.g.) horizontal spaces can be used -in all cases. */ - -#if defined COMPILE_PCRE8 -#ifdef SUPPORT_UTF - if ((options & PCRE_UTF8) == 0) -#endif - if (end > 0xff) end = 0xff; - -#elif defined COMPILE_PCRE16 -#ifdef SUPPORT_UTF - if ((options & PCRE_UTF16) == 0) -#endif - if (end > 0xffff) end = 0xffff; - -#endif /* COMPILE_PCRE[8|16] */ - -/* If all characters are less than 256, use the bit map. Otherwise use extra -data. */ - -if (end < 0x100) - { - for (c = start; c <= end; c++) - { - n8++; - SETBIT(classbits, c); - } - } - -else - { - pcre_uchar *uchardata = *uchardptr; - -#ifdef SUPPORT_UTF - if ((options & PCRE_UTF8) != 0) /* All UTFs use the same flag bit */ - { - if (start < end) - { - *uchardata++ = XCL_RANGE; - uchardata += PRIV(ord2utf)(start, uchardata); - uchardata += PRIV(ord2utf)(end, uchardata); - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - uchardata += PRIV(ord2utf)(start, uchardata); - } - } - else -#endif /* SUPPORT_UTF */ - - /* Without UTF support, character values are constrained by the bit length, - and can only be > 256 for 16-bit and 32-bit libraries. */ - -#ifdef COMPILE_PCRE8 - {} -#else - if (start < end) - { - *uchardata++ = XCL_RANGE; - *uchardata++ = start; - *uchardata++ = end; - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - *uchardata++ = start; - } -#endif - - *uchardptr = uchardata; /* Updata extra data pointer */ - } - -return n8; /* Number of 8-bit characters */ -} - - - - -/************************************************* -* Add a list of characters to a class * -*************************************************/ - -/* This function is used for adding a list of case-equivalent characters to a -class, and also for adding a list of horizontal or vertical whitespace. If the -list is in order (which it should be), ranges of characters are detected and -handled appropriately. This function is mutually recursive with the function -above. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options word - cd contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of - case-equivalent characters to avoid including the one we - already know about - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static int -add_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, - compile_data *cd, const pcre_uint32 *p, unsigned int except) -{ -int n8 = 0; -while (p[0] < NOTACHAR) - { - int n = 0; - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; - n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]); - } - p += n + 1; - } -return n8; -} - - - -/************************************************* -* Add characters not in a list to a class * -*************************************************/ - -/* This function is used for adding the complement of a list of horizontal or -vertical whitespace to a class. The list must be in order. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options word - cd contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static int -add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, - int options, compile_data *cd, const pcre_uint32 *p) -{ -BOOL utf = (options & PCRE_UTF8) != 0; -int n8 = 0; -if (p[0] > 0) - n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1); -while (p[0] < NOTACHAR) - { - while (p[1] == p[0] + 1) p++; - n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1, - (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); - p++; - } -return n8; -} - - - -/************************************************* -* Compile one branch * -*************************************************/ - -/* Scan the pattern, compiling it into the a vector. If the options are -changed during the branch, the pointer is used to change the external options -bits. This function is used during the pre-compile phase when we are trying -to find out the amount of memory needed, as well as during the real compile -phase. The value of lengthptr distinguishes the two phases. - -Arguments: - optionsptr pointer to the option bits - codeptr points to the pointer to the current code point - ptrptr points to the current pattern pointer - errorcodeptr points to error code variable - firstcharptr place to put the first required character - firstcharflagsptr place to put the first character flags, or a negative number - reqcharptr place to put the last required character - reqcharflagsptr place to put the last required character flags, or a negative number - bcptr points to current branch chain - cond_depth conditional nesting depth - cd contains pointers to tables etc. - lengthptr NULL during the real compile phase - points to length accumulator during pre-compile phase - -Returns: TRUE on success - FALSE, with *errorcodeptr set non-zero on error -*/ - -static BOOL -compile_branch(int *optionsptr, pcre_uchar **codeptr, - const pcre_uchar **ptrptr, int *errorcodeptr, - pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, - pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, - branch_chain *bcptr, int cond_depth, - compile_data *cd, int *lengthptr) -{ -int repeat_type, op_type; -int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ -int bravalue = 0; -int greedy_default, greedy_non_default; -pcre_uint32 firstchar, reqchar; -pcre_int32 firstcharflags, reqcharflags; -pcre_uint32 zeroreqchar, zerofirstchar; -pcre_int32 zeroreqcharflags, zerofirstcharflags; -pcre_int32 req_caseopt, reqvary, tempreqvary; -int options = *optionsptr; /* May change dynamically */ -int after_manual_callout = 0; -int length_prevgroup = 0; -register pcre_uint32 c; -int escape; -register pcre_uchar *code = *codeptr; -pcre_uchar *last_code = code; -pcre_uchar *orig_code = code; -pcre_uchar *tempcode; -BOOL inescq = FALSE; -BOOL groupsetfirstchar = FALSE; -const pcre_uchar *ptr = *ptrptr; -const pcre_uchar *tempptr; -const pcre_uchar *nestptr = NULL; -pcre_uchar *previous = NULL; -pcre_uchar *previous_callout = NULL; -pcre_uchar *save_hwm = NULL; -pcre_uint8 classbits[32]; - -/* We can fish out the UTF-8 setting once and for all into a BOOL, but we -must not do this for other options (e.g. PCRE_EXTENDED) because they may change -dynamically as we process the pattern. */ - -#ifdef SUPPORT_UTF -/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ -BOOL utf = (options & PCRE_UTF8) != 0; -#ifndef COMPILE_PCRE32 -pcre_uchar utf_chars[6]; -#endif -#else -BOOL utf = FALSE; -#endif - -/* Helper variables for OP_XCLASS opcode (for characters > 255). We define -class_uchardata always so that it can be passed to add_to_class() always, -though it will not be used in non-UTF 8-bit cases. This avoids having to supply -alternative calls for the different cases. */ - -pcre_uchar *class_uchardata; -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 -BOOL xclass; -pcre_uchar *class_uchardata_base; -#endif - -#ifdef PCRE_DEBUG -if (lengthptr != NULL) DPRINTF((">> start branch\n")); -#endif - -/* Set up the default and non-default settings for greediness */ - -greedy_default = ((options & PCRE_UNGREEDY) != 0); -greedy_non_default = greedy_default ^ 1; - -/* Initialize no first byte, no required byte. REQ_UNSET means "no char -matching encountered yet". It gets changed to REQ_NONE if we hit something that -matches a non-fixed char first char; reqchar just remains unset if we never -find one. - -When we hit a repeat whose minimum is zero, we may have to adjust these values -to take the zero repeat into account. This is implemented by setting them to -zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual -item types that can be repeated set these backoff variables appropriately. */ - -firstchar = reqchar = zerofirstchar = zeroreqchar = 0; -firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET; - -/* The variable req_caseopt contains either the REQ_CASELESS value -or zero, according to the current setting of the caseless flag. The -REQ_CASELESS leaves the lower 28 bit empty. It is added into the -firstchar or reqchar variables to record the case status of the -value. This is used only for ASCII characters. */ - -req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0; - -/* Switch on next character until the end of the branch */ - -for (;; ptr++) - { - BOOL negate_class; - BOOL should_flip_negation; - BOOL possessive_quantifier; - BOOL is_quantifier; - BOOL is_recurse; - BOOL reset_bracount; - int class_has_8bitchar; - int class_one_char; - int newoptions; - int recno; - int refsign; - int skipbytes; - pcre_uint32 subreqchar, subfirstchar; - pcre_int32 subreqcharflags, subfirstcharflags; - int terminator; - unsigned int mclength; - unsigned int tempbracount; - pcre_uint32 ec; - pcre_uchar mcbuffer[8]; - - /* Get next character in the pattern */ - - c = *ptr; - - /* If we are at the end of a nested substitution, revert to the outer level - string. Nesting only happens one level deep. */ - - if (c == CHAR_NULL && nestptr != NULL) - { - ptr = nestptr; - nestptr = NULL; - c = *ptr; - } - - /* If we are in the pre-compile phase, accumulate the length used for the - previous cycle of this loop. */ - - if (lengthptr != NULL) - { -#ifdef PCRE_DEBUG - if (code > cd->hwm) cd->hwm = code; /* High water info */ -#endif - if (code > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ - { - *errorcodeptr = ERR52; - goto FAILED; - } - - /* There is at least one situation where code goes backwards: this is the - case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, - the class is simply eliminated. However, it is created first, so we have to - allow memory for it. Therefore, don't ever reduce the length at this point. - */ - - if (code < last_code) code = last_code; - - /* Paranoid check for integer overflow */ - - if (OFLOW_MAX - *lengthptr < code - last_code) - { - *errorcodeptr = ERR20; - goto FAILED; - } - - *lengthptr += (int)(code - last_code); - DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr, - (int)(code - last_code), c, c)); - - /* If "previous" is set and it is not at the start of the work space, move - it back to there, in order to avoid filling up the work space. Otherwise, - if "previous" is NULL, reset the current code pointer to the start. */ - - if (previous != NULL) - { - if (previous > orig_code) - { - memmove(orig_code, previous, IN_UCHARS(code - previous)); - code -= previous - orig_code; - previous = orig_code; - } - } - else code = orig_code; - - /* Remember where this code item starts so we can pick up the length - next time round. */ - - last_code = code; - } - - /* In the real compile phase, just check the workspace used by the forward - reference list. */ - - else if (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN) - { - *errorcodeptr = ERR52; - goto FAILED; - } - - /* If in \Q...\E, check for the end; if not, we have a literal */ - - if (inescq && c != CHAR_NULL) - { - if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) - { - inescq = FALSE; - ptr++; - continue; - } - else - { - if (previous_callout != NULL) - { - if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ - complete_callout(previous_callout, ptr, cd); - previous_callout = NULL; - } - if ((options & PCRE_AUTO_CALLOUT) != 0) - { - previous_callout = code; - code = auto_callout(code, ptr, cd); - } - goto NORMAL_CHAR; - } - } - - /* Fill in length of a previous callout, except when the next thing is - a quantifier. */ - - is_quantifier = - c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || - (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); - - if (!is_quantifier && previous_callout != NULL && - after_manual_callout-- <= 0) - { - if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ - complete_callout(previous_callout, ptr, cd); - previous_callout = NULL; - } - - /* In extended mode, skip white space and comments. */ - - if ((options & PCRE_EXTENDED) != 0) - { - if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue; - if (c == CHAR_NUMBER_SIGN) - { - ptr++; - while (*ptr != CHAR_NULL) - { - if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } - ptr++; -#ifdef SUPPORT_UTF - if (utf) FORWARDCHAR(ptr); -#endif - } - if (*ptr != CHAR_NULL) continue; - - /* Else fall through to handle end of string */ - c = 0; - } - } - - /* No auto callout for quantifiers. */ - - if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier) - { - previous_callout = code; - code = auto_callout(code, ptr, cd); - } - - switch(c) - { - /* ===================================================================*/ - case 0: /* The branch terminates at string end */ - case CHAR_VERTICAL_LINE: /* or | or ) */ - case CHAR_RIGHT_PARENTHESIS: - *firstcharptr = firstchar; - *firstcharflagsptr = firstcharflags; - *reqcharptr = reqchar; - *reqcharflagsptr = reqcharflags; - *codeptr = code; - *ptrptr = ptr; - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < code - last_code) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += (int)(code - last_code); /* To include callout length */ - DPRINTF((">> end branch\n")); - } - return TRUE; - - - /* ===================================================================*/ - /* Handle single-character metacharacters. In multiline mode, ^ disables - the setting of any following char as a first character. */ - - case CHAR_CIRCUMFLEX_ACCENT: - previous = NULL; - if ((options & PCRE_MULTILINE) != 0) - { - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - *code++ = OP_CIRCM; - } - else *code++ = OP_CIRC; - break; - - case CHAR_DOLLAR_SIGN: - previous = NULL; - *code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; - break; - - /* There can never be a first char if '.' is first, whatever happens about - repeats. The value of reqchar doesn't change either. */ - - case CHAR_DOT: - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - previous = code; - *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; - break; - - - /* ===================================================================*/ - /* Character classes. If the included characters are all < 256, we build a - 32-byte bitmap of the permitted characters, except in the special case - where there is only one such character. For negated classes, we build the - map as usual, then invert it at the end. However, we use a different opcode - so that data characters > 255 can be handled correctly. - - If the class contains characters outside the 0-255 range, a different - opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells - whether the bitmap is present, and whether this is a negated class or not. - - In JavaScript compatibility mode, an isolated ']' causes an error. In - default (Perl) mode, it is treated as a data character. */ - - case CHAR_RIGHT_SQUARE_BRACKET: - if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) - { - *errorcodeptr = ERR64; - goto FAILED; - } - goto NORMAL_CHAR; - - case CHAR_LEFT_SQUARE_BRACKET: - previous = code; - - /* PCRE supports POSIX class stuff inside a class. Perl gives an error if - they are encountered at the top level, so we'll do that too. */ - - if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, &tempptr)) - { - *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31; - goto FAILED; - } - - /* If the first character is '^', set the negation flag and skip it. Also, - if the first few characters (either before or after ^) are \Q\E or \E we - skip them too. This makes for compatibility with Perl. */ - - negate_class = FALSE; - for (;;) - { - c = *(++ptr); - if (c == CHAR_BACKSLASH) - { - if (ptr[1] == CHAR_E) - ptr++; - else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 3; - else - break; - } - else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) - negate_class = TRUE; - else break; - } - - /* Empty classes are allowed in JavaScript compatibility mode. Otherwise, - an initial ']' is taken as a data character -- the code below handles - that. In JS mode, [] must always fail, so generate OP_FAIL, whereas - [^] must match any character, so generate OP_ALLANY. */ - - if (c == CHAR_RIGHT_SQUARE_BRACKET && - (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) - { - *code++ = negate_class? OP_ALLANY : OP_FAIL; - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - break; - } - - /* If a class contains a negative special such as \S, we need to flip the - negation flag at the end, so that support for characters > 255 works - correctly (they are all included in the class). */ - - should_flip_negation = FALSE; - - /* For optimization purposes, we track some properties of the class: - class_has_8bitchar will be non-zero if the class contains at least one < - 256 character; class_one_char will be 1 if the class contains just one - character. */ - - class_has_8bitchar = 0; - class_one_char = 0; - - /* Initialize the 32-char bit map to all zeros. We build the map in a - temporary bit of memory, in case the class contains fewer than two - 8-bit characters because in that case the compiled code doesn't use the bit - map. */ - - memset(classbits, 0, 32 * sizeof(pcre_uint8)); - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - xclass = FALSE; - class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ - class_uchardata_base = class_uchardata; /* Save the start */ -#endif - - /* Process characters until ] is reached. By writing this as a "do" it - means that an initial ] is taken as a data character. At the start of the - loop, c contains the first byte of the character. */ - - if (c != CHAR_NULL) do - { - const pcre_uchar *oldptr; - -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(c)) - { /* Braces are required because the */ - GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ - } -#endif - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - /* In the pre-compile phase, accumulate the length of any extra - data and reset the pointer. This is so that very large classes that - contain a zillion > 255 characters no longer overwrite the work space - (which is on the stack). We have to remember that there was XCLASS data, - however. */ - - if (lengthptr != NULL && class_uchardata > class_uchardata_base) - { - xclass = TRUE; - *lengthptr += class_uchardata - class_uchardata_base; - class_uchardata = class_uchardata_base; - } -#endif - - /* Inside \Q...\E everything is literal except \E */ - - if (inescq) - { - if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ - { - inescq = FALSE; /* Reset literal state */ - ptr++; /* Skip the 'E' */ - continue; /* Carry on with next */ - } - goto CHECK_RANGE; /* Could be range if \E follows */ - } - - /* Handle POSIX class names. Perl allows a negation extension of the - form [:^name:]. A square bracket that doesn't match the syntax is - treated as a literal. We also recognize the POSIX constructions - [.ch.] and [=ch=] ("collating elements") and fault them, as Perl - 5.6 and 5.8 do. */ - - if (c == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) - { - BOOL local_negate = FALSE; - int posix_class, taboffset, tabopt; - register const pcre_uint8 *cbits = cd->cbits; - pcre_uint8 pbits[32]; - - if (ptr[1] != CHAR_COLON) - { - *errorcodeptr = ERR31; - goto FAILED; - } - - ptr += 2; - if (*ptr == CHAR_CIRCUMFLEX_ACCENT) - { - local_negate = TRUE; - should_flip_negation = TRUE; /* Note negative special */ - ptr++; - } - - posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); - if (posix_class < 0) - { - *errorcodeptr = ERR30; - goto FAILED; - } - - /* If matching is caseless, upper and lower are converted to - alpha. This relies on the fact that the class table starts with - alpha, lower, upper as the first 3 entries. */ - - if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - - /* When PCRE_UCP is set, some of the POSIX classes are converted to - different escape sequences that use Unicode properties. */ - -#ifdef SUPPORT_UCP - if ((options & PCRE_UCP) != 0) - { - int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); - if (posix_substitutes[pc] != NULL) - { - nestptr = tempptr + 1; - ptr = posix_substitutes[pc] - 1; - continue; - } - } -#endif - /* In the non-UCP case, we build the bit map for the POSIX class in a - chunk of local store because we may be adding and subtracting from it, - and we don't want to subtract bits that may be in the main map already. - At the end we or the result into the bit map that is being built. */ - - posix_class *= 3; - - /* Copy in the first table (always present) */ - - memcpy(pbits, cbits + posix_class_maps[posix_class], - 32 * sizeof(pcre_uint8)); - - /* If there is a second table, add or remove it as required. */ - - taboffset = posix_class_maps[posix_class + 1]; - tabopt = posix_class_maps[posix_class + 2]; - - if (taboffset >= 0) - { - if (tabopt >= 0) - for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset]; - else - for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset]; - } - - /* Now see if we need to remove any special characters. An option - value of 1 removes vertical space and 2 removes underscore. */ - - if (tabopt < 0) tabopt = -tabopt; - if (tabopt == 1) pbits[1] &= ~0x3c; - else if (tabopt == 2) pbits[11] &= 0x7f; - - /* Add the POSIX table or its complement into the main table that is - being built and we are done. */ - - if (local_negate) - for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; - else - for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; - - ptr = tempptr + 1; - /* Every class contains at least one < 256 character. */ - class_has_8bitchar = 1; - /* Every class contains at least two characters. */ - class_one_char = 2; - continue; /* End of POSIX syntax handling */ - } - - /* Backslash may introduce a single character, or it may introduce one - of the specials, which just set a flag. The sequence \b is a special - case. Inside a class (and only there) it is treated as backspace. We - assume that other escapes have more than one character in them, so - speculatively set both class_has_8bitchar and class_one_char bigger - than one. Unrecognized escapes fall through and are either treated - as literal characters (by default), or are faulted if - PCRE_EXTRA is set. */ - - if (c == CHAR_BACKSLASH) - { - escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE); - - if (*errorcodeptr != 0) goto FAILED; - - if (escape == 0) - c = ec; - else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ - else if (escape == ESC_N) /* \N is not supported in a class */ - { - *errorcodeptr = ERR71; - goto FAILED; - } - else if (escape == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - { - ptr += 2; /* avoid empty string */ - } - else inescq = TRUE; - continue; - } - else if (escape == ESC_E) continue; /* Ignore orphan \E */ - - else - { - register const pcre_uint8 *cbits = cd->cbits; - /* Every class contains at least two < 256 characters. */ - class_has_8bitchar++; - /* Every class contains at least two characters. */ - class_one_char += 2; - - switch (escape) - { -#ifdef SUPPORT_UCP - case ESC_du: /* These are the values given for \d etc */ - case ESC_DU: /* when PCRE_UCP is set. We replace the */ - case ESC_wu: /* escape sequence with an appropriate \p */ - case ESC_WU: /* or \P to test Unicode properties instead */ - case ESC_su: /* of the default ASCII testing. */ - case ESC_SU: - nestptr = ptr; - ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ - class_has_8bitchar--; /* Undo! */ - continue; -#endif - case ESC_d: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; - continue; - - case ESC_D: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; - continue; - - case ESC_w: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; - continue; - - case ESC_W: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; - continue; - - /* Perl 5.004 onwards omits VT from \s, but we must preserve it - if it was previously set by something earlier in the character - class. Luckily, the value of CHAR_VT is 0x0b in both ASCII and - EBCDIC, so we lazily just adjust the appropriate bit. */ - - case ESC_s: - classbits[0] |= cbits[cbit_space]; - classbits[1] |= cbits[cbit_space+1] & ~0x08; - for (c = 2; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; - continue; - - case ESC_S: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; - classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ - continue; - - /* The rest apply in both UCP and non-UCP cases. */ - - case ESC_h: - (void)add_list_to_class(classbits, &class_uchardata, options, cd, - PRIV(hspace_list), NOTACHAR); - continue; - - case ESC_H: - (void)add_not_list_to_class(classbits, &class_uchardata, options, - cd, PRIV(hspace_list)); - continue; - - case ESC_v: - (void)add_list_to_class(classbits, &class_uchardata, options, cd, - PRIV(vspace_list), NOTACHAR); - continue; - - case ESC_V: - (void)add_not_list_to_class(classbits, &class_uchardata, options, - cd, PRIV(vspace_list)); - continue; - -#ifdef SUPPORT_UCP - case ESC_p: - case ESC_P: - { - BOOL negated; - unsigned int ptype = 0, pdata = 0; - if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) - goto FAILED; - *class_uchardata++ = ((escape == ESC_p) != negated)? - XCL_PROP : XCL_NOTPROP; - *class_uchardata++ = ptype; - *class_uchardata++ = pdata; - class_has_8bitchar--; /* Undo! */ - continue; - } -#endif - /* Unrecognized escapes are faulted if PCRE is running in its - strict mode. By default, for compatibility with Perl, they are - treated as literals. */ - - default: - if ((options & PCRE_EXTRA) != 0) - { - *errorcodeptr = ERR7; - goto FAILED; - } - class_has_8bitchar--; /* Undo the speculative increase. */ - class_one_char -= 2; /* Undo the speculative increase. */ - c = *ptr; /* Get the final character and fall through */ - break; - } - } - - /* Fall through if the escape just defined a single character (c >= 0). - This may be greater than 256. */ - - escape = 0; - - } /* End of backslash handling */ - - /* A character may be followed by '-' to form a range. However, Perl does - not permit ']' to be the end of the range. A '-' character at the end is - treated as a literal. Perl ignores orphaned \E sequences entirely. The - code for handling \Q and \E is messy. */ - - CHECK_RANGE: - while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - { - inescq = FALSE; - ptr += 2; - } - oldptr = ptr; - - /* Remember if \r or \n were explicitly used */ - - if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; - - /* Check for range */ - - if (!inescq && ptr[1] == CHAR_MINUS) - { - pcre_uint32 d; - ptr += 2; - while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; - - /* If we hit \Q (not followed by \E) at this point, go into escaped - mode. */ - - while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) - { - ptr += 2; - if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) - { ptr += 2; continue; } - inescq = TRUE; - break; - } - - /* Minus (hyphen) at the end of a class is treated as a literal, so put - back the pointer and jump to handle the character that preceded it. */ - - if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) - { - ptr = oldptr; - goto CLASS_SINGLE_CHARACTER; - } - - /* Otherwise, we have a potential range; pick up the next character */ - -#ifdef SUPPORT_UTF - if (utf) - { /* Braces are required because the */ - GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ - } - else -#endif - d = *ptr; /* Not UTF-8 mode */ - - /* The second part of a range can be a single-character escape, but - not any of the other escapes. Perl 5.6 treats a hyphen as a literal - in such circumstances. */ - - if (!inescq && d == CHAR_BACKSLASH) - { - int descape; - descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE); - if (*errorcodeptr != 0) goto FAILED; - - /* \b is backspace; any other special means the '-' was literal. */ - - if (descape != 0) - { - if (descape == ESC_b) d = CHAR_BS; else - { - ptr = oldptr; - goto CLASS_SINGLE_CHARACTER; /* A few lines below */ - } - } - } - - /* Check that the two values are in the correct order. Optimize - one-character ranges. */ - - if (d < c) - { - *errorcodeptr = ERR8; - goto FAILED; - } - if (d == c) goto CLASS_SINGLE_CHARACTER; /* A few lines below */ - - /* We have found a character range, so single character optimizations - cannot be done anymore. Any value greater than 1 indicates that there - is more than one character. */ - - class_one_char = 2; - - /* Remember an explicit \r or \n, and add the range to the class. */ - - if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; - - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, cd, c, d); - - continue; /* Go get the next char in the class */ - } - - /* Handle a single character - we can get here for a normal non-escape - char, or after \ that introduces a single character or for an apparent - range that isn't. Only the value 1 matters for class_one_char, so don't - increase it if it is already 2 or more ... just in case there's a class - with a zillion characters in it. */ - - CLASS_SINGLE_CHARACTER: - if (class_one_char < 2) class_one_char++; - - /* If class_one_char is 1, we have the first single character in the - class, and there have been no prior ranges, or XCLASS items generated by - escapes. If this is the final character in the class, we can optimize by - turning the item into a 1-character OP_CHAR[I] if it's positive, or - OP_NOT[I] if it's negative. In the positive case, it can cause firstchar - to be set. Otherwise, there can be no first char if this item is first, - whatever repeat count may follow. In the case of reqchar, save the - previous value for reinstating. */ - - if (class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - { - ptr++; - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - - if (negate_class) - { -#ifdef SUPPORT_UCP - int d; -#endif - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - - /* For caseless UTF-8 mode when UCP support is available, check - whether this character has more than one other case. If so, generate - a special OP_NOTPROP item instead of OP_NOTI. */ - -#ifdef SUPPORT_UCP - if (utf && (options & PCRE_CASELESS) != 0 && - (d = UCD_CASESET(c)) != 0) - { - *code++ = OP_NOTPROP; - *code++ = PT_CLIST; - *code++ = d; - } - else -#endif - /* Char has only one other case, or UCP not available */ - - { - *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) - code += PRIV(ord2utf)(c, code); - else -#endif - *code++ = c; - } - - /* We are finished with this character class */ - - goto END_CLASS; - } - - /* For a single, positive character, get the value into mcbuffer, and - then we can handle this with the normal one-character code. */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) - mclength = PRIV(ord2utf)(c, mcbuffer); - else -#endif - { - mcbuffer[0] = c; - mclength = 1; - } - goto ONE_CHAR; - } /* End of 1-char optimization */ - - /* There is more than one character in the class, or an XCLASS item - has been generated. Add this character to the class. */ - - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, cd, c, c); - } - - /* Loop until ']' reached. This "while" is the end of the "do" far above. - If we are at the end of an internal nested string, revert to the outer - string. */ - - while (((c = *(++ptr)) != CHAR_NULL || - (nestptr != NULL && - (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) && - (c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); - - /* Check for missing terminating ']' */ - - if (c == CHAR_NULL) - { - *errorcodeptr = ERR6; - goto FAILED; - } - - /* We will need an XCLASS if data has been placed in class_uchardata. In - the second phase this is a sufficient test. However, in the pre-compile - phase, class_uchardata gets emptied to prevent workspace overflow, so it - only if the very last character in the class needs XCLASS will it contain - anything at this point. For this reason, xclass gets set TRUE above when - uchar_classdata is emptied, and that's why this code is the way it is here - instead of just doing a test on class_uchardata below. */ - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - if (class_uchardata > class_uchardata_base) xclass = TRUE; -#endif - - /* If this is the first thing in the branch, there can be no first char - setting, whatever the repeat count. Any reqchar setting must remain - unchanged after any kind of repeat. */ - - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - - /* If there are characters with values > 255, we have to compile an - extended class, with its own opcode, unless there was a negated special - such as \S in the class, and PCRE_UCP is not set, because in that case all - characters > 255 are in the class, so any that were explicitly given as - well can be ignored. If (when there are explicit characters > 255 that must - be listed) there are no characters < 256, we can omit the bitmap in the - actual compiled code. */ - -#ifdef SUPPORT_UTF - if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0)) -#elif !defined COMPILE_PCRE8 - if (xclass && !should_flip_negation) -#endif -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - { - *class_uchardata++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; - *code = negate_class? XCL_NOT:0; - - /* If the map is required, move up the extra data to make room for it; - otherwise just move the code pointer to the end of the extra data. */ - - if (class_has_8bitchar > 0) - { - *code++ |= XCL_MAP; - memmove(code + (32 / sizeof(pcre_uchar)), code, - IN_UCHARS(class_uchardata - code)); - memcpy(code, classbits, 32); - code = class_uchardata + (32 / sizeof(pcre_uchar)); - } - else code = class_uchardata; - - /* Now fill in the complete length of the item */ - - PUT(previous, 1, (int)(code - previous)); - break; /* End of class handling */ - } -#endif - - /* If there are no characters > 255, or they are all to be included or - excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the - whole class was negated and whether there were negative specials such as \S - (non-UCP) in the class. Then copy the 32-byte map into the code vector, - negating it if necessary. */ - - *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; - if (lengthptr == NULL) /* Save time in the pre-compile phase */ - { - if (negate_class) - for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; - memcpy(code, classbits, 32); - } - code += 32 / sizeof(pcre_uchar); - - END_CLASS: - break; - - - /* ===================================================================*/ - /* Various kinds of repeat; '{' is not necessarily a quantifier, but this - has been tested above. */ - - case CHAR_LEFT_CURLY_BRACKET: - if (!is_quantifier) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); - if (*errorcodeptr != 0) goto FAILED; - goto REPEAT; - - case CHAR_ASTERISK: - repeat_min = 0; - repeat_max = -1; - goto REPEAT; - - case CHAR_PLUS: - repeat_min = 1; - repeat_max = -1; - goto REPEAT; - - case CHAR_QUESTION_MARK: - repeat_min = 0; - repeat_max = 1; - - REPEAT: - if (previous == NULL) - { - *errorcodeptr = ERR9; - goto FAILED; - } - - if (repeat_min == 0) - { - firstchar = zerofirstchar; /* Adjust for zero repeat */ - firstcharflags = zerofirstcharflags; - reqchar = zeroreqchar; /* Ditto */ - reqcharflags = zeroreqcharflags; - } - - /* Remember whether this is a variable length repeat */ - - reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; - - op_type = 0; /* Default single-char op codes */ - possessive_quantifier = FALSE; /* Default not possessive quantifier */ - - /* Save start of previous item, in case we have to move it up in order to - insert something before it. */ - - tempcode = previous; - - /* If the next character is '+', we have a possessive quantifier. This - implies greediness, whatever the setting of the PCRE_UNGREEDY option. - If the next character is '?' this is a minimizing repeat, by default, - but if PCRE_UNGREEDY is set, it works the other way round. We change the - repeat type to the non-default. */ - - if (ptr[1] == CHAR_PLUS) - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - ptr++; - } - else if (ptr[1] == CHAR_QUESTION_MARK) - { - repeat_type = greedy_non_default; - ptr++; - } - else repeat_type = greedy_default; - - /* If previous was a recursion call, wrap it in atomic brackets so that - previous becomes the atomic group. All recursions were so wrapped in the - past, but it no longer happens for non-repeated recursions. In fact, the - repeated ones could be re-implemented independently so as not to need this, - but for the moment we rely on the code for repeating groups. */ - - if (*previous == OP_RECURSE) - { - memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE)); - *previous = OP_ONCE; - PUT(previous, 1, 2 + 2*LINK_SIZE); - previous[2 + 2*LINK_SIZE] = OP_KET; - PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); - code += 2 + 2 * LINK_SIZE; - length_prevgroup = 3 + 3*LINK_SIZE; - - /* When actually compiling, we need to check whether this was a forward - reference, and if so, adjust the offset. */ - - if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE) - { - int offset = GET(cd->hwm, -LINK_SIZE); - if (offset == previous + 1 - cd->start_code) - PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE); - } - } - - /* Now handle repetition for the different types of item. */ - - /* If previous was a character or negated character match, abolish the item - and generate a repeat item instead. If a char item has a minimum of more - than one, ensure that it is set in reqchar - it might not be if a sequence - such as x{3} is the first thing in a branch because the x will have gone - into firstchar instead. */ - - if (*previous == OP_CHAR || *previous == OP_CHARI - || *previous == OP_NOT || *previous == OP_NOTI) - { - switch (*previous) - { - default: /* Make compiler happy. */ - case OP_CHAR: op_type = OP_STAR - OP_STAR; break; - case OP_CHARI: op_type = OP_STARI - OP_STAR; break; - case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break; - case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break; - } - - /* Deal with UTF characters that take up more than one character. It's - easier to write this out separately than try to macrify it. Use c to - hold the length of the character in bytes, plus UTF_LENGTH to flag that - it's a length rather than a small character. */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && NOT_FIRSTCHAR(code[-1])) - { - pcre_uchar *lastchar = code - 1; - BACKCHAR(lastchar); - c = (int)(code - lastchar); /* Length of UTF-8 character */ - memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */ - c |= UTF_LENGTH; /* Flag c as a length */ - } - else -#endif /* SUPPORT_UTF */ - - /* Handle the case of a single charater - either with no UTF support, or - with UTF disabled, or for a single character UTF character. */ - { - c = code[-1]; - if (*previous <= OP_CHARI && repeat_min > 1) - { - reqchar = c; - reqcharflags = req_caseopt | cd->req_varyopt; - } - } - - /* If the repetition is unlimited, it pays to see if the next thing on - the line is something that cannot possibly match this character. If so, - automatically possessifying this item gains some performance in the case - where the match fails. */ - - if (!possessive_quantifier && - repeat_max < 0 && - check_auto_possessive(previous, utf, ptr + 1, options, cd)) - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - } - - goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ - } - - /* If previous was a character type match (\d or similar), abolish it and - create a suitable repeat item. The code is shared with single-character - repeats by setting op_type to add a suitable offset into repeat_type. Note - the the Unicode property types will be present only when SUPPORT_UCP is - defined, but we don't wrap the little bits of code here because it just - makes it horribly messy. */ - - else if (*previous < OP_EODN) - { - pcre_uchar *oldcode; - int prop_type, prop_value; - op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ - c = *previous; - - if (!possessive_quantifier && - repeat_max < 0 && - check_auto_possessive(previous, utf, ptr + 1, options, cd)) - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - } - - OUTPUT_SINGLE_REPEAT: - if (*previous == OP_PROP || *previous == OP_NOTPROP) - { - prop_type = previous[1]; - prop_value = previous[2]; - } - else prop_type = prop_value = -1; - - oldcode = code; - code = previous; /* Usually overwrite previous item */ - - /* If the maximum is zero then the minimum must also be zero; Perl allows - this case, so we do too - by simply omitting the item altogether. */ - - if (repeat_max == 0) goto END_REPEAT; - - /*--------------------------------------------------------------------*/ - /* This code is obsolete from release 8.00; the restriction was finally - removed: */ - - /* All real repeats make it impossible to handle partial matching (maybe - one day we will be able to remove this restriction). */ - - /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */ - /*--------------------------------------------------------------------*/ - - /* Combine the op_type with the repeat_type */ - - repeat_type += op_type; - - /* A minimum of zero is handled either as the special case * or ?, or as - an UPTO, with the maximum given. */ - - if (repeat_min == 0) - { - if (repeat_max == -1) *code++ = OP_STAR + repeat_type; - else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - - /* A repeat minimum of 1 is optimized into some special cases. If the - maximum is unlimited, we use OP_PLUS. Otherwise, the original item is - left in place and, if the maximum is greater than 1, we use OP_UPTO with - one less than the maximum. */ - - else if (repeat_min == 1) - { - if (repeat_max == -1) - *code++ = OP_PLUS + repeat_type; - else - { - code = oldcode; /* leave previous item in place */ - if (repeat_max == 1) goto END_REPEAT; - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max - 1); - } - } - - /* The case {n,n} is just an EXACT, while the general case {n,m} is - handled as an EXACT followed by an UPTO. */ - - else - { - *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ - PUT2INC(code, 0, repeat_min); - - /* If the maximum is unlimited, insert an OP_STAR. Before doing so, - we have to insert the character for the previous code. For a repeated - Unicode property match, there are two extra bytes that define the - required property. In UTF-8 mode, long characters have their length in - c, with the UTF_LENGTH bit as a flag. */ - - if (repeat_max < 0) - { -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && (c & UTF_LENGTH) != 0) - { - memcpy(code, utf_chars, IN_UCHARS(c & 7)); - code += c & 7; - } - else -#endif - { - *code++ = c; - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } - } - *code++ = OP_STAR + repeat_type; - } - - /* Else insert an UPTO if the max is greater than the min, again - preceded by the character, for the previously inserted code. If the - UPTO is just for 1 instance, we can use QUERY instead. */ - - else if (repeat_max != repeat_min) - { -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && (c & UTF_LENGTH) != 0) - { - memcpy(code, utf_chars, IN_UCHARS(c & 7)); - code += c & 7; - } - else -#endif - *code++ = c; - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } - repeat_max -= repeat_min; - - if (repeat_max == 1) - { - *code++ = OP_QUERY + repeat_type; - } - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - } - - /* The character or character type itself comes last in all cases. */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && (c & UTF_LENGTH) != 0) - { - memcpy(code, utf_chars, IN_UCHARS(c & 7)); - code += c & 7; - } - else -#endif - *code++ = c; - - /* For a repeated Unicode property match, there are two extra bytes that - define the required property. */ - -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } -#endif - } - - /* If previous was a character class or a back reference, we put the repeat - stuff after it, but just skip the item if the repeat was {0,0}. */ - - else if (*previous == OP_CLASS || - *previous == OP_NCLASS || -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - *previous == OP_XCLASS || -#endif - *previous == OP_REF || - *previous == OP_REFI) - { - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - - /*--------------------------------------------------------------------*/ - /* This code is obsolete from release 8.00; the restriction was finally - removed: */ - - /* All real repeats make it impossible to handle partial matching (maybe - one day we will be able to remove this restriction). */ - - /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */ - /*--------------------------------------------------------------------*/ - - if (repeat_min == 0 && repeat_max == -1) - *code++ = OP_CRSTAR + repeat_type; - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_CRPLUS + repeat_type; - else if (repeat_min == 0 && repeat_max == 1) - *code++ = OP_CRQUERY + repeat_type; - else - { - *code++ = OP_CRRANGE + repeat_type; - PUT2INC(code, 0, repeat_min); - if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ - PUT2INC(code, 0, repeat_max); - } - } - - /* If previous was a bracket group, we may have to replicate it in certain - cases. Note that at this point we can encounter only the "basic" bracket - opcodes such as BRA and CBRA, as this is the place where they get converted - into the more special varieties such as BRAPOS and SBRA. A test for >= - OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK, - ASSERTBACK_NOT, ONCE, BRA, CBRA, and COND. Originally, PCRE did not allow - repetition of assertions, but now it does, for Perl compatibility. */ - - else if (*previous >= OP_ASSERT && *previous <= OP_COND) - { - register int i; - int len = (int)(code - previous); - pcre_uchar *bralink = NULL; - pcre_uchar *brazeroptr = NULL; - - /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so - we just ignore the repeat. */ - - if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF) - goto END_REPEAT; - - /* There is no sense in actually repeating assertions. The only potential - use of repetition is in cases when the assertion is optional. Therefore, - if the minimum is greater than zero, just ignore the repeat. If the - maximum is not not zero or one, set it to 1. */ - - if (*previous < OP_ONCE) /* Assertion */ - { - if (repeat_min > 0) goto END_REPEAT; - if (repeat_max < 0 || repeat_max > 1) repeat_max = 1; - } - - /* The case of a zero minimum is special because of the need to stick - OP_BRAZERO in front of it, and because the group appears once in the - data, whereas in other cases it appears the minimum number of times. For - this reason, it is simplest to treat this case separately, as otherwise - the code gets far too messy. There are several special subcases when the - minimum is zero. */ - - if (repeat_min == 0) - { - /* If the maximum is also zero, we used to just omit the group from the - output altogether, like this: - - ** if (repeat_max == 0) - ** { - ** code = previous; - ** goto END_REPEAT; - ** } - - However, that fails when a group or a subgroup within it is referenced - as a subroutine from elsewhere in the pattern, so now we stick in - OP_SKIPZERO in front of it so that it is skipped on execution. As we - don't have a list of which groups are referenced, we cannot do this - selectively. - - If the maximum is 1 or unlimited, we just have to stick in the BRAZERO - and do no more at this point. However, we do need to adjust any - OP_RECURSE calls inside the group that refer to the group itself or any - internal or forward referenced group, because the offset is from the - start of the whole regex. Temporarily terminate the pattern while doing - this. */ - - if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ - { - *code = OP_END; - adjust_recurse(previous, 1, utf, cd, save_hwm); - memmove(previous + 1, previous, IN_UCHARS(len)); - code++; - if (repeat_max == 0) - { - *previous++ = OP_SKIPZERO; - goto END_REPEAT; - } - brazeroptr = previous; /* Save for possessive optimizing */ - *previous++ = OP_BRAZERO + repeat_type; - } - - /* If the maximum is greater than 1 and limited, we have to replicate - in a nested fashion, sticking OP_BRAZERO before each set of brackets. - The first one has to be handled carefully because it's the original - copy, which has to be moved up. The remainder can be handled by code - that is common with the non-zero minimum case below. We have to - adjust the value or repeat_max, since one less copy is required. Once - again, we may have to adjust any OP_RECURSE calls inside the group. */ - - else - { - int offset; - *code = OP_END; - adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm); - memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); - code += 2 + LINK_SIZE; - *previous++ = OP_BRAZERO + repeat_type; - *previous++ = OP_BRA; - - /* We chain together the bracket offset fields that have to be - filled in later when the ends of the brackets are reached. */ - - offset = (bralink == NULL)? 0 : (int)(previous - bralink); - bralink = previous; - PUTINC(previous, 0, offset); - } - - repeat_max--; - } - - /* If the minimum is greater than zero, replicate the group as many - times as necessary, and adjust the maximum to the number of subsequent - copies that we need. If we set a first char from the group, and didn't - set a required char, copy the latter from the former. If there are any - forward reference subroutine calls in the group, there will be entries on - the workspace list; replicate these with an appropriate increment. */ - - else - { - if (repeat_min > 1) - { - /* In the pre-compile phase, we don't actually do the replication. We - just adjust the length as if we had. Do some paranoid checks for - potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit - integer type when available, otherwise double. */ - - if (lengthptr != NULL) - { - int delta = (repeat_min - 1)*length_prevgroup; - if ((INT64_OR_DOUBLE)(repeat_min - 1)* - (INT64_OR_DOUBLE)length_prevgroup > - (INT64_OR_DOUBLE)INT_MAX || - OFLOW_MAX - *lengthptr < delta) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += delta; - } - - /* This is compiling for real. If there is a set first byte for - the group, and we have not yet set a "required byte", set it. Make - sure there is enough workspace for copying forward references before - doing the copy. */ - - else - { - if (groupsetfirstchar && reqcharflags < 0) - { - reqchar = firstchar; - reqcharflags = firstcharflags; - } - - for (i = 1; i < repeat_min; i++) - { - pcre_uchar *hc; - pcre_uchar *this_hwm = cd->hwm; - memcpy(code, previous, IN_UCHARS(len)); - - while (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) - { - int save_offset = save_hwm - cd->start_workspace; - int this_offset = this_hwm - cd->start_workspace; - *errorcodeptr = expand_workspace(cd); - if (*errorcodeptr != 0) goto FAILED; - save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; - this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; - } - - for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) - { - PUT(cd->hwm, 0, GET(hc, 0) + len); - cd->hwm += LINK_SIZE; - } - save_hwm = this_hwm; - code += len; - } - } - } - - if (repeat_max > 0) repeat_max -= repeat_min; - } - - /* This code is common to both the zero and non-zero minimum cases. If - the maximum is limited, it replicates the group in a nested fashion, - remembering the bracket starts on a stack. In the case of a zero minimum, - the first one was set up above. In all cases the repeat_max now specifies - the number of additional copies needed. Again, we must remember to - replicate entries on the forward reference list. */ - - if (repeat_max >= 0) - { - /* In the pre-compile phase, we don't actually do the replication. We - just adjust the length as if we had. For each repetition we must add 1 - to the length for BRAZERO and for all but the last repetition we must - add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some - paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is - a 64-bit integer type when available, otherwise double. */ - - if (lengthptr != NULL && repeat_max > 0) - { - int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - - 2 - 2*LINK_SIZE; /* Last one doesn't nest */ - if ((INT64_OR_DOUBLE)repeat_max * - (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - > (INT64_OR_DOUBLE)INT_MAX || - OFLOW_MAX - *lengthptr < delta) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += delta; - } - - /* This is compiling for real */ - - else for (i = repeat_max - 1; i >= 0; i--) - { - pcre_uchar *hc; - pcre_uchar *this_hwm = cd->hwm; - - *code++ = OP_BRAZERO + repeat_type; - - /* All but the final copy start a new nesting, maintaining the - chain of brackets outstanding. */ - - if (i != 0) - { - int offset; - *code++ = OP_BRA; - offset = (bralink == NULL)? 0 : (int)(code - bralink); - bralink = code; - PUTINC(code, 0, offset); - } - - memcpy(code, previous, IN_UCHARS(len)); - - /* Ensure there is enough workspace for forward references before - copying them. */ - - while (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) - { - int save_offset = save_hwm - cd->start_workspace; - int this_offset = this_hwm - cd->start_workspace; - *errorcodeptr = expand_workspace(cd); - if (*errorcodeptr != 0) goto FAILED; - save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; - this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; - } - - for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) - { - PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); - cd->hwm += LINK_SIZE; - } - save_hwm = this_hwm; - code += len; - } - - /* Now chain through the pending brackets, and fill in their length - fields (which are holding the chain links pro tem). */ - - while (bralink != NULL) - { - int oldlinkoffset; - int offset = (int)(code - bralink + 1); - pcre_uchar *bra = code - offset; - oldlinkoffset = GET(bra, 1); - bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; - *code++ = OP_KET; - PUTINC(code, 0, offset); - PUT(bra, 1, offset); - } - } - - /* If the maximum is unlimited, set a repeater in the final copy. For - ONCE brackets, that's all we need to do. However, possessively repeated - ONCE brackets can be converted into non-capturing brackets, as the - behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to - deal with possessive ONCEs specially. - - Otherwise, when we are doing the actual compile phase, check to see - whether this group is one that could match an empty string. If so, - convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so - that runtime checking can be done. [This check is also applied to ONCE - groups at runtime, but in a different way.] - - Then, if the quantifier was possessive and the bracket is not a - conditional, we convert the BRA code to the POS form, and the KET code to - KETRPOS. (It turns out to be convenient at runtime to detect this kind of - subpattern at both the start and at the end.) The use of special opcodes - makes it possible to reduce greatly the stack usage in pcre_exec(). If - the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. - - Then, if the minimum number of matches is 1 or 0, cancel the possessive - flag so that the default action below, of wrapping everything inside - atomic brackets, does not happen. When the minimum is greater than 1, - there will be earlier copies of the group, and so we still have to wrap - the whole thing. */ - - else - { - pcre_uchar *ketcode = code - 1 - LINK_SIZE; - pcre_uchar *bracode = ketcode - GET(ketcode, 1); - - /* Convert possessive ONCE brackets to non-capturing */ - - if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && - possessive_quantifier) *bracode = OP_BRA; - - /* For non-possessive ONCE brackets, all we need to do is to - set the KET. */ - - if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) - *ketcode = OP_KETRMAX + repeat_type; - - /* Handle non-ONCE brackets and possessive ONCEs (which have been - converted to non-capturing above). */ - - else - { - /* In the compile phase, check for empty string matching. */ - - if (lengthptr == NULL) - { - pcre_uchar *scode = bracode; - do - { - if (could_be_empty_branch(scode, ketcode, utf, cd)) - { - *bracode += OP_SBRA - OP_BRA; - break; - } - scode += GET(scode, 1); - } - while (*scode == OP_ALT); - } - - /* Handle possessive quantifiers. */ - - if (possessive_quantifier) - { - /* For COND brackets, we wrap the whole thing in a possessively - repeated non-capturing bracket, because we have not invented POS - versions of the COND opcodes. Because we are moving code along, we - must ensure that any pending recursive references are updated. */ - - if (*bracode == OP_COND || *bracode == OP_SCOND) - { - int nlen = (int)(code - bracode); - *code = OP_END; - adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm); - memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen)); - code += 1 + LINK_SIZE; - nlen += 1 + LINK_SIZE; - *bracode = OP_BRAPOS; - *code++ = OP_KETRPOS; - PUTINC(code, 0, nlen); - PUT(bracode, 1, nlen); - } - - /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ - - else - { - *bracode += 1; /* Switch to xxxPOS opcodes */ - *ketcode = OP_KETRPOS; - } - - /* If the minimum is zero, mark it as possessive, then unset the - possessive flag when the minimum is 0 or 1. */ - - if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; - if (repeat_min < 2) possessive_quantifier = FALSE; - } - - /* Non-possessive quantifier */ - - else *ketcode = OP_KETRMAX + repeat_type; - } - } - } - - /* If previous is OP_FAIL, it was generated by an empty class [] in - JavaScript mode. The other ways in which OP_FAIL can be generated, that is - by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat" - error above. We can just ignore the repeat in JS case. */ - - else if (*previous == OP_FAIL) goto END_REPEAT; - - /* Else there's some kind of shambles */ - - else - { - *errorcodeptr = ERR11; - goto FAILED; - } - - /* If the character following a repeat is '+', or if certain optimization - tests above succeeded, possessive_quantifier is TRUE. For some opcodes, - there are special alternative opcodes for this case. For anything else, we - wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+' - notation is just syntactic sugar, taken from Sun's Java package, but the - special opcodes can optimize it. - - Some (but not all) possessively repeated subpatterns have already been - completely handled in the code just above. For them, possessive_quantifier - is always FALSE at this stage. - - Note that the repeated item starts at tempcode, not at previous, which - might be the first part of a string whose (former) last char we repeated. - - Possessifying an 'exact' quantifier has no effect, so we can ignore it. But - an 'upto' may follow. We skip over an 'exact' item, and then test the - length of what remains before proceeding. */ - - if (possessive_quantifier) - { - int len; - - if (*tempcode == OP_TYPEEXACT) - tempcode += PRIV(OP_lengths)[*tempcode] + - ((tempcode[1 + IMM2_SIZE] == OP_PROP - || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); - - else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) - { - tempcode += PRIV(OP_lengths)[*tempcode]; -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(tempcode[-1])) - tempcode += GET_EXTRALEN(tempcode[-1]); -#endif - } - - len = (int)(code - tempcode); - if (len > 0) switch (*tempcode) - { - case OP_STAR: *tempcode = OP_POSSTAR; break; - case OP_PLUS: *tempcode = OP_POSPLUS; break; - case OP_QUERY: *tempcode = OP_POSQUERY; break; - case OP_UPTO: *tempcode = OP_POSUPTO; break; - - case OP_STARI: *tempcode = OP_POSSTARI; break; - case OP_PLUSI: *tempcode = OP_POSPLUSI; break; - case OP_QUERYI: *tempcode = OP_POSQUERYI; break; - case OP_UPTOI: *tempcode = OP_POSUPTOI; break; - - case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; - case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; - case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; - case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; - - case OP_NOTSTARI: *tempcode = OP_NOTPOSSTARI; break; - case OP_NOTPLUSI: *tempcode = OP_NOTPOSPLUSI; break; - case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break; - case OP_NOTUPTOI: *tempcode = OP_NOTPOSUPTOI; break; - - case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break; - case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break; - case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break; - case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break; - - /* Because we are moving code along, we must ensure that any - pending recursive references are updated. */ - - default: - *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm); - memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); - code += 1 + LINK_SIZE; - len += 1 + LINK_SIZE; - tempcode[0] = OP_ONCE; - *code++ = OP_KET; - PUTINC(code, 0, len); - PUT(tempcode, 1, len); - break; - } - } - - /* In all case we no longer have a previous item. We also set the - "follows varying string" flag for subsequently encountered reqchars if - it isn't already set and we have just passed a varying length item. */ - - END_REPEAT: - previous = NULL; - cd->req_varyopt |= reqvary; - break; - - - /* ===================================================================*/ - /* Start of nested parenthesized sub-expression, or comment or lookahead or - lookbehind or option setting or condition or all the other extended - parenthesis forms. */ - - case CHAR_LEFT_PARENTHESIS: - newoptions = options; - skipbytes = 0; - bravalue = OP_CBRA; - save_hwm = cd->hwm; - reset_bracount = FALSE; - - /* First deal with various "verbs" that can be introduced by '*'. */ - - ptr++; - if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' - || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0)))) - { - int i, namelen; - int arglen = 0; - const char *vn = verbnames; - const pcre_uchar *name = ptr + 1; - const pcre_uchar *arg = NULL; - previous = NULL; - ptr++; - while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; - namelen = (int)(ptr - name); - - /* It appears that Perl allows any characters whatsoever, other than - a closing parenthesis, to appear in arguments, so we no longer insist on - letters, digits, and underscores. */ - - if (*ptr == CHAR_COLON) - { - arg = ++ptr; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - arglen = (int)(ptr - arg); - if ((unsigned int)arglen > MAX_MARK) - { - *errorcodeptr = ERR75; - goto FAILED; - } - } - - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR60; - goto FAILED; - } - - /* Scan the table of verb names */ - - for (i = 0; i < verbcount; i++) - { - if (namelen == verbs[i].len && - STRNCMP_UC_C8(name, vn, namelen) == 0) - { - int setverb; - - /* Check for open captures before ACCEPT and convert it to - ASSERT_ACCEPT if in an assertion. */ - - if (verbs[i].op == OP_ACCEPT) - { - open_capitem *oc; - if (arglen != 0) - { - *errorcodeptr = ERR59; - goto FAILED; - } - cd->had_accept = TRUE; - for (oc = cd->open_caps; oc != NULL; oc = oc->next) - { - *code++ = OP_CLOSE; - PUT2INC(code, 0, oc->number); - } - setverb = *code++ = - (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; - - /* Do not set firstchar after *ACCEPT */ - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - } - - /* Handle other cases with/without an argument */ - - else if (arglen == 0) - { - if (verbs[i].op < 0) /* Argument is mandatory */ - { - *errorcodeptr = ERR66; - goto FAILED; - } - setverb = *code++ = verbs[i].op; - } - - else - { - if (verbs[i].op_arg < 0) /* Argument is forbidden */ - { - *errorcodeptr = ERR59; - goto FAILED; - } - setverb = *code++ = verbs[i].op_arg; - *code++ = arglen; - memcpy(code, arg, IN_UCHARS(arglen)); - code += arglen; - *code++ = 0; - } - - switch (setverb) - { - case OP_THEN: - case OP_THEN_ARG: - cd->external_flags |= PCRE_HASTHEN; - break; - - case OP_PRUNE: - case OP_PRUNE_ARG: - case OP_SKIP: - case OP_SKIP_ARG: - cd->had_pruneorskip = TRUE; - break; - } - - break; /* Found verb, exit loop */ - } - - vn += verbs[i].len + 1; - } - - if (i < verbcount) continue; /* Successfully handled a verb */ - *errorcodeptr = ERR60; /* Verb not recognized */ - goto FAILED; - } - - /* Deal with the extended parentheses; all are introduced by '?', and the - appearance of any of them means that this is not a capturing group. */ - - else if (*ptr == CHAR_QUESTION_MARK) - { - int i, set, unset, namelen; - int *optset; - const pcre_uchar *name; - pcre_uchar *slot; - - switch (*(++ptr)) - { - case CHAR_NUMBER_SIGN: /* Comment; skip to ket */ - ptr++; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr == CHAR_NULL) - { - *errorcodeptr = ERR18; - goto FAILED; - } - continue; - - - /* ------------------------------------------------------------ */ - case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ - reset_bracount = TRUE; - /* Fall through */ - - /* ------------------------------------------------------------ */ - case CHAR_COLON: /* Non-capturing bracket */ - bravalue = OP_BRA; - ptr++; - break; - - - /* ------------------------------------------------------------ */ - case CHAR_LEFT_PARENTHESIS: - bravalue = OP_COND; /* Conditional group */ - - /* A condition can be an assertion, a number (referring to a numbered - group), a name (referring to a named group), or 'R', referring to - recursion. R and R&name are also permitted for recursion tests. - - There are several syntaxes for testing a named group: (?(name)) is used - by Python; Perl 5.10 onwards uses (?() or (?('name')). - - There are two unfortunate ambiguities, caused by history. (a) 'R' can - be the recursive thing or the name 'R' (and similarly for 'R' followed - by digits), and (b) a number could be a name that consists of digits. - In both cases, we look for a name first; if not found, we try the other - cases. */ - - /* For conditions that are assertions, check the syntax, and then exit - the switch. This will take control down to where bracketed groups, - including assertions, are processed. */ - - if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN || - ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN)) - break; - - /* Most other conditions use OP_CREF (a couple change to OP_RREF - below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */ - - code[1+LINK_SIZE] = OP_CREF; - skipbytes = 1+IMM2_SIZE; - refsign = -1; - - /* Check for a test for recursion in a named group. */ - - if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND) - { - terminator = -1; - ptr += 2; - code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */ - } - - /* Check for a test for a named group's having been set, using the Perl - syntax (?() or (?('name') */ - - else if (ptr[1] == CHAR_LESS_THAN_SIGN) - { - terminator = CHAR_GREATER_THAN_SIGN; - ptr++; - } - else if (ptr[1] == CHAR_APOSTROPHE) - { - terminator = CHAR_APOSTROPHE; - ptr++; - } - else - { - terminator = CHAR_NULL; - if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr); - } - - /* We now expect to read a name; any thing else is an error */ - - if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0) - { - ptr += 1; /* To get the right offset */ - *errorcodeptr = ERR28; - goto FAILED; - } - - /* Read the name, but also get it as a number if it's all digits */ - - recno = 0; - name = ++ptr; - while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) - { - if (recno >= 0) - recno = (IS_DIGIT(*ptr))? recno * 10 + (int)(*ptr - CHAR_0) : -1; - ptr++; - } - namelen = (int)(ptr - name); - - if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) || - *ptr++ != CHAR_RIGHT_PARENTHESIS) - { - ptr--; /* Error offset */ - *errorcodeptr = ERR26; - goto FAILED; - } - - /* Do no further checking in the pre-compile phase. */ - - if (lengthptr != NULL) break; - - /* In the real compile we do the work of looking for the actual - reference. If the string started with "+" or "-" we require the rest to - be digits, in which case recno will be set. */ - - if (refsign > 0) - { - if (recno <= 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno = (refsign == CHAR_MINUS)? - cd->bracount - recno + 1 : recno +cd->bracount; - if (recno <= 0 || recno > cd->final_bracount) - { - *errorcodeptr = ERR15; - goto FAILED; - } - PUT2(code, 2+LINK_SIZE, recno); - break; - } - - /* Otherwise (did not start with "+" or "-"), start by looking for the - name. If we find a name, add one to the opcode to change OP_CREF or - OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same, - except they record that the reference was originally to a name. The - information is used to check duplicate names. */ - - slot = cd->name_table; - for (i = 0; i < cd->names_found; i++) - { - if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break; - slot += cd->name_entry_size; - } - - /* Found a previous named subpattern */ - - if (i < cd->names_found) - { - recno = GET2(slot, 0); - PUT2(code, 2+LINK_SIZE, recno); - code[1+LINK_SIZE]++; - } - - /* Search the pattern for a forward reference */ - - else if ((i = find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf)) > 0) - { - PUT2(code, 2+LINK_SIZE, i); - code[1+LINK_SIZE]++; - } - - /* If terminator == CHAR_NULL it means that the name followed directly - after the opening parenthesis [e.g. (?(abc)...] and in this case there - are some further alternatives to try. For the cases where terminator != - 0 [things like (?(... or (?('name')... or (?(R&name)... ] we have - now checked all the possibilities, so give an error. */ - - else if (terminator != CHAR_NULL) - { - *errorcodeptr = ERR15; - goto FAILED; - } - - /* Check for (?(R) for recursion. Allow digits after R to specify a - specific group number. */ - - else if (*name == CHAR_R) - { - recno = 0; - for (i = 1; i < namelen; i++) - { - if (!IS_DIGIT(name[i])) - { - *errorcodeptr = ERR15; - goto FAILED; - } - recno = recno * 10 + name[i] - CHAR_0; - } - if (recno == 0) recno = RREF_ANY; - code[1+LINK_SIZE] = OP_RREF; /* Change test type */ - PUT2(code, 2+LINK_SIZE, recno); - } - - /* Similarly, check for the (?(DEFINE) "condition", which is always - false. */ - - else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0) - { - code[1+LINK_SIZE] = OP_DEF; - skipbytes = 1; - } - - /* Check for the "name" actually being a subpattern number. We are - in the second pass here, so final_bracount is set. */ - - else if (recno > 0 && recno <= cd->final_bracount) - { - PUT2(code, 2+LINK_SIZE, recno); - } - - /* Either an unidentified subpattern, or a reference to (?(0) */ - - else - { - *errorcodeptr = (recno == 0)? ERR35: ERR15; - goto FAILED; - } - break; - - - /* ------------------------------------------------------------ */ - case CHAR_EQUALS_SIGN: /* Positive lookahead */ - bravalue = OP_ASSERT; - cd->assert_depth += 1; - ptr++; - break; - - - /* ------------------------------------------------------------ */ - case CHAR_EXCLAMATION_MARK: /* Negative lookahead */ - ptr++; - if (*ptr == CHAR_RIGHT_PARENTHESIS) /* Optimize (?!) */ - { - *code++ = OP_FAIL; - previous = NULL; - continue; - } - bravalue = OP_ASSERT_NOT; - cd->assert_depth += 1; - break; - - - /* ------------------------------------------------------------ */ - case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */ - switch (ptr[1]) - { - case CHAR_EQUALS_SIGN: /* Positive lookbehind */ - bravalue = OP_ASSERTBACK; - cd->assert_depth += 1; - ptr += 2; - break; - - case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ - bravalue = OP_ASSERTBACK_NOT; - cd->assert_depth += 1; - ptr += 2; - break; - - default: /* Could be name define, else bad */ - if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0) - goto DEFINE_NAME; - ptr++; /* Correct offset for error */ - *errorcodeptr = ERR24; - goto FAILED; - } - break; - - - /* ------------------------------------------------------------ */ - case CHAR_GREATER_THAN_SIGN: /* One-time brackets */ - bravalue = OP_ONCE; - ptr++; - break; - - - /* ------------------------------------------------------------ */ - case CHAR_C: /* Callout - may be followed by digits; */ - previous_callout = code; /* Save for later completion */ - after_manual_callout = 1; /* Skip one item before completing */ - *code++ = OP_CALLOUT; - { - int n = 0; - ptr++; - while(IS_DIGIT(*ptr)) - n = n * 10 + *ptr++ - CHAR_0; - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR39; - goto FAILED; - } - if (n > 255) - { - *errorcodeptr = ERR38; - goto FAILED; - } - *code++ = n; - PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */ - PUT(code, LINK_SIZE, 0); /* Default length */ - code += 2 * LINK_SIZE; - } - previous = NULL; - continue; - - - /* ------------------------------------------------------------ */ - case CHAR_P: /* Python-style named subpattern handling */ - if (*(++ptr) == CHAR_EQUALS_SIGN || - *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ - { - is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; - terminator = CHAR_RIGHT_PARENTHESIS; - goto NAMED_REF_OR_RECURSE; - } - else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */ - { - *errorcodeptr = ERR41; - goto FAILED; - } - /* Fall through to handle (?P< as (?< is handled */ - - - /* ------------------------------------------------------------ */ - DEFINE_NAME: /* Come here from (?< handling */ - case CHAR_APOSTROPHE: - { - terminator = (*ptr == CHAR_LESS_THAN_SIGN)? - CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; - name = ++ptr; - - while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = (int)(ptr - name); - - /* In the pre-compile phase, just do a syntax check. */ - - if (lengthptr != NULL) - { - if (*ptr != (pcre_uchar)terminator) - { - *errorcodeptr = ERR42; - goto FAILED; - } - if (cd->names_found >= MAX_NAME_COUNT) - { - *errorcodeptr = ERR49; - goto FAILED; - } - if (namelen + IMM2_SIZE + 1 > cd->name_entry_size) - { - cd->name_entry_size = namelen + IMM2_SIZE + 1; - if (namelen > MAX_NAME_SIZE) - { - *errorcodeptr = ERR48; - goto FAILED; - } - } - } - - /* In the real compile, create the entry in the table, maintaining - alphabetical order. Duplicate names for different numbers are - permitted only if PCRE_DUPNAMES is set. Duplicate names for the same - number are always OK. (An existing number can be re-used if (?| - appears in the pattern.) In either event, a duplicate name results in - a duplicate entry in the table, even if the number is the same. This - is because the number of names, and hence the table size, is computed - in the pre-compile, and it affects various numbers and pointers which - would all have to be modified, and the compiled code moved down, if - duplicates with the same number were omitted from the table. This - doesn't seem worth the hassle. However, *different* names for the - same number are not permitted. */ - - else - { - BOOL dupname = FALSE; - slot = cd->name_table; - - for (i = 0; i < cd->names_found; i++) - { - int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen)); - if (crc == 0) - { - if (slot[IMM2_SIZE+namelen] == 0) - { - if (GET2(slot, 0) != cd->bracount + 1 && - (options & PCRE_DUPNAMES) == 0) - { - *errorcodeptr = ERR43; - goto FAILED; - } - else dupname = TRUE; - } - else crc = -1; /* Current name is a substring */ - } - - /* Make space in the table and break the loop for an earlier - name. For a duplicate or later name, carry on. We do this for - duplicates so that in the simple case (when ?(| is not used) they - are in order of their numbers. */ - - if (crc < 0) - { - memmove(slot + cd->name_entry_size, slot, - IN_UCHARS((cd->names_found - i) * cd->name_entry_size)); - break; - } - - /* Continue the loop for a later or duplicate name */ - - slot += cd->name_entry_size; - } - - /* For non-duplicate names, check for a duplicate number before - adding the new name. */ - - if (!dupname) - { - pcre_uchar *cslot = cd->name_table; - for (i = 0; i < cd->names_found; i++) - { - if (cslot != slot) - { - if (GET2(cslot, 0) == cd->bracount + 1) - { - *errorcodeptr = ERR65; - goto FAILED; - } - } - else i--; - cslot += cd->name_entry_size; - } - } - - PUT2(slot, 0, cd->bracount + 1); - memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen)); - slot[IMM2_SIZE + namelen] = 0; - } - } - - /* In both pre-compile and compile, count the number of names we've - encountered. */ - - cd->names_found++; - ptr++; /* Move past > or ' */ - goto NUMBERED_GROUP; - - - /* ------------------------------------------------------------ */ - case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */ - terminator = CHAR_RIGHT_PARENTHESIS; - is_recurse = TRUE; - /* Fall through */ - - /* We come here from the Python syntax above that handles both - references (?P=name) and recursion (?P>name), as well as falling - through from the Perl recursion syntax (?&name). We also come here from - the Perl \k or \k'name' back reference syntax and the \k{name} - .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */ - - NAMED_REF_OR_RECURSE: - name = ++ptr; - while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = (int)(ptr - name); - - /* In the pre-compile phase, do a syntax check. We used to just set - a dummy reference number, because it was not used in the first pass. - However, with the change of recursive back references to be atomic, - we have to look for the number so that this state can be identified, as - otherwise the incorrect length is computed. If it's not a backwards - reference, the dummy number will do. */ - - if (lengthptr != NULL) - { - const pcre_uchar *temp; - - if (namelen == 0) - { - *errorcodeptr = ERR62; - goto FAILED; - } - if (*ptr != (pcre_uchar)terminator) - { - *errorcodeptr = ERR42; - goto FAILED; - } - if (namelen > MAX_NAME_SIZE) - { - *errorcodeptr = ERR48; - goto FAILED; - } - - /* The name table does not exist in the first pass, so we cannot - do a simple search as in the code below. Instead, we have to scan the - pattern to find the number. It is important that we scan it only as - far as we have got because the syntax of named subpatterns has not - been checked for the rest of the pattern, and find_parens() assumes - correct syntax. In any case, it's a waste of resources to scan - further. We stop the scan at the current point by temporarily - adjusting the value of cd->endpattern. */ - - temp = cd->end_pattern; - cd->end_pattern = ptr; - recno = find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf); - cd->end_pattern = temp; - if (recno < 0) recno = 0; /* Forward ref; set dummy number */ - } - - /* In the real compile, seek the name in the table. We check the name - first, and then check that we have reached the end of the name in the - table. That way, if the name that is longer than any in the table, - the comparison will fail without reading beyond the table entry. */ - - else - { - slot = cd->name_table; - for (i = 0; i < cd->names_found; i++) - { - if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 && - slot[IMM2_SIZE+namelen] == 0) - break; - slot += cd->name_entry_size; - } - - if (i < cd->names_found) /* Back reference */ - { - recno = GET2(slot, 0); - } - else if ((recno = /* Forward back reference */ - find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf)) <= 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - } - - /* In both phases, we can now go to the code than handles numerical - recursion or backreferences. */ - - if (is_recurse) goto HANDLE_RECURSION; - else goto HANDLE_REFERENCE; - - - /* ------------------------------------------------------------ */ - case CHAR_R: /* Recursion */ - ptr++; /* Same as (?0) */ - /* Fall through */ - - - /* ------------------------------------------------------------ */ - case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */ - case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: - case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - { - const pcre_uchar *called; - terminator = CHAR_RIGHT_PARENTHESIS; - - /* Come here from the \g<...> and \g'...' code (Oniguruma - compatibility). However, the syntax has been checked to ensure that - the ... are a (signed) number, so that neither ERR63 nor ERR29 will - be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY - ever be taken. */ - - HANDLE_NUMERICAL_RECURSION: - - if ((refsign = *ptr) == CHAR_PLUS) - { - ptr++; - if (!IS_DIGIT(*ptr)) - { - *errorcodeptr = ERR63; - goto FAILED; - } - } - else if (refsign == CHAR_MINUS) - { - if (!IS_DIGIT(ptr[1])) - goto OTHER_CHAR_AFTER_QUERY; - ptr++; - } - - recno = 0; - while(IS_DIGIT(*ptr)) - recno = recno * 10 + *ptr++ - CHAR_0; - - if (*ptr != (pcre_uchar)terminator) - { - *errorcodeptr = ERR29; - goto FAILED; - } - - if (refsign == CHAR_MINUS) - { - if (recno == 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno = cd->bracount - recno + 1; - if (recno <= 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - } - else if (refsign == CHAR_PLUS) - { - if (recno == 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno += cd->bracount; - } - - /* Come here from code above that handles a named recursion */ - - HANDLE_RECURSION: - - previous = code; - called = cd->start_code; - - /* When we are actually compiling, find the bracket that is being - referenced. Temporarily end the regex in case it doesn't exist before - this point. If we end up with a forward reference, first check that - the bracket does occur later so we can give the error (and position) - now. Then remember this forward reference in the workspace so it can - be filled in at the end. */ - - if (lengthptr == NULL) - { - *code = OP_END; - if (recno != 0) - called = PRIV(find_bracket)(cd->start_code, utf, recno); - - /* Forward reference */ - - if (called == NULL) - { - if (find_parens(cd, NULL, recno, - (options & PCRE_EXTENDED) != 0, utf) < 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - - /* Fudge the value of "called" so that when it is inserted as an - offset below, what it actually inserted is the reference number - of the group. Then remember the forward reference. */ - - called = cd->start_code + recno; - if (cd->hwm >= cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN) - { - *errorcodeptr = expand_workspace(cd); - if (*errorcodeptr != 0) goto FAILED; - } - PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code)); - } - - /* If not a forward reference, and the subpattern is still open, - this is a recursive call. We check to see if this is a left - recursion that could loop for ever, and diagnose that case. We - must not, however, do this check if we are in a conditional - subpattern because the condition might be testing for recursion in - a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid. - Forever loops are also detected at runtime, so those that occur in - conditional subpatterns will be picked up then. */ - - else if (GET(called, 1) == 0 && cond_depth <= 0 && - could_be_empty(called, code, bcptr, utf, cd)) - { - *errorcodeptr = ERR40; - goto FAILED; - } - } - - /* Insert the recursion/subroutine item. It does not have a set first - character (relevant if it is repeated, because it will then be - wrapped with ONCE brackets). */ - - *code = OP_RECURSE; - PUT(code, 1, (int)(called - cd->start_code)); - code += 1 + LINK_SIZE; - groupsetfirstchar = FALSE; - } - - /* Can't determine a first byte now */ - - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - continue; - - - /* ------------------------------------------------------------ */ - default: /* Other characters: check option setting */ - OTHER_CHAR_AFTER_QUERY: - set = unset = 0; - optset = &set; - - while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) - { - switch (*ptr++) - { - case CHAR_MINUS: optset = &unset; break; - - case CHAR_J: /* Record that it changed in the external options */ - *optset |= PCRE_DUPNAMES; - cd->external_flags |= PCRE_JCHANGED; - break; - - case CHAR_i: *optset |= PCRE_CASELESS; break; - case CHAR_m: *optset |= PCRE_MULTILINE; break; - case CHAR_s: *optset |= PCRE_DOTALL; break; - case CHAR_x: *optset |= PCRE_EXTENDED; break; - case CHAR_U: *optset |= PCRE_UNGREEDY; break; - case CHAR_X: *optset |= PCRE_EXTRA; break; - - default: *errorcodeptr = ERR12; - ptr--; /* Correct the offset */ - goto FAILED; - } - } - - /* Set up the changed option bits, but don't change anything yet. */ - - newoptions = (options | set) & (~unset); - - /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. If this - item is right at the start of the pattern, the options can be - abstracted and made external in the pre-compile phase, and ignored in - the compile phase. This can be helpful when matching -- for instance in - caseless checking of required bytes. - - If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are - definitely *not* at the start of the pattern because something has been - compiled. In the pre-compile phase, however, the code pointer can have - that value after the start, because it gets reset as code is discarded - during the pre-compile. However, this can happen only at top level - if - we are within parentheses, the starting BRA will still be present. At - any parenthesis level, the length value can be used to test if anything - has been compiled at that level. Thus, a test for both these conditions - is necessary to ensure we correctly detect the start of the pattern in - both phases. - - If we are not at the pattern start, reset the greedy defaults and the - case value for firstchar and reqchar. */ - - if (*ptr == CHAR_RIGHT_PARENTHESIS) - { - if (code == cd->start_code + 1 + LINK_SIZE && - (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE)) - { - cd->external_options = newoptions; - } - else - { - greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); - greedy_non_default = greedy_default ^ 1; - req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; - } - - /* Change options at this level, and pass them back for use - in subsequent branches. */ - - *optionsptr = options = newoptions; - previous = NULL; /* This item can't be repeated */ - continue; /* It is complete */ - } - - /* If the options ended with ':' we are heading into a nested group - with possible change of options. Such groups are non-capturing and are - not assertions of any kind. All we need to do is skip over the ':'; - the newoptions value is handled below. */ - - bravalue = OP_BRA; - ptr++; - } /* End of switch for character following (? */ - } /* End of (? handling */ - - /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE - is set, all unadorned brackets become non-capturing and behave like (?:...) - brackets. */ - - else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) - { - bravalue = OP_BRA; - } - - /* Else we have a capturing group. */ - - else - { - NUMBERED_GROUP: - cd->bracount += 1; - PUT2(code, 1+LINK_SIZE, cd->bracount); - skipbytes = IMM2_SIZE; - } - - /* Process nested bracketed regex. Assertions used not to be repeatable, - but this was changed for Perl compatibility, so all kinds can now be - repeated. We copy code into a non-register variable (tempcode) in order to - be able to pass its address because some compilers complain otherwise. */ - - previous = code; /* For handling repetition */ - *code = bravalue; - tempcode = code; - tempreqvary = cd->req_varyopt; /* Save value before bracket */ - tempbracount = cd->bracount; /* Save value before bracket */ - length_prevgroup = 0; /* Initialize for pre-compile phase */ - - if (!compile_regex( - newoptions, /* The complete new option state */ - &tempcode, /* Where to put code (updated) */ - &ptr, /* Input pointer (updated) */ - errorcodeptr, /* Where to put an error message */ - (bravalue == OP_ASSERTBACK || - bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ - reset_bracount, /* True if (?| group */ - skipbytes, /* Skip over bracket number */ - cond_depth + - ((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ - &subfirstchar, /* For possible first char */ - &subfirstcharflags, - &subreqchar, /* For possible last char */ - &subreqcharflags, - bcptr, /* Current branch chain */ - cd, /* Tables block */ - (lengthptr == NULL)? NULL : /* Actual compile phase */ - &length_prevgroup /* Pre-compile phase */ - )) - goto FAILED; - - /* If this was an atomic group and there are no capturing groups within it, - generate OP_ONCE_NC instead of OP_ONCE. */ - - if (bravalue == OP_ONCE && cd->bracount <= tempbracount) - *code = OP_ONCE_NC; - - if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) - cd->assert_depth -= 1; - - /* At the end of compiling, code is still pointing to the start of the - group, while tempcode has been updated to point past the end of the group. - The pattern pointer (ptr) is on the bracket. - - If this is a conditional bracket, check that there are no more than - two branches in the group, or just one if it's a DEFINE group. We do this - in the real compile phase, not in the pre-pass, where the whole group may - not be available. */ - - if (bravalue == OP_COND && lengthptr == NULL) - { - pcre_uchar *tc = code; - int condcount = 0; - - do { - condcount++; - tc += GET(tc,1); - } - while (*tc != OP_KET); - - /* A DEFINE group is never obeyed inline (the "condition" is always - false). It must have only one branch. */ - - if (code[LINK_SIZE+1] == OP_DEF) - { - if (condcount > 1) - { - *errorcodeptr = ERR54; - goto FAILED; - } - bravalue = OP_DEF; /* Just a flag to suppress char handling below */ - } - - /* A "normal" conditional group. If there is just one branch, we must not - make use of its firstchar or reqchar, because this is equivalent to an - empty second branch. */ - - else - { - if (condcount > 2) - { - *errorcodeptr = ERR27; - goto FAILED; - } - if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE; - } - } - - /* Error if hit end of pattern */ - - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR14; - goto FAILED; - } - - /* In the pre-compile phase, update the length by the length of the group, - less the brackets at either end. Then reduce the compiled code to just a - set of non-capturing brackets so that it doesn't use much memory if it is - duplicated by a quantifier.*/ - - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; - code++; /* This already contains bravalue */ - PUTINC(code, 0, 1 + LINK_SIZE); - *code++ = OP_KET; - PUTINC(code, 0, 1 + LINK_SIZE); - break; /* No need to waste time with special character handling */ - } - - /* Otherwise update the main code pointer to the end of the group. */ - - code = tempcode; - - /* For a DEFINE group, required and first character settings are not - relevant. */ - - if (bravalue == OP_DEF) break; - - /* Handle updating of the required and first characters for other types of - group. Update for normal brackets of all kinds, and conditions with two - branches (see code above). If the bracket is followed by a quantifier with - zero repeat, we have to back off. Hence the definition of zeroreqchar and - zerofirstchar outside the main loop so that they can be accessed for the - back off. */ - - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - groupsetfirstchar = FALSE; - - if (bravalue >= OP_ONCE) - { - /* If we have not yet set a firstchar in this branch, take it from the - subpattern, remembering that it was set here so that a repeat of more - than one can replicate it as reqchar if necessary. If the subpattern has - no firstchar, set "none" for the whole branch. In both cases, a zero - repeat forces firstchar to "none". */ - - if (firstcharflags == REQ_UNSET) - { - if (subfirstcharflags >= 0) - { - firstchar = subfirstchar; - firstcharflags = subfirstcharflags; - groupsetfirstchar = TRUE; - } - else firstcharflags = REQ_NONE; - zerofirstcharflags = REQ_NONE; - } - - /* If firstchar was previously set, convert the subpattern's firstchar - into reqchar if there wasn't one, using the vary flag that was in - existence beforehand. */ - - else if (subfirstcharflags >= 0 && subreqcharflags < 0) - { - subreqchar = subfirstchar; - subreqcharflags = subfirstcharflags | tempreqvary; - } - - /* If the subpattern set a required byte (or set a first byte that isn't - really the first byte - see above), set it. */ - - if (subreqcharflags >= 0) - { - reqchar = subreqchar; - reqcharflags = subreqcharflags; - } - } - - /* For a forward assertion, we take the reqchar, if set. This can be - helpful if the pattern that follows the assertion doesn't set a different - char. For example, it's useful for /(?=abcde).+/. We can't set firstchar - for an assertion, however because it leads to incorrect effect for patterns - such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead - of a firstchar. This is overcome by a scan at the end if there's no - firstchar, looking for an asserted first char. */ - - else if (bravalue == OP_ASSERT && subreqcharflags >= 0) - { - reqchar = subreqchar; - reqcharflags = subreqcharflags; - } - break; /* End of processing '(' */ - - - /* ===================================================================*/ - /* Handle metasequences introduced by \. For ones like \d, the ESC_ values - are arranged to be the negation of the corresponding OP_values in the - default case when PCRE_UCP is not set. For the back references, the values - are negative the reference number. Only back references and those types - that consume a character may be repeated. We can test for values between - ESC_b and ESC_Z for the latter; this may have to change if any new ones are - ever created. */ - - case CHAR_BACKSLASH: - tempptr = ptr; - escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE); - - if (*errorcodeptr != 0) goto FAILED; - - if (escape == 0) - c = ec; - else - { - if (escape == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - ptr += 2; /* avoid empty string */ - else inescq = TRUE; - continue; - } - - if (escape == ESC_E) continue; /* Perl ignores an orphan \E */ - - /* For metasequences that actually match a character, we disable the - setting of a first character if it hasn't already been set. */ - - if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z) - firstcharflags = REQ_NONE; - - /* Set values to reset to if this is followed by a zero repeat. */ - - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - - /* \g or \g'name' is a subroutine call by name and \g or \g'n' - is a subroutine call by number (Oniguruma syntax). In fact, the value - ESC_g is returned only for these cases. So we don't need to check for < - or ' if the value is ESC_g. For the Perl syntax \g{n} the value is - -n, and for the Perl syntax \g{name} the result is ESC_k (as - that is a synonym for a named back reference). */ - - if (escape == ESC_g) - { - const pcre_uchar *p; - save_hwm = cd->hwm; /* Normally this is set when '(' is read */ - terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? - CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; - - /* These two statements stop the compiler for warning about possibly - unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In - fact, because we actually check for a number below, the paths that - would actually be in error are never taken. */ - - skipbytes = 0; - reset_bracount = FALSE; - - /* Test for a name */ - - if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS) - { - BOOL is_a_number = TRUE; - for (p = ptr + 1; *p != CHAR_NULL && *p != (pcre_uchar)terminator; p++) - { - if (!MAX_255(*p)) { is_a_number = FALSE; break; } - if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE; - if ((cd->ctypes[*p] & ctype_word) == 0) break; - } - if (*p != (pcre_uchar)terminator) - { - *errorcodeptr = ERR57; - break; - } - if (is_a_number) - { - ptr++; - goto HANDLE_NUMERICAL_RECURSION; - } - is_recurse = TRUE; - goto NAMED_REF_OR_RECURSE; - } - - /* Test a signed number in angle brackets or quotes. */ - - p = ptr + 2; - while (IS_DIGIT(*p)) p++; - if (*p != (pcre_uchar)terminator) - { - *errorcodeptr = ERR57; - break; - } - ptr++; - goto HANDLE_NUMERICAL_RECURSION; - } - - /* \k or \k'name' is a back reference by name (Perl syntax). - We also support \k{name} (.NET syntax). */ - - if (escape == ESC_k) - { - if ((ptr[1] != CHAR_LESS_THAN_SIGN && - ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET)) - { - *errorcodeptr = ERR69; - break; - } - is_recurse = FALSE; - terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? - CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? - CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; - goto NAMED_REF_OR_RECURSE; - } - - /* Back references are handled specially; must disable firstchar if - not set to cope with cases like (?=(\w+))\1: which would otherwise set - ':' later. */ - - if (escape < 0) - { - open_capitem *oc; - recno = -escape; - - HANDLE_REFERENCE: /* Come here from named backref handling */ - if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; - previous = code; - *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; - PUT2INC(code, 0, recno); - cd->backref_map |= (recno < 32)? (1 << recno) : 1; - if (recno > cd->top_backref) cd->top_backref = recno; - - /* Check to see if this back reference is recursive, that it, it - is inside the group that it references. A flag is set so that the - group can be made atomic. */ - - for (oc = cd->open_caps; oc != NULL; oc = oc->next) - { - if (oc->number == recno) - { - oc->flag = TRUE; - break; - } - } - } - - /* So are Unicode property matches, if supported. */ - -#ifdef SUPPORT_UCP - else if (escape == ESC_P || escape == ESC_p) - { - BOOL negated; - unsigned int ptype = 0, pdata = 0; - if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) - goto FAILED; - previous = code; - *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; - *code++ = ptype; - *code++ = pdata; - } -#else - - /* If Unicode properties are not supported, \X, \P, and \p are not - allowed. */ - - else if (escape == ESC_X || escape == ESC_P || escape == ESC_p) - { - *errorcodeptr = ERR45; - goto FAILED; - } -#endif - - /* For the rest (including \X when Unicode properties are supported), we - can obtain the OP value by negating the escape value in the default - situation when PCRE_UCP is not set. When it *is* set, we substitute - Unicode property tests. Note that \b and \B do a one-character - lookbehind. */ - - else - { - if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0) - cd->max_lookbehind = 1; -#ifdef SUPPORT_UCP - if (escape >= ESC_DU && escape <= ESC_wu) - { - nestptr = ptr + 1; /* Where to resume */ - ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ - } - else -#endif - /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE - so that it works in DFA mode and in lookbehinds. */ - - { - previous = (escape > ESC_b && escape < ESC_Z)? code : NULL; - *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape; - } - } - continue; - } - - /* We have a data character whose value is in c. In UTF-8 mode it may have - a value > 127. We set its representation in the length/buffer, and then - handle it as a data character. */ - -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) - mclength = PRIV(ord2utf)(c, mcbuffer); - else -#endif - - { - mcbuffer[0] = c; - mclength = 1; - } - goto ONE_CHAR; - - - /* ===================================================================*/ - /* Handle a literal character. It is guaranteed not to be whitespace or # - when the extended flag is set. If we are in UTF-8 mode, it may be a - multi-byte literal character. */ - - default: - NORMAL_CHAR: - mclength = 1; - mcbuffer[0] = c; - -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(c)) - ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); -#endif - - /* At this point we have the character's bytes in mcbuffer, and the length - in mclength. When not in UTF-8 mode, the length is always 1. */ - - ONE_CHAR: - previous = code; - - /* For caseless UTF-8 mode when UCP support is available, check whether - this character has more than one other case. If so, generate a special - OP_PROP item instead of OP_CHARI. */ - -#ifdef SUPPORT_UCP - if (utf && (options & PCRE_CASELESS) != 0) - { - GETCHAR(c, mcbuffer); - if ((c = UCD_CASESET(c)) != 0) - { - *code++ = OP_PROP; - *code++ = PT_CLIST; - *code++ = c; - if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE; - break; - } - } -#endif - - /* Caseful matches, or not one of the multicase characters. */ - - *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR; - for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; - - /* Remember if \r or \n were seen */ - - if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) - cd->external_flags |= PCRE_HASCRORLF; - - /* Set the first and required bytes appropriately. If no previous first - byte, set it from this character, but revert to none on a zero repeat. - Otherwise, leave the firstchar value alone, and don't change it on a zero - repeat. */ - - if (firstcharflags == REQ_UNSET) - { - zerofirstcharflags = REQ_NONE; - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - - /* If the character is more than one byte long, we can set firstchar - only if it is not to be matched caselessly. */ - - if (mclength == 1 || req_caseopt == 0) - { - firstchar = mcbuffer[0] | req_caseopt; - firstchar = mcbuffer[0]; - firstcharflags = req_caseopt; - - if (mclength != 1) - { - reqchar = code[-1]; - reqcharflags = cd->req_varyopt; - } - } - else firstcharflags = reqcharflags = REQ_NONE; - } - - /* firstchar was previously set; we can set reqchar only if the length is - 1 or the matching is caseful. */ - - else - { - zerofirstchar = firstchar; - zerofirstcharflags = firstcharflags; - zeroreqchar = reqchar; - zeroreqcharflags = reqcharflags; - if (mclength == 1 || req_caseopt == 0) - { - reqchar = code[-1]; - reqcharflags = req_caseopt | cd->req_varyopt; - } - } - - break; /* End of literal character handling */ - } - } /* end of big loop */ - - -/* Control never reaches here by falling through, only by a goto for all the -error states. Pass back the position in the pattern so that it can be displayed -to the user for diagnosing the error. */ - -FAILED: -*ptrptr = ptr; -return FALSE; -} - - - -/************************************************* -* Compile sequence of alternatives * -*************************************************/ - -/* On entry, ptr is pointing past the bracket character, but on return it -points to the closing bracket, or vertical bar, or end of string. The code -variable is pointing at the byte into which the BRA operator has been stored. -This function is used during the pre-compile phase when we are trying to find -out the amount of memory needed, as well as during the real compile phase. The -value of lengthptr distinguishes the two phases. - -Arguments: - options option bits, including any changes for this subpattern - codeptr -> the address of the current code pointer - ptrptr -> the address of the current pattern pointer - errorcodeptr -> pointer to error code variable - lookbehind TRUE if this is a lookbehind assertion - reset_bracount TRUE to reset the count for each branch - skipbytes skip this many bytes at start (for brackets and OP_COND) - cond_depth depth of nesting for conditional subpatterns - firstcharptr place to put the first required character - firstcharflagsptr place to put the first character flags, or a negative number - reqcharptr place to put the last required character - reqcharflagsptr place to put the last required character flags, or a negative number - bcptr pointer to the chain of currently open branches - cd points to the data block with tables pointers etc. - lengthptr NULL during the real compile phase - points to length accumulator during pre-compile phase - -Returns: TRUE on success -*/ - -static BOOL -compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr, - int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, - int cond_depth, - pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, - pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, - branch_chain *bcptr, compile_data *cd, int *lengthptr) -{ -const pcre_uchar *ptr = *ptrptr; -pcre_uchar *code = *codeptr; -pcre_uchar *last_branch = code; -pcre_uchar *start_bracket = code; -pcre_uchar *reverse_count = NULL; -open_capitem capitem; -int capnumber = 0; -pcre_uint32 firstchar, reqchar; -pcre_int32 firstcharflags, reqcharflags; -pcre_uint32 branchfirstchar, branchreqchar; -pcre_int32 branchfirstcharflags, branchreqcharflags; -int length; -unsigned int orig_bracount; -unsigned int max_bracount; -branch_chain bc; - -bc.outer = bcptr; -bc.current_branch = code; - -firstchar = reqchar = 0; -firstcharflags = reqcharflags = REQ_UNSET; - -/* Accumulate the length for use in the pre-compile phase. Start with the -length of the BRA and KET and any extra bytes that are required at the -beginning. We accumulate in a local variable to save frequent testing of -lenthptr for NULL. We cannot do this by looking at the value of code at the -start and end of each alternative, because compiled items are discarded during -the pre-compile phase so that the work space is not exceeded. */ - -length = 2 + 2*LINK_SIZE + skipbytes; - -/* WARNING: If the above line is changed for any reason, you must also change -the code that abstracts option settings at the start of the pattern and makes -them global. It tests the value of length for (2 + 2*LINK_SIZE) in the -pre-compile phase to find out whether anything has yet been compiled or not. */ - -/* If this is a capturing subpattern, add to the chain of open capturing items -so that we can detect them if (*ACCEPT) is encountered. This is also used to -detect groups that contain recursive back references to themselves. Note that -only OP_CBRA need be tested here; changing this opcode to one of its variants, -e.g. OP_SCBRAPOS, happens later, after the group has been compiled. */ - -if (*code == OP_CBRA) - { - capnumber = GET2(code, 1 + LINK_SIZE); - capitem.number = capnumber; - capitem.next = cd->open_caps; - capitem.flag = FALSE; - cd->open_caps = &capitem; - } - -/* Offset is set zero to mark that this bracket is still open */ - -PUT(code, 1, 0); -code += 1 + LINK_SIZE + skipbytes; - -/* Loop for each alternative branch */ - -orig_bracount = max_bracount = cd->bracount; -for (;;) - { - /* For a (?| group, reset the capturing bracket count so that each branch - uses the same numbers. */ - - if (reset_bracount) cd->bracount = orig_bracount; - - /* Set up dummy OP_REVERSE if lookbehind assertion */ - - if (lookbehind) - { - *code++ = OP_REVERSE; - reverse_count = code; - PUTINC(code, 0, 0); - length += 1 + LINK_SIZE; - } - - /* Now compile the branch; in the pre-compile phase its length gets added - into the length. */ - - if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar, - &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc, - cond_depth, cd, (lengthptr == NULL)? NULL : &length)) - { - *ptrptr = ptr; - return FALSE; - } - - /* Keep the highest bracket count in case (?| was used and some branch - has fewer than the rest. */ - - if (cd->bracount > max_bracount) max_bracount = cd->bracount; - - /* In the real compile phase, there is some post-processing to be done. */ - - if (lengthptr == NULL) - { - /* If this is the first branch, the firstchar and reqchar values for the - branch become the values for the regex. */ - - if (*last_branch != OP_ALT) - { - firstchar = branchfirstchar; - firstcharflags = branchfirstcharflags; - reqchar = branchreqchar; - reqcharflags = branchreqcharflags; - } - - /* If this is not the first branch, the first char and reqchar have to - match the values from all the previous branches, except that if the - previous value for reqchar didn't have REQ_VARY set, it can still match, - and we set REQ_VARY for the regex. */ - - else - { - /* If we previously had a firstchar, but it doesn't match the new branch, - we have to abandon the firstchar for the regex, but if there was - previously no reqchar, it takes on the value of the old firstchar. */ - - if (firstcharflags >= 0 && - (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar)) - { - if (reqcharflags < 0) - { - reqchar = firstchar; - reqcharflags = firstcharflags; - } - firstcharflags = REQ_NONE; - } - - /* If we (now or from before) have no firstchar, a firstchar from the - branch becomes a reqchar if there isn't a branch reqchar. */ - - if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0) - { - branchreqchar = branchfirstchar; - branchreqcharflags = branchfirstcharflags; - } - - /* Now ensure that the reqchars match */ - - if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) || - reqchar != branchreqchar) - reqcharflags = REQ_NONE; - else - { - reqchar = branchreqchar; - reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */ - } - } - - /* If lookbehind, check that this branch matches a fixed-length string, and - put the length into the OP_REVERSE item. Temporarily mark the end of the - branch with OP_END. If the branch contains OP_RECURSE, the result is -3 - because there may be forward references that we can't check here. Set a - flag to cause another lookbehind check at the end. Why not do it all at the - end? Because common, erroneous checks are picked up here and the offset of - the problem can be shown. */ - - if (lookbehind) - { - int fixed_length; - *code = OP_END; - fixed_length = find_fixedlength(last_branch, (options & PCRE_UTF8) != 0, - FALSE, cd); - DPRINTF(("fixed length = %d\n", fixed_length)); - if (fixed_length == -3) - { - cd->check_lookbehind = TRUE; - } - else if (fixed_length < 0) - { - *errorcodeptr = (fixed_length == -2)? ERR36 : - (fixed_length == -4)? ERR70: ERR25; - *ptrptr = ptr; - return FALSE; - } - else - { - if (fixed_length > cd->max_lookbehind) - cd->max_lookbehind = fixed_length; - PUT(reverse_count, 0, fixed_length); - } - } - } - - /* Reached end of expression, either ')' or end of pattern. In the real - compile phase, go back through the alternative branches and reverse the chain - of offsets, with the field in the BRA item now becoming an offset to the - first alternative. If there are no alternatives, it points to the end of the - group. The length in the terminating ket is always the length of the whole - bracketed item. Return leaving the pointer at the terminating char. */ - - if (*ptr != CHAR_VERTICAL_LINE) - { - if (lengthptr == NULL) - { - int branch_length = (int)(code - last_branch); - do - { - int prev_length = GET(last_branch, 1); - PUT(last_branch, 1, branch_length); - branch_length = prev_length; - last_branch -= branch_length; - } - while (branch_length > 0); - } - - /* Fill in the ket */ - - *code = OP_KET; - PUT(code, 1, (int)(code - start_bracket)); - code += 1 + LINK_SIZE; - - /* If it was a capturing subpattern, check to see if it contained any - recursive back references. If so, we must wrap it in atomic brackets. - In any event, remove the block from the chain. */ - - if (capnumber > 0) - { - if (cd->open_caps->flag) - { - memmove(start_bracket + 1 + LINK_SIZE, start_bracket, - IN_UCHARS(code - start_bracket)); - *start_bracket = OP_ONCE; - code += 1 + LINK_SIZE; - PUT(start_bracket, 1, (int)(code - start_bracket)); - *code = OP_KET; - PUT(code, 1, (int)(code - start_bracket)); - code += 1 + LINK_SIZE; - length += 2 + 2*LINK_SIZE; - } - cd->open_caps = cd->open_caps->next; - } - - /* Retain the highest bracket number, in case resetting was used. */ - - cd->bracount = max_bracount; - - /* Set values to pass back */ - - *codeptr = code; - *ptrptr = ptr; - *firstcharptr = firstchar; - *firstcharflagsptr = firstcharflags; - *reqcharptr = reqchar; - *reqcharflagsptr = reqcharflags; - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < length) - { - *errorcodeptr = ERR20; - return FALSE; - } - *lengthptr += length; - } - return TRUE; - } - - /* Another branch follows. In the pre-compile phase, we can move the code - pointer back to where it was for the start of the first branch. (That is, - pretend that each branch is the only one.) - - In the real compile phase, insert an ALT node. Its length field points back - to the previous branch while the bracket remains open. At the end the chain - is reversed. It's done like this so that the start of the bracket has a - zero offset until it is closed, making it possible to detect recursion. */ - - if (lengthptr != NULL) - { - code = *codeptr + 1 + LINK_SIZE + skipbytes; - length += 1 + LINK_SIZE; - } - else - { - *code = OP_ALT; - PUT(code, 1, (int)(code - last_branch)); - bc.current_branch = last_branch = code; - code += 1 + LINK_SIZE; - } - - ptr++; - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Check for anchored expression * -*************************************************/ - -/* Try to find out if this is an anchored regular expression. Consider each -alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket -all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then -it's anchored. However, if this is a multiline pattern, then only OP_SOD will -be found, because ^ generates OP_CIRCM in that mode. - -We can also consider a regex to be anchored if OP_SOM starts all its branches. -This is the code for \G, which means "match at start of match position, taking -into account the match offset". - -A branch is also implicitly anchored if it starts with .* and DOTALL is set, -because that will try the rest of the pattern at all possible matching points, -so there is no point trying again.... er .... - -.... except when the .* appears inside capturing parentheses, and there is a -subsequent back reference to those parentheses. We haven't enough information -to catch that case precisely. - -At first, the best we could do was to detect when .* was in capturing brackets -and the highest back reference was greater than or equal to that level. -However, by keeping a bitmap of the first 31 back references, we can catch some -of the more common cases more precisely. - -... A second exception is when the .* appears inside an atomic group, because -this prevents the number of characters it matches from being adjusted. - -Arguments: - code points to start of expression (the bracket) - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach - cd points to the compile data block - atomcount atomic group level - -Returns: TRUE or FALSE -*/ - -static BOOL -is_anchored(register const pcre_uchar *code, unsigned int bracket_map, - compile_data *cd, int atomcount) -{ -do { - const pcre_uchar *scode = first_significant_code( - code + PRIV(OP_lengths)[*code], FALSE); - register int op = *scode; - - /* Non-capturing brackets */ - - if (op == OP_BRA || op == OP_BRAPOS || - op == OP_SBRA || op == OP_SBRAPOS) - { - if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; - } - - /* Capturing brackets */ - - else if (op == OP_CBRA || op == OP_CBRAPOS || - op == OP_SCBRA || op == OP_SCBRAPOS) - { - int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1 << n) : 1); - if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE; - } - - /* Positive forward assertions and conditions */ - - else if (op == OP_ASSERT || op == OP_COND) - { - if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; - } - - /* Atomic groups */ - - else if (op == OP_ONCE || op == OP_ONCE_NC) - { - if (!is_anchored(scode, bracket_map, cd, atomcount + 1)) - return FALSE; - } - - /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and - it isn't in brackets that are or may be referenced or inside an atomic - group. */ - - else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || - op == OP_TYPEPOSSTAR)) - { - if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 || - atomcount > 0 || cd->had_pruneorskip) - return FALSE; - } - - /* Check for explicit anchoring */ - - else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; - - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for starting with ^ or .* * -*************************************************/ - -/* This is called to find out if every branch starts with ^ or .* so that -"first char" processing can be done to speed things up in multiline -matching and for non-DOTALL patterns that start with .* (which must start at -the beginning or after \n). As in the case of is_anchored() (see above), we -have to take account of back references to capturing brackets that contain .* -because in that case we can't make the assumption. Also, the appearance of .* -inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not -count, because once again the assumption no longer holds. - -Arguments: - code points to start of expression (the bracket) - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach - cd points to the compile data - atomcount atomic group level - -Returns: TRUE or FALSE -*/ - -static BOOL -is_startline(const pcre_uchar *code, unsigned int bracket_map, - compile_data *cd, int atomcount) -{ -do { - const pcre_uchar *scode = first_significant_code( - code + PRIV(OP_lengths)[*code], FALSE); - register int op = *scode; - - /* If we are at the start of a conditional assertion group, *both* the - conditional assertion *and* what follows the condition must satisfy the test - for start of line. Other kinds of condition fail. Note that there may be an - auto-callout at the start of a condition. */ - - if (op == OP_COND) - { - scode += 1 + LINK_SIZE; - if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; - switch (*scode) - { - case OP_CREF: - case OP_NCREF: - case OP_RREF: - case OP_NRREF: - case OP_DEF: - return FALSE; - - default: /* Assertion */ - if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; - do scode += GET(scode, 1); while (*scode == OP_ALT); - scode += 1 + LINK_SIZE; - break; - } - scode = first_significant_code(scode, FALSE); - op = *scode; - } - - /* Non-capturing brackets */ - - if (op == OP_BRA || op == OP_BRAPOS || - op == OP_SBRA || op == OP_SBRAPOS) - { - if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; - } - - /* Capturing brackets */ - - else if (op == OP_CBRA || op == OP_CBRAPOS || - op == OP_SCBRA || op == OP_SCBRAPOS) - { - int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1 << n) : 1); - if (!is_startline(scode, new_map, cd, atomcount)) return FALSE; - } - - /* Positive forward assertions */ - - else if (op == OP_ASSERT) - { - if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; - } - - /* Atomic brackets */ - - else if (op == OP_ONCE || op == OP_ONCE_NC) - { - if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE; - } - - /* .* means "start at start or after \n" if it isn't in atomic brackets or - brackets that may be referenced, as long as the pattern does not contain - *PRUNE or *SKIP, because these break the feature. Consider, for example, - /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the - start of a line. */ - - else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) - { - if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 || - atomcount > 0 || cd->had_pruneorskip) - return FALSE; - } - - /* Check for explicit circumflex; anything else gives a FALSE result. Note - in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC - because the number of characters matched by .* cannot be adjusted inside - them. */ - - else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; - - /* Move on to the next alternative */ - - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for asserted fixed first char * -*************************************************/ - -/* During compilation, the "first char" settings from forward assertions are -discarded, because they can cause conflicts with actual literals that follow. -However, if we end up without a first char setting for an unanchored pattern, -it is worth scanning the regex to see if there is an initial asserted first -char. If all branches start with the same asserted char, or with a bracket all -of whose alternatives start with the same asserted char (recurse ad lib), then -we return that char, otherwise -1. - -Arguments: - code points to start of expression (the bracket) - flags points to the first char flags, or to REQ_NONE - inassert TRUE if in an assertion - -Returns: the fixed first char, or 0 with REQ_NONE in flags -*/ - -static pcre_uint32 -find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags, - BOOL inassert) -{ -register pcre_uint32 c = 0; -int cflags = REQ_NONE; - -*flags = REQ_NONE; -do { - pcre_uint32 d; - int dflags; - int xl = (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; - const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, - TRUE); - register pcre_uchar op = *scode; - - switch(op) - { - default: - return 0; - - case OP_BRA: - case OP_BRAPOS: - case OP_CBRA: - case OP_SCBRA: - case OP_CBRAPOS: - case OP_SCBRAPOS: - case OP_ASSERT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_COND: - d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT); - if (dflags < 0) - return 0; - if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0; - break; - - case OP_EXACT: - scode += IMM2_SIZE; - /* Fall through */ - - case OP_CHAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - if (!inassert) return 0; - if (cflags < 0) { c = scode[1]; cflags = 0; } - else if (c != scode[1]) return 0; - break; - - case OP_EXACTI: - scode += IMM2_SIZE; - /* Fall through */ - - case OP_CHARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSPLUSI: - if (!inassert) return 0; - if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } - else if (c != scode[1]) return 0; - break; - } - - code += GET(code, 1); - } -while (*code == OP_ALT); - -*flags = cflags; -return c; -} - - - -/************************************************* -* Compile a Regular Expression * -*************************************************/ - -/* This function takes a string and returns a pointer to a block of store -holding a compiled version of the expression. The original API for this -function had no error code return variable; it is retained for backwards -compatibility. The new function is given a new name. - -Arguments: - pattern the regular expression - options various option bits - errorcodeptr pointer to error code variable (pcre_compile2() only) - can be NULL if you don't want a code value - errorptr pointer to pointer to error text - erroroffset ptr offset in pattern where error was detected - tables pointer to character tables or NULL - -Returns: pointer to compiled data block, or NULL on error, - with errorptr and erroroffset set -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION -pcre_compile(const char *pattern, int options, const char **errorptr, - int *erroroffset, const unsigned char *tables) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION -pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr, - int *erroroffset, const unsigned char *tables) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION -pcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr, - int *erroroffset, const unsigned char *tables) -#endif -{ -#if defined COMPILE_PCRE8 -return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); -#elif defined COMPILE_PCRE16 -return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables); -#elif defined COMPILE_PCRE32 -return pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables); -#endif -} - - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION -pcre_compile2(const char *pattern, int options, int *errorcodeptr, - const char **errorptr, int *erroroffset, const unsigned char *tables) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION -pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr, - const char **errorptr, int *erroroffset, const unsigned char *tables) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION -pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr, - const char **errorptr, int *erroroffset, const unsigned char *tables) -#endif -{ -REAL_PCRE *re; -int length = 1; /* For final END opcode */ -pcre_uint32 firstchar, reqchar; -pcre_int32 firstcharflags, reqcharflags; -int newline; -int errorcode = 0; -int skipatstart = 0; -BOOL utf; -size_t size; -pcre_uchar *code; -const pcre_uchar *codestart; -const pcre_uchar *ptr; -compile_data compile_block; -compile_data *cd = &compile_block; - -/* This space is used for "compiling" into during the first phase, when we are -computing the amount of memory that is needed. Compiled items are thrown away -as soon as possible, so that a fairly large buffer should be sufficient for -this purpose. The same space is used in the second phase for remembering where -to fill in forward references to subpatterns. That may overflow, in which case -new memory is obtained from malloc(). */ - -pcre_uchar cworkspace[COMPILE_WORK_SIZE]; - -/* Set this early so that early errors get offset 0. */ - -ptr = (const pcre_uchar *)pattern; - -/* We can't pass back an error message if errorptr is NULL; I guess the best we -can do is just return NULL, but we can set a code value if there is a code -pointer. */ - -if (errorptr == NULL) - { - if (errorcodeptr != NULL) *errorcodeptr = 99; - return NULL; - } - -*errorptr = NULL; -if (errorcodeptr != NULL) *errorcodeptr = ERR0; - -/* However, we can give a message for this error */ - -if (erroroffset == NULL) - { - errorcode = ERR16; - goto PCRE_EARLY_ERROR_RETURN2; - } - -*erroroffset = 0; - -/* Set up pointers to the individual character tables */ - -if (tables == NULL) tables = PRIV(default_tables); -cd->lcc = tables + lcc_offset; -cd->fcc = tables + fcc_offset; -cd->cbits = tables + cbits_offset; -cd->ctypes = tables + ctypes_offset; - -/* Check that all undefined public option bits are zero */ - -if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) - { - errorcode = ERR17; - goto PCRE_EARLY_ERROR_RETURN; - } - -/* Check for global one-time settings at the start of the pattern, and remember -the offset for later. */ - -while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && - ptr[skipatstart+1] == CHAR_ASTERISK) - { - int newnl = 0; - int newbsr = 0; - -/* For completeness and backward compatibility, (*UTFn) is supported in the -relevant libraries, but (*UTF) is generic and always supported. Note that -PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ - -#ifdef COMPILE_PCRE8 - if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0) - { skipatstart += 7; options |= PCRE_UTF8; continue; } -#endif -#ifdef COMPILE_PCRE16 - if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0) - { skipatstart += 8; options |= PCRE_UTF16; continue; } -#endif -#ifdef COMPILE_PCRE32 - if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0) - { skipatstart += 8; options |= PCRE_UTF32; continue; } -#endif - - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0) - { skipatstart += 6; options |= PCRE_UTF8; continue; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) - { skipatstart += 6; options |= PCRE_UCP; continue; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) - { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; } - - if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0) - { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0) - { skipatstart += 5; newnl = PCRE_NEWLINE_LF; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5) == 0) - { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0) - { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0) - { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } - - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0) - { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } - else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0) - { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } - - if (newnl != 0) - options = (options & ~PCRE_NEWLINE_BITS) | newnl; - else if (newbsr != 0) - options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; - else break; - } - -/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ -utf = (options & PCRE_UTF8) != 0; - -/* Can't support UTF unless PCRE has been compiled to include the code. The -return of an error code from PRIV(valid_utf)() is a new feature, introduced in -release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is -not used here. */ - -#ifdef SUPPORT_UTF -if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 && - (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0) - { -#if defined COMPILE_PCRE8 - errorcode = ERR44; -#elif defined COMPILE_PCRE16 - errorcode = ERR74; -#elif defined COMPILE_PCRE32 - errorcode = ERR77; -#endif - goto PCRE_EARLY_ERROR_RETURN2; - } -#else -if (utf) - { - errorcode = ERR32; - goto PCRE_EARLY_ERROR_RETURN; - } -#endif - -/* Can't support UCP unless PCRE has been compiled to include the code. */ - -#ifndef SUPPORT_UCP -if ((options & PCRE_UCP) != 0) - { - errorcode = ERR67; - goto PCRE_EARLY_ERROR_RETURN; - } -#endif - -/* Check validity of \R options. */ - -if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == - (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) - { - errorcode = ERR56; - goto PCRE_EARLY_ERROR_RETURN; - } - -/* Handle different types of newline. The three bits give seven cases. The -current code allows for fixed one- or two-byte sequences, plus "any" and -"anycrlf". */ - -switch (options & PCRE_NEWLINE_BITS) - { - case 0: newline = NEWLINE; break; /* Build-time default */ - case PCRE_NEWLINE_CR: newline = CHAR_CR; break; - case PCRE_NEWLINE_LF: newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ - PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: newline = -1; break; - case PCRE_NEWLINE_ANYCRLF: newline = -2; break; - default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; - } - -if (newline == -2) - { - cd->nltype = NLTYPE_ANYCRLF; - } -else if (newline < 0) - { - cd->nltype = NLTYPE_ANY; - } -else - { - cd->nltype = NLTYPE_FIXED; - if (newline > 255) - { - cd->nllen = 2; - cd->nl[0] = (newline >> 8) & 255; - cd->nl[1] = newline & 255; - } - else - { - cd->nllen = 1; - cd->nl[0] = newline; - } - } - -/* Maximum back reference and backref bitmap. The bitmap records up to 31 back -references to help in deciding whether (.*) can be treated as anchored or not. -*/ - -cd->top_backref = 0; -cd->backref_map = 0; - -/* Reflect pattern for debugging output */ - -DPRINTF(("------------------------------------------------------------------\n")); -#ifdef PCRE_DEBUG -print_puchar(stdout, (PCRE_PUCHAR)pattern); -#endif -DPRINTF(("\n")); - -/* Pretend to compile the pattern while actually just accumulating the length -of memory required. This behaviour is triggered by passing a non-NULL final -argument to compile_regex(). We pass a block of workspace (cworkspace) for it -to compile parts of the pattern into; the compiled code is discarded when it is -no longer needed, so hopefully this workspace will never overflow, though there -is a test for its doing so. */ - -cd->bracount = cd->final_bracount = 0; -cd->names_found = 0; -cd->name_entry_size = 0; -cd->name_table = NULL; -cd->start_code = cworkspace; -cd->hwm = cworkspace; -cd->start_workspace = cworkspace; -cd->workspace_size = COMPILE_WORK_SIZE; -cd->start_pattern = (const pcre_uchar *)pattern; -cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); -cd->req_varyopt = 0; -cd->assert_depth = 0; -cd->max_lookbehind = 0; -cd->external_options = options; -cd->external_flags = 0; -cd->open_caps = NULL; - -/* Now do the pre-compile. On error, errorcode will be set non-zero, so we -don't need to look at the result of the function here. The initial options have -been put into the cd block so that they can be changed if an option setting is -found within the regex right at the beginning. Bringing initial option settings -outside can help speed up starting point checks. */ - -ptr += skipatstart; -code = cworkspace; -*code = OP_BRA; -(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, - FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, - cd, &length); -if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; - -DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, - (int)(cd->hwm - cworkspace))); - -if (length > MAX_PATTERN_SIZE) - { - errorcode = ERR20; - goto PCRE_EARLY_ERROR_RETURN; - } - -/* Compute the size of data block needed and get it, either from malloc or -externally provided function. Integer overflow should no longer be possible -because nowadays we limit the maximum value of cd->names_found and -cd->name_entry_size. */ - -size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar); -re = (REAL_PCRE *)(PUBL(malloc))(size); - -if (re == NULL) - { - errorcode = ERR21; - goto PCRE_EARLY_ERROR_RETURN; - } - -/* Put in the magic number, and save the sizes, initial options, internal -flags, and character table pointer. NULL is used for the default character -tables. The nullpad field is at the end; it's there to help in the case when a -regex compiled on a system with 4-byte pointers is run on another with 8-byte -pointers. */ - -re->magic_number = MAGIC_NUMBER; -re->size = (int)size; -re->options = cd->external_options; -re->flags = cd->external_flags; -re->first_char = 0; -re->req_char = 0; -re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); -re->name_entry_size = cd->name_entry_size; -re->name_count = cd->names_found; -re->ref_count = 0; -re->tables = (tables == PRIV(default_tables))? NULL : tables; -re->nullpad = NULL; -#ifdef COMPILE_PCRE32 -re->dummy1 = re->dummy2 = 0; -#endif - -/* The starting points of the name/number translation table and of the code are -passed around in the compile data block. The start/end pattern and initial -options are already set from the pre-compile phase, as is the name_entry_size -field. Reset the bracket count and the names_found field. Also reset the hwm -field; this time it's used for remembering forward references to subpatterns. -*/ - -cd->final_bracount = cd->bracount; /* Save for checking forward references */ -cd->assert_depth = 0; -cd->bracount = 0; -cd->max_lookbehind = 0; -cd->names_found = 0; -cd->name_table = (pcre_uchar *)re + re->name_table_offset; -codestart = cd->name_table + re->name_entry_size * re->name_count; -cd->start_code = codestart; -cd->hwm = (pcre_uchar *)(cd->start_workspace); -cd->req_varyopt = 0; -cd->had_accept = FALSE; -cd->had_pruneorskip = FALSE; -cd->check_lookbehind = FALSE; -cd->open_caps = NULL; - -/* Set up a starting, non-extracting bracket, then compile the expression. On -error, errorcode will be set non-zero, so we don't need to look at the result -of the function here. */ - -ptr = (const pcre_uchar *)pattern + skipatstart; -code = (pcre_uchar *)codestart; -*code = OP_BRA; -(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, - &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL); -re->top_bracket = cd->bracount; -re->top_backref = cd->top_backref; -re->max_lookbehind = cd->max_lookbehind; -re->flags = cd->external_flags | PCRE_MODE; - -if (cd->had_accept) - { - reqchar = 0; /* Must disable after (*ACCEPT) */ - reqcharflags = REQ_NONE; - } - -/* If not reached end of pattern on success, there's an excess bracket. */ - -if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22; - -/* Fill in the terminating state and check for disastrous overflow, but -if debugging, leave the test till after things are printed out. */ - -*code++ = OP_END; - -#ifndef PCRE_DEBUG -if (code - codestart > length) errorcode = ERR23; -#endif - -#ifdef SUPPORT_VALGRIND -/* If the estimated length exceeds the really used length, mark the extra -allocated memory as unadressable, so that any out-of-bound reads can be -detected. */ -VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar)); -#endif - -/* Fill in any forward references that are required. There may be repeated -references; optimize for them, as searching a large regex takes time. */ - -if (cd->hwm > cd->start_workspace) - { - int prev_recno = -1; - const pcre_uchar *groupptr = NULL; - while (errorcode == 0 && cd->hwm > cd->start_workspace) - { - int offset, recno; - cd->hwm -= LINK_SIZE; - offset = GET(cd->hwm, 0); - recno = GET(codestart, offset); - if (recno != prev_recno) - { - groupptr = PRIV(find_bracket)(codestart, utf, recno); - prev_recno = recno; - } - if (groupptr == NULL) errorcode = ERR53; - else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart)); - } - } - -/* If the workspace had to be expanded, free the new memory. */ - -if (cd->workspace_size > COMPILE_WORK_SIZE) - (PUBL(free))((void *)cd->start_workspace); - -/* Give an error if there's back reference to a non-existent capturing -subpattern. */ - -if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; - -/* If there were any lookbehind assertions that contained OP_RECURSE -(recursions or subroutine calls), a flag is set for them to be checked here, -because they may contain forward references. Actual recursions cannot be fixed -length, but subroutine calls can. It is done like this so that those without -OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The -exceptional ones forgo this. We scan the pattern to check that they are fixed -length, and set their lengths. */ - -if (cd->check_lookbehind) - { - pcre_uchar *cc = (pcre_uchar *)codestart; - - /* Loop, searching for OP_REVERSE items, and process those that do not have - their length set. (Actually, it will also re-process any that have a length - of zero, but that is a pathological case, and it does no harm.) When we find - one, we temporarily terminate the branch it is in while we scan it. */ - - for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1); - cc != NULL; - cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1)) - { - if (GET(cc, 1) == 0) - { - int fixed_length; - pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); - int end_op = *be; - *be = OP_END; - fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE, - cd); - *be = end_op; - DPRINTF(("fixed length = %d\n", fixed_length)); - if (fixed_length < 0) - { - errorcode = (fixed_length == -2)? ERR36 : - (fixed_length == -4)? ERR70 : ERR25; - break; - } - if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length; - PUT(cc, 1, fixed_length); - } - cc += 1 + LINK_SIZE; - } - } - -/* Failed to compile, or error while post-processing */ - -if (errorcode != 0) - { - (PUBL(free))(re); - PCRE_EARLY_ERROR_RETURN: - *erroroffset = (int)(ptr - (const pcre_uchar *)pattern); - PCRE_EARLY_ERROR_RETURN2: - *errorptr = find_error_text(errorcode); - if (errorcodeptr != NULL) *errorcodeptr = errorcode; - return NULL; - } - -/* If the anchored option was not passed, set the flag if we can determine that -the pattern is anchored by virtue of ^ characters or \A or anything else, such -as starting with non-atomic .* when DOTALL is set and there are no occurrences -of *PRUNE or *SKIP. - -Otherwise, if we know what the first byte has to be, save it, because that -speeds up unanchored matches no end. If not, see if we can set the -PCRE_STARTLINE flag. This is helpful for multiline matches when all branches -start with ^. and also when all branches start with non-atomic .* for -non-DOTALL matches when *PRUNE and SKIP are not present. */ - -if ((re->options & PCRE_ANCHORED) == 0) - { - if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED; - else - { - if (firstcharflags < 0) - firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE); - if (firstcharflags >= 0) /* Remove caseless flag for non-caseable chars */ - { -#if defined COMPILE_PCRE8 - re->first_char = firstchar & 0xff; -#elif defined COMPILE_PCRE16 - re->first_char = firstchar & 0xffff; -#elif defined COMPILE_PCRE32 - re->first_char = firstchar; -#endif - if ((firstcharflags & REQ_CASELESS) != 0) - { -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - /* We ignore non-ASCII first chars in 8 bit mode. */ - if (utf) - { - if (re->first_char < 128) - { - if (cd->fcc[re->first_char] != re->first_char) - re->flags |= PCRE_FCH_CASELESS; - } - else if (UCD_OTHERCASE(re->first_char) != re->first_char) - re->flags |= PCRE_FCH_CASELESS; - } - else -#endif - if (MAX_255(re->first_char) - && cd->fcc[re->first_char] != re->first_char) - re->flags |= PCRE_FCH_CASELESS; - } - - re->flags |= PCRE_FIRSTSET; - } - - else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE; - } - } - -/* For an anchored pattern, we use the "required byte" only if it follows a -variable length item in the regex. Remove the caseless flag for non-caseable -bytes. */ - -if (reqcharflags >= 0 && - ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0)) - { -#if defined COMPILE_PCRE8 - re->req_char = reqchar & 0xff; -#elif defined COMPILE_PCRE16 - re->req_char = reqchar & 0xffff; -#elif defined COMPILE_PCRE32 - re->req_char = reqchar; -#endif - if ((reqcharflags & REQ_CASELESS) != 0) - { -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - /* We ignore non-ASCII first chars in 8 bit mode. */ - if (utf) - { - if (re->req_char < 128) - { - if (cd->fcc[re->req_char] != re->req_char) - re->flags |= PCRE_RCH_CASELESS; - } - else if (UCD_OTHERCASE(re->req_char) != re->req_char) - re->flags |= PCRE_RCH_CASELESS; - } - else -#endif - if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char) - re->flags |= PCRE_RCH_CASELESS; - } - - re->flags |= PCRE_REQCHSET; - } - -/* Print out the compiled data if debugging is enabled. This is never the -case when building a production library. */ - -#ifdef PCRE_DEBUG -printf("Length = %d top_bracket = %d top_backref = %d\n", - length, re->top_bracket, re->top_backref); - -printf("Options=%08x\n", re->options); - -if ((re->flags & PCRE_FIRSTSET) != 0) - { - pcre_uchar ch = re->first_char; - const char *caseless = - ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)"; - if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless); - else printf("First char = \\x%02x%s\n", ch, caseless); - } - -if ((re->flags & PCRE_REQCHSET) != 0) - { - pcre_uchar ch = re->req_char; - const char *caseless = - ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)"; - if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless); - else printf("Req char = \\x%02x%s\n", ch, caseless); - } - -#if defined COMPILE_PCRE8 -pcre_printint((pcre *)re, stdout, TRUE); -#elif defined COMPILE_PCRE16 -pcre16_printint((pcre *)re, stdout, TRUE); -#elif defined COMPILE_PCRE32 -pcre32_printint((pcre *)re, stdout, TRUE); -#endif - -/* This check is done here in the debugging case so that the code that -was compiled can be seen. */ - -if (code - codestart > length) - { - (PUBL(free))(re); - *errorptr = find_error_text(ERR23); - *erroroffset = ptr - (pcre_uchar *)pattern; - if (errorcodeptr != NULL) *errorcodeptr = ERR23; - return NULL; - } -#endif /* PCRE_DEBUG */ - -#if defined COMPILE_PCRE8 -return (pcre *)re; -#elif defined COMPILE_PCRE16 -return (pcre16 *)re; -#elif defined COMPILE_PCRE32 -return (pcre32 *)re; -#endif -} - -/* End of pcre_compile.c */ diff --git a/deps/libmagic/pcre/pcre_config.c b/deps/libmagic/pcre/pcre_config.c deleted file mode 100644 index 3d5689f..0000000 --- a/deps/libmagic/pcre/pcre_config.c +++ /dev/null @@ -1,186 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_config(). */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Keep the original link size. */ -static int real_link_size = LINK_SIZE; - -#include "pcre_internal.h" - - -/************************************************* -* Return info about what features are configured * -*************************************************/ - -/* This function has an extensible interface so that additional items can be -added compatibly. - -Arguments: - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_config(int what, void *where) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_config(int what, void *where) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_config(int what, void *where) -#endif -{ -switch (what) - { - case PCRE_CONFIG_UTF8: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - *((int *)where) = 0; - return PCRE_ERROR_BADOPTION; -#else -#if defined SUPPORT_UTF - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; -#endif - - case PCRE_CONFIG_UTF16: -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32 - *((int *)where) = 0; - return PCRE_ERROR_BADOPTION; -#else -#if defined SUPPORT_UTF - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; -#endif - - case PCRE_CONFIG_UTF32: -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 - *((int *)where) = 0; - return PCRE_ERROR_BADOPTION; -#else -#if defined SUPPORT_UTF - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; -#endif - - case PCRE_CONFIG_UNICODE_PROPERTIES: -#ifdef SUPPORT_UCP - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; - - case PCRE_CONFIG_JIT: -#ifdef SUPPORT_JIT - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; - - case PCRE_CONFIG_JITTARGET: -#ifdef SUPPORT_JIT - *((const char **)where) = PRIV(jit_get_target)(); -#else - *((const char **)where) = NULL; -#endif - break; - - case PCRE_CONFIG_NEWLINE: - *((int *)where) = NEWLINE; - break; - - case PCRE_CONFIG_BSR: -#ifdef BSR_ANYCRLF - *((int *)where) = 1; -#else - *((int *)where) = 0; -#endif - break; - - case PCRE_CONFIG_LINK_SIZE: - *((int *)where) = real_link_size; - break; - - case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: - *((int *)where) = POSIX_MALLOC_THRESHOLD; - break; - - case PCRE_CONFIG_MATCH_LIMIT: - *((unsigned long int *)where) = MATCH_LIMIT; - break; - - case PCRE_CONFIG_MATCH_LIMIT_RECURSION: - *((unsigned long int *)where) = MATCH_LIMIT_RECURSION; - break; - - case PCRE_CONFIG_STACKRECURSE: -#ifdef NO_RECURSE - *((int *)where) = 0; -#else - *((int *)where) = 1; -#endif - break; - - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - -/* End of pcre_config.c */ diff --git a/deps/libmagic/pcre/pcre_dfa_exec.c b/deps/libmagic/pcre/pcre_dfa_exec.c deleted file mode 100644 index 91fb730..0000000 --- a/deps/libmagic/pcre/pcre_dfa_exec.c +++ /dev/null @@ -1,3582 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language (but see -below for why this module is different). - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains the external function pcre_dfa_exec(), which is an -alternative matching function that uses a sort of DFA algorithm (not a true -FSM). This is NOT Perl-compatible, but it has advantages in certain -applications. */ - - -/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved -the performance of his patterns greatly. I could not use it as it stood, as it -was not thread safe, and made assumptions about pattern sizes. Also, it caused -test 7 to loop, and test 9 to crash with a segfault. - -The issue is the check for duplicate states, which is done by a simple linear -search up the state list. (Grep for "duplicate" below to find the code.) For -many patterns, there will never be many states active at one time, so a simple -linear search is fine. In patterns that have many active states, it might be a -bottleneck. The suggested code used an indexing scheme to remember which states -had previously been used for each character, and avoided the linear search when -it knew there was no chance of a duplicate. This was implemented when adding -states to the state lists. - -I wrote some thread-safe, not-limited code to try something similar at the time -of checking for duplicates (instead of when adding states), using index vectors -on the stack. It did give a 13% improvement with one specially constructed -pattern for certain subject strings, but on other strings and on many of the -simpler patterns in the test suite it did worse. The major problem, I think, -was the extra time to initialize the index. This had to be done for each call -of internal_dfa_exec(). (The supplied patch used a static vector, initialized -only once - I suspect this was the cause of the problems with the tests.) - -Overall, I concluded that the gains in some cases did not outweigh the losses -in others, so I abandoned this code. */ - - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define NLBLOCK md /* Block containing newline information */ -#define PSSTART start_subject /* Field containing processed string start */ -#define PSEND end_subject /* Field containing processed string end */ - -#include "pcre_internal.h" - - -/* For use to indent debugging output */ - -#define SP " " - - -/************************************************* -* Code parameters and static tables * -*************************************************/ - -/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes -into others, under special conditions. A gap of 20 between the blocks should be -enough. The resulting opcodes don't have to be less than 256 because they are -never stored, so we push them well clear of the normal opcodes. */ - -#define OP_PROP_EXTRA 300 -#define OP_EXTUNI_EXTRA 320 -#define OP_ANYNL_EXTRA 340 -#define OP_HSPACE_EXTRA 360 -#define OP_VSPACE_EXTRA 380 - - -/* This table identifies those opcodes that are followed immediately by a -character that is to be tested in some way. This makes it possible to -centralize the loading of these characters. In the case of Type * etc, the -"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a -small value. Non-zero values in the table are the offsets from the opcode where -the character is to be found. ***NOTE*** If the start of this table is -modified, the three tables that follow must also be modified. */ - -static const pcre_uint8 coptable[] = { - 0, /* End */ - 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ - 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ - 0, 0, 0, /* Any, AllAny, Anybyte */ - 0, 0, /* \P, \p */ - 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ - 0, /* \X */ - 0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */ - 1, /* Char */ - 1, /* Chari */ - 1, /* not */ - 1, /* noti */ - /* Positive single-char repeats */ - 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */ - 1+IMM2_SIZE, /* exact */ - 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */ - 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */ - 1+IMM2_SIZE, /* exact I */ - 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ - /* Negative single-char repeats - only for chars < 256 */ - 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */ - 1+IMM2_SIZE, /* NOT exact */ - 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */ - 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */ - 1+IMM2_SIZE, /* NOT exact I */ - 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */ - /* Positive type repeats */ - 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */ - 1+IMM2_SIZE, /* Type exact */ - 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */ - /* Character class & ref repeats */ - 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ - 0, 0, /* CRRANGE, CRMINRANGE */ - 0, /* CLASS */ - 0, /* NCLASS */ - 0, /* XCLASS - variable length */ - 0, /* REF */ - 0, /* REFI */ - 0, /* RECURSE */ - 0, /* CALLOUT */ - 0, /* Alt */ - 0, /* Ket */ - 0, /* KetRmax */ - 0, /* KetRmin */ - 0, /* KetRpos */ - 0, /* Reverse */ - 0, /* Assert */ - 0, /* Assert not */ - 0, /* Assert behind */ - 0, /* Assert behind not */ - 0, 0, /* ONCE, ONCE_NC */ - 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ - 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ - 0, 0, /* CREF, NCREF */ - 0, 0, /* RREF, NRREF */ - 0, /* DEF */ - 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ - 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ - 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ - 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ - 0, 0 /* CLOSE, SKIPZERO */ -}; - -/* This table identifies those opcodes that inspect a character. It is used to -remember the fact that a character could have been inspected when the end of -the subject is reached. ***NOTE*** If the start of this table is modified, the -two tables that follow must also be modified. */ - -static const pcre_uint8 poptable[] = { - 0, /* End */ - 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ - 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ - 1, 1, 1, /* Any, AllAny, Anybyte */ - 1, 1, /* \P, \p */ - 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ - 1, /* \X */ - 0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */ - 1, /* Char */ - 1, /* Chari */ - 1, /* not */ - 1, /* noti */ - /* Positive single-char repeats */ - 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ - 1, 1, 1, /* upto, minupto, exact */ - 1, 1, 1, 1, /* *+, ++, ?+, upto+ */ - 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ - 1, 1, 1, /* upto I, minupto I, exact I */ - 1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */ - /* Negative single-char repeats - only for chars < 256 */ - 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ - 1, 1, 1, /* NOT upto, minupto, exact */ - 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */ - 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ - 1, 1, 1, /* NOT upto I, minupto I, exact I */ - 1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */ - /* Positive type repeats */ - 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ - 1, 1, 1, /* Type upto, minupto, exact */ - 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */ - /* Character class & ref repeats */ - 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ - 1, 1, /* CRRANGE, CRMINRANGE */ - 1, /* CLASS */ - 1, /* NCLASS */ - 1, /* XCLASS - variable length */ - 0, /* REF */ - 0, /* REFI */ - 0, /* RECURSE */ - 0, /* CALLOUT */ - 0, /* Alt */ - 0, /* Ket */ - 0, /* KetRmax */ - 0, /* KetRmin */ - 0, /* KetRpos */ - 0, /* Reverse */ - 0, /* Assert */ - 0, /* Assert not */ - 0, /* Assert behind */ - 0, /* Assert behind not */ - 0, 0, /* ONCE, ONCE_NC */ - 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ - 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ - 0, 0, /* CREF, NCREF */ - 0, 0, /* RREF, NRREF */ - 0, /* DEF */ - 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ - 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ - 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ - 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ - 0, 0 /* CLOSE, SKIPZERO */ -}; - -/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, -and \w */ - -static const pcre_uint8 toptable1[] = { - 0, 0, 0, 0, 0, 0, - ctype_digit, ctype_digit, - ctype_space, ctype_space, - ctype_word, ctype_word, - 0, 0 /* OP_ANY, OP_ALLANY */ -}; - -static const pcre_uint8 toptable2[] = { - 0, 0, 0, 0, 0, 0, - ctype_digit, 0, - ctype_space, 0, - ctype_word, 0, - 1, 1 /* OP_ANY, OP_ALLANY */ -}; - - -/* Structure for holding data about a particular state, which is in effect the -current data for an active path through the match tree. It must consist -entirely of ints because the working vector we are passed, and which we put -these structures in, is a vector of ints. */ - -typedef struct stateblock { - int offset; /* Offset to opcode */ - int count; /* Count for repeats */ - int data; /* Some use extra data */ -} stateblock; - -#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) - - -#ifdef PCRE_DEBUG -/************************************************* -* Print character string * -*************************************************/ - -/* Character string printing function for debugging. - -Arguments: - p points to string - length number of bytes - f where to print - -Returns: nothing -*/ - -static void -pchars(const pcre_uchar *p, int length, FILE *f) -{ -pcre_uint32 c; -while (length-- > 0) - { - if (isprint(c = *(p++))) - fprintf(f, "%c", c); - else - fprintf(f, "\\x{%02x}", c); - } -} -#endif - - - -/************************************************* -* Execute a Regular Expression - DFA engine * -*************************************************/ - -/* This internal function applies a compiled pattern to a subject string, -starting at a given point, using a DFA engine. This function is called from the -external one, possibly multiple times if the pattern is not anchored. The -function calls itself recursively for some kinds of subpattern. - -Arguments: - md the match_data block with fixed information - this_start_code the opening bracket of this subexpression's code - current_subject where we currently are in the subject string - start_offset start offset in the subject string - offsets vector to contain the matching string offsets - offsetcount size of same - workspace vector of workspace - wscount size of same - rlevel function call recursion level - -Returns: > 0 => number of match offset pairs placed in offsets - = 0 => offsets overflowed; longest matches are present - -1 => failed to match - < -1 => some kind of unexpected problem - -The following macros are used for adding states to the two state vectors (one -for the current character, one for the following character). */ - -#define ADD_ACTIVE(x,y) \ - if (active_count++ < wscount) \ - { \ - next_active_state->offset = (x); \ - next_active_state->count = (y); \ - next_active_state++; \ - DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -#define ADD_ACTIVE_DATA(x,y,z) \ - if (active_count++ < wscount) \ - { \ - next_active_state->offset = (x); \ - next_active_state->count = (y); \ - next_active_state->data = (z); \ - next_active_state++; \ - DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -#define ADD_NEW(x,y) \ - if (new_count++ < wscount) \ - { \ - next_new_state->offset = (x); \ - next_new_state->count = (y); \ - next_new_state++; \ - DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -#define ADD_NEW_DATA(x,y,z) \ - if (new_count++ < wscount) \ - { \ - next_new_state->offset = (x); \ - next_new_state->count = (y); \ - next_new_state->data = (z); \ - next_new_state++; \ - DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \ - (x), (y), (z), __LINE__)); \ - } \ - else return PCRE_ERROR_DFA_WSSIZE - -/* And now, here is the code */ - -static int -internal_dfa_exec( - dfa_match_data *md, - const pcre_uchar *this_start_code, - const pcre_uchar *current_subject, - int start_offset, - int *offsets, - int offsetcount, - int *workspace, - int wscount, - int rlevel) -{ -stateblock *active_states, *new_states, *temp_states; -stateblock *next_active_state, *next_new_state; - -const pcre_uint8 *ctypes, *lcc, *fcc; -const pcre_uchar *ptr; -const pcre_uchar *end_code, *first_op; - -dfa_recursion_info new_recursive; - -int active_count, new_count, match_count; - -/* Some fields in the md block are frequently referenced, so we load them into -independent variables in the hope that this will perform better. */ - -const pcre_uchar *start_subject = md->start_subject; -const pcre_uchar *end_subject = md->end_subject; -const pcre_uchar *start_code = md->start_code; - -#ifdef SUPPORT_UTF -BOOL utf = (md->poptions & PCRE_UTF8) != 0; -#else -BOOL utf = FALSE; -#endif - -BOOL reset_could_continue = FALSE; - -rlevel++; -offsetcount &= (-2); - -wscount -= 2; -wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / - (2 * INTS_PER_STATEBLOCK); - -DPRINTF(("\n%.*s---------------------\n" - "%.*sCall to internal_dfa_exec f=%d\n", - rlevel*2-2, SP, rlevel*2-2, SP, rlevel)); - -ctypes = md->tables + ctypes_offset; -lcc = md->tables + lcc_offset; -fcc = md->tables + fcc_offset; - -match_count = PCRE_ERROR_NOMATCH; /* A negative number */ - -active_states = (stateblock *)(workspace + 2); -next_new_state = new_states = active_states + wscount; -new_count = 0; - -first_op = this_start_code + 1 + LINK_SIZE + - ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) - ? IMM2_SIZE:0); - -/* The first thing in any (sub) pattern is a bracket of some sort. Push all -the alternative states onto the list, and find out where the end is. This -makes is possible to use this function recursively, when we want to stop at a -matching internal ket rather than at the end. - -If the first opcode in the first alternative is OP_REVERSE, we are dealing with -a backward assertion. In that case, we have to find out the maximum amount to -move back, and set up each alternative appropriately. */ - -if (*first_op == OP_REVERSE) - { - int max_back = 0; - int gone_back; - - end_code = this_start_code; - do - { - int back = GET(end_code, 2+LINK_SIZE); - if (back > max_back) max_back = back; - end_code += GET(end_code, 1); - } - while (*end_code == OP_ALT); - - /* If we can't go back the amount required for the longest lookbehind - pattern, go back as far as we can; some alternatives may still be viable. */ - -#ifdef SUPPORT_UTF - /* In character mode we have to step back character by character */ - - if (utf) - { - for (gone_back = 0; gone_back < max_back; gone_back++) - { - if (current_subject <= start_subject) break; - current_subject--; - ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--); - } - } - else -#endif - - /* In byte-mode we can do this quickly. */ - - { - gone_back = (current_subject - max_back < start_subject)? - (int)(current_subject - start_subject) : max_back; - current_subject -= gone_back; - } - - /* Save the earliest consulted character */ - - if (current_subject < md->start_used_ptr) - md->start_used_ptr = current_subject; - - /* Now we can process the individual branches. */ - - end_code = this_start_code; - do - { - int back = GET(end_code, 2+LINK_SIZE); - if (back <= gone_back) - { - int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE); - ADD_NEW_DATA(-bstate, 0, gone_back - back); - } - end_code += GET(end_code, 1); - } - while (*end_code == OP_ALT); - } - -/* This is the code for a "normal" subpattern (not a backward assertion). The -start of a whole pattern is always one of these. If we are at the top level, -we may be asked to restart matching from the same point that we reached for a -previous partial match. We still have to scan through the top-level branches to -find the end state. */ - -else - { - end_code = this_start_code; - - /* Restarting */ - - if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0) - { - do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); - new_count = workspace[1]; - if (!workspace[0]) - memcpy(new_states, active_states, new_count * sizeof(stateblock)); - } - - /* Not restarting */ - - else - { - int length = 1 + LINK_SIZE + - ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) - ? IMM2_SIZE:0); - do - { - ADD_NEW((int)(end_code - start_code + length), 0); - end_code += GET(end_code, 1); - length = 1 + LINK_SIZE; - } - while (*end_code == OP_ALT); - } - } - -workspace[0] = 0; /* Bit indicating which vector is current */ - -DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code))); - -/* Loop for scanning the subject */ - -ptr = current_subject; -for (;;) - { - int i, j; - int clen, dlen; - pcre_uint32 c, d; - int forced_fail = 0; - BOOL partial_newline = FALSE; - BOOL could_continue = reset_could_continue; - reset_could_continue = FALSE; - - /* Make the new state list into the active state list and empty the - new state list. */ - - temp_states = active_states; - active_states = new_states; - new_states = temp_states; - active_count = new_count; - new_count = 0; - - workspace[0] ^= 1; /* Remember for the restarting feature */ - workspace[1] = active_count; - -#ifdef PCRE_DEBUG - printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); - pchars(ptr, STRLEN_UC(ptr), stdout); - printf("\"\n"); - - printf("%.*sActive states: ", rlevel*2-2, SP); - for (i = 0; i < active_count; i++) - printf("%d/%d ", active_states[i].offset, active_states[i].count); - printf("\n"); -#endif - - /* Set the pointers for adding new states */ - - next_active_state = active_states + active_count; - next_new_state = new_states; - - /* Load the current character from the subject outside the loop, as many - different states may want to look at it, and we assume that at least one - will. */ - - if (ptr < end_subject) - { - clen = 1; /* Number of data items in the character */ -#ifdef SUPPORT_UTF - GETCHARLENTEST(c, ptr, clen); -#else - c = *ptr; -#endif /* SUPPORT_UTF */ - } - else - { - clen = 0; /* This indicates the end of the subject */ - c = NOTACHAR; /* This value should never actually be used */ - } - - /* Scan up the active states and act on each one. The result of an action - may be to add more states to the currently active list (e.g. on hitting a - parenthesis) or it may be to put states on the new list, for considering - when we move the character pointer on. */ - - for (i = 0; i < active_count; i++) - { - stateblock *current_state = active_states + i; - BOOL caseless = FALSE; - const pcre_uchar *code; - int state_offset = current_state->offset; - int codevalue, rrc; - unsigned int count; - -#ifdef PCRE_DEBUG - printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); - if (clen == 0) printf("EOL\n"); - else if (c > 32 && c < 127) printf("'%c'\n", c); - else printf("0x%02x\n", c); -#endif - - /* A negative offset is a special case meaning "hold off going to this - (negated) state until the number of characters in the data field have - been skipped". If the could_continue flag was passed over from a previous - state, arrange for it to passed on. */ - - if (state_offset < 0) - { - if (current_state->data > 0) - { - DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); - ADD_NEW_DATA(state_offset, current_state->count, - current_state->data - 1); - if (could_continue) reset_could_continue = TRUE; - continue; - } - else - { - current_state->offset = state_offset = -state_offset; - } - } - - /* Check for a duplicate state with the same count, and skip if found. - See the note at the head of this module about the possibility of improving - performance here. */ - - for (j = 0; j < i; j++) - { - if (active_states[j].offset == state_offset && - active_states[j].count == current_state->count) - { - DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP)); - goto NEXT_ACTIVE_STATE; - } - } - - /* The state offset is the offset to the opcode */ - - code = start_code + state_offset; - codevalue = *code; - - /* If this opcode inspects a character, but we are at the end of the - subject, remember the fact for use when testing for a partial match. */ - - if (clen == 0 && poptable[codevalue] != 0) - could_continue = TRUE; - - /* If this opcode is followed by an inline character, load it. It is - tempting to test for the presence of a subject character here, but that - is wrong, because sometimes zero repetitions of the subject are - permitted. - - We also use this mechanism for opcodes such as OP_TYPEPLUS that take an - argument that is not a data character - but is always one byte long because - the values are small. We have to take special action to deal with \P, \p, - \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert - these ones to new opcodes. */ - - if (coptable[codevalue] > 0) - { - dlen = 1; -#ifdef SUPPORT_UTF - if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else -#endif /* SUPPORT_UTF */ - d = code[coptable[codevalue]]; - if (codevalue >= OP_TYPESTAR) - { - switch(d) - { - case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM; - case OP_NOTPROP: - case OP_PROP: codevalue += OP_PROP_EXTRA; break; - case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break; - case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break; - case OP_NOT_HSPACE: - case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; - case OP_NOT_VSPACE: - case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; - default: break; - } - } - } - else - { - dlen = 0; /* Not strictly necessary, but compilers moan */ - d = NOTACHAR; /* if these variables are not set. */ - } - - - /* Now process the individual opcodes */ - - switch (codevalue) - { -/* ========================================================================== */ - /* These cases are never obeyed. This is a fudge that causes a compile- - time error if the vectors coptable or poptable, which are indexed by - opcode, are not the correct length. It seems to be the only way to do - such a check at compile time, as the sizeof() operator does not work - in the C preprocessor. */ - - case OP_TABLE_LENGTH: - case OP_TABLE_LENGTH + - ((sizeof(coptable) == OP_TABLE_LENGTH) && - (sizeof(poptable) == OP_TABLE_LENGTH)): - break; - -/* ========================================================================== */ - /* Reached a closing bracket. If not at the end of the pattern, carry - on with the next opcode. For repeating opcodes, also add the repeat - state. Note that KETRPOS will always be encountered at the end of the - subpattern, because the possessive subpattern repeats are always handled - using recursive calls. Thus, it never adds any new states. - - At the end of the (sub)pattern, unless we have an empty string and - PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the - start of the subject, save the match data, shifting up all previous - matches so we always have the longest first. */ - - case OP_KET: - case OP_KETRMIN: - case OP_KETRMAX: - case OP_KETRPOS: - if (code != end_code) - { - ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); - if (codevalue != OP_KET) - { - ADD_ACTIVE(state_offset - GET(code, 1), 0); - } - } - else - { - if (ptr > current_subject || - ((md->moptions & PCRE_NOTEMPTY) == 0 && - ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 || - current_subject > start_subject + md->start_offset))) - { - if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0; - else if (match_count > 0 && ++match_count * 2 > offsetcount) - match_count = 0; - count = ((match_count == 0)? offsetcount : match_count * 2) - 2; - if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int)); - if (offsetcount >= 2) - { - offsets[0] = (int)(current_subject - start_subject); - offsets[1] = (int)(ptr - start_subject); - DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP, - offsets[1] - offsets[0], (char *)current_subject)); - } - if ((md->moptions & PCRE_DFA_SHORTEST) != 0) - { - DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" - "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, - match_count, rlevel*2-2, SP)); - return match_count; - } - } - } - break; - -/* ========================================================================== */ - /* These opcodes add to the current list of states without looking - at the current character. */ - - /*-----------------------------------------------------------------*/ - case OP_ALT: - do { code += GET(code, 1); } while (*code == OP_ALT); - ADD_ACTIVE((int)(code - start_code), 0); - break; - - /*-----------------------------------------------------------------*/ - case OP_BRA: - case OP_SBRA: - do - { - ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - code += GET(code, 1); - } - while (*code == OP_ALT); - break; - - /*-----------------------------------------------------------------*/ - case OP_CBRA: - case OP_SCBRA: - ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0); - code += GET(code, 1); - while (*code == OP_ALT) - { - ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - code += GET(code, 1); - } - break; - - /*-----------------------------------------------------------------*/ - case OP_BRAZERO: - case OP_BRAMINZERO: - ADD_ACTIVE(state_offset + 1, 0); - code += 1 + GET(code, 2); - while (*code == OP_ALT) code += GET(code, 1); - ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - break; - - /*-----------------------------------------------------------------*/ - case OP_SKIPZERO: - code += 1 + GET(code, 2); - while (*code == OP_ALT) code += GET(code, 1); - ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); - break; - - /*-----------------------------------------------------------------*/ - case OP_CIRC: - if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_CIRCM: - if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) || - (ptr != end_subject && WAS_NEWLINE(ptr))) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_EOD: - if (ptr >= end_subject) - { - if ((md->moptions & PCRE_PARTIAL_HARD) != 0) - could_continue = TRUE; - else { ADD_ACTIVE(state_offset + 1, 0); } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_SOD: - if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_SOM: - if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); } - break; - - -/* ========================================================================== */ - /* These opcodes inspect the next subject character, and sometimes - the previous one as well, but do not have an argument. The variable - clen contains the length of the current character and is zero if we are - at the end of the subject. */ - - /*-----------------------------------------------------------------*/ - case OP_ANY: - if (clen > 0 && !IS_NEWLINE(ptr)) - { - if (ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - could_continue = partial_newline = TRUE; - } - else - { - ADD_NEW(state_offset + 1, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_ALLANY: - if (clen > 0) - { ADD_NEW(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_EODN: - if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0) - could_continue = TRUE; - else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen)) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_DOLL: - if ((md->moptions & PCRE_NOTEOL) == 0) - { - if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0) - could_continue = TRUE; - else if (clen == 0 || - ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) && - (ptr == end_subject - md->nllen) - )) - { ADD_ACTIVE(state_offset + 1, 0); } - else if (ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - if ((md->moptions & PCRE_PARTIAL_HARD) != 0) - { - reset_could_continue = TRUE; - ADD_NEW_DATA(-(state_offset + 1), 0, 1); - } - else could_continue = partial_newline = TRUE; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_DOLLM: - if ((md->moptions & PCRE_NOTEOL) == 0) - { - if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0) - could_continue = TRUE; - else if (clen == 0 || - ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) - { ADD_ACTIVE(state_offset + 1, 0); } - else if (ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - if ((md->moptions & PCRE_PARTIAL_HARD) != 0) - { - reset_could_continue = TRUE; - ADD_NEW_DATA(-(state_offset + 1), 0, 1); - } - else could_continue = partial_newline = TRUE; - } - } - else if (IS_NEWLINE(ptr)) - { ADD_ACTIVE(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - - case OP_DIGIT: - case OP_WHITESPACE: - case OP_WORDCHAR: - if (clen > 0 && c < 256 && - ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0) - { ADD_NEW(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_NOT_DIGIT: - case OP_NOT_WHITESPACE: - case OP_NOT_WORDCHAR: - if (clen > 0 && (c >= 256 || - ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)) - { ADD_NEW(state_offset + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - { - int left_word, right_word; - - if (ptr > start_subject) - { - const pcre_uchar *temp = ptr - 1; - if (temp < md->start_used_ptr) md->start_used_ptr = temp; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf) { BACKCHAR(temp); } -#endif - GETCHARTEST(d, temp); -#ifdef SUPPORT_UCP - if ((md->poptions & PCRE_UCP) != 0) - { - if (d == '_') left_word = TRUE; else - { - int cat = UCD_CATEGORY(d); - left_word = (cat == ucp_L || cat == ucp_N); - } - } - else -#endif - left_word = d < 256 && (ctypes[d] & ctype_word) != 0; - } - else left_word = FALSE; - - if (clen > 0) - { -#ifdef SUPPORT_UCP - if ((md->poptions & PCRE_UCP) != 0) - { - if (c == '_') right_word = TRUE; else - { - int cat = UCD_CATEGORY(c); - right_word = (cat == ucp_L || cat == ucp_N); - } - } - else -#endif - right_word = c < 256 && (ctypes[c] & ctype_word) != 0; - } - else right_word = FALSE; - - if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY)) - { ADD_ACTIVE(state_offset + 1, 0); } - } - break; - - - /*-----------------------------------------------------------------*/ - /* Check the next character by Unicode property. We will get here only - if the support is in the binary; otherwise a compile-time error occurs. - */ - -#ifdef SUPPORT_UCP - case OP_PROP: - case OP_NOTPROP: - if (clen > 0) - { - BOOL OK; - const pcre_uint32 *cp; - const ucd_record * prop = GET_UCD(c); - switch(code[1]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; - break; - - case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; - break; - - case PT_PC: - OK = prop->chartype == code[2]; - break; - - case PT_SC: - OK = prop->script == code[2]; - break; - - /* These are specials for combination cases. */ - - case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; - break; - - case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; - break; - - case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR; - break; - - case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; - break; - - case PT_CLIST: - cp = PRIV(ucd_caseless_sets) + code[2]; - for (;;) - { - if (c < *cp) { OK = FALSE; break; } - if (c == *cp++) { OK = TRUE; break; } - } - break; - - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); } - } - break; -#endif - - - -/* ========================================================================== */ - /* These opcodes likewise inspect the subject character, but have an - argument that is not a data character. It is one of these opcodes: - OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, - OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */ - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - if (d == OP_ANY && ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - could_continue = partial_newline = TRUE; - } - else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && - (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (count > 0 && codevalue == OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW(state_offset, count); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - if (d == OP_ANY && ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - could_continue = partial_newline = TRUE; - } - else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && - (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (codevalue == OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset + 2, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - if (d == OP_ANY && ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - could_continue = partial_newline = TRUE; - } - else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && - (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (codevalue == OP_TYPEPOSSTAR) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPEEXACT: - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - if (d == OP_ANY && ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - could_continue = partial_newline = TRUE; - } - else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && - (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: - ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - if (d == OP_ANY && ptr + 1 >= md->end_subject && - (md->moptions & (PCRE_PARTIAL_HARD)) != 0 && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - could_continue = partial_newline = TRUE; - } - else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || - (c < 256 && - (d != OP_ANY || !IS_NEWLINE(ptr)) && - ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) - { - if (codevalue == OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - -/* ========================================================================== */ - /* These are virtual opcodes that are used when something like - OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its - argument. It keeps the code above fast for the other cases. The argument - is in the d variable. */ - -#ifdef SUPPORT_UCP - case OP_PROP_EXTRA + OP_TYPEPLUS: - case OP_PROP_EXTRA + OP_TYPEMINPLUS: - case OP_PROP_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); } - if (clen > 0) - { - BOOL OK; - const pcre_uint32 *cp; - const ucd_record * prop = GET_UCD(c); - switch(code[2]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; - break; - - case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; - break; - - case PT_PC: - OK = prop->chartype == code[3]; - break; - - case PT_SC: - OK = prop->script == code[3]; - break; - - /* These are specials for combination cases. */ - - case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; - break; - - case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; - break; - - case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR; - break; - - case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; - break; - - case PT_CLIST: - cp = PRIV(ucd_caseless_sets) + code[3]; - for (;;) - { - if (c < *cp) { OK = FALSE; break; } - if (c == *cp++) { OK = TRUE; break; } - } - break; - - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (d == OP_PROP)) - { - if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW(state_offset, count); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXTUNI_EXTRA + OP_TYPEPLUS: - case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS: - case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - int lgb, rgb; - const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { - dlen = 1; - if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } - rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; - lgb = rgb; - nptr += dlen; - } - count++; - ADD_NEW_DATA(-state_offset, count, ncount); - } - break; -#endif - - /*-----------------------------------------------------------------*/ - case OP_ANYNL_EXTRA + OP_TYPEPLUS: - case OP_ANYNL_EXTRA + OP_TYPEMINPLUS: - case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - int ncount = 0; - switch (c) - { - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - goto ANYNL01; - - case CHAR_CR: - if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1; - /* Fall through */ - - ANYNL01: - case CHAR_LF: - if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW_DATA(-state_offset, count, ncount); - break; - - default: - break; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE_EXTRA + OP_TYPEPLUS: - case OP_VSPACE_EXTRA + OP_TYPEMINPLUS: - case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - BOOL OK; - switch (c) - { - VSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_VSPACE)) - { - if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW_DATA(-state_offset, count, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE_EXTRA + OP_TYPEPLUS: - case OP_HSPACE_EXTRA + OP_TYPEMINPLUS: - case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } - if (clen > 0) - { - BOOL OK; - switch (c) - { - HSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_HSPACE)) - { - if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW_DATA(-state_offset, count, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ -#ifdef SUPPORT_UCP - case OP_PROP_EXTRA + OP_TYPEQUERY: - case OP_PROP_EXTRA + OP_TYPEMINQUERY: - case OP_PROP_EXTRA + OP_TYPEPOSQUERY: - count = 4; - goto QS1; - - case OP_PROP_EXTRA + OP_TYPESTAR: - case OP_PROP_EXTRA + OP_TYPEMINSTAR: - case OP_PROP_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS1: - - ADD_ACTIVE(state_offset + 4, 0); - if (clen > 0) - { - BOOL OK; - const pcre_uint32 *cp; - const ucd_record * prop = GET_UCD(c); - switch(code[2]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; - break; - - case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; - break; - - case PT_PC: - OK = prop->chartype == code[3]; - break; - - case PT_SC: - OK = prop->script == code[3]; - break; - - /* These are specials for combination cases. */ - - case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; - break; - - case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; - break; - - case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR; - break; - - case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; - break; - - case PT_CLIST: - cp = PRIV(ucd_caseless_sets) + code[3]; - for (;;) - { - if (c < *cp) { OK = FALSE; break; } - if (c == *cp++) { OK = TRUE; break; } - } - break; - - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (d == OP_PROP)) - { - if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset + count, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXTUNI_EXTRA + OP_TYPEQUERY: - case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY: - case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS2; - - case OP_EXTUNI_EXTRA + OP_TYPESTAR: - case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR: - case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS2: - - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - int lgb, rgb; - const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { - dlen = 1; - if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } - rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; - lgb = rgb; - nptr += dlen; - } - ADD_NEW_DATA(-(state_offset + count), 0, ncount); - } - break; -#endif - - /*-----------------------------------------------------------------*/ - case OP_ANYNL_EXTRA + OP_TYPEQUERY: - case OP_ANYNL_EXTRA + OP_TYPEMINQUERY: - case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS3; - - case OP_ANYNL_EXTRA + OP_TYPESTAR: - case OP_ANYNL_EXTRA + OP_TYPEMINSTAR: - case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS3: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - int ncount = 0; - switch (c) - { - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - goto ANYNL02; - - case CHAR_CR: - if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1; - /* Fall through */ - - ANYNL02: - case CHAR_LF: - if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW_DATA(-(state_offset + count), 0, ncount); - break; - - default: - break; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE_EXTRA + OP_TYPEQUERY: - case OP_VSPACE_EXTRA + OP_TYPEMINQUERY: - case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS4; - - case OP_VSPACE_EXTRA + OP_TYPESTAR: - case OP_VSPACE_EXTRA + OP_TYPEMINSTAR: - case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS4: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - BOOL OK; - switch (c) - { - VSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - if (OK == (d == OP_VSPACE)) - { - if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW_DATA(-(state_offset + count), 0, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE_EXTRA + OP_TYPEQUERY: - case OP_HSPACE_EXTRA + OP_TYPEMINQUERY: - case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY: - count = 2; - goto QS5; - - case OP_HSPACE_EXTRA + OP_TYPESTAR: - case OP_HSPACE_EXTRA + OP_TYPEMINSTAR: - case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR: - count = 0; - - QS5: - ADD_ACTIVE(state_offset + 2, 0); - if (clen > 0) - { - BOOL OK; - switch (c) - { - HSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_HSPACE)) - { - if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR || - codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW_DATA(-(state_offset + count), 0, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ -#ifdef SUPPORT_UCP - case OP_PROP_EXTRA + OP_TYPEEXACT: - case OP_PROP_EXTRA + OP_TYPEUPTO: - case OP_PROP_EXTRA + OP_TYPEMINUPTO: - case OP_PROP_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - BOOL OK; - const pcre_uint32 *cp; - const ucd_record * prop = GET_UCD(c); - switch(code[1 + IMM2_SIZE + 1]) - { - case PT_ANY: - OK = TRUE; - break; - - case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; - break; - - case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; - break; - - case PT_PC: - OK = prop->chartype == code[1 + IMM2_SIZE + 2]; - break; - - case PT_SC: - OK = prop->script == code[1 + IMM2_SIZE + 2]; - break; - - /* These are specials for combination cases. */ - - case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; - break; - - case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; - break; - - case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR; - break; - - case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; - break; - - case PT_CLIST: - cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2]; - for (;;) - { - if (c < *cp) { OK = FALSE; break; } - if (c == *cp++) { OK = TRUE; break; } - } - break; - - /* Should never occur, but keep compilers from grumbling. */ - - default: - OK = codevalue != OP_PROP; - break; - } - - if (OK == (d == OP_PROP)) - { - if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXTUNI_EXTRA + OP_TYPEEXACT: - case OP_EXTUNI_EXTRA + OP_TYPEUPTO: - case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: - case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - int lgb, rgb; - const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { - dlen = 1; - if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } - rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; - lgb = rgb; - nptr += dlen; - } - if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) - reset_could_continue = TRUE; - if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } - else - { ADD_NEW_DATA(-state_offset, count, ncount); } - } - break; -#endif - - /*-----------------------------------------------------------------*/ - case OP_ANYNL_EXTRA + OP_TYPEEXACT: - case OP_ANYNL_EXTRA + OP_TYPEUPTO: - case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: - case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - int ncount = 0; - switch (c) - { - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - goto ANYNL03; - - case CHAR_CR: - if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1; - /* Fall through */ - - ANYNL03: - case CHAR_LF: - if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } - else - { ADD_NEW_DATA(-state_offset, count, ncount); } - break; - - default: - break; - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE_EXTRA + OP_TYPEEXACT: - case OP_VSPACE_EXTRA + OP_TYPEUPTO: - case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: - case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - BOOL OK; - switch (c) - { - VSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - } - - if (OK == (d == OP_VSPACE)) - { - if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } - else - { ADD_NEW_DATA(-state_offset, count, 0); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE_EXTRA + OP_TYPEEXACT: - case OP_HSPACE_EXTRA + OP_TYPEUPTO: - case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: - case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: - if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - BOOL OK; - switch (c) - { - HSPACE_CASES: - OK = TRUE; - break; - - default: - OK = FALSE; - break; - } - - if (OK == (d == OP_HSPACE)) - { - if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } - else - { ADD_NEW_DATA(-state_offset, count, 0); } - } - } - break; - -/* ========================================================================== */ - /* These opcodes are followed by a character that is usually compared - to the current subject character; it is loaded into d. We still get - here even if there is no subject character, because in some cases zero - repetitions are permitted. */ - - /*-----------------------------------------------------------------*/ - case OP_CHAR: - if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - case OP_CHARI: - if (clen == 0) break; - -#ifdef SUPPORT_UTF - if (utf) - { - if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else - { - unsigned int othercase; - if (c < 128) - othercase = fcc[c]; - else - /* If we have Unicode property support, we can use it to test the - other case of the character. */ -#ifdef SUPPORT_UCP - othercase = UCD_OTHERCASE(c); -#else - othercase = NOTACHAR; -#endif - - if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } - } - } - else -#endif /* SUPPORT_UTF */ - /* Not UTF mode */ - { - if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d)) - { ADD_NEW(state_offset + 2, 0); } - } - break; - - -#ifdef SUPPORT_UCP - /*-----------------------------------------------------------------*/ - /* This is a tricky one because it can match more than one character. - Find out how many characters to skip, and then set up a negative state - to wait for them to pass before continuing. */ - - case OP_EXTUNI: - if (clen > 0) - { - int lgb, rgb; - const pcre_uchar *nptr = ptr + clen; - int ncount = 0; - lgb = UCD_GRAPHBREAK(c); - while (nptr < end_subject) - { - dlen = 1; - if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } - rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - ncount++; - lgb = rgb; - nptr += dlen; - } - if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) - reset_could_continue = TRUE; - ADD_NEW_DATA(-(state_offset + 1), 0, ncount); - } - break; -#endif - - /*-----------------------------------------------------------------*/ - /* This is a tricky like EXTUNI because it too can match more than one - character (when CR is followed by LF). In this case, set up a negative - state to wait for one character to pass before continuing. */ - - case OP_ANYNL: - if (clen > 0) switch(c) - { - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; - - case CHAR_LF: - ADD_NEW(state_offset + 1, 0); - break; - - case CHAR_CR: - if (ptr + 1 >= end_subject) - { - ADD_NEW(state_offset + 1, 0); - if ((md->moptions & PCRE_PARTIAL_HARD) != 0) - reset_could_continue = TRUE; - } - else if (RAWUCHARTEST(ptr + 1) == CHAR_LF) - { - ADD_NEW_DATA(-(state_offset + 1), 0, 1); - } - else - { - ADD_NEW(state_offset + 1, 0); - } - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_NOT_VSPACE: - if (clen > 0) switch(c) - { - VSPACE_CASES: - break; - - default: - ADD_NEW(state_offset + 1, 0); - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_VSPACE: - if (clen > 0) switch(c) - { - VSPACE_CASES: - ADD_NEW(state_offset + 1, 0); - break; - - default: - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_NOT_HSPACE: - if (clen > 0) switch(c) - { - HSPACE_CASES: - break; - - default: - ADD_NEW(state_offset + 1, 0); - break; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_HSPACE: - if (clen > 0) switch(c) - { - HSPACE_CASES: - ADD_NEW(state_offset + 1, 0); - break; - - default: - break; - } - break; - - /*-----------------------------------------------------------------*/ - /* Match a negated single character casefully. */ - - case OP_NOT: - if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } - break; - - /*-----------------------------------------------------------------*/ - /* Match a negated single character caselessly. */ - - case OP_NOTI: - if (clen > 0) - { - unsigned int otherd; -#ifdef SUPPORT_UTF - if (utf && d >= 128) - { -#ifdef SUPPORT_UCP - otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else -#endif /* SUPPORT_UTF */ - otherd = TABLE_GET(d, fcc, d); - if (c != d && c != otherd) - { ADD_NEW(state_offset + dlen + 1, 0); } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSPLUSI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTPOSPLUSI: - caseless = TRUE; - codevalue -= OP_STARI - OP_STAR; - - /* Fall through */ - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTPOSPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } - if (clen > 0) - { - pcre_uint32 otherd = NOTACHAR; - if (caseless) - { -#ifdef SUPPORT_UTF - if (utf && d >= 128) - { -#ifdef SUPPORT_UCP - otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else -#endif /* SUPPORT_UTF */ - otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (count > 0 && - (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS)) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - count++; - ADD_NEW(state_offset, count); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_QUERYI: - case OP_MINQUERYI: - case OP_POSQUERYI: - case OP_NOTQUERYI: - case OP_NOTMINQUERYI: - case OP_NOTPOSQUERYI: - caseless = TRUE; - codevalue -= OP_STARI - OP_STAR; - /* Fall through */ - case OP_QUERY: - case OP_MINQUERY: - case OP_POSQUERY: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTPOSQUERY: - ADD_ACTIVE(state_offset + dlen + 1, 0); - if (clen > 0) - { - pcre_uint32 otherd = NOTACHAR; - if (caseless) - { -#ifdef SUPPORT_UTF - if (utf && d >= 128) - { -#ifdef SUPPORT_UCP - otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else -#endif /* SUPPORT_UTF */ - otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset + dlen + 1, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_STARI: - case OP_MINSTARI: - case OP_POSSTARI: - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPOSSTARI: - caseless = TRUE; - codevalue -= OP_STARI - OP_STAR; - /* Fall through */ - case OP_STAR: - case OP_MINSTAR: - case OP_POSSTAR: - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPOSSTAR: - ADD_ACTIVE(state_offset + dlen + 1, 0); - if (clen > 0) - { - pcre_uint32 otherd = NOTACHAR; - if (caseless) - { -#ifdef SUPPORT_UTF - if (utf && d >= 128) - { -#ifdef SUPPORT_UCP - otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else -#endif /* SUPPORT_UTF */ - otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - ADD_NEW(state_offset, 0); - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_EXACTI: - case OP_NOTEXACTI: - caseless = TRUE; - codevalue -= OP_STARI - OP_STAR; - /* Fall through */ - case OP_EXACT: - case OP_NOTEXACT: - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - pcre_uint32 otherd = NOTACHAR; - if (caseless) - { -#ifdef SUPPORT_UTF - if (utf && d >= 128) - { -#ifdef SUPPORT_UCP - otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else -#endif /* SUPPORT_UTF */ - otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_UPTOI: - case OP_MINUPTOI: - case OP_POSUPTOI: - case OP_NOTUPTOI: - case OP_NOTMINUPTOI: - case OP_NOTPOSUPTOI: - caseless = TRUE; - codevalue -= OP_STARI - OP_STAR; - /* Fall through */ - case OP_UPTO: - case OP_MINUPTO: - case OP_POSUPTO: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTPOSUPTO: - ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0); - count = current_state->count; /* Number already matched */ - if (clen > 0) - { - pcre_uint32 otherd = NOTACHAR; - if (caseless) - { -#ifdef SUPPORT_UTF - if (utf && d >= 128) - { -#ifdef SUPPORT_UCP - otherd = UCD_OTHERCASE(d); -#endif /* SUPPORT_UCP */ - } - else -#endif /* SUPPORT_UTF */ - otherd = TABLE_GET(d, fcc, d); - } - if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) - { - if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO) - { - active_count--; /* Remove non-match possibility */ - next_active_state--; - } - if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - } - break; - - -/* ========================================================================== */ - /* These are the class-handling opcodes */ - - case OP_CLASS: - case OP_NCLASS: - case OP_XCLASS: - { - BOOL isinclass = FALSE; - int next_state_offset; - const pcre_uchar *ecode; - - /* For a simple class, there is always just a 32-byte table, and we - can set isinclass from it. */ - - if (codevalue != OP_XCLASS) - { - ecode = code + 1 + (32 / sizeof(pcre_uchar)); - if (clen > 0) - { - isinclass = (c > 255)? (codevalue == OP_NCLASS) : - ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0); - } - } - - /* An extended class may have a table or a list of single characters, - ranges, or both, and it may be positive or negative. There's a - function that sorts all this out. */ - - else - { - ecode = code + GET(code, 1); - if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); - } - - /* At this point, isinclass is set for all kinds of class, and ecode - points to the byte after the end of the class. If there is a - quantifier, this is where it will be. */ - - next_state_offset = (int)(ecode - start_code); - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - ADD_ACTIVE(next_state_offset + 1, 0); - if (isinclass) { ADD_NEW(state_offset, 0); } - break; - - case OP_CRPLUS: - case OP_CRMINPLUS: - count = current_state->count; /* Already matched */ - if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); } - if (isinclass) { count++; ADD_NEW(state_offset, count); } - break; - - case OP_CRQUERY: - case OP_CRMINQUERY: - ADD_ACTIVE(next_state_offset + 1, 0); - if (isinclass) { ADD_NEW(next_state_offset + 1, 0); } - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - count = current_state->count; /* Already matched */ - if (count >= GET2(ecode, 1)) - { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } - if (isinclass) - { - unsigned int max = GET2(ecode, 1 + IMM2_SIZE); - if (++count >= max && max != 0) /* Max 0 => no limit */ - { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } - else - { ADD_NEW(state_offset, count); } - } - break; - - default: - if (isinclass) { ADD_NEW(next_state_offset, 0); } - break; - } - } - break; - -/* ========================================================================== */ - /* These are the opcodes for fancy brackets of various kinds. We have - to use recursion in order to handle them. The "always failing" assertion - (?!) is optimised to OP_FAIL when compiling, so we have to support that, - though the other "backtracking verbs" are not supported. */ - - case OP_FAIL: - forced_fail++; /* Count FAILs for multiple states */ - break; - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - { - int rc; - int local_offsets[2]; - int local_workspace[1000]; - const pcre_uchar *endasscode = code + GET(code, 1); - - while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); - - rc = internal_dfa_exec( - md, /* static match data */ - code, /* this subexpression's code */ - ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ - - if (rc == PCRE_ERROR_DFA_UITEM) return rc; - if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) - { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_COND: - case OP_SCOND: - { - int local_offsets[1000]; - int local_workspace[1000]; - int codelink = GET(code, 1); - int condcode; - - /* Because of the way auto-callout works during compile, a callout item - is inserted between OP_COND and an assertion condition. This does not - happen for the other conditions. */ - - if (code[LINK_SIZE+1] == OP_CALLOUT) - { - rrc = 0; - if (PUBL(callout) != NULL) - { - PUBL(callout_block) cb; - cb.version = 1; /* Version 1 of the callout block */ - cb.callout_number = code[LINK_SIZE+2]; - cb.offset_vector = offsets; -#if defined COMPILE_PCRE8 - cb.subject = (PCRE_SPTR)start_subject; -#elif defined COMPILE_PCRE16 - cb.subject = (PCRE_SPTR16)start_subject; -#elif defined COMPILE_PCRE32 - cb.subject = (PCRE_SPTR32)start_subject; -#endif - cb.subject_length = (int)(end_subject - start_subject); - cb.start_match = (int)(current_subject - start_subject); - cb.current_position = (int)(ptr - start_subject); - cb.pattern_position = GET(code, LINK_SIZE + 3); - cb.next_item_length = GET(code, 3 + 2*LINK_SIZE); - cb.capture_top = 1; - cb.capture_last = -1; - cb.callout_data = md->callout_data; - cb.mark = NULL; /* No (*MARK) support */ - if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ - } - if (rrc > 0) break; /* Fail this thread */ - code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */ - } - - condcode = code[LINK_SIZE+1]; - - /* Back reference conditions are not supported */ - - if (condcode == OP_CREF || condcode == OP_NCREF) - return PCRE_ERROR_DFA_UCOND; - - /* The DEFINE condition is always false */ - - if (condcode == OP_DEF) - { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - - /* The only supported version of OP_RREF is for the value RREF_ANY, - which means "test if in any recursion". We can't test for specifically - recursed groups. */ - - else if (condcode == OP_RREF || condcode == OP_NRREF) - { - int value = GET2(code, LINK_SIZE + 2); - if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; - if (md->recursive != NULL) - { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } - else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - } - - /* Otherwise, the condition is an assertion */ - - else - { - int rc; - const pcre_uchar *asscode = code + LINK_SIZE + 1; - const pcre_uchar *endasscode = asscode + GET(asscode, 1); - - while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); - - rc = internal_dfa_exec( - md, /* fixed match data */ - asscode, /* this subexpression's code */ - ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ - - if (rc == PCRE_ERROR_DFA_UITEM) return rc; - if ((rc >= 0) == - (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) - { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } - else - { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_RECURSE: - { - dfa_recursion_info *ri; - int local_offsets[1000]; - int local_workspace[1000]; - const pcre_uchar *callpat = start_code + GET(code, 1); - int recno = (callpat == md->start_code)? 0 : - GET2(callpat, 1 + LINK_SIZE); - int rc; - - DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP)); - - /* Check for repeating a recursion without advancing the subject - pointer. This should catch convoluted mutual recursions. (Some simple - cases are caught at compile time.) */ - - for (ri = md->recursive; ri != NULL; ri = ri->prevrec) - if (recno == ri->group_num && ptr == ri->subject_position) - return PCRE_ERROR_RECURSELOOP; - - /* Remember this recursion and where we started it so as to - catch infinite loops. */ - - new_recursive.group_num = recno; - new_recursive.subject_position = ptr; - new_recursive.prevrec = md->recursive; - md->recursive = &new_recursive; - - rc = internal_dfa_exec( - md, /* fixed match data */ - callpat, /* this subexpression's code */ - ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ - - md->recursive = new_recursive.prevrec; /* Done this recursion */ - - DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP, - rc)); - - /* Ran out of internal offsets */ - - if (rc == 0) return PCRE_ERROR_DFA_RECURSE; - - /* For each successful matched substring, set up the next state with a - count of characters to skip before trying it. Note that the count is in - characters, not bytes. */ - - if (rc > 0) - { - for (rc = rc*2 - 2; rc >= 0; rc -= 2) - { - int charcount = local_offsets[rc+1] - local_offsets[rc]; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf) - { - const pcre_uchar *p = start_subject + local_offsets[rc]; - const pcre_uchar *pp = start_subject + local_offsets[rc+1]; - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; - } -#endif - if (charcount > 0) - { - ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); - } - else - { - ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0); - } - } - } - else if (rc != PCRE_ERROR_NOMATCH) return rc; - } - break; - - /*-----------------------------------------------------------------*/ - case OP_BRAPOS: - case OP_SBRAPOS: - case OP_CBRAPOS: - case OP_SCBRAPOS: - case OP_BRAPOSZERO: - { - int charcount, matched_count; - const pcre_uchar *local_ptr = ptr; - BOOL allow_zero; - - if (codevalue == OP_BRAPOSZERO) - { - allow_zero = TRUE; - codevalue = *(++code); /* Codevalue will be one of above BRAs */ - } - else allow_zero = FALSE; - - /* Loop to match the subpattern as many times as possible as if it were - a complete pattern. */ - - for (matched_count = 0;; matched_count++) - { - int local_offsets[2]; - int local_workspace[1000]; - - int rc = internal_dfa_exec( - md, /* fixed match data */ - code, /* this subexpression's code */ - local_ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ - - /* Failed to match */ - - if (rc < 0) - { - if (rc != PCRE_ERROR_NOMATCH) return rc; - break; - } - - /* Matched: break the loop if zero characters matched. */ - - charcount = local_offsets[1] - local_offsets[0]; - if (charcount == 0) break; - local_ptr += charcount; /* Advance temporary position ptr */ - } - - /* At this point we have matched the subpattern matched_count - times, and local_ptr is pointing to the character after the end of the - last match. */ - - if (matched_count > 0 || allow_zero) - { - const pcre_uchar *end_subpattern = code; - int next_state_offset; - - do { end_subpattern += GET(end_subpattern, 1); } - while (*end_subpattern == OP_ALT); - next_state_offset = - (int)(end_subpattern - start_code + LINK_SIZE + 1); - - /* Optimization: if there are no more active states, and there - are no new states yet set up, then skip over the subject string - right here, to save looping. Otherwise, set up the new state to swing - into action when the end of the matched substring is reached. */ - - if (i + 1 >= active_count && new_count == 0) - { - ptr = local_ptr; - clen = 0; - ADD_NEW(next_state_offset, 0); - } - else - { - const pcre_uchar *p = ptr; - const pcre_uchar *pp = local_ptr; - charcount = (int)(pp - p); -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; -#endif - ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); - } - } - } - break; - - /*-----------------------------------------------------------------*/ - case OP_ONCE: - case OP_ONCE_NC: - { - int local_offsets[2]; - int local_workspace[1000]; - - int rc = internal_dfa_exec( - md, /* fixed match data */ - code, /* this subexpression's code */ - ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ - local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(int), /* size of same */ - local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ - - if (rc >= 0) - { - const pcre_uchar *end_subpattern = code; - int charcount = local_offsets[1] - local_offsets[0]; - int next_state_offset, repeat_state_offset; - - do { end_subpattern += GET(end_subpattern, 1); } - while (*end_subpattern == OP_ALT); - next_state_offset = - (int)(end_subpattern - start_code + LINK_SIZE + 1); - - /* If the end of this subpattern is KETRMAX or KETRMIN, we must - arrange for the repeat state also to be added to the relevant list. - Calculate the offset, or set -1 for no repeat. */ - - repeat_state_offset = (*end_subpattern == OP_KETRMAX || - *end_subpattern == OP_KETRMIN)? - (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1; - - /* If we have matched an empty string, add the next state at the - current character pointer. This is important so that the duplicate - checking kicks in, which is what breaks infinite loops that match an - empty string. */ - - if (charcount == 0) - { - ADD_ACTIVE(next_state_offset, 0); - } - - /* Optimization: if there are no more active states, and there - are no new states yet set up, then skip over the subject string - right here, to save looping. Otherwise, set up the new state to swing - into action when the end of the matched substring is reached. */ - - else if (i + 1 >= active_count && new_count == 0) - { - ptr += charcount; - clen = 0; - ADD_NEW(next_state_offset, 0); - - /* If we are adding a repeat state at the new character position, - we must fudge things so that it is the only current state. - Otherwise, it might be a duplicate of one we processed before, and - that would cause it to be skipped. */ - - if (repeat_state_offset >= 0) - { - next_active_state = active_states; - active_count = 0; - i = -1; - ADD_ACTIVE(repeat_state_offset, 0); - } - } - else - { -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 - if (utf) - { - const pcre_uchar *p = start_subject + local_offsets[0]; - const pcre_uchar *pp = start_subject + local_offsets[1]; - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; - } -#endif - ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); - if (repeat_state_offset >= 0) - { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } - } - } - else if (rc != PCRE_ERROR_NOMATCH) return rc; - } - break; - - -/* ========================================================================== */ - /* Handle callouts */ - - case OP_CALLOUT: - rrc = 0; - if (PUBL(callout) != NULL) - { - PUBL(callout_block) cb; - cb.version = 1; /* Version 1 of the callout block */ - cb.callout_number = code[1]; - cb.offset_vector = offsets; -#if defined COMPILE_PCRE8 - cb.subject = (PCRE_SPTR)start_subject; -#elif defined COMPILE_PCRE16 - cb.subject = (PCRE_SPTR16)start_subject; -#elif defined COMPILE_PCRE32 - cb.subject = (PCRE_SPTR32)start_subject; -#endif - cb.subject_length = (int)(end_subject - start_subject); - cb.start_match = (int)(current_subject - start_subject); - cb.current_position = (int)(ptr - start_subject); - cb.pattern_position = GET(code, 2); - cb.next_item_length = GET(code, 2 + LINK_SIZE); - cb.capture_top = 1; - cb.capture_last = -1; - cb.callout_data = md->callout_data; - cb.mark = NULL; /* No (*MARK) support */ - if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ - } - if (rrc == 0) - { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); } - break; - - -/* ========================================================================== */ - default: /* Unsupported opcode */ - return PCRE_ERROR_DFA_UITEM; - } - - NEXT_ACTIVE_STATE: continue; - - } /* End of loop scanning active states */ - - /* We have finished the processing at the current subject character. If no - new states have been set for the next character, we have found all the - matches that we are going to find. If we are at the top level and partial - matching has been requested, check for appropriate conditions. - - The "forced_ fail" variable counts the number of (*F) encountered for the - character. If it is equal to the original active_count (saved in - workspace[1]) it means that (*F) was found on every active state. In this - case we don't want to give a partial match. - - The "could_continue" variable is true if a state could have continued but - for the fact that the end of the subject was reached. */ - - if (new_count <= 0) - { - if (rlevel == 1 && /* Top level, and */ - could_continue && /* Some could go on, and */ - forced_fail != workspace[1] && /* Not all forced fail & */ - ( /* either... */ - (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ - || /* or... */ - ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */ - match_count < 0) /* no matches */ - ) && /* And... */ - ( - partial_newline || /* Either partial NL */ - ( /* or ... */ - ptr >= end_subject && /* End of subject and */ - ptr > md->start_used_ptr) /* Inspected non-empty string */ - ) - ) - { - if (offsetcount >= 2) - { - offsets[0] = (int)(md->start_used_ptr - start_subject); - offsets[1] = (int)(end_subject - start_subject); - } - match_count = PCRE_ERROR_PARTIAL; - } - - DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" - "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count, - rlevel*2-2, SP)); - break; /* In effect, "return", but see the comment below */ - } - - /* One or more states are active for the next character. */ - - ptr += clen; /* Advance to next subject character */ - } /* Loop to move along the subject string */ - -/* Control gets here from "break" a few lines above. We do it this way because -if we use "return" above, we have compiler trouble. Some compilers warn if -there's nothing here because they think the function doesn't return a value. On -the other hand, if we put a dummy statement here, some more clever compilers -complain that it can't be reached. Sigh. */ - -return match_count; -} - - - - -/************************************************* -* Execute a Regular Expression - DFA engine * -*************************************************/ - -/* This external function applies a compiled re to a subject string using a DFA -engine. This function calls the internal function multiple times if the pattern -is not anchored. - -Arguments: - argument_re points to the compiled expression - extra_data points to extra data or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets vector of match offsets - offsetcount size of same - workspace workspace vector - wscount size of same - -Returns: > 0 => number of match offset pairs placed in offsets - = 0 => offsets overflowed; longest matches are present - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, - const char *subject, int length, int start_offset, int options, int *offsets, - int offsetcount, int *workspace, int wscount) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, - PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, - int offsetcount, int *workspace, int wscount) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, - PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets, - int offsetcount, int *workspace, int wscount) -#endif -{ -REAL_PCRE *re = (REAL_PCRE *)argument_re; -dfa_match_data match_block; -dfa_match_data *md = &match_block; -BOOL utf, anchored, startline, firstline; -const pcre_uchar *current_subject, *end_subject; -const pcre_study_data *study = NULL; - -const pcre_uchar *req_char_ptr; -const pcre_uint8 *start_bits = NULL; -BOOL has_first_char = FALSE; -BOOL has_req_char = FALSE; -pcre_uchar first_char = 0; -pcre_uchar first_char2 = 0; -pcre_uchar req_char = 0; -pcre_uchar req_char2 = 0; -int newline; - -/* Plausibility checks */ - -if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; -if (re == NULL || subject == NULL || workspace == NULL || - (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; -if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; -if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; -if (length < 0) return PCRE_ERROR_BADLENGTH; -if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; - -/* Check that the first field in the block is the magic number. If it is not, -return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to -REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which -means that the pattern is likely compiled with different endianness. */ - -if (re->magic_number != MAGIC_NUMBER) - return re->magic_number == REVERSED_MAGIC_NUMBER? - PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; -if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; - -/* If restarting after a partial match, do some sanity checks on the contents -of the workspace. */ - -if ((options & PCRE_DFA_RESTART) != 0) - { - if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 || - workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK) - return PCRE_ERROR_DFA_BADRESTART; - } - -/* Set up study, callout, and table data */ - -md->tables = re->tables; -md->callout_data = NULL; - -if (extra_data != NULL) - { - unsigned int flags = extra_data->flags; - if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = (const pcre_study_data *)extra_data->study_data; - if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT; - if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) - return PCRE_ERROR_DFA_UMLIMIT; - if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) - md->callout_data = extra_data->callout_data; - if ((flags & PCRE_EXTRA_TABLES) != 0) - md->tables = extra_data->tables; - } - -/* Set some local values */ - -current_subject = (const pcre_uchar *)subject + start_offset; -end_subject = (const pcre_uchar *)subject + length; -req_char_ptr = current_subject - 1; - -#ifdef SUPPORT_UTF -/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ -utf = (re->options & PCRE_UTF8) != 0; -#else -utf = FALSE; -#endif - -anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || - (re->options & PCRE_ANCHORED) != 0; - -/* The remaining fixed data for passing around. */ - -md->start_code = (const pcre_uchar *)argument_re + - re->name_table_offset + re->name_count * re->name_entry_size; -md->start_subject = (const pcre_uchar *)subject; -md->end_subject = end_subject; -md->start_offset = start_offset; -md->moptions = options; -md->poptions = re->options; - -/* If the BSR option is not set at match time, copy what was set -at compile time. */ - -if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0) - { - if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) - md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE); -#ifdef BSR_ANYCRLF - else md->moptions |= PCRE_BSR_ANYCRLF; -#endif - } - -/* Handle different types of newline. The three bits give eight cases. If -nothing is set at run time, whatever was used at compile time applies. */ - -switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & - PCRE_NEWLINE_BITS) - { - case 0: newline = NEWLINE; break; /* Compile-time default */ - case PCRE_NEWLINE_CR: newline = CHAR_CR; break; - case PCRE_NEWLINE_LF: newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ - PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: newline = -1; break; - case PCRE_NEWLINE_ANYCRLF: newline = -2; break; - default: return PCRE_ERROR_BADNEWLINE; - } - -if (newline == -2) - { - md->nltype = NLTYPE_ANYCRLF; - } -else if (newline < 0) - { - md->nltype = NLTYPE_ANY; - } -else - { - md->nltype = NLTYPE_FIXED; - if (newline > 255) - { - md->nllen = 2; - md->nl[0] = (newline >> 8) & 255; - md->nl[1] = newline & 255; - } - else - { - md->nllen = 1; - md->nl[0] = newline; - } - } - -/* Check a UTF-8 string if required. Unfortunately there's no way of passing -back the character offset. */ - -#ifdef SUPPORT_UTF -if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) - { - int erroroffset; - int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset); - if (errorcode != 0) - { - if (offsetcount >= 2) - { - offsets[0] = erroroffset; - offsets[1] = errorcode; - } -#if defined COMPILE_PCRE8 - return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ? - PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; -#elif defined COMPILE_PCRE16 - return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ? - PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16; -#elif defined COMPILE_PCRE32 - return PCRE_ERROR_BADUTF32; -#endif - } -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 - if (start_offset > 0 && start_offset < length && - NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) - return PCRE_ERROR_BADUTF8_OFFSET; -#endif - } -#endif - -/* If the exec call supplied NULL for tables, use the inbuilt ones. This -is a feature that makes it possible to save compiled regex and re-use them -in other programs later. */ - -if (md->tables == NULL) md->tables = PRIV(default_tables); - -/* The "must be at the start of a line" flags are used in a loop when finding -where to start. */ - -startline = (re->flags & PCRE_STARTLINE) != 0; -firstline = (re->options & PCRE_FIRSTLINE) != 0; - -/* Set up the first character to match, if available. The first_byte value is -never set for an anchored regular expression, but the anchoring may be forced -at run time, so we have to test for anchoring. The first char may be unset for -an unanchored pattern, of course. If there's no first char and the pattern was -studied, there may be a bitmap of possible first characters. */ - -if (!anchored) - { - if ((re->flags & PCRE_FIRSTSET) != 0) - { - has_first_char = TRUE; - first_char = first_char2 = (pcre_uchar)(re->first_char); - if ((re->flags & PCRE_FCH_CASELESS) != 0) - { - first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (utf && first_char > 127) - first_char2 = UCD_OTHERCASE(first_char); -#endif - } - } - else - { - if (!startline && study != NULL && - (study->flags & PCRE_STUDY_MAPPED) != 0) - start_bits = study->start_bits; - } - } - -/* For anchored or unanchored matches, there may be a "last known required -character" set. */ - -if ((re->flags & PCRE_REQCHSET) != 0) - { - has_req_char = TRUE; - req_char = req_char2 = (pcre_uchar)(re->req_char); - if ((re->flags & PCRE_RCH_CASELESS) != 0) - { - req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (utf && req_char > 127) - req_char2 = UCD_OTHERCASE(req_char); -#endif - } - } - -/* Call the main matching function, looping for a non-anchored regex after a -failed match. If not restarting, perform certain optimizations at the start of -a match. */ - -for (;;) - { - int rc; - - if ((options & PCRE_DFA_RESTART) == 0) - { - const pcre_uchar *save_end_subject = end_subject; - - /* If firstline is TRUE, the start of the match is constrained to the first - line of a multiline string. Implement this by temporarily adjusting - end_subject so that we stop scanning at a newline. If the match fails at - the newline, later code breaks this loop. */ - - if (firstline) - { - PCRE_PUCHAR t = current_subject; -#ifdef SUPPORT_UTF - if (utf) - { - while (t < md->end_subject && !IS_NEWLINE(t)) - { - t++; - ACROSSCHAR(t < end_subject, *t, t++); - } - } - else -#endif - while (t < md->end_subject && !IS_NEWLINE(t)) t++; - end_subject = t; - } - - /* There are some optimizations that avoid running the match if a known - starting point is not found. However, there is an option that disables - these, for testing and for ensuring that all callouts do actually occur. - The option can be set in the regex by (*NO_START_OPT) or passed in - match-time options. */ - - if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) - { - /* Advance to a known first char. */ - - if (has_first_char) - { - if (first_char != first_char2) - { - pcre_uchar csc; - while (current_subject < end_subject && - (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2) - current_subject++; - } - else - while (current_subject < end_subject && - RAWUCHARTEST(current_subject) != first_char) - current_subject++; - } - - /* Or to just after a linebreak for a multiline match if possible */ - - else if (startline) - { - if (current_subject > md->start_subject + start_offset) - { -#ifdef SUPPORT_UTF - if (utf) - { - while (current_subject < end_subject && - !WAS_NEWLINE(current_subject)) - { - current_subject++; - ACROSSCHAR(current_subject < end_subject, *current_subject, - current_subject++); - } - } - else -#endif - while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) - current_subject++; - - /* If we have just passed a CR and the newline option is ANY or - ANYCRLF, and we are now at a LF, advance the match position by one - more character. */ - - if (RAWUCHARTEST(current_subject - 1) == CHAR_CR && - (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && - current_subject < end_subject && - RAWUCHARTEST(current_subject) == CHAR_NL) - current_subject++; - } - } - - /* Or to a non-unique first char after study */ - - else if (start_bits != NULL) - { - while (current_subject < end_subject) - { - register pcre_uint32 c = RAWUCHARTEST(current_subject); -#ifndef COMPILE_PCRE8 - if (c > 255) c = 255; -#endif - if ((start_bits[c/8] & (1 << (c&7))) == 0) - { - current_subject++; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 - /* In non 8-bit mode, the iteration will stop for - characters > 255 at the beginning or not stop at all. */ - if (utf) - ACROSSCHAR(current_subject < end_subject, *current_subject, - current_subject++); -#endif - } - else break; - } - } - } - - /* Restore fudged end_subject */ - - end_subject = save_end_subject; - - /* The following two optimizations are disabled for partial matching or if - disabling is explicitly requested (and of course, by the test above, this - code is not obeyed when restarting after a partial match). */ - - if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && - (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0) - { - /* If the pattern was studied, a minimum subject length may be set. This - is a lower bound; no actual string of that length may actually match the - pattern. Although the value is, strictly, in characters, we treat it as - bytes to avoid spending too much time in this optimization. */ - - if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && - (pcre_uint32)(end_subject - current_subject) < study->minlength) - return PCRE_ERROR_NOMATCH; - - /* If req_char is set, we know that that character must appear in the - subject for the match to succeed. If the first character is set, req_char - must be later in the subject; otherwise the test starts at the match - point. This optimization can save a huge amount of work in patterns with - nested unlimited repeats that aren't going to match. Writing separate - code for cased/caseless versions makes it go faster, as does using an - autoincrement and backing off on a match. - - HOWEVER: when the subject string is very, very long, searching to its end - can take a long time, and give bad performance on quite ordinary - patterns. This showed up when somebody was matching /^C/ on a 32-megabyte - string... so we don't do this when the string is sufficiently long. */ - - if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX) - { - register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0); - - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ - - if (p > req_char_ptr) - { - if (req_char != req_char2) - { - while (p < end_subject) - { - register pcre_uint32 pp = RAWUCHARINCTEST(p); - if (pp == req_char || pp == req_char2) { p--; break; } - } - } - else - { - while (p < end_subject) - { - if (RAWUCHARINCTEST(p) == req_char) { p--; break; } - } - } - - /* If we can't find the required character, break the matching loop, - which will cause a return or PCRE_ERROR_NOMATCH. */ - - if (p >= end_subject) break; - - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ - - req_char_ptr = p; - } - } - } - } /* End of optimizations that are done when not restarting */ - - /* OK, now we can do the business */ - - md->start_used_ptr = current_subject; - md->recursive = NULL; - - rc = internal_dfa_exec( - md, /* fixed match data */ - md->start_code, /* this subexpression's code */ - current_subject, /* where we currently are */ - start_offset, /* start offset in subject */ - offsets, /* offset vector */ - offsetcount, /* size of same */ - workspace, /* workspace vector */ - wscount, /* size of same */ - 0); /* function recurse level */ - - /* Anything other than "no match" means we are done, always; otherwise, carry - on only if not anchored. */ - - if (rc != PCRE_ERROR_NOMATCH || anchored) return rc; - - /* Advance to the next subject character unless we are at the end of a line - and firstline is set. */ - - if (firstline && IS_NEWLINE(current_subject)) break; - current_subject++; -#ifdef SUPPORT_UTF - if (utf) - { - ACROSSCHAR(current_subject < end_subject, *current_subject, - current_subject++); - } -#endif - if (current_subject > end_subject) break; - - /* If we have just passed a CR and we are now at a LF, and the pattern does - not contain any explicit matches for \r or \n, and the newline option is CRLF - or ANY or ANYCRLF, advance the match position by one more character. */ - - if (RAWUCHARTEST(current_subject - 1) == CHAR_CR && - current_subject < end_subject && - RAWUCHARTEST(current_subject) == CHAR_NL && - (re->flags & PCRE_HASCRORLF) == 0 && - (md->nltype == NLTYPE_ANY || - md->nltype == NLTYPE_ANYCRLF || - md->nllen == 2)) - current_subject++; - - } /* "Bumpalong" loop */ - -return PCRE_ERROR_NOMATCH; -} - -/* End of pcre_dfa_exec.c */ diff --git a/deps/libmagic/pcre/pcre_exec.c b/deps/libmagic/pcre/pcre_exec.c deleted file mode 100644 index 05d0e52..0000000 --- a/deps/libmagic/pcre/pcre_exec.c +++ /dev/null @@ -1,7049 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains pcre_exec(), the externally visible function that does -pattern matching using an NFA algorithm, trying to mimic Perl as closely as -possible. There are also some static supporting functions. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define NLBLOCK md /* Block containing newline information */ -#define PSSTART start_subject /* Field containing processed string start */ -#define PSEND end_subject /* Field containing processed string end */ - -#include "pcre_internal.h" - -/* Undefine some potentially clashing cpp symbols */ - -#undef min -#undef max - -/* Values for setting in md->match_function_type to indicate two special types -of call to match(). We do it this way to save on using another stack variable, -as stack usage is to be discouraged. */ - -#define MATCH_CONDASSERT 1 /* Called to check a condition assertion */ -#define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */ - -/* Non-error returns from the match() function. Error returns are externally -defined PCRE_ERROR_xxx codes, which are all negative. */ - -#define MATCH_MATCH 1 -#define MATCH_NOMATCH 0 - -/* Special internal returns from the match() function. Make them sufficiently -negative to avoid the external error codes. */ - -#define MATCH_ACCEPT (-999) -#define MATCH_COMMIT (-998) -#define MATCH_KETRPOS (-997) -#define MATCH_ONCE (-996) -#define MATCH_PRUNE (-995) -#define MATCH_SKIP (-994) -#define MATCH_SKIP_ARG (-993) -#define MATCH_THEN (-992) - -/* Maximum number of ints of offset to save on the stack for recursive calls. -If the offset vector is bigger, malloc is used. This should be a multiple of 3, -because the offset vector is always a multiple of 3 long. */ - -#define REC_STACK_SAVE_MAX 30 - -/* Min and max values for the common repeats; for the maxima, 0 => infinity */ - -static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; -static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; - -#ifdef PCRE_DEBUG -/************************************************* -* Debugging function to print chars * -*************************************************/ - -/* Print a sequence of chars in printable format, stopping at the end of the -subject if the requested. - -Arguments: - p points to characters - length number to print - is_subject TRUE if printing from within md->start_subject - md pointer to matching data block, if is_subject is TRUE - -Returns: nothing -*/ - -static void -pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) -{ -pcre_uint32 c; -BOOL utf = md->utf; -if (is_subject && length > md->end_subject - p) length = md->end_subject - p; -while (length-- > 0) - if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c); -} -#endif - - - -/************************************************* -* Match a back-reference * -*************************************************/ - -/* Normally, if a back reference hasn't been set, the length that is passed is -negative, so the match always fails. However, in JavaScript compatibility mode, -the length passed is zero. Note that in caseless UTF-8 mode, the number of -subject bytes matched may be different to the number of reference bytes. - -Arguments: - offset index into the offset vector - eptr pointer into the subject - length length of reference to be matched (number of bytes) - md points to match data block - caseless TRUE if caseless - -Returns: >= 0 the number of subject bytes matched - -1 no match - -2 partial match; always given if at end subject -*/ - -static int -match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md, - BOOL caseless) -{ -PCRE_PUCHAR eptr_start = eptr; -register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; -#ifdef SUPPORT_UTF -BOOL utf = md->utf; -#endif - -#ifdef PCRE_DEBUG -if (eptr >= md->end_subject) - printf("matching subject "); -else - { - printf("matching subject "); - pchars(eptr, length, TRUE, md); - } -printf(" against backref "); -pchars(p, length, FALSE, md); -printf("\n"); -#endif - -/* Always fail if reference not set (and not JavaScript compatible - in that -case the length is passed as zero). */ - -if (length < 0) return -1; - -/* Separate the caseless case for speed. In UTF-8 mode we can only do this -properly if Unicode properties are supported. Otherwise, we can check only -ASCII characters. */ - -if (caseless) - { -#ifdef SUPPORT_UTF -#ifdef SUPPORT_UCP - if (utf) - { - /* Match characters up to the end of the reference. NOTE: the number of - data units matched may differ, because in UTF-8 there are some characters - whose upper and lower case versions code have different numbers of bytes. - For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 - (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a - sequence of two of the latter. It is important, therefore, to check the - length along the reference, not along the subject (earlier code did this - wrong). */ - - PCRE_PUCHAR endptr = p + length; - while (p < endptr) - { - pcre_uint32 c, d; - const ucd_record *ur; - if (eptr >= md->end_subject) return -2; /* Partial match */ - GETCHARINC(c, eptr); - GETCHARINC(d, p); - ur = GET_UCD(d); - if (c != d && c != d + ur->other_case) - { - const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset; - for (;;) - { - if (c < *pp) return -1; - if (c == *pp++) break; - } - } - } - } - else -#endif -#endif - - /* The same code works when not in UTF-8 mode and in UTF-8 mode when there - is no UCP support. */ - { - while (length-- > 0) - { - pcre_uchar cc, cp; - if (eptr >= md->end_subject) return -2; /* Partial match */ - cc = RAWUCHARTEST(eptr); - cp = RAWUCHARTEST(p); - if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1; - p++; - eptr++; - } - } - } - -/* In the caseful case, we can just compare the bytes, whether or not we -are in UTF-8 mode. */ - -else - { - while (length-- > 0) - { - if (eptr >= md->end_subject) return -2; /* Partial match */ - if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1; - } - } - -return (int)(eptr - eptr_start); -} - - - -/*************************************************************************** -**************************************************************************** - RECURSION IN THE match() FUNCTION - -The match() function is highly recursive, though not every recursive call -increases the recursive depth. Nevertheless, some regular expressions can cause -it to recurse to a great depth. I was writing for Unix, so I just let it call -itself recursively. This uses the stack for saving everything that has to be -saved for a recursive call. On Unix, the stack can be large, and this works -fine. - -It turns out that on some non-Unix-like systems there are problems with -programs that use a lot of stack. (This despite the fact that every last chip -has oodles of memory these days, and techniques for extending the stack have -been known for decades.) So.... - -There is a fudge, triggered by defining NO_RECURSE, which avoids recursive -calls by keeping local variables that need to be preserved in blocks of memory -obtained from malloc() instead instead of on the stack. Macros are used to -achieve this so that the actual code doesn't look very different to what it -always used to. - -The original heap-recursive code used longjmp(). However, it seems that this -can be very slow on some operating systems. Following a suggestion from Stan -Switzer, the use of longjmp() has been abolished, at the cost of having to -provide a unique number for each call to RMATCH. There is no way of generating -a sequence of numbers at compile time in C. I have given them names, to make -them stand out more clearly. - -Crude tests on x86 Linux show a small speedup of around 5-8%. However, on -FreeBSD, avoiding longjmp() more than halves the time taken to run the standard -tests. Furthermore, not using longjmp() means that local dynamic variables -don't have indeterminate values; this has meant that the frame size can be -reduced because the result can be "passed back" by straight setting of the -variable instead of being passed in the frame. -**************************************************************************** -***************************************************************************/ - -/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN -below must be updated in sync. */ - -enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, - RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, - RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, - RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, - RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, - RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, - RM61, RM62, RM63, RM64, RM65, RM66, RM67 }; - -/* These versions of the macros use the stack, as normal. There are debugging -versions and production versions. Note that the "rw" argument of RMATCH isn't -actually used in this definition. */ - -#ifndef NO_RECURSE -#define REGISTER register - -#ifdef PCRE_DEBUG -#define RMATCH(ra,rb,rc,rd,re,rw) \ - { \ - printf("match() called in line %d\n", __LINE__); \ - rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \ - printf("to line %d\n", __LINE__); \ - } -#define RRETURN(ra) \ - { \ - printf("match() returned %d from line %d\n", ra, __LINE__); \ - return ra; \ - } -#else -#define RMATCH(ra,rb,rc,rd,re,rw) \ - rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) -#define RRETURN(ra) return ra -#endif - -#else - - -/* These versions of the macros manage a private stack on the heap. Note that -the "rd" argument of RMATCH isn't actually used in this definition. It's the md -argument of match(), which never changes. */ - -#define REGISTER - -#define RMATCH(ra,rb,rc,rd,re,rw)\ - {\ - heapframe *newframe = frame->Xnextframe;\ - if (newframe == NULL)\ - {\ - newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ - if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ - newframe->Xnextframe = NULL;\ - frame->Xnextframe = newframe;\ - }\ - frame->Xwhere = rw;\ - newframe->Xeptr = ra;\ - newframe->Xecode = rb;\ - newframe->Xmstart = mstart;\ - newframe->Xoffset_top = rc;\ - newframe->Xeptrb = re;\ - newframe->Xrdepth = frame->Xrdepth + 1;\ - newframe->Xprevframe = frame;\ - frame = newframe;\ - DPRINTF(("restarting from line %d\n", __LINE__));\ - goto HEAP_RECURSE;\ - L_##rw:\ - DPRINTF(("jumped back to line %d\n", __LINE__));\ - } - -#define RRETURN(ra)\ - {\ - heapframe *oldframe = frame;\ - frame = oldframe->Xprevframe;\ - if (frame != NULL)\ - {\ - rrc = ra;\ - goto HEAP_RETURN;\ - }\ - return ra;\ - } - - -/* Structure for remembering the local variables in a private frame */ - -typedef struct heapframe { - struct heapframe *Xprevframe; - struct heapframe *Xnextframe; - - /* Function arguments that may change */ - - PCRE_PUCHAR Xeptr; - const pcre_uchar *Xecode; - PCRE_PUCHAR Xmstart; - int Xoffset_top; - eptrblock *Xeptrb; - unsigned int Xrdepth; - - /* Function local variables */ - - PCRE_PUCHAR Xcallpat; -#ifdef SUPPORT_UTF - PCRE_PUCHAR Xcharptr; -#endif - PCRE_PUCHAR Xdata; - PCRE_PUCHAR Xnext; - PCRE_PUCHAR Xpp; - PCRE_PUCHAR Xprev; - PCRE_PUCHAR Xsaved_eptr; - - recursion_info Xnew_recursive; - - BOOL Xcur_is_word; - BOOL Xcondition; - BOOL Xprev_is_word; - -#ifdef SUPPORT_UCP - int Xprop_type; - unsigned int Xprop_value; - int Xprop_fail_result; - int Xoclength; - pcre_uchar Xocchars[6]; -#endif - - int Xcodelink; - int Xctype; - unsigned int Xfc; - int Xfi; - int Xlength; - int Xmax; - int Xmin; - int Xnumber; - int Xoffset; - int Xop; - int Xsave_capture_last; - int Xsave_offset1, Xsave_offset2, Xsave_offset3; - int Xstacksave[REC_STACK_SAVE_MAX]; - - eptrblock Xnewptrb; - - /* Where to jump back to */ - - int Xwhere; - -} heapframe; - -#endif - - -/*************************************************************************** -***************************************************************************/ - - - -/************************************************* -* Match from current position * -*************************************************/ - -/* This function is called recursively in many circumstances. Whenever it -returns a negative (error) response, the outer incarnation must also return the -same response. */ - -/* These macros pack up tests that are used for partial matching, and which -appear several times in the code. We set the "hit end" flag if the pointer is -at the end of the subject and also past the start of the subject (i.e. -something has been matched). For hard partial matching, we then return -immediately. The second one is used when we already know we are past the end of -the subject. */ - -#define CHECK_PARTIAL()\ - if (md->partial != 0 && eptr >= md->end_subject && \ - eptr > md->start_used_ptr) \ - { \ - md->hitend = TRUE; \ - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ - } - -#define SCHECK_PARTIAL()\ - if (md->partial != 0 && eptr > md->start_used_ptr) \ - { \ - md->hitend = TRUE; \ - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \ - } - - -/* Performance note: It might be tempting to extract commonly used fields from -the md structure (e.g. utf, end_subject) into individual variables to improve -performance. Tests using gcc on a SPARC disproved this; in the first case, it -made performance worse. - -Arguments: - eptr pointer to current character in subject - ecode pointer to current position in compiled code - mstart pointer to the current match start position (can be modified - by encountering \K) - offset_top current top pointer - md pointer to "static" info for the match - eptrb pointer to chain of blocks containing eptr at start of - brackets - for testing for empty matches - rdepth the recursion depth - -Returns: MATCH_MATCH if matched ) these values are >= 0 - MATCH_NOMATCH if failed to match ) - a negative MATCH_xxx value for PRUNE, SKIP, etc - a negative PCRE_ERROR_xxx value if aborted by an error condition - (e.g. stopped by repeated call or recursion limit) -*/ - -static int -match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, - PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, - unsigned int rdepth) -{ -/* These variables do not need to be preserved over recursion in this function, -so they can be ordinary variables in all cases. Mark some of them with -"register" because they are used a lot in loops. */ - -register int rrc; /* Returns from recursive calls */ -register int i; /* Used for loops not involving calls to RMATCH() */ -register pcre_uint32 c; /* Character values not kept over RMATCH() calls */ -register BOOL utf; /* Local copy of UTF flag for speed */ - -BOOL minimize, possessive; /* Quantifier options */ -BOOL caseless; -int condcode; - -/* When recursion is not being used, all "local" variables that have to be -preserved over calls to RMATCH() are part of a "frame". We set up the top-level -frame on the stack here; subsequent instantiations are obtained from the heap -whenever RMATCH() does a "recursion". See the macro definitions above. Putting -the top-level on the stack rather than malloc-ing them all gives a performance -boost in many cases where there is not much "recursion". */ - -#ifdef NO_RECURSE -heapframe *frame = (heapframe *)md->match_frames_base; - -/* Copy in the original argument variables */ - -frame->Xeptr = eptr; -frame->Xecode = ecode; -frame->Xmstart = mstart; -frame->Xoffset_top = offset_top; -frame->Xeptrb = eptrb; -frame->Xrdepth = rdepth; - -/* This is where control jumps back to to effect "recursion" */ - -HEAP_RECURSE: - -/* Macros make the argument variables come from the current frame */ - -#define eptr frame->Xeptr -#define ecode frame->Xecode -#define mstart frame->Xmstart -#define offset_top frame->Xoffset_top -#define eptrb frame->Xeptrb -#define rdepth frame->Xrdepth - -/* Ditto for the local variables */ - -#ifdef SUPPORT_UTF -#define charptr frame->Xcharptr -#endif -#define callpat frame->Xcallpat -#define codelink frame->Xcodelink -#define data frame->Xdata -#define next frame->Xnext -#define pp frame->Xpp -#define prev frame->Xprev -#define saved_eptr frame->Xsaved_eptr - -#define new_recursive frame->Xnew_recursive - -#define cur_is_word frame->Xcur_is_word -#define condition frame->Xcondition -#define prev_is_word frame->Xprev_is_word - -#ifdef SUPPORT_UCP -#define prop_type frame->Xprop_type -#define prop_value frame->Xprop_value -#define prop_fail_result frame->Xprop_fail_result -#define oclength frame->Xoclength -#define occhars frame->Xocchars -#endif - -#define ctype frame->Xctype -#define fc frame->Xfc -#define fi frame->Xfi -#define length frame->Xlength -#define max frame->Xmax -#define min frame->Xmin -#define number frame->Xnumber -#define offset frame->Xoffset -#define op frame->Xop -#define save_capture_last frame->Xsave_capture_last -#define save_offset1 frame->Xsave_offset1 -#define save_offset2 frame->Xsave_offset2 -#define save_offset3 frame->Xsave_offset3 -#define stacksave frame->Xstacksave - -#define newptrb frame->Xnewptrb - -/* When recursion is being used, local variables are allocated on the stack and -get preserved during recursion in the normal way. In this environment, fi and -i, and fc and c, can be the same variables. */ - -#else /* NO_RECURSE not defined */ -#define fi i -#define fc c - -/* Many of the following variables are used only in small blocks of the code. -My normal style of coding would have declared them within each of those blocks. -However, in order to accommodate the version of this code that uses an external -"stack" implemented on the heap, it is easier to declare them all here, so the -declarations can be cut out in a block. The only declarations within blocks -below are for variables that do not have to be preserved over a recursive call -to RMATCH(). */ - -#ifdef SUPPORT_UTF -const pcre_uchar *charptr; -#endif -const pcre_uchar *callpat; -const pcre_uchar *data; -const pcre_uchar *next; -PCRE_PUCHAR pp; -const pcre_uchar *prev; -PCRE_PUCHAR saved_eptr; - -recursion_info new_recursive; - -BOOL cur_is_word; -BOOL condition; -BOOL prev_is_word; - -#ifdef SUPPORT_UCP -int prop_type; -unsigned int prop_value; -int prop_fail_result; -int oclength; -pcre_uchar occhars[6]; -#endif - -int codelink; -int ctype; -int length; -int max; -int min; -unsigned int number; -int offset; -pcre_uchar op; -int save_capture_last; -int save_offset1, save_offset2, save_offset3; -int stacksave[REC_STACK_SAVE_MAX]; - -eptrblock newptrb; - -/* There is a special fudge for calling match() in a way that causes it to -measure the size of its basic stack frame when the stack is being used for -recursion. The second argument (ecode) being NULL triggers this behaviour. It -cannot normally ever be NULL. The return is the negated value of the frame -size. */ - -if (ecode == NULL) - { - if (rdepth == 0) - return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1); - else - { - int len = (char *)&rdepth - (char *)eptr; - return (len > 0)? -len : len; - } - } -#endif /* NO_RECURSE */ - -/* To save space on the stack and in the heap frame, I have doubled up on some -of the local variables that are used only in localised parts of the code, but -still need to be preserved over recursive calls of match(). These macros define -the alternative names that are used. */ - -#define allow_zero cur_is_word -#define cbegroup condition -#define code_offset codelink -#define condassert condition -#define matched_once prev_is_word -#define foc number -#define save_mark data - -/* These statements are here to stop the compiler complaining about unitialized -variables. */ - -#ifdef SUPPORT_UCP -prop_value = 0; -prop_fail_result = 0; -#endif - - -/* This label is used for tail recursion, which is used in a few cases even -when NO_RECURSE is not defined, in order to reduce the amount of stack that is -used. Thanks to Ian Taylor for noticing this possibility and sending the -original patch. */ - -TAIL_RECURSE: - -/* OK, now we can get on with the real code of the function. Recursive calls -are specified by the macro RMATCH and RRETURN is used to return. When -NO_RECURSE is *not* defined, these just turn into a recursive call to match() -and a "return", respectively (possibly with some debugging if PCRE_DEBUG is -defined). However, RMATCH isn't like a function call because it's quite a -complicated macro. It has to be used in one particular way. This shouldn't, -however, impact performance when true recursion is being used. */ - -#ifdef SUPPORT_UTF -utf = md->utf; /* Local copy of the flag */ -#else -utf = FALSE; -#endif - -/* First check that we haven't called match() too many times, or that we -haven't exceeded the recursive call limit. */ - -if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT); -if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT); - -/* At the start of a group with an unlimited repeat that may match an empty -string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is -done this way to save having to use another function argument, which would take -up space on the stack. See also MATCH_CONDASSERT below. - -When MATCH_CBEGROUP is set, add the current subject pointer to the chain of -such remembered pointers, to be checked when we hit the closing ket, in order -to break infinite loops that match no characters. When match() is called in -other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must -NOT be used with tail recursion, because the memory block that is used is on -the stack, so a new one may be required for each match(). */ - -if (md->match_function_type == MATCH_CBEGROUP) - { - newptrb.epb_saved_eptr = eptr; - newptrb.epb_prev = eptrb; - eptrb = &newptrb; - md->match_function_type = 0; - } - -/* Now start processing the opcodes. */ - -for (;;) - { - minimize = possessive = FALSE; - op = *ecode; - - switch(op) - { - case OP_MARK: - md->nomatch_mark = ecode + 2; - md->mark = NULL; /* In case previously set by assertion */ - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, - eptrb, RM55); - if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && - md->mark == NULL) md->mark = ecode + 2; - - /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an - argument, and we must check whether that argument matches this MARK's - argument. It is passed back in md->start_match_ptr (an overloading of that - variable). If it does match, we reset that variable to the current subject - position and return MATCH_SKIP. Otherwise, pass back the return code - unaltered. */ - - else if (rrc == MATCH_SKIP_ARG && - STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0) - { - md->start_match_ptr = eptr; - RRETURN(MATCH_SKIP); - } - RRETURN(rrc); - - case OP_FAIL: - RRETURN(MATCH_NOMATCH); - - /* COMMIT overrides PRUNE, SKIP, and THEN */ - - case OP_COMMIT: - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM52); - if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && - rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && - rrc != MATCH_THEN) - RRETURN(rrc); - RRETURN(MATCH_COMMIT); - - /* PRUNE overrides THEN */ - - case OP_PRUNE: - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM51); - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); - RRETURN(MATCH_PRUNE); - - case OP_PRUNE_ARG: - md->nomatch_mark = ecode + 2; - md->mark = NULL; /* In case previously set by assertion */ - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, - eptrb, RM56); - if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && - md->mark == NULL) md->mark = ecode + 2; - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); - RRETURN(MATCH_PRUNE); - - /* SKIP overrides PRUNE and THEN */ - - case OP_SKIP: - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM53); - if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) - RRETURN(rrc); - md->start_match_ptr = eptr; /* Pass back current position */ - RRETURN(MATCH_SKIP); - - /* Note that, for Perl compatibility, SKIP with an argument does NOT set - nomatch_mark. There is a flag that disables this opcode when re-matching a - pattern that ended with a SKIP for which there was not a matching MARK. */ - - case OP_SKIP_ARG: - if (md->ignore_skip_arg) - { - ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; - break; - } - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, - eptrb, RM57); - if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) - RRETURN(rrc); - - /* Pass back the current skip name by overloading md->start_match_ptr and - returning the special MATCH_SKIP_ARG return code. This will either be - caught by a matching MARK, or get to the top, where it causes a rematch - with the md->ignore_skip_arg flag set. */ - - md->start_match_ptr = ecode + 2; - RRETURN(MATCH_SKIP_ARG); - - /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that - the branch in which it occurs can be determined. Overload the start of - match pointer to do this. */ - - case OP_THEN: - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM54); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->start_match_ptr = ecode; - RRETURN(MATCH_THEN); - - case OP_THEN_ARG: - md->nomatch_mark = ecode + 2; - md->mark = NULL; /* In case previously set by assertion */ - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, - md, eptrb, RM58); - if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && - md->mark == NULL) md->mark = ecode + 2; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->start_match_ptr = ecode; - RRETURN(MATCH_THEN); - - /* Handle an atomic group that does not contain any capturing parentheses. - This can be handled like an assertion. Prior to 8.13, all atomic groups - were handled this way. In 8.13, the code was changed as below for ONCE, so - that backups pass through the group and thereby reset captured values. - However, this uses a lot more stack, so in 8.20, atomic groups that do not - contain any captures generate OP_ONCE_NC, which can be handled in the old, - less stack intensive way. - - Check the alternative branches in turn - the matching won't pass the KET - for this kind of subpattern. If any one branch matches, we carry on as at - the end of a normal bracket, leaving the subject pointer, but resetting - the start-of-match value in case it was changed by \K. */ - - case OP_ONCE_NC: - prev = ecode; - saved_eptr = eptr; - save_mark = md->mark; - do - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); - if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ - { - mstart = md->start_match_ptr; - break; - } - if (rrc == MATCH_THEN) - { - next = ecode + GET(ecode,1); - if (md->start_match_ptr < next && - (*ecode == OP_ALT || *next == OP_ALT)) - rrc = MATCH_NOMATCH; - } - - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - ecode += GET(ecode,1); - md->mark = save_mark; - } - while (*ecode == OP_ALT); - - /* If hit the end of the group (which could be repeated), fail */ - - if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); - - /* Continue as from after the group, updating the offsets high water - mark, since extracts may have been taken. */ - - do ecode += GET(ecode, 1); while (*ecode == OP_ALT); - - offset_top = md->end_offset_top; - eptr = md->end_match_ptr; - - /* For a non-repeating ket, just continue at this level. This also - happens for a repeating ket if no characters were matched in the group. - This is the forcible breaking of infinite loops as implemented in Perl - 5.005. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { - ecode += 1+LINK_SIZE; - break; - } - - /* The repeating kets try the rest of the pattern or restart from the - preceding bracket, in the appropriate order. The second "call" of match() - uses tail recursion, to avoid using another stack frame. */ - - if (*ecode == OP_KETRMIN) - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - ecode = prev; - goto TAIL_RECURSE; - } - else /* OP_KETRMAX */ - { - RMATCH(eptr, prev, offset_top, md, eptrb, RM66); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - ecode += 1 + LINK_SIZE; - goto TAIL_RECURSE; - } - /* Control never gets here */ - - /* Handle a capturing bracket, other than those that are possessive with an - unlimited repeat. If there is space in the offset vector, save the current - subject position in the working slot at the top of the vector. We mustn't - change the current values of the data slot, because they may be set from a - previous iteration of this group, and be referred to by a reference inside - the group. A failure to match might occur after the group has succeeded, - if something later on doesn't match. For this reason, we need to restore - the working value and also the values of the final offsets, in case they - were set by a previous iteration of the same bracket. - - If there isn't enough space in the offset vector, treat this as if it were - a non-capturing bracket. Don't worry about setting the flag for the error - case here; that is handled in the code for KET. */ - - case OP_CBRA: - case OP_SCBRA: - number = GET2(ecode, 1+LINK_SIZE); - offset = number << 1; - -#ifdef PCRE_DEBUG - printf("start bracket %d\n", number); - printf("subject="); - pchars(eptr, 16, TRUE, md); - printf("\n"); -#endif - - if (offset < md->offset_max) - { - save_offset1 = md->offset_vector[offset]; - save_offset2 = md->offset_vector[offset+1]; - save_offset3 = md->offset_vector[md->offset_end - number]; - save_capture_last = md->capture_last; - save_mark = md->mark; - - DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); - md->offset_vector[md->offset_end - number] = - (int)(eptr - md->start_subject); - - for (;;) - { - if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM1); - if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ - - /* If we backed up to a THEN, check whether it is within the current - branch by comparing the address of the THEN that is passed back with - the end of the branch. If it is within the current branch, and the - branch is one of two or more alternatives (it either starts or ends - with OP_ALT), we have reached the limit of THEN's action, so convert - the return code to NOMATCH, which will cause normal backtracking to - happen from now on. Otherwise, THEN is passed back to an outer - alternative. This implements Perl's treatment of parenthesized groups, - where a group not containing | does not affect the current alternative, - that is, (X) is NOT the same as (X|(*F)). */ - - if (rrc == MATCH_THEN) - { - next = ecode + GET(ecode,1); - if (md->start_match_ptr < next && - (*ecode == OP_ALT || *next == OP_ALT)) - rrc = MATCH_NOMATCH; - } - - /* Anything other than NOMATCH is passed back. */ - - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->capture_last = save_capture_last; - ecode += GET(ecode, 1); - md->mark = save_mark; - if (*ecode != OP_ALT) break; - } - - DPRINTF(("bracket %d failed\n", number)); - md->offset_vector[offset] = save_offset1; - md->offset_vector[offset+1] = save_offset2; - md->offset_vector[md->offset_end - number] = save_offset3; - - /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ - - RRETURN(rrc); - } - - /* FALL THROUGH ... Insufficient room for saving captured contents. Treat - as a non-capturing bracket. */ - - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - - DPRINTF(("insufficient capture room: treat as non-capturing\n")); - - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - - /* Non-capturing or atomic group, except for possessive with unlimited - repeat and ONCE group with no captures. Loop for all the alternatives. - - When we get to the final alternative within the brackets, we used to return - the result of a recursive call to match() whatever happened so it was - possible to reduce stack usage by turning this into a tail recursion, - except in the case of a possibly empty group. However, now that there is - the possiblity of (*THEN) occurring in the final alternative, this - optimization is no longer always possible. - - We can optimize if we know there are no (*THEN)s in the pattern; at present - this is the best that can be done. - - MATCH_ONCE is returned when the end of an atomic group is successfully - reached, but subsequent matching fails. It passes back up the tree (causing - captured values to be reset) until the original atomic group level is - reached. This is tested by comparing md->once_target with the start of the - group. At this point, the return is converted into MATCH_NOMATCH so that - previous backup points can be taken. */ - - case OP_ONCE: - case OP_BRA: - case OP_SBRA: - DPRINTF(("start non-capturing bracket\n")); - - for (;;) - { - if (op >= OP_SBRA || op == OP_ONCE) - md->match_function_type = MATCH_CBEGROUP; - - /* If this is not a possibly empty group, and there are no (*THEN)s in - the pattern, and this is the final alternative, optimize as described - above. */ - - else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) - { - ecode += PRIV(OP_lengths)[*ecode]; - goto TAIL_RECURSE; - } - - /* In all other cases, we have to make another call to match(). */ - - save_mark = md->mark; - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, - RM2); - - /* See comment in the code for capturing groups above about handling - THEN. */ - - if (rrc == MATCH_THEN) - { - next = ecode + GET(ecode,1); - if (md->start_match_ptr < next && - (*ecode == OP_ALT || *next == OP_ALT)) - rrc = MATCH_NOMATCH; - } - - if (rrc != MATCH_NOMATCH) - { - if (rrc == MATCH_ONCE) - { - const pcre_uchar *scode = ecode; - if (*scode != OP_ONCE) /* If not at start, find it */ - { - while (*scode == OP_ALT) scode += GET(scode, 1); - scode -= GET(scode, 1); - } - if (md->once_target == scode) rrc = MATCH_NOMATCH; - } - RRETURN(rrc); - } - ecode += GET(ecode, 1); - md->mark = save_mark; - if (*ecode != OP_ALT) break; - } - - RRETURN(MATCH_NOMATCH); - - /* Handle possessive capturing brackets with an unlimited repeat. We come - here from BRAZERO with allow_zero set TRUE. The offset_vector values are - handled similarly to the normal case above. However, the matching is - different. The end of these brackets will always be OP_KETRPOS, which - returns MATCH_KETRPOS without going further in the pattern. By this means - we can handle the group by iteration rather than recursion, thereby - reducing the amount of stack needed. */ - - case OP_CBRAPOS: - case OP_SCBRAPOS: - allow_zero = FALSE; - - POSSESSIVE_CAPTURE: - number = GET2(ecode, 1+LINK_SIZE); - offset = number << 1; - -#ifdef PCRE_DEBUG - printf("start possessive bracket %d\n", number); - printf("subject="); - pchars(eptr, 16, TRUE, md); - printf("\n"); -#endif - - if (offset < md->offset_max) - { - matched_once = FALSE; - code_offset = (int)(ecode - md->start_code); - - save_offset1 = md->offset_vector[offset]; - save_offset2 = md->offset_vector[offset+1]; - save_offset3 = md->offset_vector[md->offset_end - number]; - save_capture_last = md->capture_last; - - DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); - - /* Each time round the loop, save the current subject position for use - when the group matches. For MATCH_MATCH, the group has matched, so we - restart it with a new subject starting position, remembering that we had - at least one match. For MATCH_NOMATCH, carry on with the alternatives, as - usual. If we haven't matched any alternatives in any iteration, check to - see if a previous iteration matched. If so, the group has matched; - continue from afterwards. Otherwise it has failed; restore the previous - capture values before returning NOMATCH. */ - - for (;;) - { - md->offset_vector[md->offset_end - number] = - (int)(eptr - md->start_subject); - if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM63); - if (rrc == MATCH_KETRPOS) - { - offset_top = md->end_offset_top; - eptr = md->end_match_ptr; - ecode = md->start_code + code_offset; - save_capture_last = md->capture_last; - matched_once = TRUE; - continue; - } - - /* See comment in the code for capturing groups above about handling - THEN. */ - - if (rrc == MATCH_THEN) - { - next = ecode + GET(ecode,1); - if (md->start_match_ptr < next && - (*ecode == OP_ALT || *next == OP_ALT)) - rrc = MATCH_NOMATCH; - } - - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->capture_last = save_capture_last; - ecode += GET(ecode, 1); - if (*ecode != OP_ALT) break; - } - - if (!matched_once) - { - md->offset_vector[offset] = save_offset1; - md->offset_vector[offset+1] = save_offset2; - md->offset_vector[md->offset_end - number] = save_offset3; - } - - if (allow_zero || matched_once) - { - ecode += 1 + LINK_SIZE; - break; - } - - RRETURN(MATCH_NOMATCH); - } - - /* FALL THROUGH ... Insufficient room for saving captured contents. Treat - as a non-capturing bracket. */ - - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - - DPRINTF(("insufficient capture room: treat as non-capturing\n")); - - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - /* VVVVVVVVVVVVVVVVVVVVVVVVV */ - - /* Non-capturing possessive bracket with unlimited repeat. We come here - from BRAZERO with allow_zero = TRUE. The code is similar to the above, - without the capturing complication. It is written out separately for speed - and cleanliness. */ - - case OP_BRAPOS: - case OP_SBRAPOS: - allow_zero = FALSE; - - POSSESSIVE_NON_CAPTURE: - matched_once = FALSE; - code_offset = (int)(ecode - md->start_code); - - for (;;) - { - if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, - eptrb, RM48); - if (rrc == MATCH_KETRPOS) - { - offset_top = md->end_offset_top; - eptr = md->end_match_ptr; - ecode = md->start_code + code_offset; - matched_once = TRUE; - continue; - } - - /* See comment in the code for capturing groups above about handling - THEN. */ - - if (rrc == MATCH_THEN) - { - next = ecode + GET(ecode,1); - if (md->start_match_ptr < next && - (*ecode == OP_ALT || *next == OP_ALT)) - rrc = MATCH_NOMATCH; - } - - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - ecode += GET(ecode, 1); - if (*ecode != OP_ALT) break; - } - - if (matched_once || allow_zero) - { - ecode += 1 + LINK_SIZE; - break; - } - RRETURN(MATCH_NOMATCH); - - /* Control never reaches here. */ - - /* Conditional group: compilation checked that there are no more than - two branches. If the condition is false, skipping the first branch takes us - past the end if there is only one branch, but that's OK because that is - exactly what going to the ket would do. */ - - case OP_COND: - case OP_SCOND: - codelink = GET(ecode, 1); - - /* Because of the way auto-callout works during compile, a callout item is - inserted between OP_COND and an assertion condition. */ - - if (ecode[LINK_SIZE+1] == OP_CALLOUT) - { - if (PUBL(callout) != NULL) - { - PUBL(callout_block) cb; - cb.version = 2; /* Version 1 of the callout block */ - cb.callout_number = ecode[LINK_SIZE+2]; - cb.offset_vector = md->offset_vector; -#if defined COMPILE_PCRE8 - cb.subject = (PCRE_SPTR)md->start_subject; -#elif defined COMPILE_PCRE16 - cb.subject = (PCRE_SPTR16)md->start_subject; -#elif defined COMPILE_PCRE32 - cb.subject = (PCRE_SPTR32)md->start_subject; -#endif - cb.subject_length = (int)(md->end_subject - md->start_subject); - cb.start_match = (int)(mstart - md->start_subject); - cb.current_position = (int)(eptr - md->start_subject); - cb.pattern_position = GET(ecode, LINK_SIZE + 3); - cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE); - cb.capture_top = offset_top/2; - cb.capture_last = md->capture_last; - cb.callout_data = md->callout_data; - cb.mark = md->nomatch_mark; - if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); - if (rrc < 0) RRETURN(rrc); - } - ecode += PRIV(OP_lengths)[OP_CALLOUT]; - } - - condcode = ecode[LINK_SIZE+1]; - - /* Now see what the actual condition is */ - - if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */ - { - if (md->recursive == NULL) /* Not recursing => FALSE */ - { - condition = FALSE; - ecode += GET(ecode, 1); - } - else - { - unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ - condition = (recno == RREF_ANY || recno == md->recursive->group_num); - - /* If the test is for recursion into a specific subpattern, and it is - false, but the test was set up by name, scan the table to see if the - name refers to any other numbers, and test them. The condition is true - if any one is set. */ - - if (!condition && condcode == OP_NRREF) - { - pcre_uchar *slotA = md->name_table; - for (i = 0; i < md->name_count; i++) - { - if (GET2(slotA, 0) == recno) break; - slotA += md->name_entry_size; - } - - /* Found a name for the number - there can be only one; duplicate - names for different numbers are allowed, but not vice versa. First - scan down for duplicates. */ - - if (i < md->name_count) - { - pcre_uchar *slotB = slotA; - while (slotB > md->name_table) - { - slotB -= md->name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == md->recursive->group_num; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - - if (!condition) - { - slotB = slotA; - for (i++; i < md->name_count; i++) - { - slotB += md->name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == md->recursive->group_num; - if (condition) break; - } - else break; - } - } - } - } - - /* Chose branch according to the condition */ - - ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); - } - } - - else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */ - { - offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ - condition = offset < offset_top && md->offset_vector[offset] >= 0; - - /* If the numbered capture is unset, but the reference was by name, - scan the table to see if the name refers to any other numbers, and test - them. The condition is true if any one is set. This is tediously similar - to the code above, but not close enough to try to amalgamate. */ - - if (!condition && condcode == OP_NCREF) - { - unsigned int refno = offset >> 1; - pcre_uchar *slotA = md->name_table; - - for (i = 0; i < md->name_count; i++) - { - if (GET2(slotA, 0) == refno) break; - slotA += md->name_entry_size; - } - - /* Found a name for the number - there can be only one; duplicate names - for different numbers are allowed, but not vice versa. First scan down - for duplicates. */ - - if (i < md->name_count) - { - pcre_uchar *slotB = slotA; - while (slotB > md->name_table) - { - slotB -= md->name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - offset = GET2(slotB, 0) << 1; - condition = offset < offset_top && - md->offset_vector[offset] >= 0; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - - if (!condition) - { - slotB = slotA; - for (i++; i < md->name_count; i++) - { - slotB += md->name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - offset = GET2(slotB, 0) << 1; - condition = offset < offset_top && - md->offset_vector[offset] >= 0; - if (condition) break; - } - else break; - } - } - } - } - - /* Chose branch according to the condition */ - - ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); - } - - else if (condcode == OP_DEF) /* DEFINE - always false */ - { - condition = FALSE; - ecode += GET(ecode, 1); - } - - /* The condition is an assertion. Call match() to evaluate it - setting - md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of - an assertion. */ - - else - { - md->match_function_type = MATCH_CONDASSERT; - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3); - if (rrc == MATCH_MATCH) - { - if (md->end_offset_top > offset_top) - offset_top = md->end_offset_top; /* Captures may have happened */ - condition = TRUE; - ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); - while (*ecode == OP_ALT) ecode += GET(ecode, 1); - } - - /* PCRE doesn't allow the effect of (*THEN) to escape beyond an - assertion; it is therefore treated as NOMATCH. */ - - else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) - { - RRETURN(rrc); /* Need braces because of following else */ - } - else - { - condition = FALSE; - ecode += codelink; - } - } - - /* We are now at the branch that is to be obeyed. As there is only one, can - use tail recursion to avoid using another stack frame, except when there is - unlimited repeat of a possibly empty group. In the latter case, a recursive - call to match() is always required, unless the second alternative doesn't - exist, in which case we can just plough on. Note that, for compatibility - with Perl, the | in a conditional group is NOT treated as creating two - alternatives. If a THEN is encountered in the branch, it propagates out to - the enclosing alternative (unless nested in a deeper set of alternatives, - of course). */ - - if (condition || *ecode == OP_ALT) - { - if (op != OP_SCOND) - { - ecode += 1 + LINK_SIZE; - goto TAIL_RECURSE; - } - - md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49); - RRETURN(rrc); - } - - /* Condition false & no alternative; continue after the group. */ - - else - { - ecode += 1 + LINK_SIZE; - } - break; - - - /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, - to close any currently open capturing brackets. */ - - case OP_CLOSE: - number = GET2(ecode, 1); - offset = number << 1; - -#ifdef PCRE_DEBUG - printf("end bracket %d at *ACCEPT", number); - printf("\n"); -#endif - - md->capture_last = number; - if (offset >= md->offset_max) md->offset_overflow = TRUE; else - { - md->offset_vector[offset] = - md->offset_vector[md->offset_end - number]; - md->offset_vector[offset+1] = (int)(eptr - md->start_subject); - if (offset_top <= offset) offset_top = offset + 2; - } - ecode += 1 + IMM2_SIZE; - break; - - - /* End of the pattern, either real or forced. */ - - case OP_END: - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - - /* If we have matched an empty string, fail if not in an assertion and not - in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART - is set and we have matched at the start of the subject. In both cases, - backtracking will then try other alternatives, if any. */ - - if (eptr == mstart && op != OP_ASSERT_ACCEPT && - md->recursive == NULL && - (md->notempty || - (md->notempty_atstart && - mstart == md->start_subject + md->start_offset))) - RRETURN(MATCH_NOMATCH); - - /* Otherwise, we have a match. */ - - md->end_match_ptr = eptr; /* Record where we ended */ - md->end_offset_top = offset_top; /* and how many extracts were taken */ - md->start_match_ptr = mstart; /* and the start (\K can modify) */ - - /* For some reason, the macros don't work properly if an expression is - given as the argument to RRETURN when the heap is in use. */ - - rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT; - RRETURN(rrc); - - /* Assertion brackets. Check the alternative branches in turn - the - matching won't pass the KET for an assertion. If any one branch matches, - the assertion is true. Lookbehind assertions have an OP_REVERSE item at the - start of each branch to move the current point backwards, so the code at - this level is identical to the lookahead case. When the assertion is part - of a condition, we want to return immediately afterwards. The caller of - this incarnation of the match() function will have set MATCH_CONDASSERT in - md->match_function type, and one of these opcodes will be the first opcode - that is processed. We use a local variable that is preserved over calls to - match() to remember this case. */ - - case OP_ASSERT: - case OP_ASSERTBACK: - save_mark = md->mark; - if (md->match_function_type == MATCH_CONDASSERT) - { - condassert = TRUE; - md->match_function_type = 0; - } - else condassert = FALSE; - - do - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4); - if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) - { - mstart = md->start_match_ptr; /* In case \K reset it */ - break; - } - md->mark = save_mark; - - /* A COMMIT failure must fail the entire assertion, without trying any - subsequent branches. */ - - if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); - - /* PCRE does not allow THEN to escape beyond an assertion; it - is treated as NOMATCH. */ - - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); - ecode += GET(ecode, 1); - } - while (*ecode == OP_ALT); - - if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); - - /* If checking an assertion for a condition, return MATCH_MATCH. */ - - if (condassert) RRETURN(MATCH_MATCH); - - /* Continue from after the assertion, updating the offsets high water - mark, since extracts may have been taken during the assertion. */ - - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - ecode += 1 + LINK_SIZE; - offset_top = md->end_offset_top; - continue; - - /* Negative assertion: all branches must fail to match. Encountering SKIP, - PRUNE, or COMMIT means we must assume failure without checking subsequent - branches. */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK_NOT: - save_mark = md->mark; - if (md->match_function_type == MATCH_CONDASSERT) - { - condassert = TRUE; - md->match_function_type = 0; - } - else condassert = FALSE; - - do - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); - md->mark = save_mark; - if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); - if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) - { - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - break; - } - - /* PCRE does not allow THEN to escape beyond an assertion; it is treated - as NOMATCH. */ - - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); - ecode += GET(ecode,1); - } - while (*ecode == OP_ALT); - - if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ - - ecode += 1 + LINK_SIZE; - continue; - - /* Move the subject pointer back. This occurs only at the start of - each branch of a lookbehind assertion. If we are too close to the start to - move back, this match function fails. When working with UTF-8 we move - back a number of characters, not bytes. */ - - case OP_REVERSE: -#ifdef SUPPORT_UTF - if (utf) - { - i = GET(ecode, 1); - while (i-- > 0) - { - eptr--; - if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); - BACKCHAR(eptr); - } - } - else -#endif - - /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ - - { - eptr -= GET(ecode, 1); - if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); - } - - /* Save the earliest consulted character, then skip to next op code */ - - if (eptr < md->start_used_ptr) md->start_used_ptr = eptr; - ecode += 1 + LINK_SIZE; - break; - - /* The callout item calls an external function, if one is provided, passing - details of the match so far. This is mainly for debugging, though the - function is able to force a failure. */ - - case OP_CALLOUT: - if (PUBL(callout) != NULL) - { - PUBL(callout_block) cb; - cb.version = 2; /* Version 1 of the callout block */ - cb.callout_number = ecode[1]; - cb.offset_vector = md->offset_vector; -#if defined COMPILE_PCRE8 - cb.subject = (PCRE_SPTR)md->start_subject; -#elif defined COMPILE_PCRE16 - cb.subject = (PCRE_SPTR16)md->start_subject; -#elif defined COMPILE_PCRE32 - cb.subject = (PCRE_SPTR32)md->start_subject; -#endif - cb.subject_length = (int)(md->end_subject - md->start_subject); - cb.start_match = (int)(mstart - md->start_subject); - cb.current_position = (int)(eptr - md->start_subject); - cb.pattern_position = GET(ecode, 2); - cb.next_item_length = GET(ecode, 2 + LINK_SIZE); - cb.capture_top = offset_top/2; - cb.capture_last = md->capture_last; - cb.callout_data = md->callout_data; - cb.mark = md->nomatch_mark; - if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); - if (rrc < 0) RRETURN(rrc); - } - ecode += 2 + 2*LINK_SIZE; - break; - - /* Recursion either matches the current regex, or some subexpression. The - offset data is the offset to the starting bracket from the start of the - whole pattern. (This is so that it works from duplicated subpatterns.) - - The state of the capturing groups is preserved over recursion, and - re-instated afterwards. We don't know how many are started and not yet - finished (offset_top records the completed total) so we just have to save - all the potential data. There may be up to 65535 such values, which is too - large to put on the stack, but using malloc for small numbers seems - expensive. As a compromise, the stack is used when there are no more than - REC_STACK_SAVE_MAX values to store; otherwise malloc is used. - - There are also other values that have to be saved. We use a chained - sequence of blocks that actually live on the stack. Thanks to Robin Houston - for the original version of this logic. It has, however, been hacked around - a lot, so he is not to blame for the current way it works. */ - - case OP_RECURSE: - { - recursion_info *ri; - unsigned int recno; - - callpat = md->start_code + GET(ecode, 1); - recno = (callpat == md->start_code)? 0 : - GET2(callpat, 1 + LINK_SIZE); - - /* Check for repeating a recursion without advancing the subject pointer. - This should catch convoluted mutual recursions. (Some simple cases are - caught at compile time.) */ - - for (ri = md->recursive; ri != NULL; ri = ri->prevrec) - if (recno == ri->group_num && eptr == ri->subject_position) - RRETURN(PCRE_ERROR_RECURSELOOP); - - /* Add to "recursing stack" */ - - new_recursive.group_num = recno; - new_recursive.subject_position = eptr; - new_recursive.prevrec = md->recursive; - md->recursive = &new_recursive; - - /* Where to continue from afterwards */ - - ecode += 1 + LINK_SIZE; - - /* Now save the offset data */ - - new_recursive.saved_max = md->offset_end; - if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) - new_recursive.offset_save = stacksave; - else - { - new_recursive.offset_save = - (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); - if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); - } - memcpy(new_recursive.offset_save, md->offset_vector, - new_recursive.saved_max * sizeof(int)); - - /* OK, now we can do the recursion. After processing each alternative, - restore the offset data. If there were nested recursions, md->recursive - might be changed, so reset it before looping. */ - - DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); - cbegroup = (*callpat >= OP_SBRA); - do - { - if (cbegroup) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, - md, eptrb, RM6); - memcpy(md->offset_vector, new_recursive.offset_save, - new_recursive.saved_max * sizeof(int)); - md->recursive = new_recursive.prevrec; - if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) - { - DPRINTF(("Recursion matched\n")); - if (new_recursive.offset_save != stacksave) - (PUBL(free))(new_recursive.offset_save); - - /* Set where we got to in the subject, and reset the start in case - it was changed by \K. This *is* propagated back out of a recursion, - for Perl compatibility. */ - - eptr = md->end_match_ptr; - mstart = md->start_match_ptr; - goto RECURSION_MATCHED; /* Exit loop; end processing */ - } - - /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it - is treated as NOMATCH. */ - - else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN && - rrc != MATCH_COMMIT) - { - DPRINTF(("Recursion gave error %d\n", rrc)); - if (new_recursive.offset_save != stacksave) - (PUBL(free))(new_recursive.offset_save); - RRETURN(rrc); - } - - md->recursive = &new_recursive; - callpat += GET(callpat, 1); - } - while (*callpat == OP_ALT); - - DPRINTF(("Recursion didn't match\n")); - md->recursive = new_recursive.prevrec; - if (new_recursive.offset_save != stacksave) - (PUBL(free))(new_recursive.offset_save); - RRETURN(MATCH_NOMATCH); - } - - RECURSION_MATCHED: - break; - - /* An alternation is the end of a branch; scan along to find the end of the - bracketed group and go to there. */ - - case OP_ALT: - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - break; - - /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group, - indicating that it may occur zero times. It may repeat infinitely, or not - at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets - with fixed upper repeat limits are compiled as a number of copies, with the - optional ones preceded by BRAZERO or BRAMINZERO. */ - - case OP_BRAZERO: - next = ecode + 1; - RMATCH(eptr, next, offset_top, md, eptrb, RM10); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - do next += GET(next, 1); while (*next == OP_ALT); - ecode = next + 1 + LINK_SIZE; - break; - - case OP_BRAMINZERO: - next = ecode + 1; - do next += GET(next, 1); while (*next == OP_ALT); - RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - ecode++; - break; - - case OP_SKIPZERO: - next = ecode+1; - do next += GET(next,1); while (*next == OP_ALT); - ecode = next + 1 + LINK_SIZE; - break; - - /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything - here; just jump to the group, with allow_zero set TRUE. */ - - case OP_BRAPOSZERO: - op = *(++ecode); - allow_zero = TRUE; - if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE; - goto POSSESSIVE_NON_CAPTURE; - - /* End of a group, repeated or non-repeating. */ - - case OP_KET: - case OP_KETRMIN: - case OP_KETRMAX: - case OP_KETRPOS: - prev = ecode - GET(ecode, 1); - - /* If this was a group that remembered the subject start, in order to break - infinite repeats of empty string matches, retrieve the subject start from - the chain. Otherwise, set it NULL. */ - - if (*prev >= OP_SBRA || *prev == OP_ONCE) - { - saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */ - eptrb = eptrb->epb_prev; /* Backup to previous group */ - } - else saved_eptr = NULL; - - /* If we are at the end of an assertion group or a non-capturing atomic - group, stop matching and return MATCH_MATCH, but record the current high - water mark for use by positive assertions. We also need to record the match - start in case it was changed by \K. */ - - if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) || - *prev == OP_ONCE_NC) - { - md->end_match_ptr = eptr; /* For ONCE_NC */ - md->end_offset_top = offset_top; - md->start_match_ptr = mstart; - RRETURN(MATCH_MATCH); /* Sets md->mark */ - } - - /* For capturing groups we have to check the group number back at the start - and if necessary complete handling an extraction by setting the offsets and - bumping the high water mark. Whole-pattern recursion is coded as a recurse - into group 0, so it won't be picked up here. Instead, we catch it when the - OP_END is reached. Other recursion is handled here. We just have to record - the current subject position and start match pointer and give a MATCH - return. */ - - if (*prev == OP_CBRA || *prev == OP_SCBRA || - *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS) - { - number = GET2(prev, 1+LINK_SIZE); - offset = number << 1; - -#ifdef PCRE_DEBUG - printf("end bracket %d", number); - printf("\n"); -#endif - - /* Handle a recursively called group. */ - - if (md->recursive != NULL && md->recursive->group_num == number) - { - md->end_match_ptr = eptr; - md->start_match_ptr = mstart; - RRETURN(MATCH_MATCH); - } - - /* Deal with capturing */ - - md->capture_last = number; - if (offset >= md->offset_max) md->offset_overflow = TRUE; else - { - /* If offset is greater than offset_top, it means that we are - "skipping" a capturing group, and that group's offsets must be marked - unset. In earlier versions of PCRE, all the offsets were unset at the - start of matching, but this doesn't work because atomic groups and - assertions can cause a value to be set that should later be unset. - Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as - part of the atomic group, but this is not on the final matching path, - so must be unset when 2 is set. (If there is no group 2, there is no - problem, because offset_top will then be 2, indicating no capture.) */ - - if (offset > offset_top) - { - register int *iptr = md->offset_vector + offset_top; - register int *iend = md->offset_vector + offset; - while (iptr < iend) *iptr++ = -1; - } - - /* Now make the extraction */ - - md->offset_vector[offset] = - md->offset_vector[md->offset_end - number]; - md->offset_vector[offset+1] = (int)(eptr - md->start_subject); - if (offset_top <= offset) offset_top = offset + 2; - } - } - - /* For an ordinary non-repeating ket, just continue at this level. This - also happens for a repeating ket if no characters were matched in the - group. This is the forcible breaking of infinite loops as implemented in - Perl 5.005. For a non-repeating atomic group that includes captures, - establish a backup point by processing the rest of the pattern at a lower - level. If this results in a NOMATCH return, pass MATCH_ONCE back to the - original OP_ONCE level, thereby bypassing intermediate backup points, but - resetting any captures that happened along the way. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { - if (*prev == OP_ONCE) - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */ - RRETURN(MATCH_ONCE); - } - ecode += 1 + LINK_SIZE; /* Carry on at this level */ - break; - } - - /* OP_KETRPOS is a possessive repeating ket. Remember the current position, - and return the MATCH_KETRPOS. This makes it possible to do the repeats one - at a time from the outer level, thus saving stack. */ - - if (*ecode == OP_KETRPOS) - { - md->end_match_ptr = eptr; - md->end_offset_top = offset_top; - RRETURN(MATCH_KETRPOS); - } - - /* The normal repeating kets try the rest of the pattern or restart from - the preceding bracket, in the appropriate order. In the second case, we can - use tail recursion to avoid using another stack frame, unless we have an - an atomic group or an unlimited repeat of a group that can match an empty - string. */ - - if (*ecode == OP_KETRMIN) - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (*prev == OP_ONCE) - { - RMATCH(eptr, prev, offset_top, md, eptrb, RM8); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */ - RRETURN(MATCH_ONCE); - } - if (*prev >= OP_SBRA) /* Could match an empty string */ - { - RMATCH(eptr, prev, offset_top, md, eptrb, RM50); - RRETURN(rrc); - } - ecode = prev; - goto TAIL_RECURSE; - } - else /* OP_KETRMAX */ - { - RMATCH(eptr, prev, offset_top, md, eptrb, RM13); - if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (*prev == OP_ONCE) - { - RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - md->once_target = prev; - RRETURN(MATCH_ONCE); - } - ecode += 1 + LINK_SIZE; - goto TAIL_RECURSE; - } - /* Control never gets here */ - - /* Not multiline mode: start of subject assertion, unless notbol. */ - - case OP_CIRC: - if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); - - /* Start of subject assertion */ - - case OP_SOD: - if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); - ecode++; - break; - - /* Multiline mode: start of subject unless notbol, or after any newline. */ - - case OP_CIRCM: - if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); - if (eptr != md->start_subject && - (eptr == md->end_subject || !WAS_NEWLINE(eptr))) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - /* Start of match assertion */ - - case OP_SOM: - if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); - ecode++; - break; - - /* Reset the start of match point */ - - case OP_SET_SOM: - mstart = eptr; - ecode++; - break; - - /* Multiline mode: assert before any newline, or before end of subject - unless noteol is set. */ - - case OP_DOLLM: - if (eptr < md->end_subject) - { - if (!IS_NEWLINE(eptr)) - { - if (md->partial != 0 && - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - RAWUCHARTEST(eptr) == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - RRETURN(MATCH_NOMATCH); - } - } - else - { - if (md->noteol) RRETURN(MATCH_NOMATCH); - SCHECK_PARTIAL(); - } - ecode++; - break; - - /* Not multiline mode: assert before a terminating newline or before end of - subject unless noteol is set. */ - - case OP_DOLL: - if (md->noteol) RRETURN(MATCH_NOMATCH); - if (!md->endonly) goto ASSERT_NL_OR_EOS; - - /* ... else fall through for endonly */ - - /* End of subject assertion (\z) */ - - case OP_EOD: - if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); - SCHECK_PARTIAL(); - ecode++; - break; - - /* End of subject or ending \n assertion (\Z) */ - - case OP_EODN: - ASSERT_NL_OR_EOS: - if (eptr < md->end_subject && - (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) - { - if (md->partial != 0 && - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - RAWUCHARTEST(eptr) == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - RRETURN(MATCH_NOMATCH); - } - - /* Either at end of string or \n before end. */ - - SCHECK_PARTIAL(); - ecode++; - break; - - /* Word boundary assertions */ - - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - { - - /* Find out if the previous and current characters are "word" characters. - It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to - be "non-word" characters. Remember the earliest consulted character for - partial matching. */ - -#ifdef SUPPORT_UTF - if (utf) - { - /* Get status of previous character */ - - if (eptr == md->start_subject) prev_is_word = FALSE; else - { - PCRE_PUCHAR lastptr = eptr - 1; - BACKCHAR(lastptr); - if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; - GETCHAR(c, lastptr); -#ifdef SUPPORT_UCP - if (md->use_ucp) - { - if (c == '_') prev_is_word = TRUE; else - { - int cat = UCD_CATEGORY(c); - prev_is_word = (cat == ucp_L || cat == ucp_N); - } - } - else -#endif - prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; - } - - /* Get status of next character */ - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - cur_is_word = FALSE; - } - else - { - GETCHAR(c, eptr); -#ifdef SUPPORT_UCP - if (md->use_ucp) - { - if (c == '_') cur_is_word = TRUE; else - { - int cat = UCD_CATEGORY(c); - cur_is_word = (cat == ucp_L || cat == ucp_N); - } - } - else -#endif - cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; - } - } - else -#endif - - /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for - consistency with the behaviour of \w we do use it in this case. */ - - { - /* Get status of previous character */ - - if (eptr == md->start_subject) prev_is_word = FALSE; else - { - if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; -#ifdef SUPPORT_UCP - if (md->use_ucp) - { - c = eptr[-1]; - if (c == '_') prev_is_word = TRUE; else - { - int cat = UCD_CATEGORY(c); - prev_is_word = (cat == ucp_L || cat == ucp_N); - } - } - else -#endif - prev_is_word = MAX_255(eptr[-1]) - && ((md->ctypes[eptr[-1]] & ctype_word) != 0); - } - - /* Get status of next character */ - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - cur_is_word = FALSE; - } - else -#ifdef SUPPORT_UCP - if (md->use_ucp) - { - c = *eptr; - if (c == '_') cur_is_word = TRUE; else - { - int cat = UCD_CATEGORY(c); - cur_is_word = (cat == ucp_L || cat == ucp_N); - } - } - else -#endif - cur_is_word = MAX_255(*eptr) - && ((md->ctypes[*eptr] & ctype_word) != 0); - } - - /* Now see if the situation is what we want */ - - if ((*ecode++ == OP_WORD_BOUNDARY)? - cur_is_word == prev_is_word : cur_is_word != prev_is_word) - RRETURN(MATCH_NOMATCH); - } - break; - - /* Match any single character type except newline; have to take care with - CRLF newlines and partial matching. */ - - case OP_ANY: - if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - if (md->partial != 0 && - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - RAWUCHARTEST(eptr) == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - - /* Fall through */ - - /* Match any single character whatsoever. */ - - case OP_ALLANY: - if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ - { /* not be updated before SCHECK_PARTIAL. */ - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr++; -#ifdef SUPPORT_UTF - if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); -#endif - ecode++; - break; - - /* Match a single byte, even in UTF-8 mode. This opcode really does match - any byte, even newline, independent of the setting of PCRE_DOTALL. */ - - case OP_ANYBYTE: - if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ - { /* not be updated before SCHECK_PARTIAL. */ - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr++; - ecode++; - break; - - case OP_NOT_DIGIT: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ( -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c < 256 && -#endif - (md->ctypes[c] & ctype_digit) != 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_DIGIT: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ( -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c > 255 || -#endif - (md->ctypes[c] & ctype_digit) == 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_NOT_WHITESPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ( -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c < 256 && -#endif - (md->ctypes[c] & ctype_space) != 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_WHITESPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ( -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c > 255 || -#endif - (md->ctypes[c] & ctype_space) == 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_NOT_WORDCHAR: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ( -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c < 256 && -#endif - (md->ctypes[c] & ctype_word) != 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_WORDCHAR: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ( -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - c > 255 || -#endif - (md->ctypes[c] & ctype_word) == 0 - ) - RRETURN(MATCH_NOMATCH); - ecode++; - break; - - case OP_ANYNL: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - switch(c) - { - default: RRETURN(MATCH_NOMATCH); - - case CHAR_CR: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - } - else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++; - break; - - case CHAR_LF: - break; - - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - ecode++; - break; - - case OP_NOT_HSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - switch(c) - { - HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ - default: break; - } - ecode++; - break; - - case OP_HSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - switch(c) - { - HSPACE_CASES: break; /* Byte and multibyte cases */ - default: RRETURN(MATCH_NOMATCH); - } - ecode++; - break; - - case OP_NOT_VSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - switch(c) - { - VSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - ecode++; - break; - - case OP_VSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - switch(c) - { - VSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - ecode++; - break; - -#ifdef SUPPORT_UCP - /* Check the next character by Unicode property. We will get here only - if the support is in the binary; otherwise a compile-time error occurs. */ - - case OP_PROP: - case OP_NOTPROP: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - { - const pcre_uint32 *cp; - const ucd_record *prop = GET_UCD(c); - - switch(ecode[1]) - { - case PT_ANY: - if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); - break; - - case PT_LAMP: - if ((prop->chartype == ucp_Lu || - prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_GC: - if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_PC: - if ((ecode[2] != prop->chartype) == (op == OP_PROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_SC: - if ((ecode[2] != prop->script) == (op == OP_PROP)) - RRETURN(MATCH_NOMATCH); - break; - - /* These are specials */ - - case PT_ALNUM: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_SPACE: /* Perl space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) - == (op == OP_NOTPROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_PXSPACE: /* POSIX space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR) - == (op == OP_NOTPROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_WORD: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) - RRETURN(MATCH_NOMATCH); - break; - - case PT_CLIST: - cp = PRIV(ucd_caseless_sets) + ecode[2]; - for (;;) - { - if (c < *cp) - { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; } - if (c == *cp++) - { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } } - } - break; - - /* This should never occur */ - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - - ecode += 3; - } - break; - - /* Match an extended Unicode sequence. We will get here only if the support - is in the binary; otherwise a compile-time error occurs. */ - - case OP_EXTUNI: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - else - { - int lgb, rgb; - GETCHARINCTEST(c, eptr); - lgb = UCD_GRAPHBREAK(c); - while (eptr < md->end_subject) - { - int len = 1; - if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } - rgb = UCD_GRAPHBREAK(c); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - lgb = rgb; - eptr += len; - } - } - CHECK_PARTIAL(); - ecode++; - break; -#endif /* SUPPORT_UCP */ - - - /* Match a back reference, possibly repeatedly. Look past the end of the - item to see if there is repeat information following. The code is similar - to that for character classes, but repeated for efficiency. Then obey - similar code to character type repeats - written out again for speed. - However, if the referenced string is the empty string, always treat - it as matched, any number of times (otherwise there could be infinite - loops). */ - - case OP_REF: - case OP_REFI: - caseless = op == OP_REFI; - offset = GET2(ecode, 1) << 1; /* Doubled ref number */ - ecode += 1 + IMM2_SIZE; - - /* If the reference is unset, there are two possibilities: - - (a) In the default, Perl-compatible state, set the length negative; - this ensures that every attempt at a match fails. We can't just fail - here, because of the possibility of quantifiers with zero minima. - - (b) If the JavaScript compatibility flag is set, set the length to zero - so that the back reference matches an empty string. - - Otherwise, set the length to the length of what was matched by the - referenced subpattern. */ - - if (offset >= offset_top || md->offset_vector[offset] < 0) - length = (md->jscript_compat)? 0 : -1; - else - length = md->offset_vector[offset+1] - md->offset_vector[offset]; - - /* Set up for repetition, or handle the non-repeated case */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = GET2(ecode, 1); - max = GET2(ecode, 1 + IMM2_SIZE); - if (max == 0) max = INT_MAX; - ecode += 1 + 2 * IMM2_SIZE; - break; - - default: /* No repeat follows */ - if ((length = match_ref(offset, eptr, length, md, caseless)) < 0) - { - if (length == -2) eptr = md->end_subject; /* Partial match */ - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr += length; - continue; /* With the main loop */ - } - - /* Handle repeated back references. If the length of the reference is - zero, just continue with the main loop. If the length is negative, it - means the reference is unset in non-Java-compatible mode. If the minimum is - zero, we can continue at the same level without recursion. For any other - minimum, carrying on will result in NOMATCH. */ - - if (length == 0) continue; - if (length < 0 && min == 0) continue; - - /* First, ensure the minimum number of matches are present. We get back - the length of the reference string explicitly rather than passing the - address of eptr, so that eptr can be a register variable. */ - - for (i = 1; i <= min; i++) - { - int slength; - if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) - { - if (slength == -2) eptr = md->end_subject; /* Partial match */ - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr += slength; - } - - /* If min = max, continue at the same level without recursion. - They are not both allowed to be zero. */ - - if (min == max) continue; - - /* If minimizing, keep trying and advancing the pointer */ - - if (minimize) - { - for (fi = min;; fi++) - { - int slength; - RMATCH(eptr, ecode, offset_top, md, eptrb, RM14); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) - { - if (slength == -2) eptr = md->end_subject; /* Partial match */ - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr += slength; - } - /* Control never gets here */ - } - - /* If maximizing, find the longest string and work backwards */ - - else - { - pp = eptr; - for (i = min; i < max; i++) - { - int slength; - if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) - { - /* Can't use CHECK_PARTIAL because we don't want to update eptr in - the soft partial matching case. */ - - if (slength == -2 && md->partial != 0 && - md->end_subject > md->start_used_ptr) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - break; - } - eptr += slength; - } - - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM15); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr -= length; - } - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - /* Match a bit-mapped character class, possibly repeatedly. This op code is - used when all the characters in the class have values in the range 0-255, - and either the matching is caseful, or the characters are in the range - 0-127 when UTF-8 processing is enabled. The only difference between - OP_CLASS and OP_NCLASS occurs when a data character outside the range is - encountered. - - First, look past the end of the item to see if there is repeat information - following. Then obey similar code to character type repeats - written out - again for speed. */ - - case OP_NCLASS: - case OP_CLASS: - { - /* The data variable is saved across frames, so the byte map needs to - be stored there. */ -#define BYTE_MAP ((pcre_uint8 *)data) - data = ecode + 1; /* Save for matching */ - ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = GET2(ecode, 1); - max = GET2(ecode, 1 + IMM2_SIZE); - if (max == 0) max = INT_MAX; - ecode += 1 + 2 * IMM2_SIZE; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - -#ifdef SUPPORT_UTF - if (utf) - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - if (c > 255) - { - if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); - } - else - if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - /* Not UTF mode */ - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - c = *eptr++; -#ifndef COMPILE_PCRE8 - if (c > 255) - { - if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); - } - else -#endif - if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { -#ifdef SUPPORT_UTF - if (utf) - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM16); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - if (c > 255) - { - if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); - } - else - if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - /* Not UTF mode */ - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM17); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - c = *eptr++; -#ifndef COMPILE_PCRE8 - if (c > 255) - { - if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); - } - else -#endif - if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - pp = eptr; - -#ifdef SUPPORT_UTF - if (utf) - { - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c > 255) - { - if (op == OP_CLASS) break; - } - else - if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; - eptr += len; - } - for (;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM18); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - /* Not UTF mode */ - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - c = *eptr; -#ifndef COMPILE_PCRE8 - if (c > 255) - { - if (op == OP_CLASS) break; - } - else -#endif - if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; - eptr++; - } - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM19); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - } - } - - RRETURN(MATCH_NOMATCH); - } -#undef BYTE_MAP - } - /* Control never gets here */ - - - /* Match an extended character class. This opcode is encountered only - when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 - mode, because Unicode properties are supported in non-UTF-8 mode. */ - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - { - data = ecode + 1 + LINK_SIZE; /* Save for matching */ - ecode += GET(ecode, 1); /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = GET2(ecode, 1); - max = GET2(ecode, 1 + IMM2_SIZE); - if (max == 0) max = INT_MAX; - ecode += 1 + 2 * IMM2_SIZE; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM20); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - pp = eptr; - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } -#ifdef SUPPORT_UTF - GETCHARLENTEST(c, eptr, len); -#else - c = *eptr; -#endif - if (!PRIV(xclass)(c, data, utf)) break; - eptr += len; - } - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ -#ifdef SUPPORT_UTF - if (utf) BACKCHAR(eptr); -#endif - } - RRETURN(MATCH_NOMATCH); - } - - /* Control never gets here */ - } -#endif /* End of XCLASS */ - - /* Match a single character, casefully */ - - case OP_CHAR: -#ifdef SUPPORT_UTF - if (utf) - { - length = 1; - ecode++; - GETCHARLEN(fc, ecode, length); - if (length > md->end_subject - eptr) - { - CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ - RRETURN(MATCH_NOMATCH); - } - while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH); - } - else -#endif - /* Not UTF mode */ - { - if (md->end_subject - eptr < 1) - { - SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ - RRETURN(MATCH_NOMATCH); - } - if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); - ecode += 2; - } - break; - - /* Match a single character, caselessly. If we are at the end of the - subject, give up immediately. */ - - case OP_CHARI: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - -#ifdef SUPPORT_UTF - if (utf) - { - length = 1; - ecode++; - GETCHARLEN(fc, ecode, length); - - /* If the pattern character's value is < 128, we have only one byte, and - we know that its other case must also be one byte long, so we can use the - fast lookup table. We know that there is at least one byte left in the - subject. */ - - if (fc < 128) - { - pcre_uchar cc = RAWUCHAR(eptr); - if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH); - ecode++; - eptr++; - } - - /* Otherwise we must pick up the subject character. Note that we cannot - use the value of "length" to check for sufficient bytes left, because the - other case of the character may have more or fewer bytes. */ - - else - { - pcre_uint32 dc; - GETCHARINC(dc, eptr); - ecode += length; - - /* If we have Unicode property support, we can use it to test the other - case of the character, if there is one. */ - - if (fc != dc) - { -#ifdef SUPPORT_UCP - if (dc != UCD_OTHERCASE(fc)) -#endif - RRETURN(MATCH_NOMATCH); - } - } - } - else -#endif /* SUPPORT_UTF */ - - /* Not UTF mode */ - { - if (TABLE_GET(ecode[1], md->lcc, ecode[1]) - != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); - eptr++; - ecode += 2; - } - break; - - /* Match a single character repeatedly. */ - - case OP_EXACT: - case OP_EXACTI: - min = max = GET2(ecode, 1); - ecode += 1 + IMM2_SIZE; - goto REPEATCHAR; - - case OP_POSUPTO: - case OP_POSUPTOI: - possessive = TRUE; - /* Fall through */ - - case OP_UPTO: - case OP_UPTOI: - case OP_MINUPTO: - case OP_MINUPTOI: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; - ecode += 1 + IMM2_SIZE; - goto REPEATCHAR; - - case OP_POSSTAR: - case OP_POSSTARI: - possessive = TRUE; - min = 0; - max = INT_MAX; - ecode++; - goto REPEATCHAR; - - case OP_POSPLUS: - case OP_POSPLUSI: - possessive = TRUE; - min = 1; - max = INT_MAX; - ecode++; - goto REPEATCHAR; - - case OP_POSQUERY: - case OP_POSQUERYI: - possessive = TRUE; - min = 0; - max = 1; - ecode++; - goto REPEATCHAR; - - case OP_STAR: - case OP_STARI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_PLUS: - case OP_PLUSI: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_QUERY: - case OP_QUERYI: - case OP_MINQUERY: - case OP_MINQUERYI: - c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI); - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single-character matches. */ - - REPEATCHAR: -#ifdef SUPPORT_UTF - if (utf) - { - length = 1; - charptr = ecode; - GETCHARLEN(fc, ecode, length); - ecode += length; - - /* Handle multibyte character matching specially here. There is - support for caseless matching if UCP support is present. */ - - if (length > 1) - { -#ifdef SUPPORT_UCP - pcre_uint32 othercase; - if (op >= OP_STARI && /* Caseless */ - (othercase = UCD_OTHERCASE(fc)) != fc) - oclength = PRIV(ord2utf)(othercase, occhars); - else oclength = 0; -#endif /* SUPPORT_UCP */ - - for (i = 1; i <= min; i++) - { - if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; -#ifdef SUPPORT_UCP - else if (oclength > 0 && - eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; -#endif /* SUPPORT_UCP */ - else - { - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - } - - if (min == max) continue; - - if (minimize) - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM22); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; -#ifdef SUPPORT_UCP - else if (oclength > 0 && - eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; -#endif /* SUPPORT_UCP */ - else - { - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - } - - else /* Maximize */ - { - pp = eptr; - for (i = min; i < max; i++) - { - if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; -#ifdef SUPPORT_UCP - else if (oclength > 0 && - eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; -#endif /* SUPPORT_UCP */ - else - { - CHECK_PARTIAL(); - break; - } - } - - if (possessive) continue; - - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr == pp) { RRETURN(MATCH_NOMATCH); } -#ifdef SUPPORT_UCP - eptr--; - BACKCHAR(eptr); -#else /* without SUPPORT_UCP */ - eptr -= length; -#endif /* SUPPORT_UCP */ - } - } - /* Control never gets here */ - } - - /* If the length of a UTF-8 character is 1, we fall through here, and - obey the code as for non-UTF-8 characters below, though in this case the - value of fc will always be < 128. */ - } - else -#endif /* SUPPORT_UTF */ - /* When not in UTF-8 mode, load a single-byte character. */ - fc = *ecode++; - - /* The value of fc at this point is always one character, though we may - or may not be in UTF mode. The code is duplicated for the caseless and - caseful cases, for speed, since matching characters is likely to be quite - common. First, ensure the minimum number of matches are present. If min = - max, continue at the same level without recursing. Otherwise, if - minimizing, keep trying the rest of the expression and advancing one - matching character if failing, up to the maximum. Alternatively, if - maximizing, find the maximum number of characters and work backwards. */ - - DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, - max, (char *)eptr)); - - if (op >= OP_STARI) /* Caseless */ - { -#ifdef COMPILE_PCRE8 - /* fc must be < 128 if UTF is enabled. */ - foc = md->fcc[fc]; -#else -#ifdef SUPPORT_UTF -#ifdef SUPPORT_UCP - if (utf && fc > 127) - foc = UCD_OTHERCASE(fc); -#else - if (utf && fc > 127) - foc = fc; -#endif /* SUPPORT_UCP */ - else -#endif /* SUPPORT_UTF */ - foc = TABLE_GET(fc, md->fcc, fc); -#endif /* COMPILE_PCRE8 */ - - for (i = 1; i <= min; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHARTEST(eptr); - if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); - eptr++; - } - if (min == max) continue; - if (minimize) - { - for (fi = min;; fi++) - { - pcre_uchar cc; - - RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHARTEST(eptr); - if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); - eptr++; - } - /* Control never gets here */ - } - else /* Maximize */ - { - pp = eptr; - for (i = min; i < max; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - cc = RAWUCHARTEST(eptr); - if (fc != cc && foc != cc) break; - eptr++; - } - - if (possessive) continue; - - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM25); - eptr--; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - } - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - } - - /* Caseful comparisons (includes all multi-byte characters) */ - - else - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH); - } - - if (min == max) continue; - - if (minimize) - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM26); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - } - else /* Maximize */ - { - pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc != RAWUCHARTEST(eptr)) break; - eptr++; - } - if (possessive) continue; - - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM27); - eptr--; - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - } - RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - - /* Match a negated single one-byte character. The character we are - checking can be multibyte. */ - - case OP_NOT: - case OP_NOTI: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 ch, och; - - ecode++; - GETCHARINC(ch, ecode); - GETCHARINC(c, eptr); - - if (op == OP_NOT) - { - if (ch == c) RRETURN(MATCH_NOMATCH); - } - else - { -#ifdef SUPPORT_UCP - if (ch > 127) - och = UCD_OTHERCASE(ch); -#else - if (ch > 127) - och = ch; -#endif /* SUPPORT_UCP */ - else - och = TABLE_GET(ch, md->fcc, ch); - if (ch == c || och == c) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - { - register pcre_uint32 ch = ecode[1]; - c = *eptr++; - if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c)) - RRETURN(MATCH_NOMATCH); - ecode += 2; - } - break; - - /* Match a negated single one-byte character repeatedly. This is almost a - repeat of the code for a repeated single character, but I haven't found a - nice way of commoning these up that doesn't require a test of the - positive/negative option for each character match. Maybe that wouldn't add - very much to the time taken, but character matching *is* what this is all - about... */ - - case OP_NOTEXACT: - case OP_NOTEXACTI: - min = max = GET2(ecode, 1); - ecode += 1 + IMM2_SIZE; - goto REPEATNOTCHAR; - - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; - ecode += 1 + IMM2_SIZE; - goto REPEATNOTCHAR; - - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - possessive = TRUE; - min = 0; - max = INT_MAX; - ecode++; - goto REPEATNOTCHAR; - - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - possessive = TRUE; - min = 1; - max = INT_MAX; - ecode++; - goto REPEATNOTCHAR; - - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - possessive = TRUE; - min = 0; - max = 1; - ecode++; - goto REPEATNOTCHAR; - - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - possessive = TRUE; - min = 0; - max = GET2(ecode, 1); - ecode += 1 + IMM2_SIZE; - goto REPEATNOTCHAR; - - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR); - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single-byte matches. */ - - REPEATNOTCHAR: - GETCHARINCTEST(fc, ecode); - - /* The code is duplicated for the caseless and caseful cases, for speed, - since matching characters is likely to be quite common. First, ensure the - minimum number of matches are present. If min = max, continue at the same - level without recursing. Otherwise, if minimizing, keep trying the rest of - the expression and advancing one matching character if failing, up to the - maximum. Alternatively, if maximizing, find the maximum number of - characters and work backwards. */ - - DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, - max, (char *)eptr)); - - if (op >= OP_NOTSTARI) /* Caseless */ - { -#ifdef SUPPORT_UTF -#ifdef SUPPORT_UCP - if (utf && fc > 127) - foc = UCD_OTHERCASE(fc); -#else - if (utf && fc > 127) - foc = fc; -#endif /* SUPPORT_UCP */ - else -#endif /* SUPPORT_UTF */ - foc = TABLE_GET(fc, md->fcc, fc); - -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 d; - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(d, eptr); - if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - /* Not UTF mode */ - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); - eptr++; - } - } - - if (min == max) continue; - - if (minimize) - { -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 d; - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM28); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(d, eptr); - if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - /* Not UTF mode */ - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM29); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); - eptr++; - } - } - /* Control never gets here */ - } - - /* Maximize case */ - - else - { - pp = eptr; - -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 d; - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(d, eptr, len); - if (fc == d || (unsigned int)foc == d) break; - eptr += len; - } - if (possessive) continue; - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - /* Not UTF mode */ - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc == *eptr || foc == *eptr) break; - eptr++; - } - if (possessive) continue; - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM31); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - } - } - - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - } - - /* Caseful comparisons */ - - else - { -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 d; - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(d, eptr); - if (fc == d) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - /* Not UTF mode */ - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc == *eptr++) RRETURN(MATCH_NOMATCH); - } - } - - if (min == max) continue; - - if (minimize) - { -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 d; - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM32); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(d, eptr); - if (fc == d) RRETURN(MATCH_NOMATCH); - } - } - else -#endif - /* Not UTF mode */ - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM33); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc == *eptr++) RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - } - - /* Maximize case */ - - else - { - pp = eptr; - -#ifdef SUPPORT_UTF - if (utf) - { - register pcre_uint32 d; - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(d, eptr, len); - if (fc == d) break; - eptr += len; - } - if (possessive) continue; - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM34); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - /* Not UTF mode */ - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc == *eptr) break; - eptr++; - } - if (possessive) continue; - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM35); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - } - } - - RRETURN(MATCH_NOMATCH); - } - } - /* Control never gets here */ - - /* Match a single character type repeatedly; several different opcodes - share code. This is very similar to the code for single characters, but we - repeat it in the interests of efficiency. */ - - case OP_TYPEEXACT: - min = max = GET2(ecode, 1); - minimize = TRUE; - ecode += 1 + IMM2_SIZE; - goto REPEATTYPE; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_TYPEMINUPTO; - ecode += 1 + IMM2_SIZE; - goto REPEATTYPE; - - case OP_TYPEPOSSTAR: - possessive = TRUE; - min = 0; - max = INT_MAX; - ecode++; - goto REPEATTYPE; - - case OP_TYPEPOSPLUS: - possessive = TRUE; - min = 1; - max = INT_MAX; - ecode++; - goto REPEATTYPE; - - case OP_TYPEPOSQUERY: - possessive = TRUE; - min = 0; - max = 1; - ecode++; - goto REPEATTYPE; - - case OP_TYPEPOSUPTO: - possessive = TRUE; - min = 0; - max = GET2(ecode, 1); - ecode += 1 + IMM2_SIZE; - goto REPEATTYPE; - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - c = *ecode++ - OP_TYPESTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single character type matches. Note that - in UTF-8 mode, '.' matches a character of any length, but for the other - character types, the valid characters are all one-byte long. */ - - REPEATTYPE: - ctype = *ecode++; /* Code for the character type */ - -#ifdef SUPPORT_UCP - if (ctype == OP_PROP || ctype == OP_NOTPROP) - { - prop_fail_result = ctype == OP_NOTPROP; - prop_type = *ecode++; - prop_value = *ecode++; - } - else prop_type = -1; -#endif - - /* First, ensure the minimum number of matches are present. Use inline - code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). Separate the UTF-8 code completely as that - is tidier. Also separate the UCP code, which can be the same for both UTF-8 - and single-bytes. */ - - if (min > 0) - { -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - switch(prop_type) - { - case PT_ANY: - if (prop_fail_result) RRETURN(MATCH_NOMATCH); - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - } - break; - - case PT_LAMP: - for (i = 1; i <= min; i++) - { - int chartype; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - chartype = UCD_CHARTYPE(c); - if ((chartype == ucp_Lu || - chartype == ucp_Ll || - chartype == ucp_Lt) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_GC: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_PC: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_SC: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_ALNUM: - for (i = 1; i <= min; i++) - { - int category; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - category = UCD_CATEGORY(c); - if ((category == ucp_L || category == ucp_N) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_SPACE: /* Perl space */ - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || - c == CHAR_FF || c == CHAR_CR) - == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_PXSPACE: /* POSIX space */ - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || - c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) - == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_WORD: - for (i = 1; i <= min; i++) - { - int category; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - category = UCD_CATEGORY(c); - if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) - == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - break; - - case PT_CLIST: - for (i = 1; i <= min; i++) - { - const pcre_uint32 *cp; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - cp = PRIV(ucd_caseless_sets) + prop_value; - for (;;) - { - if (c < *cp) - { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } } - if (c == *cp++) - { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; } - } - } - break; - - /* This should not occur */ - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - - /* Match extended Unicode sequences. We will get here only if the - support is in the binary; otherwise a compile-time error occurs. */ - - else if (ctype == OP_EXTUNI) - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - else - { - int lgb, rgb; - GETCHARINCTEST(c, eptr); - lgb = UCD_GRAPHBREAK(c); - while (eptr < md->end_subject) - { - int len = 1; - if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } - rgb = UCD_GRAPHBREAK(c); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - lgb = rgb; - eptr += len; - } - } - CHECK_PARTIAL(); - } - } - - else -#endif /* SUPPORT_UCP */ - -/* Handle all other cases when the coding is UTF-8 */ - -#ifdef SUPPORT_UTF - if (utf) switch(ctype) - { - case OP_ANY: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - if (md->partial != 0 && - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - RAWUCHAR(eptr) == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - - case OP_ALLANY: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - - case OP_ANYBYTE: - if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); - eptr += min; - break; - - case OP_ANYNL: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - switch(c) - { - default: RRETURN(MATCH_NOMATCH); - - case CHAR_CR: - if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++; - break; - - case CHAR_LF: - break; - - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - } - break; - - case OP_NOT_HSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - switch(c) - { - HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ - default: break; - } - } - break; - - case OP_HSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - switch(c) - { - HSPACE_CASES: break; /* Byte and multibyte cases */ - default: RRETURN(MATCH_NOMATCH); - } - } - break; - - case OP_NOT_VSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - switch(c) - { - VSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - } - break; - - case OP_VSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - switch(c) - { - VSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - } - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINC(c, eptr); - if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) - RRETURN(MATCH_NOMATCH); - } - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHAR(eptr); - if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0) - RRETURN(MATCH_NOMATCH); - eptr++; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHAR(eptr); - if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0) - RRETURN(MATCH_NOMATCH); - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHAR(eptr); - if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0) - RRETURN(MATCH_NOMATCH); - eptr++; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHAR(eptr); - if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0) - RRETURN(MATCH_NOMATCH); - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) - { - pcre_uchar cc; - - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - cc = RAWUCHAR(eptr); - if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0) - RRETURN(MATCH_NOMATCH); - eptr++; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } /* End switch(ctype) */ - - else -#endif /* SUPPORT_UTF */ - - /* Code for the non-UTF-8 case for minimum matching of operators other - than OP_PROP and OP_NOTPROP. */ - - switch(ctype) - { - case OP_ANY: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - if (md->partial != 0 && - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - eptr++; - } - break; - - case OP_ALLANY: - if (eptr > md->end_subject - min) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr += min; - break; - - case OP_ANYBYTE: - if (eptr > md->end_subject - min) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - eptr += min; - break; - - case OP_ANYNL: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - switch(*eptr++) - { - default: RRETURN(MATCH_NOMATCH); - - case CHAR_CR: - if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; - break; - - case CHAR_LF: - break; - - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case 0x2028: - case 0x2029: -#endif - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - } - break; - - case OP_NOT_HSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - switch(*eptr++) - { - default: break; - HSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - HSPACE_MULTIBYTE_CASES: -#endif - RRETURN(MATCH_NOMATCH); - } - } - break; - - case OP_HSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - switch(*eptr++) - { - default: RRETURN(MATCH_NOMATCH); - HSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - HSPACE_MULTIBYTE_CASES: -#endif - break; - } - } - break; - - case OP_NOT_VSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - switch(*eptr++) - { - VSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - VSPACE_MULTIBYTE_CASES: -#endif - RRETURN(MATCH_NOMATCH); - default: break; - } - } - break; - - case OP_VSPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - switch(*eptr++) - { - default: RRETURN(MATCH_NOMATCH); - VSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - VSPACE_MULTIBYTE_CASES: -#endif - break; - } - } - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) - RRETURN(MATCH_NOMATCH); - eptr++; - } - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) - RRETURN(MATCH_NOMATCH); - eptr++; - } - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) - RRETURN(MATCH_NOMATCH); - eptr++; - } - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) - RRETURN(MATCH_NOMATCH); - eptr++; - } - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) - RRETURN(MATCH_NOMATCH); - eptr++; - } - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) - RRETURN(MATCH_NOMATCH); - eptr++; - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - - /* If min = max, continue at the same level without recursing */ - - if (min == max) continue; - - /* If minimizing, we have to test the rest of the pattern before each - subsequent match. Again, separate the UTF-8 case for speed, and also - separate the UCP cases. */ - - if (minimize) - { -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - switch(prop_type) - { - case PT_ANY: - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM36); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if (prop_fail_result) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_LAMP: - for (fi = min;; fi++) - { - int chartype; - RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - chartype = UCD_CHARTYPE(c); - if ((chartype == ucp_Lu || - chartype == ucp_Ll || - chartype == ucp_Lt) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_GC: - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM38); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_PC: - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM39); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_SC: - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM40); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_ALNUM: - for (fi = min;; fi++) - { - int category; - RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - category = UCD_CATEGORY(c); - if ((category == ucp_L || category == ucp_N) == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_SPACE: /* Perl space */ - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || - c == CHAR_FF || c == CHAR_CR) - == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_PXSPACE: /* POSIX space */ - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM61); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || - c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) - == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_WORD: - for (fi = min;; fi++) - { - int category; - RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - category = UCD_CATEGORY(c); - if ((category == ucp_L || - category == ucp_N || - c == CHAR_UNDERSCORE) - == prop_fail_result) - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - case PT_CLIST: - for (fi = min;; fi++) - { - const pcre_uint32 *cp; - RMATCH(eptr, ecode, offset_top, md, eptrb, RM67); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(c, eptr); - cp = PRIV(ucd_caseless_sets) + prop_value; - for (;;) - { - if (c < *cp) - { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } } - if (c == *cp++) - { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; } - } - } - /* Control never gets here */ - - /* This should never occur */ - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - - /* Match extended Unicode sequences. We will get here only if the - support is in the binary; otherwise a compile-time error occurs. */ - - else if (ctype == OP_EXTUNI) - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM41); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - else - { - int lgb, rgb; - GETCHARINCTEST(c, eptr); - lgb = UCD_GRAPHBREAK(c); - while (eptr < md->end_subject) - { - int len = 1; - if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } - rgb = UCD_GRAPHBREAK(c); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - lgb = rgb; - eptr += len; - } - } - CHECK_PARTIAL(); - } - } - else -#endif /* SUPPORT_UCP */ - -#ifdef SUPPORT_UTF - if (utf) - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM42); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (ctype == OP_ANY && IS_NEWLINE(eptr)) - RRETURN(MATCH_NOMATCH); - GETCHARINC(c, eptr); - switch(ctype) - { - case OP_ANY: /* This is the non-NL case */ - if (md->partial != 0 && /* Take care with CRLF partial */ - eptr >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - break; - - case OP_ALLANY: - case OP_ANYBYTE: - break; - - case OP_ANYNL: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); - case CHAR_CR: - if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++; - break; - - case CHAR_LF: - break; - - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#ifndef EBCDIC - case 0x2028: - case 0x2029: -#endif /* Not EBCDIC */ - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - break; - - case OP_NOT_HSPACE: - switch(c) - { - HSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - break; - - case OP_HSPACE: - switch(c) - { - HSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - break; - - case OP_NOT_VSPACE: - switch(c) - { - VSPACE_CASES: RRETURN(MATCH_NOMATCH); - default: break; - } - break; - - case OP_VSPACE: - switch(c) - { - VSPACE_CASES: break; - default: RRETURN(MATCH_NOMATCH); - } - break; - - case OP_NOT_DIGIT: - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_DIGIT: - if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WHITESPACE: - if (c < 256 && (md->ctypes[c] & ctype_space) != 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_WHITESPACE: - if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WORDCHAR: - if (c < 256 && (md->ctypes[c] & ctype_word) != 0) - RRETURN(MATCH_NOMATCH); - break; - - case OP_WORDCHAR: - if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) - RRETURN(MATCH_NOMATCH); - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - } - else -#endif - /* Not UTF mode */ - { - for (fi = min;; fi++) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM43); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (ctype == OP_ANY && IS_NEWLINE(eptr)) - RRETURN(MATCH_NOMATCH); - c = *eptr++; - switch(ctype) - { - case OP_ANY: /* This is the non-NL case */ - if (md->partial != 0 && /* Take care with CRLF partial */ - eptr >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - c == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - break; - - case OP_ALLANY: - case OP_ANYBYTE: - break; - - case OP_ANYNL: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); - case CHAR_CR: - if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; - break; - - case CHAR_LF: - break; - - case CHAR_VT: - case CHAR_FF: - case CHAR_NEL: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case 0x2028: - case 0x2029: -#endif - if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); - break; - } - break; - - case OP_NOT_HSPACE: - switch(c) - { - default: break; - HSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - HSPACE_MULTIBYTE_CASES: -#endif - RRETURN(MATCH_NOMATCH); - } - break; - - case OP_HSPACE: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); - HSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - HSPACE_MULTIBYTE_CASES: -#endif - break; - } - break; - - case OP_NOT_VSPACE: - switch(c) - { - default: break; - VSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - VSPACE_MULTIBYTE_CASES: -#endif - RRETURN(MATCH_NOMATCH); - } - break; - - case OP_VSPACE: - switch(c) - { - default: RRETURN(MATCH_NOMATCH); - VSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - VSPACE_MULTIBYTE_CASES: -#endif - break; - } - break; - - case OP_NOT_DIGIT: - if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_DIGIT: - if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WHITESPACE: - if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_WHITESPACE: - if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_NOT_WORDCHAR: - if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); - break; - - case OP_WORDCHAR: - if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - } - } - /* Control never gets here */ - } - - /* If maximizing, it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). Again, keep the - UTF-8 and UCP stuff separate. */ - - else - { - pp = eptr; /* Remember where we started */ - -#ifdef SUPPORT_UCP - if (prop_type >= 0) - { - switch(prop_type) - { - case PT_ANY: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - if (prop_fail_result) break; - eptr+= len; - } - break; - - case PT_LAMP: - for (i = min; i < max; i++) - { - int chartype; - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - chartype = UCD_CHARTYPE(c); - if ((chartype == ucp_Lu || - chartype == ucp_Ll || - chartype == ucp_Lt) == prop_fail_result) - break; - eptr+= len; - } - break; - - case PT_GC: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break; - eptr+= len; - } - break; - - case PT_PC: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break; - eptr+= len; - } - break; - - case PT_SC: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break; - eptr+= len; - } - break; - - case PT_ALNUM: - for (i = min; i < max; i++) - { - int category; - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - category = UCD_CATEGORY(c); - if ((category == ucp_L || category == ucp_N) == prop_fail_result) - break; - eptr+= len; - } - break; - - case PT_SPACE: /* Perl space */ - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || - c == CHAR_FF || c == CHAR_CR) - == prop_fail_result) - break; - eptr+= len; - } - break; - - case PT_PXSPACE: /* POSIX space */ - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || - c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) - == prop_fail_result) - break; - eptr+= len; - } - break; - - case PT_WORD: - for (i = min; i < max; i++) - { - int category; - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - category = UCD_CATEGORY(c); - if ((category == ucp_L || category == ucp_N || - c == CHAR_UNDERSCORE) == prop_fail_result) - break; - eptr+= len; - } - break; - - case PT_CLIST: - for (i = min; i < max; i++) - { - const pcre_uint32 *cp; - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(c, eptr, len); - cp = PRIV(ucd_caseless_sets) + prop_value; - for (;;) - { - if (c < *cp) - { if (prop_fail_result) break; else goto GOT_MAX; } - if (c == *cp++) - { if (prop_fail_result) goto GOT_MAX; else break; } - } - eptr += len; - } - GOT_MAX: - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - - /* eptr is now past the end of the maximum run */ - - if (possessive) continue; - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ - if (utf) BACKCHAR(eptr); - } - } - - /* Match extended Unicode sequences. We will get here only if the - support is in the binary; otherwise a compile-time error occurs. */ - - else if (ctype == OP_EXTUNI) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - else - { - int lgb, rgb; - GETCHARINCTEST(c, eptr); - lgb = UCD_GRAPHBREAK(c); - while (eptr < md->end_subject) - { - int len = 1; - if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } - rgb = UCD_GRAPHBREAK(c); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; - lgb = rgb; - eptr += len; - } - } - CHECK_PARTIAL(); - } - - /* eptr is now past the end of the maximum run */ - - if (possessive) continue; - - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ - for (;;) /* Move back over one extended */ - { - if (!utf) c = *eptr; else - { - BACKCHAR(eptr); - GETCHAR(c, eptr); - } - if (UCD_CATEGORY(c) != ucp_M) break; - eptr--; - } - } - } - - else -#endif /* SUPPORT_UCP */ - -#ifdef SUPPORT_UTF - if (utf) - { - switch(ctype) - { - case OP_ANY: - if (max < INT_MAX) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (IS_NEWLINE(eptr)) break; - if (md->partial != 0 && /* Take care with CRLF partial */ - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - RAWUCHAR(eptr) == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - } - - /* Handle unlimited UTF-8 repeat */ - - else - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (IS_NEWLINE(eptr)) break; - if (md->partial != 0 && /* Take care with CRLF partial */ - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - RAWUCHAR(eptr) == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - } - break; - - case OP_ALLANY: - if (max < INT_MAX) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - eptr++; - ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); - } - } - else - { - eptr = md->end_subject; /* Unlimited UTF-8 repeat */ - SCHECK_PARTIAL(); - } - break; - - /* The byte case is the same as non-UTF8 */ - - case OP_ANYBYTE: - c = max - min; - if (c > (unsigned int)(md->end_subject - eptr)) - { - eptr = md->end_subject; - SCHECK_PARTIAL(); - } - else eptr += c; - break; - - case OP_ANYNL: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c == CHAR_CR) - { - if (++eptr >= md->end_subject) break; - if (RAWUCHAR(eptr) == CHAR_LF) eptr++; - } - else - { - if (c != CHAR_LF && - (md->bsr_anycrlf || - (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL -#ifndef EBCDIC - && c != 0x2028 && c != 0x2029 -#endif /* Not EBCDIC */ - ))) - break; - eptr += len; - } - } - break; - - case OP_NOT_HSPACE: - case OP_HSPACE: - for (i = min; i < max; i++) - { - BOOL gotspace; - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - switch(c) - { - HSPACE_CASES: gotspace = TRUE; break; - default: gotspace = FALSE; break; - } - if (gotspace == (ctype == OP_NOT_HSPACE)) break; - eptr += len; - } - break; - - case OP_NOT_VSPACE: - case OP_VSPACE: - for (i = min; i < max; i++) - { - BOOL gotspace; - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - switch(c) - { - VSPACE_CASES: gotspace = TRUE; break; - default: gotspace = FALSE; break; - } - if (gotspace == (ctype == OP_NOT_VSPACE)) break; - eptr += len; - } - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; - eptr+= len; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; - eptr+= len; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; - eptr+= len; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; - eptr+= len; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; - eptr+= len; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLEN(c, eptr, len); - if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; - eptr+= len; - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - - /* eptr is now past the end of the maximum run. If possessive, we are - done (no backing up). Otherwise, match at this position; anything other - than no match is immediately returned. For nomatch, back up one - character, unless we are matching \R and the last thing matched was - \r\n, in which case, back up two bytes. */ - - if (possessive) continue; - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM46); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL && - RAWUCHAR(eptr - 1) == CHAR_CR) eptr--; - } - } - else -#endif /* SUPPORT_UTF */ - /* Not UTF mode */ - { - switch(ctype) - { - case OP_ANY: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (IS_NEWLINE(eptr)) break; - if (md->partial != 0 && /* Take care with CRLF partial */ - eptr + 1 >= md->end_subject && - NLBLOCK->nltype == NLTYPE_FIXED && - NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) - { - md->hitend = TRUE; - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); - } - eptr++; - } - break; - - case OP_ALLANY: - case OP_ANYBYTE: - c = max - min; - if (c > (unsigned int)(md->end_subject - eptr)) - { - eptr = md->end_subject; - SCHECK_PARTIAL(); - } - else eptr += c; - break; - - case OP_ANYNL: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - c = *eptr; - if (c == CHAR_CR) - { - if (++eptr >= md->end_subject) break; - if (*eptr == CHAR_LF) eptr++; - } - else - { - if (c != CHAR_LF && (md->bsr_anycrlf || - (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - && c != 0x2028 && c != 0x2029 -#endif - ))) break; - eptr++; - } - } - break; - - case OP_NOT_HSPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - switch(*eptr) - { - default: eptr++; break; - HSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - HSPACE_MULTIBYTE_CASES: -#endif - goto ENDLOOP00; - } - } - ENDLOOP00: - break; - - case OP_HSPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - switch(*eptr) - { - default: goto ENDLOOP01; - HSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - HSPACE_MULTIBYTE_CASES: -#endif - eptr++; break; - } - } - ENDLOOP01: - break; - - case OP_NOT_VSPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - switch(*eptr) - { - default: eptr++; break; - VSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - VSPACE_MULTIBYTE_CASES: -#endif - goto ENDLOOP02; - } - } - ENDLOOP02: - break; - - case OP_VSPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - switch(*eptr) - { - default: goto ENDLOOP03; - VSPACE_BYTE_CASES: -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - VSPACE_MULTIBYTE_CASES: -#endif - eptr++; break; - } - } - ENDLOOP03: - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break; - eptr++; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break; - eptr++; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break; - eptr++; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break; - eptr++; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break; - eptr++; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break; - eptr++; - } - break; - - default: - RRETURN(PCRE_ERROR_INTERNAL); - } - - /* eptr is now past the end of the maximum run. If possessive, we are - done (no backing up). Otherwise, match at this position; anything other - than no match is immediately returned. For nomatch, back up one - character (byte), unless we are matching \R and the last thing matched - was \r\n, in which case, back up two bytes. */ - - if (possessive) continue; - while (eptr >= pp) - { - RMATCH(eptr, ecode, offset_top, md, eptrb, RM47); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - eptr--; - if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF && - eptr[-1] == CHAR_CR) eptr--; - } - } - - /* Get here if we can't make it match with any permitted repetitions */ - - RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - - /* There's been some horrible disaster. Arrival here can only mean there is - something seriously wrong in the code above or the OP_xxx definitions. */ - - default: - DPRINTF(("Unknown opcode %d\n", *ecode)); - RRETURN(PCRE_ERROR_UNKNOWN_OPCODE); - } - - /* Do not stick any code in here without much thought; it is assumed - that "continue" in the code above comes out to here to repeat the main - loop. */ - - } /* End of main loop */ -/* Control never reaches here */ - - -/* When compiling to use the heap rather than the stack for recursive calls to -match(), the RRETURN() macro jumps here. The number that is saved in -frame->Xwhere indicates which label we actually want to return to. */ - -#ifdef NO_RECURSE -#define LBL(val) case val: goto L_RM##val; -HEAP_RETURN: -switch (frame->Xwhere) - { - LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) - LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) - LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) - LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) - LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64) - LBL(65) LBL(66) -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - LBL(21) -#endif -#ifdef SUPPORT_UTF - LBL(16) LBL(18) LBL(20) - LBL(22) LBL(23) LBL(28) LBL(30) - LBL(32) LBL(34) LBL(42) LBL(46) -#ifdef SUPPORT_UCP - LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) - LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) -#endif /* SUPPORT_UCP */ -#endif /* SUPPORT_UTF */ - default: - DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); - return PCRE_ERROR_INTERNAL; - } -#undef LBL -#endif /* NO_RECURSE */ -} - - -/*************************************************************************** -**************************************************************************** - RECURSION IN THE match() FUNCTION - -Undefine all the macros that were defined above to handle this. */ - -#ifdef NO_RECURSE -#undef eptr -#undef ecode -#undef mstart -#undef offset_top -#undef eptrb -#undef flags - -#undef callpat -#undef charptr -#undef data -#undef next -#undef pp -#undef prev -#undef saved_eptr - -#undef new_recursive - -#undef cur_is_word -#undef condition -#undef prev_is_word - -#undef ctype -#undef length -#undef max -#undef min -#undef number -#undef offset -#undef op -#undef save_capture_last -#undef save_offset1 -#undef save_offset2 -#undef save_offset3 -#undef stacksave - -#undef newptrb - -#endif - -/* These two are defined as macros in both cases */ - -#undef fc -#undef fi - -/*************************************************************************** -***************************************************************************/ - - -#ifdef NO_RECURSE -/************************************************* -* Release allocated heap frames * -*************************************************/ - -/* This function releases all the allocated frames. The base frame is on the -machine stack, and so must not be freed. - -Argument: the address of the base frame -Returns: nothing -*/ - -static void -release_match_heapframes (heapframe *frame_base) -{ -heapframe *nextframe = frame_base->Xnextframe; -while (nextframe != NULL) - { - heapframe *oldframe = nextframe; - nextframe = nextframe->Xnextframe; - (PUBL(stack_free))(oldframe); - } -} -#endif - - -/************************************************* -* Execute a Regular Expression * -*************************************************/ - -/* This function applies a compiled re to a subject string and picks out -portions of the string if it matches. Two elements in the vector are set for -each substring: the offsets to the start and end of the substring. - -Arguments: - argument_re points to the compiled expression - extra_data points to extra data or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets points to a vector of ints to be filled in with offsets - offsetcount the number of elements in the vector - -Returns: > 0 => success; value is the number of elements filled in - = 0 => success, but offsets is not big enough - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, - PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, - int offsetcount) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, - PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, - int offsetcount) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, - PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets, - int offsetcount) -#endif -{ -int rc, ocount, arg_offset_max; -int newline; -BOOL using_temporary_offsets = FALSE; -BOOL anchored; -BOOL startline; -BOOL firstline; -BOOL utf; -BOOL has_first_char = FALSE; -BOOL has_req_char = FALSE; -pcre_uchar first_char = 0; -pcre_uchar first_char2 = 0; -pcre_uchar req_char = 0; -pcre_uchar req_char2 = 0; -match_data match_block; -match_data *md = &match_block; -const pcre_uint8 *tables; -const pcre_uint8 *start_bits = NULL; -PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset; -PCRE_PUCHAR end_subject; -PCRE_PUCHAR start_partial = NULL; -PCRE_PUCHAR req_char_ptr = start_match - 1; - -const pcre_study_data *study; -const REAL_PCRE *re = (const REAL_PCRE *)argument_re; - -#ifdef NO_RECURSE -heapframe frame_zero; -frame_zero.Xprevframe = NULL; /* Marks the top level */ -frame_zero.Xnextframe = NULL; /* None are allocated yet */ -md->match_frames_base = &frame_zero; -#endif - -/* Check for the special magic call that measures the size of the stack used -per recursive call of match(). Without the funny casting for sizeof, a Windows -compiler gave this error: "unary minus operator applied to unsigned type, -result still unsigned". Hopefully the cast fixes that. */ - -if (re == NULL && extra_data == NULL && subject == NULL && length == -999 && - start_offset == -999) -#ifdef NO_RECURSE - return -((int)sizeof(heapframe)); -#else - return match(NULL, NULL, NULL, 0, NULL, NULL, 0); -#endif - -/* Plausibility checks */ - -if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; -if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) - return PCRE_ERROR_NULL; -if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; -if (length < 0) return PCRE_ERROR_BADLENGTH; -if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; - -/* Check that the first field in the block is the magic number. If it is not, -return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to -REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which -means that the pattern is likely compiled with different endianness. */ - -if (re->magic_number != MAGIC_NUMBER) - return re->magic_number == REVERSED_MAGIC_NUMBER? - PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; -if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; - -/* These two settings are used in the code for checking a UTF-8 string that -follows immediately afterwards. Other values in the md block are used only -during "normal" pcre_exec() processing, not when the JIT support is in use, -so they are set up later. */ - -/* PCRE_UTF16 has the same value as PCRE_UTF8. */ -utf = md->utf = (re->options & PCRE_UTF8) != 0; -md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 : - ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0; - -/* Check a UTF-8 string if required. Pass back the character offset and error -code for an invalid string if a results vector is available. */ - -#ifdef SUPPORT_UTF -if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) - { - int erroroffset; - int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset); - if (errorcode != 0) - { - if (offsetcount >= 2) - { - offsets[0] = erroroffset; - offsets[1] = errorcode; - } -#if defined COMPILE_PCRE8 - return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)? - PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; -#elif defined COMPILE_PCRE16 - return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)? - PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16; -#elif defined COMPILE_PCRE32 - return PCRE_ERROR_BADUTF32; -#endif - } -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 - /* Check that a start_offset points to the start of a UTF character. */ - if (start_offset > 0 && start_offset < length && - NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) - return PCRE_ERROR_BADUTF8_OFFSET; -#endif - } -#endif - -/* If the pattern was successfully studied with JIT support, run the JIT -executable instead of the rest of this function. Most options must be set at -compile time for the JIT code to be usable. Fallback to the normal code path if -an unsupported flag is set. */ - -#ifdef SUPPORT_JIT -if (extra_data != NULL - && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT | - PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT - && extra_data->executable_jit != NULL - && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0) - { - rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length, - start_offset, options, offsets, offsetcount); - - /* PCRE_ERROR_NULL means that the selected normal or partial matching - mode is not compiled. In this case we simply fallback to interpreter. */ - - if (rc != PCRE_ERROR_JIT_BADOPTION) return rc; - } -#endif - -/* Carry on with non-JIT matching. This information is for finding all the -numbers associated with a given name, for condition testing. */ - -md->name_table = (pcre_uchar *)re + re->name_table_offset; -md->name_count = re->name_count; -md->name_entry_size = re->name_entry_size; - -/* Fish out the optional data from the extra_data structure, first setting -the default values. */ - -study = NULL; -md->match_limit = MATCH_LIMIT; -md->match_limit_recursion = MATCH_LIMIT_RECURSION; -md->callout_data = NULL; - -/* The table pointer is always in native byte order. */ - -tables = re->tables; - -if (extra_data != NULL) - { - register unsigned int flags = extra_data->flags; - if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = (const pcre_study_data *)extra_data->study_data; - if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) - md->match_limit = extra_data->match_limit; - if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) - md->match_limit_recursion = extra_data->match_limit_recursion; - if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) - md->callout_data = extra_data->callout_data; - if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; - } - -/* If the exec call supplied NULL for tables, use the inbuilt ones. This -is a feature that makes it possible to save compiled regex and re-use them -in other programs later. */ - -if (tables == NULL) tables = PRIV(default_tables); - -/* Set up other data */ - -anchored = ((re->options | options) & PCRE_ANCHORED) != 0; -startline = (re->flags & PCRE_STARTLINE) != 0; -firstline = (re->options & PCRE_FIRSTLINE) != 0; - -/* The code starts after the real_pcre block and the capture name table. */ - -md->start_code = (const pcre_uchar *)re + re->name_table_offset + - re->name_count * re->name_entry_size; - -md->start_subject = (PCRE_PUCHAR)subject; -md->start_offset = start_offset; -md->end_subject = md->start_subject + length; -end_subject = md->end_subject; - -md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; -md->use_ucp = (re->options & PCRE_UCP) != 0; -md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; -md->ignore_skip_arg = FALSE; - -/* Some options are unpacked into BOOL variables in the hope that testing -them will be faster than individual option bits. */ - -md->notbol = (options & PCRE_NOTBOL) != 0; -md->noteol = (options & PCRE_NOTEOL) != 0; -md->notempty = (options & PCRE_NOTEMPTY) != 0; -md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; - -md->hitend = FALSE; -md->mark = md->nomatch_mark = NULL; /* In case never set */ - -md->recursive = NULL; /* No recursion at top level */ -md->hasthen = (re->flags & PCRE_HASTHEN) != 0; - -md->lcc = tables + lcc_offset; -md->fcc = tables + fcc_offset; -md->ctypes = tables + ctypes_offset; - -/* Handle different \R options. */ - -switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) - { - case 0: - if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) - md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0; - else -#ifdef BSR_ANYCRLF - md->bsr_anycrlf = TRUE; -#else - md->bsr_anycrlf = FALSE; -#endif - break; - - case PCRE_BSR_ANYCRLF: - md->bsr_anycrlf = TRUE; - break; - - case PCRE_BSR_UNICODE: - md->bsr_anycrlf = FALSE; - break; - - default: return PCRE_ERROR_BADNEWLINE; - } - -/* Handle different types of newline. The three bits give eight cases. If -nothing is set at run time, whatever was used at compile time applies. */ - -switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : - (pcre_uint32)options) & PCRE_NEWLINE_BITS) - { - case 0: newline = NEWLINE; break; /* Compile-time default */ - case PCRE_NEWLINE_CR: newline = CHAR_CR; break; - case PCRE_NEWLINE_LF: newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ - PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: newline = -1; break; - case PCRE_NEWLINE_ANYCRLF: newline = -2; break; - default: return PCRE_ERROR_BADNEWLINE; - } - -if (newline == -2) - { - md->nltype = NLTYPE_ANYCRLF; - } -else if (newline < 0) - { - md->nltype = NLTYPE_ANY; - } -else - { - md->nltype = NLTYPE_FIXED; - if (newline > 255) - { - md->nllen = 2; - md->nl[0] = (newline >> 8) & 255; - md->nl[1] = newline & 255; - } - else - { - md->nllen = 1; - md->nl[0] = newline; - } - } - -/* Partial matching was originally supported only for a restricted set of -regexes; from release 8.00 there are no restrictions, but the bits are still -defined (though never set). So there's no harm in leaving this code. */ - -if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) - return PCRE_ERROR_BADPARTIAL; - -/* If the expression has got more back references than the offsets supplied can -hold, we get a temporary chunk of working store to use during the matching. -Otherwise, we can use the vector supplied, rounding down its size to a multiple -of 3. */ - -ocount = offsetcount - (offsetcount % 3); -arg_offset_max = (2*ocount)/3; - -if (re->top_backref > 0 && re->top_backref >= ocount/3) - { - ocount = re->top_backref * 3 + 3; - md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int)); - if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY; - using_temporary_offsets = TRUE; - DPRINTF(("Got memory to hold back references\n")); - } -else md->offset_vector = offsets; - -md->offset_end = ocount; -md->offset_max = (2*ocount)/3; -md->offset_overflow = FALSE; -md->capture_last = -1; - -/* Reset the working variable associated with each extraction. These should -never be used unless previously set, but they get saved and restored, and so we -initialize them to avoid reading uninitialized locations. Also, unset the -offsets for the matched string. This is really just for tidiness with callouts, -in case they inspect these fields. */ - -if (md->offset_vector != NULL) - { - register int *iptr = md->offset_vector + ocount; - register int *iend = iptr - re->top_bracket; - if (iend < md->offset_vector + 2) iend = md->offset_vector + 2; - while (--iptr >= iend) *iptr = -1; - md->offset_vector[0] = md->offset_vector[1] = -1; - } - -/* Set up the first character to match, if available. The first_char value is -never set for an anchored regular expression, but the anchoring may be forced -at run time, so we have to test for anchoring. The first char may be unset for -an unanchored pattern, of course. If there's no first char and the pattern was -studied, there may be a bitmap of possible first characters. */ - -if (!anchored) - { - if ((re->flags & PCRE_FIRSTSET) != 0) - { - has_first_char = TRUE; - first_char = first_char2 = (pcre_uchar)(re->first_char); - if ((re->flags & PCRE_FCH_CASELESS) != 0) - { - first_char2 = TABLE_GET(first_char, md->fcc, first_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (utf && first_char > 127) - first_char2 = UCD_OTHERCASE(first_char); -#endif - } - } - else - if (!startline && study != NULL && - (study->flags & PCRE_STUDY_MAPPED) != 0) - start_bits = study->start_bits; - } - -/* For anchored or unanchored matches, there may be a "last known required -character" set. */ - -if ((re->flags & PCRE_REQCHSET) != 0) - { - has_req_char = TRUE; - req_char = req_char2 = (pcre_uchar)(re->req_char); - if ((re->flags & PCRE_RCH_CASELESS) != 0) - { - req_char2 = TABLE_GET(req_char, md->fcc, req_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (utf && req_char > 127) - req_char2 = UCD_OTHERCASE(req_char); -#endif - } - } - - -/* ==========================================================================*/ - -/* Loop for handling unanchored repeated matching attempts; for anchored regexs -the loop runs just once. */ - -for(;;) - { - PCRE_PUCHAR save_end_subject = end_subject; - PCRE_PUCHAR new_start_match; - - /* If firstline is TRUE, the start of the match is constrained to the first - line of a multiline string. That is, the match must be before or at the first - newline. Implement this by temporarily adjusting end_subject so that we stop - scanning at a newline. If the match fails at the newline, later code breaks - this loop. */ - - if (firstline) - { - PCRE_PUCHAR t = start_match; -#ifdef SUPPORT_UTF - if (utf) - { - while (t < md->end_subject && !IS_NEWLINE(t)) - { - t++; - ACROSSCHAR(t < end_subject, *t, t++); - } - } - else -#endif - while (t < md->end_subject && !IS_NEWLINE(t)) t++; - end_subject = t; - } - - /* There are some optimizations that avoid running the match if a known - starting point is not found, or if a known later character is not present. - However, there is an option that disables these, for testing and for ensuring - that all callouts do actually occur. The option can be set in the regex by - (*NO_START_OPT) or passed in match-time options. */ - - if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) - { - /* Advance to a unique first char if there is one. */ - - if (has_first_char) - { - pcre_uchar smc; - - if (first_char != first_char2) - while (start_match < end_subject && - (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2) - start_match++; - else - while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char) - start_match++; - } - - /* Or to just after a linebreak for a multiline match */ - - else if (startline) - { - if (start_match > md->start_subject + start_offset) - { -#ifdef SUPPORT_UTF - if (utf) - { - while (start_match < end_subject && !WAS_NEWLINE(start_match)) - { - start_match++; - ACROSSCHAR(start_match < end_subject, *start_match, - start_match++); - } - } - else -#endif - while (start_match < end_subject && !WAS_NEWLINE(start_match)) - start_match++; - - /* If we have just passed a CR and the newline option is ANY or ANYCRLF, - and we are now at a LF, advance the match position by one more character. - */ - - if (start_match[-1] == CHAR_CR && - (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && - start_match < end_subject && - RAWUCHARTEST(start_match) == CHAR_NL) - start_match++; - } - } - - /* Or to a non-unique first byte after study */ - - else if (start_bits != NULL) - { - while (start_match < end_subject) - { - register pcre_uint32 c = RAWUCHARTEST(start_match); -#ifndef COMPILE_PCRE8 - if (c > 255) c = 255; -#endif - if ((start_bits[c/8] & (1 << (c&7))) == 0) - { - start_match++; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 - /* In non 8-bit mode, the iteration will stop for - characters > 255 at the beginning or not stop at all. */ - if (utf) - ACROSSCHAR(start_match < end_subject, *start_match, - start_match++); -#endif - } - else break; - } - } - } /* Starting optimizations */ - - /* Restore fudged end_subject */ - - end_subject = save_end_subject; - - /* The following two optimizations are disabled for partial matching or if - disabling is explicitly requested. */ - - if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial) - { - /* If the pattern was studied, a minimum subject length may be set. This is - a lower bound; no actual string of that length may actually match the - pattern. Although the value is, strictly, in characters, we treat it as - bytes to avoid spending too much time in this optimization. */ - - if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && - (pcre_uint32)(end_subject - start_match) < study->minlength) - { - rc = MATCH_NOMATCH; - break; - } - - /* If req_char is set, we know that that character must appear in the - subject for the match to succeed. If the first character is set, req_char - must be later in the subject; otherwise the test starts at the match point. - This optimization can save a huge amount of backtracking in patterns with - nested unlimited repeats that aren't going to match. Writing separate code - for cased/caseless versions makes it go faster, as does using an - autoincrement and backing off on a match. - - HOWEVER: when the subject string is very, very long, searching to its end - can take a long time, and give bad performance on quite ordinary patterns. - This showed up when somebody was matching something like /^\d+C/ on a - 32-megabyte string... so we don't do this when the string is sufficiently - long. */ - - if (has_req_char && end_subject - start_match < REQ_BYTE_MAX) - { - register PCRE_PUCHAR p = start_match + (has_first_char? 1:0); - - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ - - if (p > req_char_ptr) - { - if (req_char != req_char2) - { - while (p < end_subject) - { - register pcre_uint32 pp = RAWUCHARINCTEST(p); - if (pp == req_char || pp == req_char2) { p--; break; } - } - } - else - { - while (p < end_subject) - { - if (RAWUCHARINCTEST(p) == req_char) { p--; break; } - } - } - - /* If we can't find the required character, break the matching loop, - forcing a match failure. */ - - if (p >= end_subject) - { - rc = MATCH_NOMATCH; - break; - } - - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ - - req_char_ptr = p; - } - } - } - -#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */ - printf(">>>> Match against: "); - pchars(start_match, end_subject - start_match, TRUE, md); - printf("\n"); -#endif - - /* OK, we can now run the match. If "hitend" is set afterwards, remember the - first starting point for which a partial match was found. */ - - md->start_match_ptr = start_match; - md->start_used_ptr = start_match; - md->match_call_count = 0; - md->match_function_type = 0; - md->end_offset_top = 0; - rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0); - if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; - - switch(rc) - { - /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched - the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP - entirely. The only way we can do that is to re-do the match at the same - point, with a flag to force SKIP with an argument to be ignored. Just - treating this case as NOMATCH does not work because it does not check other - alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */ - - case MATCH_SKIP_ARG: - new_start_match = start_match; - md->ignore_skip_arg = TRUE; - break; - - /* SKIP passes back the next starting point explicitly, but if it is the - same as the match we have just done, treat it as NOMATCH. */ - - case MATCH_SKIP: - if (md->start_match_ptr != start_match) - { - new_start_match = md->start_match_ptr; - break; - } - /* Fall through */ - - /* NOMATCH and PRUNE advance by one character. THEN at this level acts - exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */ - - case MATCH_NOMATCH: - case MATCH_PRUNE: - case MATCH_THEN: - md->ignore_skip_arg = FALSE; - new_start_match = start_match + 1; -#ifdef SUPPORT_UTF - if (utf) - ACROSSCHAR(new_start_match < end_subject, *new_start_match, - new_start_match++); -#endif - break; - - /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */ - - case MATCH_COMMIT: - rc = MATCH_NOMATCH; - goto ENDLOOP; - - /* Any other return is either a match, or some kind of error. */ - - default: - goto ENDLOOP; - } - - /* Control reaches here for the various types of "no match at this point" - result. Reset the code to MATCH_NOMATCH for subsequent checking. */ - - rc = MATCH_NOMATCH; - - /* If PCRE_FIRSTLINE is set, the match must happen before or at the first - newline in the subject (though it may continue over the newline). Therefore, - if we have just failed to match, starting at a newline, do not continue. */ - - if (firstline && IS_NEWLINE(start_match)) break; - - /* Advance to new matching position */ - - start_match = new_start_match; - - /* Break the loop if the pattern is anchored or if we have passed the end of - the subject. */ - - if (anchored || start_match > end_subject) break; - - /* If we have just passed a CR and we are now at a LF, and the pattern does - not contain any explicit matches for \r or \n, and the newline option is CRLF - or ANY or ANYCRLF, advance the match position by one more character. In - normal matching start_match will aways be greater than the first position at - this stage, but a failed *SKIP can cause a return at the same point, which is - why the first test exists. */ - - if (start_match > (PCRE_PUCHAR)subject + start_offset && - start_match[-1] == CHAR_CR && - start_match < end_subject && - *start_match == CHAR_NL && - (re->flags & PCRE_HASCRORLF) == 0 && - (md->nltype == NLTYPE_ANY || - md->nltype == NLTYPE_ANYCRLF || - md->nllen == 2)) - start_match++; - - md->mark = NULL; /* Reset for start of next match attempt */ - } /* End of for(;;) "bumpalong" loop */ - -/* ==========================================================================*/ - -/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping -conditions is true: - -(1) The pattern is anchored or the match was failed by (*COMMIT); - -(2) We are past the end of the subject; - -(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because - this option requests that a match occur at or before the first newline in - the subject. - -When we have a match and the offset vector is big enough to deal with any -backreferences, captured substring offsets will already be set up. In the case -where we had to get some local store to hold offsets for backreference -processing, copy those that we can. In this case there need not be overflow if -certain parts of the pattern were not used, even though there are more -capturing parentheses than vector slots. */ - -ENDLOOP: - -if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) - { - if (using_temporary_offsets) - { - if (arg_offset_max >= 4) - { - memcpy(offsets + 2, md->offset_vector + 2, - (arg_offset_max - 2) * sizeof(int)); - DPRINTF(("Copied offsets from temporary memory\n")); - } - if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE; - DPRINTF(("Freeing temporary memory\n")); - (PUBL(free))(md->offset_vector); - } - - /* Set the return code to the number of captured strings, or 0 if there were - too many to fit into the vector. */ - - rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)? - 0 : md->end_offset_top/2; - - /* If there is space in the offset vector, set any unused pairs at the end of - the pattern to -1 for backwards compatibility. It is documented that this - happens. In earlier versions, the whole set of potential capturing offsets - was set to -1 each time round the loop, but this is handled differently now. - "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only - those at the end that need unsetting here. We can't just unset them all at - the start of the whole thing because they may get set in one branch that is - not the final matching branch. */ - - if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL) - { - register int *iptr, *iend; - int resetcount = 2 + re->top_bracket * 2; - if (resetcount > offsetcount) resetcount = offsetcount; - iptr = offsets + md->end_offset_top; - iend = offsets + resetcount; - while (iptr < iend) *iptr++ = -1; - } - - /* If there is space, set up the whole thing as substring 0. The value of - md->start_match_ptr might be modified if \K was encountered on the success - matching path. */ - - if (offsetcount < 2) rc = 0; else - { - offsets[0] = (int)(md->start_match_ptr - md->start_subject); - offsets[1] = (int)(md->end_match_ptr - md->start_subject); - } - - /* Return MARK data if requested */ - - if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) - *(extra_data->mark) = (pcre_uchar *)md->mark; - DPRINTF((">>>> returning %d\n", rc)); -#ifdef NO_RECURSE - release_match_heapframes(&frame_zero); -#endif - return rc; - } - -/* Control gets here if there has been an error, or if the overall match -attempt has failed at all permitted starting positions. */ - -if (using_temporary_offsets) - { - DPRINTF(("Freeing temporary memory\n")); - (PUBL(free))(md->offset_vector); - } - -/* For anything other than nomatch or partial match, just return the code. */ - -if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) - { - DPRINTF((">>>> error: returning %d\n", rc)); -#ifdef NO_RECURSE - release_match_heapframes(&frame_zero); -#endif - return rc; - } - -/* Handle partial matches - disable any mark data */ - -if (start_partial != NULL) - { - DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); - md->mark = NULL; - if (offsetcount > 1) - { - offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject); - offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject); - } - rc = PCRE_ERROR_PARTIAL; - } - -/* This is the classic nomatch case */ - -else - { - DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); - rc = PCRE_ERROR_NOMATCH; - } - -/* Return the MARK data if it has been requested. */ - -if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) - *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark; -#ifdef NO_RECURSE - release_match_heapframes(&frame_zero); -#endif -return rc; -} - -/* End of pcre_exec.c */ diff --git a/deps/libmagic/pcre/pcre_fullinfo.c b/deps/libmagic/pcre/pcre_fullinfo.c deleted file mode 100644 index 02c9df4..0000000 --- a/deps/libmagic/pcre/pcre_fullinfo.c +++ /dev/null @@ -1,231 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_fullinfo(), which returns -information about a compiled pattern. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Return info about compiled pattern * -*************************************************/ - -/* This is a newer "info" function which has an extensible interface so -that additional items can be added compatibly. - -Arguments: - argument_re points to compiled code - extra_data points extra data, or NULL - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, - int what, void *where) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data, - int what, void *where) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data, - int what, void *where) -#endif -{ -const REAL_PCRE *re = (const REAL_PCRE *)argument_re; -const pcre_study_data *study = NULL; - -if (re == NULL || where == NULL) return PCRE_ERROR_NULL; - -if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = (const pcre_study_data *)extra_data->study_data; - -/* Check that the first field in the block is the magic number. If it is not, -return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to -REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which -means that the pattern is likely compiled with different endianness. */ - -if (re->magic_number != MAGIC_NUMBER) - return re->magic_number == REVERSED_MAGIC_NUMBER? - PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; - -/* Check that this pattern was compiled in the correct bit mode */ - -if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; - -switch (what) - { - case PCRE_INFO_OPTIONS: - *((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS; - break; - - case PCRE_INFO_SIZE: - *((size_t *)where) = re->size; - break; - - case PCRE_INFO_STUDYSIZE: - *((size_t *)where) = (study == NULL)? 0 : study->size; - break; - - case PCRE_INFO_JITSIZE: -#ifdef SUPPORT_JIT - *((size_t *)where) = - (extra_data != NULL && - (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && - extra_data->executable_jit != NULL)? - PRIV(jit_get_size)(extra_data->executable_jit) : 0; -#else - *((size_t *)where) = 0; -#endif - break; - - case PCRE_INFO_CAPTURECOUNT: - *((int *)where) = re->top_bracket; - break; - - case PCRE_INFO_BACKREFMAX: - *((int *)where) = re->top_backref; - break; - - case PCRE_INFO_FIRSTBYTE: - *((int *)where) = - ((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char : - ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2; - break; - - case PCRE_INFO_FIRSTCHARACTER: - *((pcre_uint32 *)where) = - (re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0; - break; - - case PCRE_INFO_FIRSTCHARACTERFLAGS: - *((int *)where) = - ((re->flags & PCRE_FIRSTSET) != 0) ? 1 : - ((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0; - break; - - /* Make sure we pass back the pointer to the bit vector in the external - block, not the internal copy (with flipped integer fields). */ - - case PCRE_INFO_FIRSTTABLE: - *((const pcre_uint8 **)where) = - (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)? - ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL; - break; - - case PCRE_INFO_MINLENGTH: - *((int *)where) = - (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)? - (int)(study->minlength) : -1; - break; - - case PCRE_INFO_JIT: - *((int *)where) = extra_data != NULL && - (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && - extra_data->executable_jit != NULL; - break; - - case PCRE_INFO_LASTLITERAL: - *((int *)where) = - ((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1; - break; - - case PCRE_INFO_REQUIREDCHAR: - *((pcre_uint32 *)where) = - ((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0; - break; - - case PCRE_INFO_REQUIREDCHARFLAGS: - *((int *)where) = - ((re->flags & PCRE_REQCHSET) != 0); - break; - - case PCRE_INFO_NAMEENTRYSIZE: - *((int *)where) = re->name_entry_size; - break; - - case PCRE_INFO_NAMECOUNT: - *((int *)where) = re->name_count; - break; - - case PCRE_INFO_NAMETABLE: - *((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset; - break; - - case PCRE_INFO_DEFAULT_TABLES: - *((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables)); - break; - - /* From release 8.00 this will always return TRUE because NOPARTIAL is - no longer ever set (the restrictions have been removed). */ - - case PCRE_INFO_OKPARTIAL: - *((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0; - break; - - case PCRE_INFO_JCHANGED: - *((int *)where) = (re->flags & PCRE_JCHANGED) != 0; - break; - - case PCRE_INFO_HASCRORLF: - *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0; - break; - - case PCRE_INFO_MAXLOOKBEHIND: - *((int *)where) = re->max_lookbehind; - break; - - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - -/* End of pcre_fullinfo.c */ diff --git a/deps/libmagic/pcre/pcre_get.c b/deps/libmagic/pcre/pcre_get.c deleted file mode 100644 index 8094b34..0000000 --- a/deps/libmagic/pcre/pcre_get.c +++ /dev/null @@ -1,662 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains some convenience functions for extracting substrings -from the subject string after a regex match has succeeded. The original idea -for these functions came from Scott Wimer. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Find number for named string * -*************************************************/ - -/* This function is used by the get_first_set() function below, as well -as being generally available. It assumes that names are unique. - -Arguments: - code the compiled regex - stringname the name whose number is required - -Returns: the number of the named parentheses, or a negative number - (PCRE_ERROR_NOSUBSTRING) if not found -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_stringnumber(const pcre *code, const char *stringname) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname) -#endif -{ -int rc; -int entrysize; -int top, bot; -pcre_uchar *nametable; - -#ifdef COMPILE_PCRE8 -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; -#endif -#ifdef COMPILE_PCRE16 -if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; -#endif -#ifdef COMPILE_PCRE32 -if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; -#endif - -bot = 0; -while (top > bot) - { - int mid = (top + bot) / 2; - pcre_uchar *entry = nametable + entrysize*mid; - int c = STRCMP_UC_UC((pcre_uchar *)stringname, - (pcre_uchar *)(entry + IMM2_SIZE)); - if (c == 0) return GET2(entry, 0); - if (c > 0) bot = mid + 1; else top = mid; - } - -return PCRE_ERROR_NOSUBSTRING; -} - - - -/************************************************* -* Find (multiple) entries for named string * -*************************************************/ - -/* This is used by the get_first_set() function below, as well as being -generally available. It is used when duplicated names are permitted. - -Arguments: - code the compiled regex - stringname the name whose entries required - firstptr where to put the pointer to the first entry - lastptr where to put the pointer to the last entry - -Returns: the length of each entry, or a negative number - (PCRE_ERROR_NOSUBSTRING) if not found -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_stringtable_entries(const pcre *code, const char *stringname, - char **firstptr, char **lastptr) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname, - PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname, - PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr) -#endif -{ -int rc; -int entrysize; -int top, bot; -pcre_uchar *nametable, *lastentry; - -#ifdef COMPILE_PCRE8 -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; -#endif -#ifdef COMPILE_PCRE16 -if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; -#endif -#ifdef COMPILE_PCRE32 -if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - return rc; -if (top <= 0) return PCRE_ERROR_NOSUBSTRING; - -if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) - return rc; -if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) - return rc; -#endif - -lastentry = nametable + entrysize * (top - 1); -bot = 0; -while (top > bot) - { - int mid = (top + bot) / 2; - pcre_uchar *entry = nametable + entrysize*mid; - int c = STRCMP_UC_UC((pcre_uchar *)stringname, - (pcre_uchar *)(entry + IMM2_SIZE)); - if (c == 0) - { - pcre_uchar *first = entry; - pcre_uchar *last = entry; - while (first > nametable) - { - if (STRCMP_UC_UC((pcre_uchar *)stringname, - (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break; - first -= entrysize; - } - while (last < lastentry) - { - if (STRCMP_UC_UC((pcre_uchar *)stringname, - (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break; - last += entrysize; - } -#if defined COMPILE_PCRE8 - *firstptr = (char *)first; - *lastptr = (char *)last; -#elif defined COMPILE_PCRE16 - *firstptr = (PCRE_UCHAR16 *)first; - *lastptr = (PCRE_UCHAR16 *)last; -#elif defined COMPILE_PCRE32 - *firstptr = (PCRE_UCHAR32 *)first; - *lastptr = (PCRE_UCHAR32 *)last; -#endif - return entrysize; - } - if (c > 0) bot = mid + 1; else top = mid; - } - -return PCRE_ERROR_NOSUBSTRING; -} - - - -/************************************************* -* Find first set of multiple named strings * -*************************************************/ - -/* This function allows for duplicate names in the table of named substrings. -It returns the number of the first one that was set in a pattern match. - -Arguments: - code the compiled regex - stringname the name of the capturing substring - ovector the vector of matched substrings - -Returns: the number of the first that is set, - or the number of the last one if none are set, - or a negative number on error -*/ - -#if defined COMPILE_PCRE8 -static int -get_first_set(const pcre *code, const char *stringname, int *ovector) -#elif defined COMPILE_PCRE16 -static int -get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) -#elif defined COMPILE_PCRE32 -static int -get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector) -#endif -{ -const REAL_PCRE *re = (const REAL_PCRE *)code; -int entrysize; -pcre_uchar *entry; -#if defined COMPILE_PCRE8 -char *first, *last; -#elif defined COMPILE_PCRE16 -PCRE_UCHAR16 *first, *last; -#elif defined COMPILE_PCRE32 -PCRE_UCHAR32 *first, *last; -#endif - -#if defined COMPILE_PCRE8 -if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) - return pcre_get_stringnumber(code, stringname); -entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); -#elif defined COMPILE_PCRE16 -if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) - return pcre16_get_stringnumber(code, stringname); -entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last); -#elif defined COMPILE_PCRE32 -if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) - return pcre32_get_stringnumber(code, stringname); -entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last); -#endif -if (entrysize <= 0) return entrysize; -for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) - { - int n = GET2(entry, 0); - if (ovector[n*2] >= 0) return n; - } -return GET2(entry, 0); -} - - - - -/************************************************* -* Copy captured string to given buffer * -*************************************************/ - -/* This function copies a single captured substring into a given buffer. -Note that we use memcpy() rather than strncpy() in case there are binary zeros -in the string. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - buffer where to put the substring - size the size of the buffer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) buffer too small - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_copy_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, char *buffer, int size) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, - int stringnumber, PCRE_UCHAR16 *buffer, int size) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount, - int stringnumber, PCRE_UCHAR32 *buffer, int size) -#endif -{ -int yield; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -if (size < yield + 1) return PCRE_ERROR_NOMEMORY; -memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield)); -buffer[yield] = 0; -return yield; -} - - - -/************************************************* -* Copy named captured string to given buffer * -*************************************************/ - -/* This function copies a single captured substring into a given buffer, -identifying it by name. If the regex permits duplicate names, the first -substring that is set is chosen. - -Arguments: - code the compiled regex - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringname the name of the required substring - buffer where to put the substring - size the size of the buffer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) buffer too small - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_copy_named_substring(const pcre *code, const char *subject, - int *ovector, int stringcount, const char *stringname, - char *buffer, int size) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject, - int *ovector, int stringcount, PCRE_SPTR16 stringname, - PCRE_UCHAR16 *buffer, int size) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject, - int *ovector, int stringcount, PCRE_SPTR32 stringname, - PCRE_UCHAR32 *buffer, int size) -#endif -{ -int n = get_first_set(code, stringname, ovector); -if (n <= 0) return n; -#if defined COMPILE_PCRE8 -return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); -#elif defined COMPILE_PCRE16 -return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size); -#elif defined COMPILE_PCRE32 -return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size); -#endif -} - - - -/************************************************* -* Copy all captured strings to new store * -*************************************************/ - -/* This function gets one chunk of store and builds a list of pointers and all -of the captured substrings in it. A NULL pointer is put on the end of the list. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - listptr set to point to the list of pointers - -Returns: if successful: 0 - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_substring_list(const char *subject, int *ovector, int stringcount, - const char ***listptr) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount, - PCRE_SPTR16 **listptr) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount, - PCRE_SPTR32 **listptr) -#endif -{ -int i; -int size = sizeof(pcre_uchar *); -int double_count = stringcount * 2; -pcre_uchar **stringlist; -pcre_uchar *p; - -for (i = 0; i < double_count; i += 2) - size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); - -stringlist = (pcre_uchar **)(PUBL(malloc))(size); -if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; - -#if defined COMPILE_PCRE8 -*listptr = (const char **)stringlist; -#elif defined COMPILE_PCRE16 -*listptr = (PCRE_SPTR16 *)stringlist; -#elif defined COMPILE_PCRE32 -*listptr = (PCRE_SPTR32 *)stringlist; -#endif -p = (pcre_uchar *)(stringlist + stringcount + 1); - -for (i = 0; i < double_count; i += 2) - { - int len = ovector[i+1] - ovector[i]; - memcpy(p, subject + ovector[i], IN_UCHARS(len)); - *stringlist++ = p; - p += len; - *p++ = 0; - } - -*stringlist = NULL; -return 0; -} - - - -/************************************************* -* Free store obtained by get_substring_list * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (PUBL(free))() -directly. - -Argument: the result of a previous pcre_get_substring_list() -Returns: nothing -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre_free_substring_list(const char **pointer) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre16_free_substring_list(PCRE_SPTR16 *pointer) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre32_free_substring_list(PCRE_SPTR32 *pointer) -#endif -{ -(PUBL(free))((void *)pointer); -} - - - -/************************************************* -* Copy captured string to new store * -*************************************************/ - -/* This function copies a single captured substring into a piece of new -store - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - stringptr where to put a pointer to the substring - -Returns: if successful: - the length of the string, not including the zero that - is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store - PCRE_ERROR_NOSUBSTRING (-7) substring not present -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, const char **stringptr) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, - int stringnumber, PCRE_SPTR16 *stringptr) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount, - int stringnumber, PCRE_SPTR32 *stringptr) -#endif -{ -int yield; -pcre_uchar *substring; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1)); -if (substring == NULL) return PCRE_ERROR_NOMEMORY; -memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield)); -substring[yield] = 0; -#if defined COMPILE_PCRE8 -*stringptr = (const char *)substring; -#elif defined COMPILE_PCRE16 -*stringptr = (PCRE_SPTR16)substring; -#elif defined COMPILE_PCRE32 -*stringptr = (PCRE_SPTR32)substring; -#endif -return yield; -} - - - -/************************************************* -* Copy named captured string to new store * -*************************************************/ - -/* This function copies a single captured substring, identified by name, into -new store. If the regex permits duplicate names, the first substring that is -set is chosen. - -Arguments: - code the compiled regex - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringname the name of the required substring - stringptr where to put the pointer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) couldn't get memory - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_named_substring(const pcre *code, const char *subject, - int *ovector, int stringcount, const char *stringname, - const char **stringptr) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject, - int *ovector, int stringcount, PCRE_SPTR16 stringname, - PCRE_SPTR16 *stringptr) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject, - int *ovector, int stringcount, PCRE_SPTR32 stringname, - PCRE_SPTR32 *stringptr) -#endif -{ -int n = get_first_set(code, stringname, ovector); -if (n <= 0) return n; -#if defined COMPILE_PCRE8 -return pcre_get_substring(subject, ovector, stringcount, n, stringptr); -#elif defined COMPILE_PCRE16 -return pcre16_get_substring(subject, ovector, stringcount, n, stringptr); -#elif defined COMPILE_PCRE32 -return pcre32_get_substring(subject, ovector, stringcount, n, stringptr); -#endif -} - - - - -/************************************************* -* Free store obtained by get_substring * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (PUBL(free))() -directly. - -Argument: the result of a previous pcre_get_substring() -Returns: nothing -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre_free_substring(const char *pointer) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre16_free_substring(PCRE_SPTR16 pointer) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN void PCRE_CALL_CONVENTION -pcre32_free_substring(PCRE_SPTR32 pointer) -#endif -{ -(PUBL(free))((void *)pointer); -} - -/* End of pcre_get.c */ diff --git a/deps/libmagic/pcre/pcre_globals.c b/deps/libmagic/pcre/pcre_globals.c deleted file mode 100644 index 36e6ddb..0000000 --- a/deps/libmagic/pcre/pcre_globals.c +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains global variables that are exported by the PCRE library. -PCRE is thread-clean and doesn't use any global variables in the normal sense. -However, it calls memory allocation and freeing functions via the four -indirections below, and it can optionally do callouts, using the fifth -indirection. These values can be changed by the caller, but are shared between -all threads. - -For MS Visual Studio and Symbian OS, there are problems in initializing these -variables to non-local functions. In these cases, therefore, an indirection via -a local function is used. - -Also, when compiling for Virtual Pascal, things are done differently, and -global variables are not used. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - -#if defined _MSC_VER || defined __SYMBIAN32__ -static void* LocalPcreMalloc(size_t aSize) - { - return malloc(aSize); - } -static void LocalPcreFree(void* aPtr) - { - free(aPtr); - } -PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc; -PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree; -PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc; -PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree; -PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL; - -#elif !defined VPCOMPAT -PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc; -PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free; -PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc; -PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free; -PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL; -#endif - -/* End of pcre_globals.c */ diff --git a/deps/libmagic/pcre/pcre_internal.h b/deps/libmagic/pcre/pcre_internal.h deleted file mode 100644 index f3cb001..0000000 --- a/deps/libmagic/pcre/pcre_internal.h +++ /dev/null @@ -1,2744 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This header contains definitions that are shared between the different -modules, but which are not relevant to the exported API. This includes some -functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_" -depending on the PRIV macro. */ - -#ifndef PCRE_INTERNAL_H -#define PCRE_INTERNAL_H - -/* Define PCRE_DEBUG to get debugging output on stdout. */ - -#if 0 -#define PCRE_DEBUG -#endif - -/* PCRE is compiled as an 8 bit library if it is not requested otherwise. */ - -#if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32 -#define COMPILE_PCRE8 -#endif - -/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The -"configure" script ensures this, but not everybody uses "configure". */ - -#if defined SUPPORT_UCP && !(defined SUPPORT_UTF) -#define SUPPORT_UTF 1 -#endif - -/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility -reasons with existing code. */ - -#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF) -#define SUPPORT_UTF 1 -#endif - -/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code. -Until then we define it if SUPPORT_UTF is defined. */ - -#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8) -#define SUPPORT_UTF8 1 -#endif - -/* We do not support both EBCDIC and UTF-8/16/32 at the same time. The "configure" -script prevents both being selected, but not everybody uses "configure". */ - -#if defined EBCDIC && defined SUPPORT_UTF -#error The use of both EBCDIC and SUPPORT_UTF is not supported. -#endif - -/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef -inline, and there are *still* stupid compilers about that don't like indented -pre-processor statements, or at least there were when I first wrote this. After -all, it had only been about 10 years then... - -It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so -be absolutely sure we get our version. */ - -#undef DPRINTF -#ifdef PCRE_DEBUG -#define DPRINTF(p) printf p -#else -#define DPRINTF(p) /* Nothing */ -#endif - - -/* Standard C headers plus the external interface definition. The only time -setjmp and stdarg are used is when NO_RECURSE is set. */ - -#include -#include -#include -#include -#include -#include - -/* Valgrind (memcheck) support */ - -#ifdef SUPPORT_VALGRIND -#include -#endif - -/* When compiling a DLL for Windows, the exported symbols have to be declared -using some MS magic. I found some useful information on this web page: -http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the -information there, using __declspec(dllexport) without "extern" we have a -definition; with "extern" we have a declaration. The settings here override the -setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, -which is all that is needed for applications (they just import the symbols). We -use: - - PCRE_EXP_DECL for declarations - PCRE_EXP_DEFN for definitions of exported functions - PCRE_EXP_DATA_DEFN for definitions of exported variables - -The reason for the two DEFN macros is that in non-Windows environments, one -does not want to have "extern" before variable definitions because it leads to -compiler warnings. So we distinguish between functions and variables. In -Windows, the two should always be the same. - -The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, -which is an application, but needs to import this file in order to "peek" at -internals, can #include pcre.h first to get an application's-eye view. - -In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, -special-purpose environments) might want to stick other stuff in front of -exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and -PCRE_EXP_DATA_DEFN only if they are not already set. */ - -#ifndef PCRE_EXP_DECL -# ifdef _WIN32 -# ifndef PCRE_STATIC -# define PCRE_EXP_DECL extern __declspec(dllexport) -# define PCRE_EXP_DEFN __declspec(dllexport) -# define PCRE_EXP_DATA_DEFN __declspec(dllexport) -# else -# define PCRE_EXP_DECL extern -# define PCRE_EXP_DEFN -# define PCRE_EXP_DATA_DEFN -# endif -# else -# ifdef __cplusplus -# define PCRE_EXP_DECL extern "C" -# else -# define PCRE_EXP_DECL extern -# endif -# ifndef PCRE_EXP_DEFN -# define PCRE_EXP_DEFN PCRE_EXP_DECL -# endif -# ifndef PCRE_EXP_DATA_DEFN -# define PCRE_EXP_DATA_DEFN -# endif -# endif -#endif - -/* When compiling with the MSVC compiler, it is sometimes necessary to include -a "calling convention" before exported function names. (This is secondhand -information; I know nothing about MSVC myself). For example, something like - - void __cdecl function(....) - -might be needed. In order so make this easy, all the exported functions have -PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not -set, we ensure here that it has no effect. */ - -#ifndef PCRE_CALL_CONVENTION -#define PCRE_CALL_CONVENTION -#endif - -/* We need to have types that specify unsigned 8, 16 and 32-bit integers. We -cannot determine these outside the compilation (e.g. by running a program as -part of "configure") because PCRE is often cross-compiled for use on other -systems. Instead we make use of the maximum sizes that are available at -preprocessor time in standard C environments. */ - -typedef unsigned char pcre_uint8; - -#if USHRT_MAX == 65535 - typedef unsigned short pcre_uint16; - typedef short pcre_int16; -#elif UINT_MAX == 65535 - typedef unsigned int pcre_uint16; - typedef int pcre_int16; -#else -# error Cannot determine a type for 16-bit unsigned integers -#endif - -#if UINT_MAX == 4294967295 - typedef unsigned int pcre_uint32; - typedef int pcre_int32; -#elif ULONG_MAX == 4294967295 - typedef unsigned long int pcre_uint32; - typedef long int pcre_int32; -#else -# error Cannot determine a type for 32-bit unsigned integers -#endif - -/* When checking for integer overflow in pcre_compile(), we need to handle -large integers. If a 64-bit integer type is available, we can use that. -Otherwise we have to cast to double, which of course requires floating point -arithmetic. Handle this by defining a macro for the appropriate type. If -stdint.h is available, include it; it may define INT64_MAX. Systems that do not -have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set -by "configure". */ - -#if defined HAVE_STDINT_H -#include -#elif defined HAVE_INTTYPES_H -#include -#endif - -#if defined INT64_MAX || defined int64_t -#define INT64_OR_DOUBLE int64_t -#else -#define INT64_OR_DOUBLE double -#endif - -/* All character handling must be done as unsigned characters. Otherwise there -are problems with top-bit-set characters and functions such as isspace(). -However, we leave the interface to the outside world as char * or short *, -because that should make things easier for callers. This character type is -called pcre_uchar. - -The IN_UCHARS macro multiply its argument with the byte size of the current -pcre_uchar type. Useful for memcpy and such operations, whose require the -byte size of their input/output buffers. - -The MAX_255 macro checks whether its pcre_uchar input is less than 256. - -The TABLE_GET macro is designed for accessing elements of tables whose contain -exactly 256 items. When the character is able to contain more than 256 -items, some check is needed before accessing these tables. -*/ - -#if defined COMPILE_PCRE8 - -typedef unsigned char pcre_uchar; -#define IN_UCHARS(x) (x) -#define MAX_255(c) 1 -#define TABLE_GET(c, table, default) ((table)[c]) - -#elif defined COMPILE_PCRE16 - -#if USHRT_MAX != 65535 -/* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in -pcre.h(.in) and disable (comment out) this message. */ -#error Warning: PCRE_UCHAR16 is not a 16 bit data type. -#endif - -typedef pcre_uint16 pcre_uchar; -#define UCHAR_SHIFT (1) -#define IN_UCHARS(x) ((x) << UCHAR_SHIFT) -#define MAX_255(c) ((c) <= 255u) -#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) - -#elif defined COMPILE_PCRE32 - -typedef pcre_uint32 pcre_uchar; -#define UCHAR_SHIFT (2) -#define IN_UCHARS(x) ((x) << UCHAR_SHIFT) -#define MAX_255(c) ((c) <= 255u) -#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) - -#else -#error Unsupported compiling mode -#endif /* COMPILE_PCRE[8|16|32] */ - -/* This is an unsigned int value that no character can ever have. UTF-8 -characters only go up to 0x7fffffff (though Unicode doesn't go beyond -0x0010ffff). */ - -#define NOTACHAR 0xffffffff - -/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, -"any" and "anycrlf" at present). The following macros are used to package up -testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various -modules to indicate in which datablock the parameters exist, and what the -start/end of string field names are. */ - -#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ -#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ -#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ - -/* This macro checks for a newline at the given position */ - -#define IS_NEWLINE(p) \ - ((NLBLOCK->nltype != NLTYPE_FIXED)? \ - ((p) < NLBLOCK->PSEND && \ - PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ - &(NLBLOCK->nllen), utf)) \ - : \ - ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ - RAWUCHARTEST(p) == NLBLOCK->nl[0] && \ - (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1]) \ - ) \ - ) - -/* This macro checks for a newline immediately preceding the given position */ - -#define WAS_NEWLINE(p) \ - ((NLBLOCK->nltype != NLTYPE_FIXED)? \ - ((p) > NLBLOCK->PSSTART && \ - PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ - &(NLBLOCK->nllen), utf)) \ - : \ - ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ - RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ - (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ - ) \ - ) - -/* When PCRE is compiled as a C++ library, the subject pointer can be replaced -with a custom type. This makes it possible, for example, to allow pcre_exec() -to process subject strings that are discontinuous by using a smart pointer -class. It must always be possible to inspect all of the subject string in -pcre_exec() because of the way it backtracks. Two macros are required in the -normal case, for sign-unspecified and unsigned char pointers. The former is -used for the external interface and appears in pcre.h, which is why its name -must begin with PCRE_. */ - -#ifdef CUSTOM_SUBJECT_PTR -#define PCRE_PUCHAR CUSTOM_SUBJECT_PTR -#else -#define PCRE_PUCHAR const pcre_uchar * -#endif - -/* Include the public PCRE header and the definitions of UCP character property -values. */ - -#include "pcre.h" -#include "ucp.h" - -#ifdef COMPILE_PCRE32 -/* Assert that the public PCRE_UCHAR32 is a 32-bit type */ -typedef int __assert_pcre_uchar32_size[sizeof(PCRE_UCHAR32) == 4 ? 1 : -1]; -#endif - -/* When compiling for use with the Virtual Pascal compiler, these functions -need to have their names changed. PCRE must be compiled with the -DVPCOMPAT -option on the command line. */ - -#ifdef VPCOMPAT -#define strlen(s) _strlen(s) -#define strncmp(s1,s2,m) _strncmp(s1,s2,m) -#define memcmp(s,c,n) _memcmp(s,c,n) -#define memcpy(d,s,n) _memcpy(d,s,n) -#define memmove(d,s,n) _memmove(d,s,n) -#define memset(s,c,n) _memset(s,c,n) -#else /* VPCOMPAT */ - -/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), -define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY -is set. Otherwise, include an emulating function for those systems that have -neither (there some non-Unix environments where this is the case). */ - -#ifndef HAVE_MEMMOVE -#undef memmove /* some systems may have a macro */ -#ifdef HAVE_BCOPY -#define memmove(a, b, c) bcopy(b, a, c) -#else /* HAVE_BCOPY */ -static void * -pcre_memmove(void *d, const void *s, size_t n) -{ -size_t i; -unsigned char *dest = (unsigned char *)d; -const unsigned char *src = (const unsigned char *)s; -if (dest > src) - { - dest += n; - src += n; - for (i = 0; i < n; ++i) *(--dest) = *(--src); - return (void *)dest; - } -else - { - for (i = 0; i < n; ++i) *dest++ = *src++; - return (void *)(dest - n); - } -} -#define memmove(a, b, c) pcre_memmove(a, b, c) -#endif /* not HAVE_BCOPY */ -#endif /* not HAVE_MEMMOVE */ -#endif /* not VPCOMPAT */ - - -/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored -in big-endian order) by default. These are used, for example, to link from the -start of a subpattern to its alternatives and its end. The use of 2 bytes per -offset limits the size of the compiled regex to around 64K, which is big enough -for almost everybody. However, I received a request for an even bigger limit. -For this reason, and also to make the code easier to maintain, the storing and -loading of offsets from the byte string is now handled by the macros that are -defined here. - -The macros are controlled by the value of LINK_SIZE. This defaults to 2 in -the config.h file, but can be overridden by using -D on the command line. This -is automated on Unix systems via the "configure" command. */ - -#if defined COMPILE_PCRE8 - -#if LINK_SIZE == 2 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 8), \ - (a[(n)+1] = (d) & 255) - -#define GET(a,n) \ - (((a)[n] << 8) | (a)[(n)+1]) - -#define MAX_PATTERN_SIZE (1 << 16) - - -#elif LINK_SIZE == 3 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 16), \ - (a[(n)+1] = (d) >> 8), \ - (a[(n)+2] = (d) & 255) - -#define GET(a,n) \ - (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) - -#define MAX_PATTERN_SIZE (1 << 24) - - -#elif LINK_SIZE == 4 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 24), \ - (a[(n)+1] = (d) >> 16), \ - (a[(n)+2] = (d) >> 8), \ - (a[(n)+3] = (d) & 255) - -#define GET(a,n) \ - (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) - -/* Keep it positive */ -#define MAX_PATTERN_SIZE (1 << 30) - -#else -#error LINK_SIZE must be either 2, 3, or 4 -#endif - -#elif defined COMPILE_PCRE16 - -#if LINK_SIZE == 2 - -/* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */ -#undef LINK_SIZE -#define LINK_SIZE 1 - -#define PUT(a,n,d) \ - (a[n] = (d)) - -#define GET(a,n) \ - (a[n]) - -#define MAX_PATTERN_SIZE (1 << 16) - -#elif LINK_SIZE == 3 || LINK_SIZE == 4 - -/* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */ -#undef LINK_SIZE -#define LINK_SIZE 2 - -#define PUT(a,n,d) \ - (a[n] = (d) >> 16), \ - (a[(n)+1] = (d) & 65535) - -#define GET(a,n) \ - (((a)[n] << 16) | (a)[(n)+1]) - -/* Keep it positive */ -#define MAX_PATTERN_SIZE (1 << 30) - -#else -#error LINK_SIZE must be either 2, 3, or 4 -#endif - -#elif defined COMPILE_PCRE32 - -/* Only supported LINK_SIZE is 4 */ -/* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */ -#undef LINK_SIZE -#define LINK_SIZE 1 - -#define PUT(a,n,d) \ - (a[n] = (d)) - -#define GET(a,n) \ - (a[n]) - -/* Keep it positive */ -#define MAX_PATTERN_SIZE (1 << 30) - -#else -#error Unsupported compiling mode -#endif /* COMPILE_PCRE[8|16|32] */ - -/* Convenience macro defined in terms of the others */ - -#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE - - -/* PCRE uses some other 2-byte quantities that do not change when the size of -offsets changes. There are used for repeat counts and for other things such as -capturing parenthesis numbers in back references. */ - -#if defined COMPILE_PCRE8 - -#define IMM2_SIZE 2 - -#define PUT2(a,n,d) \ - a[n] = (d) >> 8; \ - a[(n)+1] = (d) & 255 - -/* For reasons that I do not understand, the expression in this GET2 macro is -treated by gcc as a signed expression, even when a is declared as unsigned. It -seems that any kind of arithmetic results in a signed value. */ - -#define GET2(a,n) \ - (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) - -#elif defined COMPILE_PCRE16 - -#define IMM2_SIZE 1 - -#define PUT2(a,n,d) \ - a[n] = d - -#define GET2(a,n) \ - a[n] - -#elif defined COMPILE_PCRE32 - -#define IMM2_SIZE 1 - -#define PUT2(a,n,d) \ - a[n] = d - -#define GET2(a,n) \ - a[n] - -#else -#error Unsupported compiling mode -#endif /* COMPILE_PCRE[8|16|32] */ - -#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE - -/* The maximum length of a MARK name is currently one data unit; it may be -changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */ - -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -#define MAX_MARK ((1u << 16) - 1) -#else -#define MAX_MARK ((1u << 8) - 1) -#endif - -/* When UTF encoding is being used, a character is no longer just a single -byte. The macros for character handling generate simple sequences when used in -character-mode, and more complicated ones for UTF characters. GETCHARLENTEST -and other macros are not used when UTF is not supported, so they are not -defined. To make sure they can never even appear when UTF support is omitted, -we don't even define them. */ - -#ifndef SUPPORT_UTF - -/* #define MAX_VALUE_FOR_SINGLE_CHAR */ -/* #define HAS_EXTRALEN(c) */ -/* #define GET_EXTRALEN(c) */ -/* #define NOT_FIRSTCHAR(c) */ -#define GETCHAR(c, eptr) c = *eptr; -#define GETCHARTEST(c, eptr) c = *eptr; -#define GETCHARINC(c, eptr) c = *eptr++; -#define GETCHARINCTEST(c, eptr) c = *eptr++; -#define GETCHARLEN(c, eptr, len) c = *eptr; -#define RAWUCHAR(eptr) (*(eptr)) -#define RAWUCHARINC(eptr) (*(eptr)++) -#define RAWUCHARTEST(eptr) (*(eptr)) -#define RAWUCHARINCTEST(eptr) (*(eptr)++) -/* #define GETCHARLENTEST(c, eptr, len) */ -/* #define BACKCHAR(eptr) */ -/* #define FORWARDCHAR(eptr) */ -/* #define ACROSSCHAR(condition, eptr, action) */ - -#else /* SUPPORT_UTF */ - -/* Tests whether the code point needs extra characters to decode. */ - -#define HASUTF8EXTRALEN(c) ((c) >= 0xc0) - -/* Base macro to pick up the remaining bytes of a UTF-8 character, not -advancing the pointer. */ - -#define GETUTF8(c, eptr) \ - { \ - if ((c & 0x20) == 0) \ - c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ - else if ((c & 0x10) == 0) \ - c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ - else if ((c & 0x08) == 0) \ - c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \ - ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \ - else if ((c & 0x04) == 0) \ - c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \ - ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \ - (eptr[4] & 0x3f); \ - else \ - c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \ - ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \ - ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \ - } - -/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing -the pointer. */ - -#define GETUTF8INC(c, eptr) \ - { \ - if ((c & 0x20) == 0) \ - c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \ - else if ((c & 0x10) == 0) \ - { \ - c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \ - eptr += 2; \ - } \ - else if ((c & 0x08) == 0) \ - { \ - c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \ - ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ - eptr += 3; \ - } \ - else if ((c & 0x04) == 0) \ - { \ - c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \ - ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \ - (eptr[3] & 0x3f); \ - eptr += 4; \ - } \ - else \ - { \ - c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \ - ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \ - ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \ - eptr += 5; \ - } \ - } - -#if defined COMPILE_PCRE8 - -/* These macros were originally written in the form of loops that used data -from the tables whose names start with PRIV(utf8_table). They were rewritten by -a user so as not to use loops, because in some environments this gives a -significant performance advantage, and it seems never to do any harm. */ - -/* Tells the biggest code point which can be encoded as a single character. */ - -#define MAX_VALUE_FOR_SINGLE_CHAR 127 - -/* Tests whether the code point needs extra characters to decode. */ - -#define HAS_EXTRALEN(c) ((c) >= 0xc0) - -/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. -Otherwise it has an undefined behaviour. */ - -#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f]) - -/* Returns TRUE, if the given character is not the first character -of a UTF sequence. */ - -#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80) - -/* Get the next UTF-8 character, not advancing the pointer. This is called when -we know we are in UTF-8 mode. */ - -#define GETCHAR(c, eptr) \ - c = *eptr; \ - if (c >= 0xc0) GETUTF8(c, eptr); - -/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the -pointer. */ - -#define GETCHARTEST(c, eptr) \ - c = *eptr; \ - if (utf && c >= 0xc0) GETUTF8(c, eptr); - -/* Get the next UTF-8 character, advancing the pointer. This is called when we -know we are in UTF-8 mode. */ - -#define GETCHARINC(c, eptr) \ - c = *eptr++; \ - if (c >= 0xc0) GETUTF8INC(c, eptr); - -/* Get the next character, testing for UTF-8 mode, and advancing the pointer. -This is called when we don't know if we are in UTF-8 mode. */ - -#define GETCHARINCTEST(c, eptr) \ - c = *eptr++; \ - if (utf && c >= 0xc0) GETUTF8INC(c, eptr); - -/* Base macro to pick up the remaining bytes of a UTF-8 character, not -advancing the pointer, incrementing the length. */ - -#define GETUTF8LEN(c, eptr, len) \ - { \ - if ((c & 0x20) == 0) \ - { \ - c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ - len++; \ - } \ - else if ((c & 0x10) == 0) \ - { \ - c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ - len += 2; \ - } \ - else if ((c & 0x08) == 0) \ - {\ - c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \ - ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \ - len += 3; \ - } \ - else if ((c & 0x04) == 0) \ - { \ - c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \ - ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \ - (eptr[4] & 0x3f); \ - len += 4; \ - } \ - else \ - {\ - c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \ - ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \ - ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \ - len += 5; \ - } \ - } - -/* Get the next UTF-8 character, not advancing the pointer, incrementing length -if there are extra bytes. This is called when we know we are in UTF-8 mode. */ - -#define GETCHARLEN(c, eptr, len) \ - c = *eptr; \ - if (c >= 0xc0) GETUTF8LEN(c, eptr, len); - -/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the -pointer, incrementing length if there are extra bytes. This is called when we -do not know if we are in UTF-8 mode. */ - -#define GETCHARLENTEST(c, eptr, len) \ - c = *eptr; \ - if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len); - -/* Returns the next uchar, not advancing the pointer. This is called when -we know we are in UTF mode. */ - -#define RAWUCHAR(eptr) \ - (*(eptr)) - -/* Returns the next uchar, advancing the pointer. This is called when -we know we are in UTF mode. */ - -#define RAWUCHARINC(eptr) \ - (*((eptr)++)) - -/* Returns the next uchar, testing for UTF mode, and not advancing the -pointer. */ - -#define RAWUCHARTEST(eptr) \ - (*(eptr)) - -/* Returns the next uchar, testing for UTF mode, advancing the -pointer. */ - -#define RAWUCHARINCTEST(eptr) \ - (*((eptr)++)) - -/* If the pointer is not at the start of a character, move it back until -it is. This is called only in UTF-8 mode - we don't put a test within the macro -because almost all calls are already within a block of UTF-8 only code. */ - -#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- - -/* Same as above, just in the other direction. */ -#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++ - -/* Same as above, but it allows a fully customizable form. */ -#define ACROSSCHAR(condition, eptr, action) \ - while((condition) && ((eptr) & 0xc0) == 0x80) action - -#elif defined COMPILE_PCRE16 - -/* Tells the biggest code point which can be encoded as a single character. */ - -#define MAX_VALUE_FOR_SINGLE_CHAR 65535 - -/* Tests whether the code point needs extra characters to decode. */ - -#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800) - -/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. -Otherwise it has an undefined behaviour. */ - -#define GET_EXTRALEN(c) 1 - -/* Returns TRUE, if the given character is not the first character -of a UTF sequence. */ - -#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00) - -/* Base macro to pick up the low surrogate of a UTF-16 character, not -advancing the pointer. */ - -#define GETUTF16(c, eptr) \ - { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; } - -/* Get the next UTF-16 character, not advancing the pointer. This is called when -we know we are in UTF-16 mode. */ - -#define GETCHAR(c, eptr) \ - c = *eptr; \ - if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr); - -/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the -pointer. */ - -#define GETCHARTEST(c, eptr) \ - c = *eptr; \ - if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr); - -/* Base macro to pick up the low surrogate of a UTF-16 character, advancing -the pointer. */ - -#define GETUTF16INC(c, eptr) \ - { c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; } - -/* Get the next UTF-16 character, advancing the pointer. This is called when we -know we are in UTF-16 mode. */ - -#define GETCHARINC(c, eptr) \ - c = *eptr++; \ - if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); - -/* Get the next character, testing for UTF-16 mode, and advancing the pointer. -This is called when we don't know if we are in UTF-16 mode. */ - -#define GETCHARINCTEST(c, eptr) \ - c = *eptr++; \ - if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); - -/* Base macro to pick up the low surrogate of a UTF-16 character, not -advancing the pointer, incrementing the length. */ - -#define GETUTF16LEN(c, eptr, len) \ - { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; } - -/* Get the next UTF-16 character, not advancing the pointer, incrementing -length if there is a low surrogate. This is called when we know we are in -UTF-16 mode. */ - -#define GETCHARLEN(c, eptr, len) \ - c = *eptr; \ - if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); - -/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the -pointer, incrementing length if there is a low surrogate. This is called when -we do not know if we are in UTF-16 mode. */ - -#define GETCHARLENTEST(c, eptr, len) \ - c = *eptr; \ - if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); - -/* Returns the next uchar, not advancing the pointer. This is called when -we know we are in UTF mode. */ - -#define RAWUCHAR(eptr) \ - (*(eptr)) - -/* Returns the next uchar, advancing the pointer. This is called when -we know we are in UTF mode. */ - -#define RAWUCHARINC(eptr) \ - (*((eptr)++)) - -/* Returns the next uchar, testing for UTF mode, and not advancing the -pointer. */ - -#define RAWUCHARTEST(eptr) \ - (*(eptr)) - -/* Returns the next uchar, testing for UTF mode, advancing the -pointer. */ - -#define RAWUCHARINCTEST(eptr) \ - (*((eptr)++)) - -/* If the pointer is not at the start of a character, move it back until -it is. This is called only in UTF-16 mode - we don't put a test within the -macro because almost all calls are already within a block of UTF-16 only -code. */ - -#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr-- - -/* Same as above, just in the other direction. */ -#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++ - -/* Same as above, but it allows a fully customizable form. */ -#define ACROSSCHAR(condition, eptr, action) \ - if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action - -#elif defined COMPILE_PCRE32 - -/* These are trivial for the 32-bit library, since all UTF-32 characters fit -into one pcre_uchar unit. */ -#define MAX_VALUE_FOR_SINGLE_CHAR (0x10ffffu) -#define HAS_EXTRALEN(c) (0) -#define GET_EXTRALEN(c) (0) -#define NOT_FIRSTCHAR(c) (0) - -/* Get the next UTF-32 character, not advancing the pointer. This is called when -we know we are in UTF-32 mode. */ - -#define GETCHAR(c, eptr) \ - c = *(eptr); - -/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the -pointer. */ - -#define GETCHARTEST(c, eptr) \ - c = *(eptr); - -/* Get the next UTF-32 character, advancing the pointer. This is called when we -know we are in UTF-32 mode. */ - -#define GETCHARINC(c, eptr) \ - c = *((eptr)++); - -/* Get the next character, testing for UTF-32 mode, and advancing the pointer. -This is called when we don't know if we are in UTF-32 mode. */ - -#define GETCHARINCTEST(c, eptr) \ - c = *((eptr)++); - -/* Get the next UTF-32 character, not advancing the pointer, not incrementing -length (since all UTF-32 is of length 1). This is called when we know we are in -UTF-32 mode. */ - -#define GETCHARLEN(c, eptr, len) \ - GETCHAR(c, eptr) - -/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the -pointer, not incrementing the length (since all UTF-32 is of length 1). -This is called when we do not know if we are in UTF-32 mode. */ - -#define GETCHARLENTEST(c, eptr, len) \ - GETCHARTEST(c, eptr) - -/* Returns the next uchar, not advancing the pointer. This is called when -we know we are in UTF mode. */ - -#define RAWUCHAR(eptr) \ - (*(eptr)) - -/* Returns the next uchar, advancing the pointer. This is called when -we know we are in UTF mode. */ - -#define RAWUCHARINC(eptr) \ - (*((eptr)++)) - -/* Returns the next uchar, testing for UTF mode, and not advancing the -pointer. */ - -#define RAWUCHARTEST(eptr) \ - (*(eptr)) - -/* Returns the next uchar, testing for UTF mode, advancing the -pointer. */ - -#define RAWUCHARINCTEST(eptr) \ - (*((eptr)++)) - -/* If the pointer is not at the start of a character, move it back until -it is. This is called only in UTF-32 mode - we don't put a test within the -macro because almost all calls are already within a block of UTF-32 only -code. -These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */ - -#define BACKCHAR(eptr) do { } while (0) - -/* Same as above, just in the other direction. */ -#define FORWARDCHAR(eptr) do { } while (0) - -/* Same as above, but it allows a fully customizable form. */ -#define ACROSSCHAR(condition, eptr, action) do { } while (0) - -#else -#error Unsupported compiling mode -#endif /* COMPILE_PCRE[8|16|32] */ - -#endif /* SUPPORT_UTF */ - -/* Tests for Unicode horizontal and vertical whitespace characters must check a -number of different values. Using a switch statement for this generates the -fastest code (no loop, no memory access), and there are several places in the -interpreter code where this happens. In order to ensure that all the case lists -remain in step, we use macros so that there is only one place where the lists -are defined. - -These values are also required as lists in pcre_compile.c when processing \h, -\H, \v and \V in a character class. The lists are defined in pcre_tables.c, but -macros that define the values are here so that all the definitions are -together. The lists must be in ascending character order, terminated by -NOTACHAR (which is 0xffffffff). - -Any changes should ensure that the various macros are kept in step with each -other. NOTE: The values also appear in pcre_jit_compile.c. */ - -/* ------ ASCII/Unicode environments ------ */ - -#ifndef EBCDIC - -#define HSPACE_LIST \ - CHAR_HT, CHAR_SPACE, 0xa0, \ - 0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \ - 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \ - NOTACHAR - -#define HSPACE_MULTIBYTE_CASES \ - case 0x1680: /* OGHAM SPACE MARK */ \ - case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \ - case 0x2000: /* EN QUAD */ \ - case 0x2001: /* EM QUAD */ \ - case 0x2002: /* EN SPACE */ \ - case 0x2003: /* EM SPACE */ \ - case 0x2004: /* THREE-PER-EM SPACE */ \ - case 0x2005: /* FOUR-PER-EM SPACE */ \ - case 0x2006: /* SIX-PER-EM SPACE */ \ - case 0x2007: /* FIGURE SPACE */ \ - case 0x2008: /* PUNCTUATION SPACE */ \ - case 0x2009: /* THIN SPACE */ \ - case 0x200A: /* HAIR SPACE */ \ - case 0x202f: /* NARROW NO-BREAK SPACE */ \ - case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \ - case 0x3000 /* IDEOGRAPHIC SPACE */ - -#define HSPACE_BYTE_CASES \ - case CHAR_HT: \ - case CHAR_SPACE: \ - case 0xa0 /* NBSP */ - -#define HSPACE_CASES \ - HSPACE_BYTE_CASES: \ - HSPACE_MULTIBYTE_CASES - -#define VSPACE_LIST \ - CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR - -#define VSPACE_MULTIBYTE_CASES \ - case 0x2028: /* LINE SEPARATOR */ \ - case 0x2029 /* PARAGRAPH SEPARATOR */ - -#define VSPACE_BYTE_CASES \ - case CHAR_LF: \ - case CHAR_VT: \ - case CHAR_FF: \ - case CHAR_CR: \ - case CHAR_NEL - -#define VSPACE_CASES \ - VSPACE_BYTE_CASES: \ - VSPACE_MULTIBYTE_CASES - -/* ------ EBCDIC environments ------ */ - -#else -#define HSPACE_LIST CHAR_HT, CHAR_SPACE - -#define HSPACE_BYTE_CASES \ - case CHAR_HT: \ - case CHAR_SPACE - -#define HSPACE_CASES HSPACE_BYTE_CASES - -#ifdef EBCDIC_NL25 -#define VSPACE_LIST \ - CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR -#else -#define VSPACE_LIST \ - CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR -#endif - -#define VSPACE_BYTE_CASES \ - case CHAR_LF: \ - case CHAR_VT: \ - case CHAR_FF: \ - case CHAR_CR: \ - case CHAR_NEL - -#define VSPACE_CASES VSPACE_BYTE_CASES -#endif /* EBCDIC */ - -/* ------ End of whitespace macros ------ */ - - - -/* Private flags containing information about the compiled regex. They used to -live at the top end of the options word, but that got almost full, so now they -are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as -the restrictions on partial matching have been lifted. It remains for backwards -compatibility. */ - -#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */ -#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */ -#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */ -#define PCRE_FIRSTSET 0x0010 /* first_char is set */ -#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */ -#define PCRE_REQCHSET 0x0040 /* req_byte is set */ -#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */ -#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */ -#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */ -#define PCRE_JCHANGED 0x0400 /* j option used in regex */ -#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */ -#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */ - -#if defined COMPILE_PCRE8 -#define PCRE_MODE PCRE_MODE8 -#elif defined COMPILE_PCRE16 -#define PCRE_MODE PCRE_MODE16 -#elif defined COMPILE_PCRE32 -#define PCRE_MODE PCRE_MODE32 -#endif -#define PCRE_MODE_MASK (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32) - -/* Flags for the "extra" block produced by pcre_study(). */ - -#define PCRE_STUDY_MAPPED 0x0001 /* a map of starting chars exists */ -#define PCRE_STUDY_MINLEN 0x0002 /* a minimum length field exists */ - -/* Masks for identifying the public options that are permitted at compile -time, run time, or study time, respectively. */ - -#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ - PCRE_NEWLINE_ANYCRLF) - -#define PUBLIC_COMPILE_OPTIONS \ - (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ - PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ - PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ - PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ - PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE) - -#define PUBLIC_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ - PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \ - PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE) - -#define PUBLIC_DFA_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ - PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \ - PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ - PCRE_NO_START_OPTIMIZE) - -#define PUBLIC_STUDY_OPTIONS \ - (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \ - PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED) - -#define PUBLIC_JIT_EXEC_OPTIONS \ - (PCRE_NO_UTF8_CHECK|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|\ - PCRE_NOTEMPTY_ATSTART|PCRE_PARTIAL_SOFT|PCRE_PARTIAL_HARD) - -/* Magic number to provide a small check against being handed junk. */ - -#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ - -/* This variable is used to detect a loaded regular expression -in different endianness. */ - -#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */ - -/* The maximum remaining length of subject we are prepared to search for a -req_byte match. */ - -#define REQ_BYTE_MAX 1000 - -/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in -environments where these macros are defined elsewhere. Unfortunately, there -is no way to do the same for the typedef. */ - -typedef int BOOL; - -#ifndef FALSE -#define FALSE 0 -#define TRUE 1 -#endif - -/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal -character constants like '*' because the compiler would emit their EBCDIC code, -which is different from their ASCII/UTF-8 code. Instead we define macros for -the characters so that they always use the ASCII/UTF-8 code when UTF-8 support -is enabled. When UTF-8 support is not enabled, the definitions use character -literals. Both character and string versions of each character are needed, and -there are some longer strings as well. - -This means that, on EBCDIC platforms, the PCRE library can handle either -EBCDIC, or UTF-8, but not both. To support both in the same compiled library -would need different lookups depending on whether PCRE_UTF8 was set or not. -This would make it impossible to use characters in switch/case statements, -which would reduce performance. For a theoretical use (which nobody has asked -for) in a minority area (EBCDIC platforms), this is not sensible. Any -application that did need both could compile two versions of the library, using -macros to give the functions distinct names. */ - -#ifndef SUPPORT_UTF - -/* UTF-8 support is not enabled; use the platform-dependent character literals -so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF -mode. Newline characters are problematic in EBCDIC. Though it has CR and LF -characters, a common practice has been to use its NL (0x15) character as the -line terminator in C-like processing environments. However, sometimes the LF -(0x25) character is used instead, according to this Unicode document: - -http://unicode.org/standard/reports/tr13/tr13-5.html - -PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 -instead. Whichever is *not* chosen is defined as NEL. - -In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the -same code point. */ - -#ifdef EBCDIC - -#ifndef EBCDIC_NL25 -#define CHAR_NL '\x15' -#define CHAR_NEL '\x25' -#define STR_NL "\x15" -#define STR_NEL "\x25" -#else -#define CHAR_NL '\x25' -#define CHAR_NEL '\x15' -#define STR_NL "\x25" -#define STR_NEL "\x15" -#endif - -#define CHAR_LF CHAR_NL -#define STR_LF STR_NL - -#define CHAR_ESC '\047' -#define CHAR_DEL '\007' -#define STR_ESC "\047" -#define STR_DEL "\007" - -#else /* Not EBCDIC */ - -/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for -compatibility. NEL is the Unicode newline character; make sure it is -a positive value. */ - -#define CHAR_LF '\n' -#define CHAR_NL CHAR_LF -#define CHAR_NEL ((unsigned char)'\x85') -#define CHAR_ESC '\033' -#define CHAR_DEL '\177' - -#define STR_LF "\n" -#define STR_NL STR_LF -#define STR_NEL "\x85" -#define STR_ESC "\033" -#define STR_DEL "\177" - -#endif /* EBCDIC */ - -/* The remaining definitions work in both environments. */ - -#define CHAR_NULL '\0' -#define CHAR_HT '\t' -#define CHAR_VT '\v' -#define CHAR_FF '\f' -#define CHAR_CR '\r' -#define CHAR_BS '\b' -#define CHAR_BEL '\a' - -#define CHAR_SPACE ' ' -#define CHAR_EXCLAMATION_MARK '!' -#define CHAR_QUOTATION_MARK '"' -#define CHAR_NUMBER_SIGN '#' -#define CHAR_DOLLAR_SIGN '$' -#define CHAR_PERCENT_SIGN '%' -#define CHAR_AMPERSAND '&' -#define CHAR_APOSTROPHE '\'' -#define CHAR_LEFT_PARENTHESIS '(' -#define CHAR_RIGHT_PARENTHESIS ')' -#define CHAR_ASTERISK '*' -#define CHAR_PLUS '+' -#define CHAR_COMMA ',' -#define CHAR_MINUS '-' -#define CHAR_DOT '.' -#define CHAR_SLASH '/' -#define CHAR_0 '0' -#define CHAR_1 '1' -#define CHAR_2 '2' -#define CHAR_3 '3' -#define CHAR_4 '4' -#define CHAR_5 '5' -#define CHAR_6 '6' -#define CHAR_7 '7' -#define CHAR_8 '8' -#define CHAR_9 '9' -#define CHAR_COLON ':' -#define CHAR_SEMICOLON ';' -#define CHAR_LESS_THAN_SIGN '<' -#define CHAR_EQUALS_SIGN '=' -#define CHAR_GREATER_THAN_SIGN '>' -#define CHAR_QUESTION_MARK '?' -#define CHAR_COMMERCIAL_AT '@' -#define CHAR_A 'A' -#define CHAR_B 'B' -#define CHAR_C 'C' -#define CHAR_D 'D' -#define CHAR_E 'E' -#define CHAR_F 'F' -#define CHAR_G 'G' -#define CHAR_H 'H' -#define CHAR_I 'I' -#define CHAR_J 'J' -#define CHAR_K 'K' -#define CHAR_L 'L' -#define CHAR_M 'M' -#define CHAR_N 'N' -#define CHAR_O 'O' -#define CHAR_P 'P' -#define CHAR_Q 'Q' -#define CHAR_R 'R' -#define CHAR_S 'S' -#define CHAR_T 'T' -#define CHAR_U 'U' -#define CHAR_V 'V' -#define CHAR_W 'W' -#define CHAR_X 'X' -#define CHAR_Y 'Y' -#define CHAR_Z 'Z' -#define CHAR_LEFT_SQUARE_BRACKET '[' -#define CHAR_BACKSLASH '\\' -#define CHAR_RIGHT_SQUARE_BRACKET ']' -#define CHAR_CIRCUMFLEX_ACCENT '^' -#define CHAR_UNDERSCORE '_' -#define CHAR_GRAVE_ACCENT '`' -#define CHAR_a 'a' -#define CHAR_b 'b' -#define CHAR_c 'c' -#define CHAR_d 'd' -#define CHAR_e 'e' -#define CHAR_f 'f' -#define CHAR_g 'g' -#define CHAR_h 'h' -#define CHAR_i 'i' -#define CHAR_j 'j' -#define CHAR_k 'k' -#define CHAR_l 'l' -#define CHAR_m 'm' -#define CHAR_n 'n' -#define CHAR_o 'o' -#define CHAR_p 'p' -#define CHAR_q 'q' -#define CHAR_r 'r' -#define CHAR_s 's' -#define CHAR_t 't' -#define CHAR_u 'u' -#define CHAR_v 'v' -#define CHAR_w 'w' -#define CHAR_x 'x' -#define CHAR_y 'y' -#define CHAR_z 'z' -#define CHAR_LEFT_CURLY_BRACKET '{' -#define CHAR_VERTICAL_LINE '|' -#define CHAR_RIGHT_CURLY_BRACKET '}' -#define CHAR_TILDE '~' - -#define STR_HT "\t" -#define STR_VT "\v" -#define STR_FF "\f" -#define STR_CR "\r" -#define STR_BS "\b" -#define STR_BEL "\a" - -#define STR_SPACE " " -#define STR_EXCLAMATION_MARK "!" -#define STR_QUOTATION_MARK "\"" -#define STR_NUMBER_SIGN "#" -#define STR_DOLLAR_SIGN "$" -#define STR_PERCENT_SIGN "%" -#define STR_AMPERSAND "&" -#define STR_APOSTROPHE "'" -#define STR_LEFT_PARENTHESIS "(" -#define STR_RIGHT_PARENTHESIS ")" -#define STR_ASTERISK "*" -#define STR_PLUS "+" -#define STR_COMMA "," -#define STR_MINUS "-" -#define STR_DOT "." -#define STR_SLASH "/" -#define STR_0 "0" -#define STR_1 "1" -#define STR_2 "2" -#define STR_3 "3" -#define STR_4 "4" -#define STR_5 "5" -#define STR_6 "6" -#define STR_7 "7" -#define STR_8 "8" -#define STR_9 "9" -#define STR_COLON ":" -#define STR_SEMICOLON ";" -#define STR_LESS_THAN_SIGN "<" -#define STR_EQUALS_SIGN "=" -#define STR_GREATER_THAN_SIGN ">" -#define STR_QUESTION_MARK "?" -#define STR_COMMERCIAL_AT "@" -#define STR_A "A" -#define STR_B "B" -#define STR_C "C" -#define STR_D "D" -#define STR_E "E" -#define STR_F "F" -#define STR_G "G" -#define STR_H "H" -#define STR_I "I" -#define STR_J "J" -#define STR_K "K" -#define STR_L "L" -#define STR_M "M" -#define STR_N "N" -#define STR_O "O" -#define STR_P "P" -#define STR_Q "Q" -#define STR_R "R" -#define STR_S "S" -#define STR_T "T" -#define STR_U "U" -#define STR_V "V" -#define STR_W "W" -#define STR_X "X" -#define STR_Y "Y" -#define STR_Z "Z" -#define STR_LEFT_SQUARE_BRACKET "[" -#define STR_BACKSLASH "\\" -#define STR_RIGHT_SQUARE_BRACKET "]" -#define STR_CIRCUMFLEX_ACCENT "^" -#define STR_UNDERSCORE "_" -#define STR_GRAVE_ACCENT "`" -#define STR_a "a" -#define STR_b "b" -#define STR_c "c" -#define STR_d "d" -#define STR_e "e" -#define STR_f "f" -#define STR_g "g" -#define STR_h "h" -#define STR_i "i" -#define STR_j "j" -#define STR_k "k" -#define STR_l "l" -#define STR_m "m" -#define STR_n "n" -#define STR_o "o" -#define STR_p "p" -#define STR_q "q" -#define STR_r "r" -#define STR_s "s" -#define STR_t "t" -#define STR_u "u" -#define STR_v "v" -#define STR_w "w" -#define STR_x "x" -#define STR_y "y" -#define STR_z "z" -#define STR_LEFT_CURLY_BRACKET "{" -#define STR_VERTICAL_LINE "|" -#define STR_RIGHT_CURLY_BRACKET "}" -#define STR_TILDE "~" - -#define STRING_ACCEPT0 "ACCEPT\0" -#define STRING_COMMIT0 "COMMIT\0" -#define STRING_F0 "F\0" -#define STRING_FAIL0 "FAIL\0" -#define STRING_MARK0 "MARK\0" -#define STRING_PRUNE0 "PRUNE\0" -#define STRING_SKIP0 "SKIP\0" -#define STRING_THEN "THEN" - -#define STRING_alpha0 "alpha\0" -#define STRING_lower0 "lower\0" -#define STRING_upper0 "upper\0" -#define STRING_alnum0 "alnum\0" -#define STRING_ascii0 "ascii\0" -#define STRING_blank0 "blank\0" -#define STRING_cntrl0 "cntrl\0" -#define STRING_digit0 "digit\0" -#define STRING_graph0 "graph\0" -#define STRING_print0 "print\0" -#define STRING_punct0 "punct\0" -#define STRING_space0 "space\0" -#define STRING_word0 "word\0" -#define STRING_xdigit "xdigit" - -#define STRING_DEFINE "DEFINE" - -#define STRING_CR_RIGHTPAR "CR)" -#define STRING_LF_RIGHTPAR "LF)" -#define STRING_CRLF_RIGHTPAR "CRLF)" -#define STRING_ANY_RIGHTPAR "ANY)" -#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" -#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" -#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" -#define STRING_UTF8_RIGHTPAR "UTF8)" -#define STRING_UTF16_RIGHTPAR "UTF16)" -#define STRING_UTF32_RIGHTPAR "UTF32)" -#define STRING_UTF_RIGHTPAR "UTF)" -#define STRING_UCP_RIGHTPAR "UCP)" -#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" - -#else /* SUPPORT_UTF */ - -/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This -works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode -only. */ - -#define CHAR_HT '\011' -#define CHAR_VT '\013' -#define CHAR_FF '\014' -#define CHAR_CR '\015' -#define CHAR_LF '\012' -#define CHAR_NL CHAR_LF -#define CHAR_NEL ((unsigned char)'\x85') -#define CHAR_BS '\010' -#define CHAR_BEL '\007' -#define CHAR_ESC '\033' -#define CHAR_DEL '\177' - -#define CHAR_NULL '\0' -#define CHAR_SPACE '\040' -#define CHAR_EXCLAMATION_MARK '\041' -#define CHAR_QUOTATION_MARK '\042' -#define CHAR_NUMBER_SIGN '\043' -#define CHAR_DOLLAR_SIGN '\044' -#define CHAR_PERCENT_SIGN '\045' -#define CHAR_AMPERSAND '\046' -#define CHAR_APOSTROPHE '\047' -#define CHAR_LEFT_PARENTHESIS '\050' -#define CHAR_RIGHT_PARENTHESIS '\051' -#define CHAR_ASTERISK '\052' -#define CHAR_PLUS '\053' -#define CHAR_COMMA '\054' -#define CHAR_MINUS '\055' -#define CHAR_DOT '\056' -#define CHAR_SLASH '\057' -#define CHAR_0 '\060' -#define CHAR_1 '\061' -#define CHAR_2 '\062' -#define CHAR_3 '\063' -#define CHAR_4 '\064' -#define CHAR_5 '\065' -#define CHAR_6 '\066' -#define CHAR_7 '\067' -#define CHAR_8 '\070' -#define CHAR_9 '\071' -#define CHAR_COLON '\072' -#define CHAR_SEMICOLON '\073' -#define CHAR_LESS_THAN_SIGN '\074' -#define CHAR_EQUALS_SIGN '\075' -#define CHAR_GREATER_THAN_SIGN '\076' -#define CHAR_QUESTION_MARK '\077' -#define CHAR_COMMERCIAL_AT '\100' -#define CHAR_A '\101' -#define CHAR_B '\102' -#define CHAR_C '\103' -#define CHAR_D '\104' -#define CHAR_E '\105' -#define CHAR_F '\106' -#define CHAR_G '\107' -#define CHAR_H '\110' -#define CHAR_I '\111' -#define CHAR_J '\112' -#define CHAR_K '\113' -#define CHAR_L '\114' -#define CHAR_M '\115' -#define CHAR_N '\116' -#define CHAR_O '\117' -#define CHAR_P '\120' -#define CHAR_Q '\121' -#define CHAR_R '\122' -#define CHAR_S '\123' -#define CHAR_T '\124' -#define CHAR_U '\125' -#define CHAR_V '\126' -#define CHAR_W '\127' -#define CHAR_X '\130' -#define CHAR_Y '\131' -#define CHAR_Z '\132' -#define CHAR_LEFT_SQUARE_BRACKET '\133' -#define CHAR_BACKSLASH '\134' -#define CHAR_RIGHT_SQUARE_BRACKET '\135' -#define CHAR_CIRCUMFLEX_ACCENT '\136' -#define CHAR_UNDERSCORE '\137' -#define CHAR_GRAVE_ACCENT '\140' -#define CHAR_a '\141' -#define CHAR_b '\142' -#define CHAR_c '\143' -#define CHAR_d '\144' -#define CHAR_e '\145' -#define CHAR_f '\146' -#define CHAR_g '\147' -#define CHAR_h '\150' -#define CHAR_i '\151' -#define CHAR_j '\152' -#define CHAR_k '\153' -#define CHAR_l '\154' -#define CHAR_m '\155' -#define CHAR_n '\156' -#define CHAR_o '\157' -#define CHAR_p '\160' -#define CHAR_q '\161' -#define CHAR_r '\162' -#define CHAR_s '\163' -#define CHAR_t '\164' -#define CHAR_u '\165' -#define CHAR_v '\166' -#define CHAR_w '\167' -#define CHAR_x '\170' -#define CHAR_y '\171' -#define CHAR_z '\172' -#define CHAR_LEFT_CURLY_BRACKET '\173' -#define CHAR_VERTICAL_LINE '\174' -#define CHAR_RIGHT_CURLY_BRACKET '\175' -#define CHAR_TILDE '\176' - -#define STR_HT "\011" -#define STR_VT "\013" -#define STR_FF "\014" -#define STR_CR "\015" -#define STR_NL "\012" -#define STR_BS "\010" -#define STR_BEL "\007" -#define STR_ESC "\033" -#define STR_DEL "\177" - -#define STR_SPACE "\040" -#define STR_EXCLAMATION_MARK "\041" -#define STR_QUOTATION_MARK "\042" -#define STR_NUMBER_SIGN "\043" -#define STR_DOLLAR_SIGN "\044" -#define STR_PERCENT_SIGN "\045" -#define STR_AMPERSAND "\046" -#define STR_APOSTROPHE "\047" -#define STR_LEFT_PARENTHESIS "\050" -#define STR_RIGHT_PARENTHESIS "\051" -#define STR_ASTERISK "\052" -#define STR_PLUS "\053" -#define STR_COMMA "\054" -#define STR_MINUS "\055" -#define STR_DOT "\056" -#define STR_SLASH "\057" -#define STR_0 "\060" -#define STR_1 "\061" -#define STR_2 "\062" -#define STR_3 "\063" -#define STR_4 "\064" -#define STR_5 "\065" -#define STR_6 "\066" -#define STR_7 "\067" -#define STR_8 "\070" -#define STR_9 "\071" -#define STR_COLON "\072" -#define STR_SEMICOLON "\073" -#define STR_LESS_THAN_SIGN "\074" -#define STR_EQUALS_SIGN "\075" -#define STR_GREATER_THAN_SIGN "\076" -#define STR_QUESTION_MARK "\077" -#define STR_COMMERCIAL_AT "\100" -#define STR_A "\101" -#define STR_B "\102" -#define STR_C "\103" -#define STR_D "\104" -#define STR_E "\105" -#define STR_F "\106" -#define STR_G "\107" -#define STR_H "\110" -#define STR_I "\111" -#define STR_J "\112" -#define STR_K "\113" -#define STR_L "\114" -#define STR_M "\115" -#define STR_N "\116" -#define STR_O "\117" -#define STR_P "\120" -#define STR_Q "\121" -#define STR_R "\122" -#define STR_S "\123" -#define STR_T "\124" -#define STR_U "\125" -#define STR_V "\126" -#define STR_W "\127" -#define STR_X "\130" -#define STR_Y "\131" -#define STR_Z "\132" -#define STR_LEFT_SQUARE_BRACKET "\133" -#define STR_BACKSLASH "\134" -#define STR_RIGHT_SQUARE_BRACKET "\135" -#define STR_CIRCUMFLEX_ACCENT "\136" -#define STR_UNDERSCORE "\137" -#define STR_GRAVE_ACCENT "\140" -#define STR_a "\141" -#define STR_b "\142" -#define STR_c "\143" -#define STR_d "\144" -#define STR_e "\145" -#define STR_f "\146" -#define STR_g "\147" -#define STR_h "\150" -#define STR_i "\151" -#define STR_j "\152" -#define STR_k "\153" -#define STR_l "\154" -#define STR_m "\155" -#define STR_n "\156" -#define STR_o "\157" -#define STR_p "\160" -#define STR_q "\161" -#define STR_r "\162" -#define STR_s "\163" -#define STR_t "\164" -#define STR_u "\165" -#define STR_v "\166" -#define STR_w "\167" -#define STR_x "\170" -#define STR_y "\171" -#define STR_z "\172" -#define STR_LEFT_CURLY_BRACKET "\173" -#define STR_VERTICAL_LINE "\174" -#define STR_RIGHT_CURLY_BRACKET "\175" -#define STR_TILDE "\176" - -#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0" -#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0" -#define STRING_F0 STR_F "\0" -#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0" -#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0" -#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0" -#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0" -#define STRING_THEN STR_T STR_H STR_E STR_N - -#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" -#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" -#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0" -#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0" -#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0" -#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0" -#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0" -#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0" -#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0" -#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0" -#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0" -#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0" -#define STRING_word0 STR_w STR_o STR_r STR_d "\0" -#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t - -#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E - -#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS -#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS -#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS -#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS -#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS -#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS -#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS -#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS -#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS -#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS -#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS -#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS -#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS - -#endif /* SUPPORT_UTF */ - -/* Escape items that are just an encoding of a particular data value. */ - -#ifndef ESC_e -#define ESC_e CHAR_ESC -#endif - -#ifndef ESC_f -#define ESC_f CHAR_FF -#endif - -#ifndef ESC_n -#define ESC_n CHAR_LF -#endif - -#ifndef ESC_r -#define ESC_r CHAR_CR -#endif - -/* We can't officially use ESC_t because it is a POSIX reserved identifier -(presumably because of all the others like size_t). */ - -#ifndef ESC_tee -#define ESC_tee CHAR_HT -#endif - -/* Codes for different types of Unicode property */ - -#define PT_ANY 0 /* Any property - matches all chars */ -#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ -#define PT_GC 2 /* Specified general characteristic (e.g. L) */ -#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ -#define PT_SC 4 /* Script (e.g. Han) */ -#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ -#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */ -#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ -#define PT_WORD 8 /* Word - L plus N plus underscore */ -#define PT_CLIST 9 /* Pseudo-property: match character list */ - -/* Flag bits and data types for the extended class (OP_XCLASS) for classes that -contain characters with values greater than 255. */ - -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ - -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ -#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ -#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ - -/* These are escaped items that aren't just an encoding of a particular data -value such as \n. They must have non-zero values, as check_escape() returns -0 for a data character. Also, they must appear in the same order as in the opcode -definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it -corresponds to "." in DOTALL mode rather than an escape sequence. It is also -used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In -non-DOTALL mode, "." behaves like \N. - -The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. -when PCRE_UCP is set and replacement of \d etc by \p sequences is required. -They must be contiguous, and remain in order so that the replacements can be -looked up from a table. - -Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in -check_escape(). There are two tests in the code for an escape -greater than ESC_b and less than ESC_Z to detect the types that may be -repeated. These are the types that consume characters. If any new escapes are -put in between that don't consume a character, that code will have to change. -*/ - -enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, - ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, - ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, - ESC_E, ESC_Q, ESC_g, ESC_k, - ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu }; - -/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to -OP_EOD must correspond in order to the list of escapes immediately above. - -*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions -that follow must also be updated to match. There are also tables called -"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ - -enum { - OP_END, /* 0 End of pattern */ - - /* Values corresponding to backslashed metacharacters */ - - OP_SOD, /* 1 Start of data: \A */ - OP_SOM, /* 2 Start of match (subject + offset): \G */ - OP_SET_SOM, /* 3 Set start of match (\K) */ - OP_NOT_WORD_BOUNDARY, /* 4 \B */ - OP_WORD_BOUNDARY, /* 5 \b */ - OP_NOT_DIGIT, /* 6 \D */ - OP_DIGIT, /* 7 \d */ - OP_NOT_WHITESPACE, /* 8 \S */ - OP_WHITESPACE, /* 9 \s */ - OP_NOT_WORDCHAR, /* 10 \W */ - OP_WORDCHAR, /* 11 \w */ - - OP_ANY, /* 12 Match any character except newline (\N) */ - OP_ALLANY, /* 13 Match any character */ - OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */ - OP_NOTPROP, /* 15 \P (not Unicode property) */ - OP_PROP, /* 16 \p (Unicode property) */ - OP_ANYNL, /* 17 \R (any newline sequence) */ - OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */ - OP_HSPACE, /* 19 \h (horizontal whitespace) */ - OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */ - OP_VSPACE, /* 21 \v (vertical whitespace) */ - OP_EXTUNI, /* 22 \X (extended Unicode sequence */ - OP_EODN, /* 23 End of data or \n at end of data (\Z) */ - OP_EOD, /* 24 End of data (\z) */ - - OP_CIRC, /* 25 Start of line - not multiline */ - OP_CIRCM, /* 26 Start of line - multiline */ - OP_DOLL, /* 27 End of line - not multiline */ - OP_DOLLM, /* 28 End of line - multiline */ - OP_CHAR, /* 29 Match one character, casefully */ - OP_CHARI, /* 30 Match one character, caselessly */ - OP_NOT, /* 31 Match one character, not the given one, casefully */ - OP_NOTI, /* 32 Match one character, not the given one, caselessly */ - - /* The following sets of 13 opcodes must always be kept in step because - the offset from the first one is used to generate the others. */ - - /**** Single characters, caseful, must precede the caseless ones ****/ - - OP_STAR, /* 33 The maximizing and minimizing versions of */ - OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ - OP_PLUS, /* 35 the minimizing one second. */ - OP_MINPLUS, /* 36 */ - OP_QUERY, /* 37 */ - OP_MINQUERY, /* 38 */ - - OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/ - OP_MINUPTO, /* 40 */ - OP_EXACT, /* 41 Exactly n matches */ - - OP_POSSTAR, /* 42 Possessified star, caseful */ - OP_POSPLUS, /* 43 Possessified plus, caseful */ - OP_POSQUERY, /* 44 Posesssified query, caseful */ - OP_POSUPTO, /* 45 Possessified upto, caseful */ - - /**** Single characters, caseless, must follow the caseful ones */ - - OP_STARI, /* 46 */ - OP_MINSTARI, /* 47 */ - OP_PLUSI, /* 48 */ - OP_MINPLUSI, /* 49 */ - OP_QUERYI, /* 50 */ - OP_MINQUERYI, /* 51 */ - - OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */ - OP_MINUPTOI, /* 53 */ - OP_EXACTI, /* 54 */ - - OP_POSSTARI, /* 55 Possessified star, caseless */ - OP_POSPLUSI, /* 56 Possessified plus, caseless */ - OP_POSQUERYI, /* 57 Posesssified query, caseless */ - OP_POSUPTOI, /* 58 Possessified upto, caseless */ - - /**** The negated ones must follow the non-negated ones, and match them ****/ - /**** Negated single character, caseful; must precede the caseless ones ****/ - - OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ - OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ - OP_NOTPLUS, /* 61 the minimizing one second. They must be in */ - OP_NOTMINPLUS, /* 62 exactly the same order as those above. */ - OP_NOTQUERY, /* 63 */ - OP_NOTMINQUERY, /* 64 */ - - OP_NOTUPTO, /* 65 From 0 to n matches, caseful */ - OP_NOTMINUPTO, /* 66 */ - OP_NOTEXACT, /* 67 Exactly n matches */ - - OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */ - OP_NOTPOSPLUS, /* 69 */ - OP_NOTPOSQUERY, /* 70 */ - OP_NOTPOSUPTO, /* 71 */ - - /**** Negated single character, caseless; must follow the caseful ones ****/ - - OP_NOTSTARI, /* 72 */ - OP_NOTMINSTARI, /* 73 */ - OP_NOTPLUSI, /* 74 */ - OP_NOTMINPLUSI, /* 75 */ - OP_NOTQUERYI, /* 76 */ - OP_NOTMINQUERYI, /* 77 */ - - OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */ - OP_NOTMINUPTOI, /* 79 */ - OP_NOTEXACTI, /* 80 Exactly n matches */ - - OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */ - OP_NOTPOSPLUSI, /* 82 */ - OP_NOTPOSQUERYI, /* 83 */ - OP_NOTPOSUPTOI, /* 84 */ - - /**** Character types ****/ - - OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ - OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ - OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */ - OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */ - OP_TYPEQUERY, /* 89 */ - OP_TYPEMINQUERY, /* 90 */ - - OP_TYPEUPTO, /* 91 From 0 to n matches */ - OP_TYPEMINUPTO, /* 92 */ - OP_TYPEEXACT, /* 93 Exactly n matches */ - - OP_TYPEPOSSTAR, /* 94 Possessified versions */ - OP_TYPEPOSPLUS, /* 95 */ - OP_TYPEPOSQUERY, /* 96 */ - OP_TYPEPOSUPTO, /* 97 */ - - /* These are used for character classes and back references; only the - first six are the same as the sets above. */ - - OP_CRSTAR, /* 98 The maximizing and minimizing versions of */ - OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */ - OP_CRPLUS, /* 100 the minimizing one second. These codes must */ - OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */ - OP_CRQUERY, /* 102 */ - OP_CRMINQUERY, /* 103 */ - - OP_CRRANGE, /* 104 These are different to the three sets above. */ - OP_CRMINRANGE, /* 105 */ - - /* End of quantifier opcodes */ - - OP_CLASS, /* 106 Match a character class, chars < 256 only */ - OP_NCLASS, /* 107 Same, but the bitmap was created from a negative - class - the difference is relevant only when a - character > 255 is encountered. */ - OP_XCLASS, /* 108 Extended class for handling > 255 chars within the - class. This does both positive and negative. */ - OP_REF, /* 109 Match a back reference, casefully */ - OP_REFI, /* 110 Match a back reference, caselessly */ - OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */ - OP_CALLOUT, /* 112 Call out to external function if provided */ - - OP_ALT, /* 113 Start of alternation */ - OP_KET, /* 114 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 115 These two must remain together and in this */ - OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */ - OP_KETRPOS, /* 117 Possessive unlimited repeat. */ - - /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four - asserts must remain in order. */ - - OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */ - OP_ASSERT, /* 119 Positive lookahead */ - OP_ASSERT_NOT, /* 120 Negative lookahead */ - OP_ASSERTBACK, /* 121 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */ - - /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately - after the assertions, with ONCE first, as there's a test for >= ONCE for a - subpattern that isn't an assertion. The POS versions must immediately follow - the non-POS versions in each case. */ - - OP_ONCE, /* 123 Atomic group, contains captures */ - OP_ONCE_NC, /* 124 Atomic group containing no captures */ - OP_BRA, /* 125 Start of non-capturing bracket */ - OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */ - OP_CBRA, /* 127 Start of capturing bracket */ - OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */ - OP_COND, /* 129 Conditional group */ - - /* These five must follow the previous five, in the same order. There's a - check for >= SBRA to distinguish the two sets. */ - - OP_SBRA, /* 130 Start of non-capturing bracket, check empty */ - OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */ - OP_SCBRA, /* 132 Start of capturing bracket, check empty */ - OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */ - OP_SCOND, /* 134 Conditional group, check empty */ - - /* The next two pairs must (respectively) be kept together. */ - - OP_CREF, /* 135 Used to hold a capture number as condition */ - OP_NCREF, /* 136 Same, but generated by a name reference*/ - OP_RREF, /* 137 Used to hold a recursion number as condition */ - OP_NRREF, /* 138 Same, but generated by a name reference*/ - OP_DEF, /* 139 The DEFINE condition */ - - OP_BRAZERO, /* 140 These two must remain together and in this */ - OP_BRAMINZERO, /* 141 order. */ - OP_BRAPOSZERO, /* 142 */ - - /* These are backtracking control verbs */ - - OP_MARK, /* 143 always has an argument */ - OP_PRUNE, /* 144 */ - OP_PRUNE_ARG, /* 145 same, but with argument */ - OP_SKIP, /* 146 */ - OP_SKIP_ARG, /* 147 same, but with argument */ - OP_THEN, /* 148 */ - OP_THEN_ARG, /* 149 same, but with argument */ - OP_COMMIT, /* 150 */ - - /* These are forced failure and success verbs */ - - OP_FAIL, /* 151 */ - OP_ACCEPT, /* 152 */ - OP_ASSERT_ACCEPT, /* 153 Used inside assertions */ - OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */ - - /* This is used to skip a subpattern with a {0} quantifier */ - - OP_SKIPZERO, /* 155 */ - - /* This is not an opcode, but is used to check that tables indexed by opcode - are the correct length, in order to catch updating errors - there have been - some in the past. */ - - OP_TABLE_LENGTH -}; - -/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro -definitions that follow must also be updated to match. There are also tables -called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ - - -/* This macro defines textual names for all the opcodes. These are used only -for debugging, and some of them are only partial names. The macro is referenced -only in pcre_printint.c, which fills out the full names in many cases (and in -some cases doesn't actually use these names at all). */ - -#define OP_NAME_LIST \ - "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \ - "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ - "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ - "extuni", "\\Z", "\\z", \ - "^", "^", "$", "$", "char", "chari", "not", "noti", \ - "*", "*?", "+", "+?", "?", "??", \ - "{", "{", "{", \ - "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", \ - "{", "{", "{", \ - "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", \ - "{", "{", "{", \ - "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", \ - "{", "{", "{", \ - "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ - "*+","++", "?+", "{", \ - "*", "*?", "+", "+?", "?", "??", "{", "{", \ - "class", "nclass", "xclass", "Ref", "Refi", \ - "Recurse", "Callout", \ - "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ - "Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \ - "Once", "Once_NC", \ - "Bra", "BraPos", "CBra", "CBraPos", \ - "Cond", \ - "SBra", "SBraPos", "SCBra", "SCBraPos", \ - "SCond", \ - "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \ - "Brazero", "Braminzero", "Braposzero", \ - "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ - "*THEN", "*THEN", "*COMMIT", "*FAIL", \ - "*ACCEPT", "*ASSERT_ACCEPT", \ - "Close", "Skip zero" - - -/* This macro defines the length of fixed length operations in the compiled -regex. The lengths are used when searching for specific things, and also in the -debugging printing of a compiled regex. We use a macro so that it can be -defined close to the definitions of the opcodes themselves. - -As things have been extended, some of these are no longer fixed lenths, but are -minima instead. For example, the length of a single-character repeat may vary -in UTF-8 mode. The code that uses this table must know about such things. */ - -#define OP_LENGTHS \ - 1, /* End */ \ - 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \ - 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \ - 1, 1, 1, /* Any, AllAny, Anybyte */ \ - 3, 3, /* \P, \p */ \ - 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ - 1, /* \X */ \ - 1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \ - 2, /* Char - the minimum length */ \ - 2, /* Chari - the minimum length */ \ - 2, /* not */ \ - 2, /* noti */ \ - /* Positive single-char repeats ** These are */ \ - 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ - 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ - 2+IMM2_SIZE, /* exact */ \ - 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ - 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ - 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ - 2+IMM2_SIZE, /* exact I */ \ - 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ - /* Negative single-char repeats - only for chars < 256 */ \ - 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ - 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ - 2+IMM2_SIZE, /* NOT exact */ \ - 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ - 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ - 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ - 2+IMM2_SIZE, /* NOT exact I */ \ - 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ - /* Positive type repeats */ \ - 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ - 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ - 2+IMM2_SIZE, /* Type exact */ \ - 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ - /* Character class & ref repeats */ \ - 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ - 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ - 1+(32/sizeof(pcre_uchar)), /* CLASS */ \ - 1+(32/sizeof(pcre_uchar)), /* NCLASS */ \ - 0, /* XCLASS - variable length */ \ - 1+IMM2_SIZE, /* REF */ \ - 1+IMM2_SIZE, /* REFI */ \ - 1+LINK_SIZE, /* RECURSE */ \ - 2+2*LINK_SIZE, /* CALLOUT */ \ - 1+LINK_SIZE, /* Alt */ \ - 1+LINK_SIZE, /* Ket */ \ - 1+LINK_SIZE, /* KetRmax */ \ - 1+LINK_SIZE, /* KetRmin */ \ - 1+LINK_SIZE, /* KetRpos */ \ - 1+LINK_SIZE, /* Reverse */ \ - 1+LINK_SIZE, /* Assert */ \ - 1+LINK_SIZE, /* Assert not */ \ - 1+LINK_SIZE, /* Assert behind */ \ - 1+LINK_SIZE, /* Assert behind not */ \ - 1+LINK_SIZE, /* ONCE */ \ - 1+LINK_SIZE, /* ONCE_NC */ \ - 1+LINK_SIZE, /* BRA */ \ - 1+LINK_SIZE, /* BRAPOS */ \ - 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ - 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ - 1+LINK_SIZE, /* COND */ \ - 1+LINK_SIZE, /* SBRA */ \ - 1+LINK_SIZE, /* SBRAPOS */ \ - 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ - 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ - 1+LINK_SIZE, /* SCOND */ \ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \ - 1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \ - 1, /* DEF */ \ - 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ - 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ - 1, 3, /* SKIP, SKIP_ARG */ \ - 1, 3, /* THEN, THEN_ARG */ \ - 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ - 1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ - -/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" -condition. */ - -#define RREF_ANY 0xffff - -/* Compile time error code numbers. They are given names so that they can more -easily be tracked. When a new number is added, the table called eint in -pcreposix.c must be updated. */ - -enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, - ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, - ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, - ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, - ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, - ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, - ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT }; - -/* JIT compiling modes. The function list is indexed by them. */ -enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, - JIT_NUMBER_OF_COMPILE_MODES }; - -/* The real format of the start of the pcre block; the index of names and the -code vector run on as long as necessary after the end. We store an explicit -offset to the name table so that if a regex is compiled on one host, saved, and -then run on another where the size of pointers is different, all might still -be well. For the case of compiled-on-4 and run-on-8, we include an extra -pointer that is always NULL. For future-proofing, a few dummy fields were -originally included - even though you can never get this planning right - but -there is only one left now. - -NOTE NOTE NOTE: -Because people can now save and re-use compiled patterns, any additions to this -structure should be made at the end, and something earlier (e.g. a new -flag in the options or one of the dummy fields) should indicate that the new -fields are present. Currently PCRE always sets the dummy fields to zero. -NOTE NOTE NOTE -*/ - -#if defined COMPILE_PCRE8 -#define REAL_PCRE real_pcre -#elif defined COMPILE_PCRE16 -#define REAL_PCRE real_pcre16 -#elif defined COMPILE_PCRE32 -#define REAL_PCRE real_pcre32 -#endif - -/* It is necessary to fork the struct for 32 bit, since it needs to use - * pcre_uchar for first_char and req_char. Can't put an ifdef inside the - * typedef since pcretest needs access to the struct of the 8-, 16- - * and 32-bit variants. */ - -typedef struct real_pcre8_or_16 { - pcre_uint32 magic_number; - pcre_uint32 size; /* Total that was malloced */ - pcre_uint32 options; /* Public options */ - pcre_uint16 flags; /* Private flags */ - pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ - pcre_uint16 top_bracket; /* Highest numbered group */ - pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint16 first_char; /* Starting character */ - pcre_uint16 req_char; /* This character must be seen */ - pcre_uint16 name_table_offset; /* Offset to name table that follows */ - pcre_uint16 name_entry_size; /* Size of any name items */ - pcre_uint16 name_count; /* Number of name items */ - pcre_uint16 ref_count; /* Reference count */ - const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ - const pcre_uint8 *nullpad; /* NULL padding */ -} real_pcre8_or_16; - -typedef struct real_pcre8_or_16 real_pcre; -typedef struct real_pcre8_or_16 real_pcre16; - -typedef struct real_pcre32 { - pcre_uint32 magic_number; - pcre_uint32 size; /* Total that was malloced */ - pcre_uint32 options; /* Public options */ - pcre_uint16 flags; /* Private flags */ - pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ - pcre_uint16 top_bracket; /* Highest numbered group */ - pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint32 first_char; /* Starting character */ - pcre_uint32 req_char; /* This character must be seen */ - pcre_uint16 name_table_offset; /* Offset to name table that follows */ - pcre_uint16 name_entry_size; /* Size of any name items */ - pcre_uint16 name_count; /* Number of name items */ - pcre_uint16 ref_count; /* Reference count */ - pcre_uint16 dummy1; /* for later expansion */ - pcre_uint16 dummy2; /* for later expansion */ - const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ - void *nullpad; /* for later expansion */ -} real_pcre32; - -/* Assert that the size of REAL_PCRE is divisible by 8 */ -typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1]; - -/* Needed in pcretest to access some fields in the real_pcre* structures - * directly. They're unified for 8/16/32 bits since the structs only differ - * after these fields; if that ever changes, need to fork those defines into - * 8/16 and 32 bit versions. */ -#define REAL_PCRE_MAGIC(re) (((REAL_PCRE*)re)->magic_number) -#define REAL_PCRE_SIZE(re) (((REAL_PCRE*)re)->size) -#define REAL_PCRE_OPTIONS(re) (((REAL_PCRE*)re)->options) -#define REAL_PCRE_FLAGS(re) (((REAL_PCRE*)re)->flags) - -/* The format of the block used to store data from pcre_study(). The same -remark (see NOTE above) about extending this structure applies. */ - -typedef struct pcre_study_data { - pcre_uint32 size; /* Total that was malloced */ - pcre_uint32 flags; /* Private flags */ - pcre_uint8 start_bits[32]; /* Starting char bits */ - pcre_uint32 minlength; /* Minimum subject length */ -} pcre_study_data; - -/* Structure for building a chain of open capturing subpatterns during -compiling, so that instructions to close them can be compiled when (*ACCEPT) is -encountered. This is also used to identify subpatterns that contain recursive -back references to themselves, so that they can be made atomic. */ - -typedef struct open_capitem { - struct open_capitem *next; /* Chain link */ - pcre_uint16 number; /* Capture number */ - pcre_uint16 flag; /* Set TRUE if recursive back ref */ -} open_capitem; - -/* Structure for passing "static" information around between the functions -doing the compiling, so that they are thread-safe. */ - -typedef struct compile_data { - const pcre_uint8 *lcc; /* Points to lower casing table */ - const pcre_uint8 *fcc; /* Points to case-flipping table */ - const pcre_uint8 *cbits; /* Points to character type table */ - const pcre_uint8 *ctypes; /* Points to table of type maps */ - const pcre_uchar *start_workspace;/* The start of working space */ - const pcre_uchar *start_code; /* The start of the compiled code */ - const pcre_uchar *start_pattern; /* The start of the pattern */ - const pcre_uchar *end_pattern; /* The end of the pattern */ - open_capitem *open_caps; /* Chain of open capture items */ - pcre_uchar *hwm; /* High watermark of workspace */ - pcre_uchar *name_table; /* The name/number table */ - int names_found; /* Number of entries so far */ - int name_entry_size; /* Size of each entry */ - int workspace_size; /* Size of workspace */ - unsigned int bracount; /* Count of capturing parens as we compile */ - int final_bracount; /* Saved value after first pass */ - int max_lookbehind; /* Maximum lookbehind (characters) */ - int top_backref; /* Maximum back reference */ - unsigned int backref_map; /* Bitmap of low back refs */ - int assert_depth; /* Depth of nested assertions */ - int external_options; /* External (initial) options */ - int external_flags; /* External flag bits to be set */ - int req_varyopt; /* "After variable item" flag for reqbyte */ - BOOL had_accept; /* (*ACCEPT) encountered */ - BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ - BOOL check_lookbehind; /* Lookbehinds need later checking */ - int nltype; /* Newline type */ - int nllen; /* Newline string length */ - pcre_uchar nl[4]; /* Newline string when fixed length */ -} compile_data; - -/* Structure for maintaining a chain of pointers to the currently incomplete -branches, for testing for left recursion while compiling. */ - -typedef struct branch_chain { - struct branch_chain *outer; - pcre_uchar *current_branch; -} branch_chain; - -/* Structure for items in a linked list that represents an explicit recursive -call within the pattern; used by pcre_exec(). */ - -typedef struct recursion_info { - struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ - unsigned int group_num; /* Number of group that was called */ - int *offset_save; /* Pointer to start of saved offsets */ - int saved_max; /* Number of saved offsets */ - PCRE_PUCHAR subject_position; /* Position at start of recursion */ -} recursion_info; - -/* A similar structure for pcre_dfa_exec(). */ - -typedef struct dfa_recursion_info { - struct dfa_recursion_info *prevrec; - int group_num; - PCRE_PUCHAR subject_position; -} dfa_recursion_info; - -/* Structure for building a chain of data for holding the values of the subject -pointer at the start of each subpattern, so as to detect when an empty string -has been matched by a subpattern - to break infinite loops; used by -pcre_exec(). */ - -typedef struct eptrblock { - struct eptrblock *epb_prev; - PCRE_PUCHAR epb_saved_eptr; -} eptrblock; - - -/* Structure for passing "static" information around between the functions -doing traditional NFA matching, so that they are thread-safe. */ - -typedef struct match_data { - unsigned long int match_call_count; /* As it says */ - unsigned long int match_limit; /* As it says */ - unsigned long int match_limit_recursion; /* As it says */ - int *offset_vector; /* Offset vector */ - int offset_end; /* One past the end */ - int offset_max; /* The maximum usable for return data */ - int nltype; /* Newline type */ - int nllen; /* Newline string length */ - int name_count; /* Number of names in name table */ - int name_entry_size; /* Size of entry in names table */ - pcre_uchar *name_table; /* Table of names */ - pcre_uchar nl[4]; /* Newline string when fixed */ - const pcre_uint8 *lcc; /* Points to lower casing table */ - const pcre_uint8 *fcc; /* Points to case-flipping table */ - const pcre_uint8 *ctypes; /* Points to table of type maps */ - BOOL offset_overflow; /* Set if too many extractions */ - BOOL notbol; /* NOTBOL flag */ - BOOL noteol; /* NOTEOL flag */ - BOOL utf; /* UTF-8 / UTF-16 flag */ - BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ - BOOL use_ucp; /* PCRE_UCP flag */ - BOOL endonly; /* Dollar not before final \n */ - BOOL notempty; /* Empty string match not wanted */ - BOOL notempty_atstart; /* Empty string match at start not wanted */ - BOOL hitend; /* Hit the end of the subject at some point */ - BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ - BOOL hasthen; /* Pattern contains (*THEN) */ - BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ - const pcre_uchar *start_code; /* For use when recursing */ - PCRE_PUCHAR start_subject; /* Start of the subject string */ - PCRE_PUCHAR end_subject; /* End of the subject string */ - PCRE_PUCHAR start_match_ptr; /* Start of matched string */ - PCRE_PUCHAR end_match_ptr; /* Subject position at end match */ - PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */ - int partial; /* PARTIAL options */ - int end_offset_top; /* Highwater mark at end of match */ - int capture_last; /* Most recent capture number */ - int start_offset; /* The start offset value */ - int match_function_type; /* Set for certain special calls of MATCH() */ - eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ - int eptrn; /* Next free eptrblock */ - recursion_info *recursive; /* Linked list of recursion data */ - void *callout_data; /* To pass back to callouts */ - const pcre_uchar *mark; /* Mark pointer to pass back on success */ - const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */ - const pcre_uchar *once_target; /* Where to back up to for atomic groups */ -#ifdef NO_RECURSE - void *match_frames_base; /* For remembering malloc'd frames */ -#endif -} match_data; - -/* A similar structure is used for the same purpose by the DFA matching -functions. */ - -typedef struct dfa_match_data { - const pcre_uchar *start_code; /* Start of the compiled pattern */ - const pcre_uchar *start_subject ; /* Start of the subject string */ - const pcre_uchar *end_subject; /* End of subject string */ - const pcre_uchar *start_used_ptr; /* Earliest consulted character */ - const pcre_uint8 *tables; /* Character tables */ - int start_offset; /* The start offset value */ - int moptions; /* Match options */ - int poptions; /* Pattern options */ - int nltype; /* Newline type */ - int nllen; /* Newline string length */ - pcre_uchar nl[4]; /* Newline string when fixed */ - void *callout_data; /* To pass back to callouts */ - dfa_recursion_info *recursive; /* Linked list of recursion data */ -} dfa_match_data; - -/* Bit definitions for entries in the pcre_ctypes table. */ - -#define ctype_space 0x01 -#define ctype_letter 0x02 -#define ctype_digit 0x04 -#define ctype_xdigit 0x08 -#define ctype_word 0x10 /* alphanumeric or '_' */ -#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ - -/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set -of bits for a class map. Some classes are built by combining these tables. */ - -#define cbit_space 0 /* [:space:] or \s */ -#define cbit_xdigit 32 /* [:xdigit:] */ -#define cbit_digit 64 /* [:digit:] or \d */ -#define cbit_upper 96 /* [:upper:] */ -#define cbit_lower 128 /* [:lower:] */ -#define cbit_word 160 /* [:word:] or \w */ -#define cbit_graph 192 /* [:graph:] */ -#define cbit_print 224 /* [:print:] */ -#define cbit_punct 256 /* [:punct:] */ -#define cbit_cntrl 288 /* [:cntrl:] */ -#define cbit_length 320 /* Length of the cbits table */ - -/* Offsets of the various tables from the base tables pointer, and -total length. */ - -#define lcc_offset 0 -#define fcc_offset 256 -#define cbits_offset 512 -#define ctypes_offset (cbits_offset + cbit_length) -#define tables_length (ctypes_offset + 256) - -/* Internal function and data prefixes. */ - -#if defined COMPILE_PCRE8 -#ifndef PUBL -#define PUBL(name) pcre_##name -#endif -#ifndef PRIV -#define PRIV(name) _pcre_##name -#endif -#elif defined COMPILE_PCRE16 -#ifndef PUBL -#define PUBL(name) pcre16_##name -#endif -#ifndef PRIV -#define PRIV(name) _pcre16_##name -#endif -#elif defined COMPILE_PCRE32 -#ifndef PUBL -#define PUBL(name) pcre32_##name -#endif -#ifndef PRIV -#define PRIV(name) _pcre32_##name -#endif -#else -#error Unsupported compiling mode -#endif /* COMPILE_PCRE[8|16|32] */ - -/* Layout of the UCP type table that translates property names into types and -codes. Each entry used to point directly to a name, but to reduce the number of -relocations in shared libraries, it now has an offset into a single string -instead. */ - -typedef struct { - pcre_uint16 name_offset; - pcre_uint16 type; - pcre_uint16 value; -} ucp_type_table; - - -/* Internal shared data tables. These are tables that are used by more than one -of the exported public functions. They have to be "external" in the C sense, -but are not part of the PCRE public API. The data for these tables is in the -pcre_tables.c module. */ - -#ifdef COMPILE_PCRE8 -extern const int PRIV(utf8_table1)[]; -extern const int PRIV(utf8_table1_size); -extern const int PRIV(utf8_table2)[]; -extern const int PRIV(utf8_table3)[]; -extern const pcre_uint8 PRIV(utf8_table4)[]; -#endif /* COMPILE_PCRE8 */ - -extern const char PRIV(utt_names)[]; -extern const ucp_type_table PRIV(utt)[]; -extern const int PRIV(utt_size); - -extern const pcre_uint8 PRIV(OP_lengths)[]; -extern const pcre_uint8 PRIV(default_tables)[]; - -extern const pcre_uint32 PRIV(hspace_list)[]; -extern const pcre_uint32 PRIV(vspace_list)[]; - - -/* Internal shared functions. These are functions that are used by more than -one of the exported public functions. They have to be "external" in the C -sense, but are not part of the PCRE public API. */ - -/* String comparison functions. */ -#if defined COMPILE_PCRE8 - -#define STRCMP_UC_UC(str1, str2) \ - strcmp((char *)(str1), (char *)(str2)) -#define STRCMP_UC_C8(str1, str2) \ - strcmp((char *)(str1), (str2)) -#define STRNCMP_UC_UC(str1, str2, num) \ - strncmp((char *)(str1), (char *)(str2), (num)) -#define STRNCMP_UC_C8(str1, str2, num) \ - strncmp((char *)(str1), (str2), (num)) -#define STRLEN_UC(str) strlen((const char *)str) - -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - -extern int PRIV(strcmp_uc_uc)(const pcre_uchar *, - const pcre_uchar *); -extern int PRIV(strcmp_uc_c8)(const pcre_uchar *, - const char *); -extern int PRIV(strncmp_uc_uc)(const pcre_uchar *, - const pcre_uchar *, unsigned int num); -extern int PRIV(strncmp_uc_c8)(const pcre_uchar *, - const char *, unsigned int num); -extern unsigned int PRIV(strlen_uc)(const pcre_uchar *str); - -#define STRCMP_UC_UC(str1, str2) \ - PRIV(strcmp_uc_uc)((str1), (str2)) -#define STRCMP_UC_C8(str1, str2) \ - PRIV(strcmp_uc_c8)((str1), (str2)) -#define STRNCMP_UC_UC(str1, str2, num) \ - PRIV(strncmp_uc_uc)((str1), (str2), (num)) -#define STRNCMP_UC_C8(str1, str2, num) \ - PRIV(strncmp_uc_c8)((str1), (str2), (num)) -#define STRLEN_UC(str) PRIV(strlen_uc)(str) - -#endif /* COMPILE_PCRE[8|16|32] */ - -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 - -#define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2) -#define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2) - -#elif defined COMPILE_PCRE32 - -extern int PRIV(strcmp_uc_uc_utf)(const pcre_uchar *, - const pcre_uchar *); -extern int PRIV(strcmp_uc_c8_utf)(const pcre_uchar *, - const char *); - -#define STRCMP_UC_UC_TEST(str1, str2) \ - (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2))) -#define STRCMP_UC_C8_TEST(str1, str2) \ - (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2))) - -#endif /* COMPILE_PCRE[8|16|32] */ - -extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int); -extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, - int *, BOOL); -extern unsigned int PRIV(ord2utf)(pcre_uint32, pcre_uchar *); -extern int PRIV(valid_utf)(PCRE_PUCHAR, int, int *); -extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, - int *, BOOL); -extern BOOL PRIV(xclass)(pcre_uint32, const pcre_uchar *, BOOL); - -#ifdef SUPPORT_JIT -extern void PRIV(jit_compile)(const REAL_PCRE *, - PUBL(extra) *, int); -extern int PRIV(jit_exec)(const PUBL(extra) *, - const pcre_uchar *, int, int, int, int *, int); -extern void PRIV(jit_free)(void *); -extern int PRIV(jit_get_size)(void *); -extern const char* PRIV(jit_get_target)(void); -#endif - -/* Unicode character database (UCD) */ - -typedef struct { - pcre_uint8 script; /* ucp_Arabic, etc. */ - pcre_uint8 chartype; /* ucp_Cc, etc. (general categories) */ - pcre_uint8 gbprop; /* ucp_gbControl, etc. (grapheme break property) */ - pcre_uint8 caseset; /* offset to multichar other cases or zero */ - pcre_int32 other_case; /* offset to other case, or zero if none */ -} ucd_record; - -extern const pcre_uint32 PRIV(ucd_caseless_sets)[]; -extern const ucd_record PRIV(ucd_records)[]; -extern const pcre_uint8 PRIV(ucd_stage1)[]; -extern const pcre_uint16 PRIV(ucd_stage2)[]; -extern const pcre_uint32 PRIV(ucp_gentype)[]; -extern const pcre_uint32 PRIV(ucp_gbtable)[]; -#ifdef SUPPORT_JIT -extern const int PRIV(ucp_typerange)[]; -#endif - -#ifdef SUPPORT_UCP -/* UCD access macros */ - -#define UCD_BLOCK_SIZE 128 -#define GET_UCD(ch) (PRIV(ucd_records) + \ - PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \ - UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) - -#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype -#define UCD_SCRIPT(ch) GET_UCD(ch)->script -#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] -#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop -#define UCD_CASESET(ch) GET_UCD(ch)->caseset -#define UCD_OTHERCASE(ch) ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case))) - -#endif /* SUPPORT_UCP */ - -#endif - -/* End of pcre_internal.h */ diff --git a/deps/libmagic/pcre/pcre_jit_compile.c b/deps/libmagic/pcre/pcre_jit_compile.c deleted file mode 100644 index cc9f097..0000000 --- a/deps/libmagic/pcre/pcre_jit_compile.c +++ /dev/null @@ -1,8560 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - - The machine code generator part (this module) was written by Zoltan Herczeg - Copyright (c) 2010-2012 - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - -#if defined SUPPORT_JIT - -/* All-in-one: Since we use the JIT compiler only from here, -we just include it. This way we don't need to touch the build -system files. */ - -#define SLJIT_MALLOC(size) (PUBL(malloc))(size) -#define SLJIT_FREE(ptr) (PUBL(free))(ptr) -#define SLJIT_CONFIG_AUTO 1 -#define SLJIT_CONFIG_STATIC 1 -#define SLJIT_VERBOSE 0 -#define SLJIT_DEBUG 0 - -#include "sljit/sljitLir.c" - -#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED -#error Unsupported architecture -#endif - -/* Allocate memory for the regex stack on the real machine stack. -Fast, but limited size. */ -#define MACHINE_STACK_SIZE 32768 - -/* Growth rate for stack allocated by the OS. Should be the multiply -of page size. */ -#define STACK_GROWTH_RATE 8192 - -/* Enable to check that the allocation could destroy temporaries. */ -#if defined SLJIT_DEBUG && SLJIT_DEBUG -#define DESTROY_REGISTERS 1 -#endif - -/* -Short summary about the backtracking mechanism empolyed by the jit code generator: - -The code generator follows the recursive nature of the PERL compatible regular -expressions. The basic blocks of regular expressions are condition checkers -whose execute different commands depending on the result of the condition check. -The relationship between the operators can be horizontal (concatenation) and -vertical (sub-expression) (See struct backtrack_common for more details). - - 'ab' - 'a' and 'b' regexps are concatenated - 'a+' - 'a' is the sub-expression of the '+' operator - -The condition checkers are boolean (true/false) checkers. Machine code is generated -for the checker itself and for the actions depending on the result of the checker. -The 'true' case is called as the matching path (expected path), and the other is called as -the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken -branches on the matching path. - - Greedy star operator (*) : - Matching path: match happens. - Backtrack path: match failed. - Non-greedy star operator (*?) : - Matching path: no need to perform a match. - Backtrack path: match is required. - -The following example shows how the code generated for a capturing bracket -with two alternatives. Let A, B, C, D are arbirary regular expressions, and -we have the following regular expression: - - A(B|C)D - -The generated code will be the following: - - A matching path - '(' matching path (pushing arguments to the stack) - B matching path - ')' matching path (pushing arguments to the stack) - D matching path - return with successful match - - D backtrack path - ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") - B backtrack path - C expected path - jump to D matching path - C backtrack path - A backtrack path - - Notice, that the order of backtrack code paths are the opposite of the fast - code paths. In this way the topmost value on the stack is always belong - to the current backtrack code path. The backtrack path must check - whether there is a next alternative. If so, it needs to jump back to - the matching path eventually. Otherwise it needs to clear out its own stack - frame and continue the execution on the backtrack code paths. -*/ - -/* -Saved stack frames: - -Atomic blocks and asserts require reloading the values of private data -when the backtrack mechanism performed. Because of OP_RECURSE, the data -are not necessarly known in compile time, thus we need a dynamic restore -mechanism. - -The stack frames are stored in a chain list, and have the following format: -([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] - -Thus we can restore the private data to a particular point in the stack. -*/ - -typedef struct jit_arguments { - /* Pointers first. */ - struct sljit_stack *stack; - const pcre_uchar *str; - const pcre_uchar *begin; - const pcre_uchar *end; - int *offsets; - pcre_uchar *uchar_ptr; - pcre_uchar *mark_ptr; - /* Everything else after. */ - int offsetcount; - int calllimit; - pcre_uint8 notbol; - pcre_uint8 noteol; - pcre_uint8 notempty; - pcre_uint8 notempty_atstart; -} jit_arguments; - -typedef struct executable_functions { - void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; - PUBL(jit_callback) callback; - void *userdata; - pcre_uint32 top_bracket; - sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; -} executable_functions; - -typedef struct jump_list { - struct sljit_jump *jump; - struct jump_list *next; -} jump_list; - -enum stub_types { stack_alloc }; - -typedef struct stub_list { - enum stub_types type; - int data; - struct sljit_jump *start; - struct sljit_label *quit; - struct stub_list *next; -} stub_list; - -typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); - -/* The following structure is the key data type for the recursive -code generator. It is allocated by compile_matchingpath, and contains -the aguments for compile_backtrackingpath. Must be the first member -of its descendants. */ -typedef struct backtrack_common { - /* Concatenation stack. */ - struct backtrack_common *prev; - jump_list *nextbacktracks; - /* Internal stack (for component operators). */ - struct backtrack_common *top; - jump_list *topbacktracks; - /* Opcode pointer. */ - pcre_uchar *cc; -} backtrack_common; - -typedef struct assert_backtrack { - backtrack_common common; - jump_list *condfailed; - /* Less than 0 (-1) if a frame is not needed. */ - int framesize; - /* Points to our private memory word on the stack. */ - int private_data_ptr; - /* For iterators. */ - struct sljit_label *matchingpath; -} assert_backtrack; - -typedef struct bracket_backtrack { - backtrack_common common; - /* Where to coninue if an alternative is successfully matched. */ - struct sljit_label *alternative_matchingpath; - /* For rmin and rmax iterators. */ - struct sljit_label *recursive_matchingpath; - /* For greedy ? operator. */ - struct sljit_label *zero_matchingpath; - /* Contains the branches of a failed condition. */ - union { - /* Both for OP_COND, OP_SCOND. */ - jump_list *condfailed; - assert_backtrack *assert; - /* For OP_ONCE. -1 if not needed. */ - int framesize; - } u; - /* Points to our private memory word on the stack. */ - int private_data_ptr; -} bracket_backtrack; - -typedef struct bracketpos_backtrack { - backtrack_common common; - /* Points to our private memory word on the stack. */ - int private_data_ptr; - /* Reverting stack is needed. */ - int framesize; - /* Allocated stack size. */ - int stacksize; -} bracketpos_backtrack; - -typedef struct braminzero_backtrack { - backtrack_common common; - struct sljit_label *matchingpath; -} braminzero_backtrack; - -typedef struct iterator_backtrack { - backtrack_common common; - /* Next iteration. */ - struct sljit_label *matchingpath; -} iterator_backtrack; - -typedef struct recurse_entry { - struct recurse_entry *next; - /* Contains the function entry. */ - struct sljit_label *entry; - /* Collects the calls until the function is not created. */ - jump_list *calls; - /* Points to the starting opcode. */ - int start; -} recurse_entry; - -typedef struct recurse_backtrack { - backtrack_common common; -} recurse_backtrack; - -#define MAX_RANGE_SIZE 6 - -typedef struct compiler_common { - struct sljit_compiler *compiler; - pcre_uchar *start; - - /* Maps private data offset to each opcode. */ - int *private_data_ptrs; - /* Tells whether the capturing bracket is optimized. */ - pcre_uint8 *optimized_cbracket; - /* Starting offset of private data for capturing brackets. */ - int cbraptr; - /* OVector starting point. Must be divisible by 2. */ - int ovector_start; - /* Last known position of the requested byte. */ - int req_char_ptr; - /* Head of the last recursion. */ - int recursive_head; - /* First inspected character for partial matching. */ - int start_used_ptr; - /* Starting pointer for partial soft matches. */ - int hit_start; - /* End pointer of the first line. */ - int first_line_end; - /* Points to the marked string. */ - int mark_ptr; - - /* Flipped and lower case tables. */ - const pcre_uint8 *fcc; - sljit_sw lcc; - /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ - int mode; - /* Newline control. */ - int nltype; - int newline; - int bsr_nltype; - /* Dollar endonly. */ - int endonly; - BOOL has_set_som; - /* Tables. */ - sljit_sw ctypes; - int digits[2 + MAX_RANGE_SIZE]; - /* Named capturing brackets. */ - sljit_uw name_table; - sljit_sw name_count; - sljit_sw name_entry_size; - - /* Labels and jump lists. */ - struct sljit_label *partialmatchlabel; - struct sljit_label *quitlabel; - struct sljit_label *acceptlabel; - stub_list *stubs; - recurse_entry *entries; - recurse_entry *currententry; - jump_list *partialmatch; - jump_list *quit; - jump_list *accept; - jump_list *calllimit; - jump_list *stackalloc; - jump_list *revertframes; - jump_list *wordboundary; - jump_list *anynewline; - jump_list *hspace; - jump_list *vspace; - jump_list *casefulcmp; - jump_list *caselesscmp; - BOOL jscript_compat; -#ifdef SUPPORT_UTF - BOOL utf; -#ifdef SUPPORT_UCP - BOOL use_ucp; -#endif -#ifndef COMPILE_PCRE32 - jump_list *utfreadchar; -#endif -#ifdef COMPILE_PCRE8 - jump_list *utfreadtype8; -#endif -#endif /* SUPPORT_UTF */ -#ifdef SUPPORT_UCP - jump_list *getucd; -#endif -} compiler_common; - -/* For byte_sequence_compare. */ - -typedef struct compare_context { - int length; - int sourcereg; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - int ucharptr; - union { - sljit_si asint; - sljit_uh asushort; -#if defined COMPILE_PCRE8 - sljit_ub asbyte; - sljit_ub asuchars[4]; -#elif defined COMPILE_PCRE16 - sljit_uh asuchars[2]; -#elif defined COMPILE_PCRE32 - sljit_ui asuchars[1]; -#endif - } c; - union { - sljit_si asint; - sljit_uh asushort; -#if defined COMPILE_PCRE8 - sljit_ub asbyte; - sljit_ub asuchars[4]; -#elif defined COMPILE_PCRE16 - sljit_uh asuchars[2]; -#elif defined COMPILE_PCRE32 - sljit_ui asuchars[1]; -#endif - } oc; -#endif -} compare_context; - -enum { - frame_end = 0, - frame_setstrbegin = -1, - frame_setmark = -2 -}; - -/* Undefine sljit macros. */ -#undef CMP - -/* Used for accessing the elements of the stack. */ -#define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw)) - -#define TMP1 SLJIT_SCRATCH_REG1 -#define TMP2 SLJIT_SCRATCH_REG3 -#define TMP3 SLJIT_TEMPORARY_EREG2 -#define STR_PTR SLJIT_SAVED_REG1 -#define STR_END SLJIT_SAVED_REG2 -#define STACK_TOP SLJIT_SCRATCH_REG2 -#define STACK_LIMIT SLJIT_SAVED_REG3 -#define ARGUMENTS SLJIT_SAVED_EREG1 -#define CALL_COUNT SLJIT_SAVED_EREG2 -#define RETURN_ADDR SLJIT_TEMPORARY_EREG1 - -/* Local space layout. */ -/* These two locals can be used by the current opcode. */ -#define LOCALS0 (0 * sizeof(sljit_sw)) -#define LOCALS1 (1 * sizeof(sljit_sw)) -/* Two local variables for possessive quantifiers (char1 cannot use them). */ -#define POSSESSIVE0 (2 * sizeof(sljit_sw)) -#define POSSESSIVE1 (3 * sizeof(sljit_sw)) -/* Max limit of recursions. */ -#define CALL_LIMIT (4 * sizeof(sljit_sw)) -/* The output vector is stored on the stack, and contains pointers -to characters. The vector data is divided into two groups: the first -group contains the start / end character pointers, and the second is -the start pointers when the end of the capturing group has not yet reached. */ -#define OVECTOR_START (common->ovector_start) -#define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw)) -#define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw)) -#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) - -#if defined COMPILE_PCRE8 -#define MOV_UCHAR SLJIT_MOV_UB -#define MOVU_UCHAR SLJIT_MOVU_UB -#elif defined COMPILE_PCRE16 -#define MOV_UCHAR SLJIT_MOV_UH -#define MOVU_UCHAR SLJIT_MOVU_UH -#elif defined COMPILE_PCRE32 -#define MOV_UCHAR SLJIT_MOV_UI -#define MOVU_UCHAR SLJIT_MOVU_UI -#else -#error Unsupported compiling mode -#endif - -/* Shortcuts. */ -#define DEFINE_COMPILER \ - struct sljit_compiler *compiler = common->compiler -#define OP1(op, dst, dstw, src, srcw) \ - sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) -#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ - sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) -#define LABEL() \ - sljit_emit_label(compiler) -#define JUMP(type) \ - sljit_emit_jump(compiler, (type)) -#define JUMPTO(type, label) \ - sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) -#define JUMPHERE(jump) \ - sljit_set_label((jump), sljit_emit_label(compiler)) -#define CMP(type, src1, src1w, src2, src2w) \ - sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) -#define CMPTO(type, src1, src1w, src2, src2w, label) \ - sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) -#define OP_FLAGS(op, dst, dstw, src, srcw, type) \ - sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type)) -#define GET_LOCAL_BASE(dst, dstw, offset) \ - sljit_get_local_base(compiler, (dst), (dstw), (offset)) - -static pcre_uchar* bracketend(pcre_uchar* cc) -{ -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); -do cc += GET(cc, 1); while (*cc == OP_ALT); -SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); -cc += 1 + LINK_SIZE; -return cc; -} - -/* Functions whose might need modification for all new supported opcodes: - next_opcode - get_private_data_length - set_private_data_ptrs - get_framesize - init_frame - get_private_data_length_for_copy - copy_private_data - compile_matchingpath - compile_backtrackingpath -*/ - -static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) -{ -SLJIT_UNUSED_ARG(common); -switch(*cc) - { - case OP_SOD: - case OP_SOM: - case OP_SET_SOM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_ANYNL: - case OP_NOT_HSPACE: - case OP_HSPACE: - case OP_NOT_VSPACE: - case OP_VSPACE: - case OP_EXTUNI: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - case OP_DEF: - case OP_BRAZERO: - case OP_BRAMINZERO: - case OP_BRAPOSZERO: - case OP_COMMIT: - case OP_FAIL: - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - case OP_SKIPZERO: - return cc + 1; - - case OP_ANYBYTE: -#ifdef SUPPORT_UTF - if (common->utf) return NULL; -#endif - return cc + 1; - - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_POSSTAR: - case OP_POSPLUS: - case OP_POSQUERY: - case OP_STARI: - case OP_MINSTARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_QUERYI: - case OP_MINQUERYI: - case OP_POSSTARI: - case OP_POSPLUSI: - case OP_POSQUERYI: - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTPOSSTAR: - case OP_NOTPOSPLUS: - case OP_NOTPOSQUERY: - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTQUERYI: - case OP_NOTMINQUERYI: - case OP_NOTPOSSTARI: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERYI: - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - return cc; - - case OP_UPTO: - case OP_MINUPTO: - case OP_EXACT: - case OP_POSUPTO: - case OP_UPTOI: - case OP_MINUPTOI: - case OP_EXACTI: - case OP_POSUPTOI: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTEXACT: - case OP_NOTPOSUPTO: - case OP_NOTUPTOI: - case OP_NOTMINUPTOI: - case OP_NOTEXACTI: - case OP_NOTPOSUPTOI: - cc += 2 + IMM2_SIZE; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - return cc; - - case OP_NOTPROP: - case OP_PROP: - return cc + 1 + 2; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - case OP_TYPEPOSUPTO: - case OP_REF: - case OP_REFI: - case OP_CREF: - case OP_NCREF: - case OP_RREF: - case OP_NRREF: - case OP_CLOSE: - cc += 1 + IMM2_SIZE; - return cc; - - case OP_CRRANGE: - case OP_CRMINRANGE: - return cc + 1 + 2 * IMM2_SIZE; - - case OP_CLASS: - case OP_NCLASS: - return cc + 1 + 32 / sizeof(pcre_uchar); - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - return cc + GET(cc, 1); -#endif - - case OP_RECURSE: - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_REVERSE: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRA: - case OP_BRAPOS: - case OP_COND: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: - case OP_ALT: - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - return cc + 1 + LINK_SIZE; - - case OP_CBRA: - case OP_CBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - return cc + 1 + LINK_SIZE + IMM2_SIZE; - - case OP_MARK: - return cc + 1 + 2 + cc[1]; - - default: - return NULL; - } -} - -#define CASE_ITERATOR_PRIVATE_DATA_1 \ - case OP_MINSTAR: \ - case OP_MINPLUS: \ - case OP_QUERY: \ - case OP_MINQUERY: \ - case OP_MINSTARI: \ - case OP_MINPLUSI: \ - case OP_QUERYI: \ - case OP_MINQUERYI: \ - case OP_NOTMINSTAR: \ - case OP_NOTMINPLUS: \ - case OP_NOTQUERY: \ - case OP_NOTMINQUERY: \ - case OP_NOTMINSTARI: \ - case OP_NOTMINPLUSI: \ - case OP_NOTQUERYI: \ - case OP_NOTMINQUERYI: - -#define CASE_ITERATOR_PRIVATE_DATA_2A \ - case OP_STAR: \ - case OP_PLUS: \ - case OP_STARI: \ - case OP_PLUSI: \ - case OP_NOTSTAR: \ - case OP_NOTPLUS: \ - case OP_NOTSTARI: \ - case OP_NOTPLUSI: - -#define CASE_ITERATOR_PRIVATE_DATA_2B \ - case OP_UPTO: \ - case OP_MINUPTO: \ - case OP_UPTOI: \ - case OP_MINUPTOI: \ - case OP_NOTUPTO: \ - case OP_NOTMINUPTO: \ - case OP_NOTUPTOI: \ - case OP_NOTMINUPTOI: - -#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ - case OP_TYPEMINSTAR: \ - case OP_TYPEMINPLUS: \ - case OP_TYPEQUERY: \ - case OP_TYPEMINQUERY: - -#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ - case OP_TYPESTAR: \ - case OP_TYPEPLUS: - -#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ - case OP_TYPEUPTO: \ - case OP_TYPEMINUPTO: - -static int get_class_iterator_size(pcre_uchar *cc) -{ -switch(*cc) - { - case OP_CRSTAR: - case OP_CRPLUS: - return 2; - - case OP_CRMINSTAR: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - return 1; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) - return 0; - return 2; - - default: - return 0; - } -} - -static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) -{ -int private_data_length = 0; -pcre_uchar *alternative; -pcre_uchar *name; -pcre_uchar *end = NULL; -int space, size, i; -pcre_uint32 bracketlen; - -/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ -while (cc < ccend) - { - space = 0; - size = 0; - bracketlen = 0; - switch(*cc) - { - case OP_SET_SOM: - common->has_set_som = TRUE; - cc += 1; - break; - - case OP_REF: - case OP_REFI: - common->optimized_cbracket[GET2(cc, 1)] = 0; - cc += 1 + IMM2_SIZE; - break; - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRAPOS: - case OP_SBRA: - case OP_SBRAPOS: - private_data_length += sizeof(sljit_sw); - bracketlen = 1 + LINK_SIZE; - break; - - case OP_CBRAPOS: - case OP_SCBRAPOS: - private_data_length += sizeof(sljit_sw); - common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; - bracketlen = 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_COND: - case OP_SCOND: - bracketlen = cc[1 + LINK_SIZE]; - if (bracketlen == OP_CREF) - { - bracketlen = GET2(cc, 1 + LINK_SIZE + 1); - common->optimized_cbracket[bracketlen] = 0; - } - else if (bracketlen == OP_NCREF) - { - bracketlen = GET2(cc, 1 + LINK_SIZE + 1); - name = (pcre_uchar *)common->name_table; - alternative = name; - for (i = 0; i < common->name_count; i++) - { - if (GET2(name, 0) == bracketlen) break; - name += common->name_entry_size; - } - SLJIT_ASSERT(i != common->name_count); - - for (i = 0; i < common->name_count; i++) - { - if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0) - common->optimized_cbracket[GET2(alternative, 0)] = 0; - alternative += common->name_entry_size; - } - } - - if (*cc == OP_COND) - { - /* Might be a hidden SCOND. */ - alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) - private_data_length += sizeof(sljit_sw); - } - else - private_data_length += sizeof(sljit_sw); - bracketlen = 1 + LINK_SIZE; - break; - - case OP_BRA: - bracketlen = 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_SCBRA: - bracketlen = 1 + LINK_SIZE + IMM2_SIZE; - break; - - CASE_ITERATOR_PRIVATE_DATA_1 - space = 1; - size = -2; - break; - - CASE_ITERATOR_PRIVATE_DATA_2A - space = 2; - size = -2; - break; - - CASE_ITERATOR_PRIVATE_DATA_2B - space = 2; - size = -(2 + IMM2_SIZE); - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - space = 1; - size = 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) - space = 2; - size = 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) - space = 2; - size = 1 + IMM2_SIZE; - break; - - case OP_CLASS: - case OP_NCLASS: - size += 1 + 32 / sizeof(pcre_uchar); - space = get_class_iterator_size(cc + size); - break; - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - size = GET(cc, 1); - space = get_class_iterator_size(cc + size); - break; -#endif - - case OP_RECURSE: - /* Set its value only once. */ - if (common->recursive_head == 0) - { - common->recursive_head = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } - cc += 1 + LINK_SIZE; - break; - - case OP_MARK: - if (common->mark_ptr == 0) - { - common->mark_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } - cc += 1 + 2 + cc[1]; - break; - - default: - cc = next_opcode(common, cc); - if (cc == NULL) - return -1; - break; - } - - if (space > 0 && cc >= end) - private_data_length += sizeof(sljit_sw) * space; - - if (size != 0) - { - if (size < 0) - { - cc += -size; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - } - else - cc += size; - } - - if (bracketlen != 0) - { - if (cc >= end) - { - end = bracketend(cc); - if (end[-1 - LINK_SIZE] == OP_KET) - end = NULL; - } - cc += bracketlen; - } - } -return private_data_length; -} - -static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend) -{ -pcre_uchar *cc = common->start; -pcre_uchar *alternative; -pcre_uchar *end = NULL; -int space, size, bracketlen; - -while (cc < ccend) - { - space = 0; - size = 0; - bracketlen = 0; - switch(*cc) - { - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRAPOS: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: - common->private_data_ptrs[cc - common->start] = private_data_ptr; - private_data_ptr += sizeof(sljit_sw); - bracketlen = 1 + LINK_SIZE; - break; - - case OP_CBRAPOS: - case OP_SCBRAPOS: - common->private_data_ptrs[cc - common->start] = private_data_ptr; - private_data_ptr += sizeof(sljit_sw); - bracketlen = 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_COND: - /* Might be a hidden SCOND. */ - alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) - { - common->private_data_ptrs[cc - common->start] = private_data_ptr; - private_data_ptr += sizeof(sljit_sw); - } - bracketlen = 1 + LINK_SIZE; - break; - - case OP_BRA: - bracketlen = 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_SCBRA: - bracketlen = 1 + LINK_SIZE + IMM2_SIZE; - break; - - CASE_ITERATOR_PRIVATE_DATA_1 - space = 1; - size = -2; - break; - - CASE_ITERATOR_PRIVATE_DATA_2A - space = 2; - size = -2; - break; - - CASE_ITERATOR_PRIVATE_DATA_2B - space = 2; - size = -(2 + IMM2_SIZE); - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - space = 1; - size = 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) - space = 2; - size = 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) - space = 2; - size = 1 + IMM2_SIZE; - break; - - case OP_CLASS: - case OP_NCLASS: - size += 1 + 32 / sizeof(pcre_uchar); - space = get_class_iterator_size(cc + size); - break; - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - size = GET(cc, 1); - space = get_class_iterator_size(cc + size); - break; -#endif - - default: - cc = next_opcode(common, cc); - SLJIT_ASSERT(cc != NULL); - break; - } - - if (space > 0 && cc >= end) - { - common->private_data_ptrs[cc - common->start] = private_data_ptr; - private_data_ptr += sizeof(sljit_sw) * space; - } - - if (size != 0) - { - if (size < 0) - { - cc += -size; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - } - else - cc += size; - } - - if (bracketlen > 0) - { - if (cc >= end) - { - end = bracketend(cc); - if (end[-1 - LINK_SIZE] == OP_KET) - end = NULL; - } - cc += bracketlen; - } - } -} - -/* Returns with -1 if no need for frame. */ -static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive) -{ -pcre_uchar *ccend = bracketend(cc); -int length = 0; -BOOL possessive = FALSE; -BOOL setsom_found = recursive; -BOOL setmark_found = recursive; - -if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) - { - length = 3; - possessive = TRUE; - } - -cc = next_opcode(common, cc); -SLJIT_ASSERT(cc != NULL); -while (cc < ccend) - switch(*cc) - { - case OP_SET_SOM: - SLJIT_ASSERT(common->has_set_som); - if (!setsom_found) - { - length += 2; - setsom_found = TRUE; - } - cc += 1; - break; - - case OP_MARK: - SLJIT_ASSERT(common->mark_ptr != 0); - if (!setmark_found) - { - length += 2; - setmark_found = TRUE; - } - cc += 1 + 2 + cc[1]; - break; - - case OP_RECURSE: - if (common->has_set_som && !setsom_found) - { - length += 2; - setsom_found = TRUE; - } - if (common->mark_ptr != 0 && !setmark_found) - { - length += 2; - setmark_found = TRUE; - } - cc += 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_CBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - length += 3; - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - default: - cc = next_opcode(common, cc); - SLJIT_ASSERT(cc != NULL); - break; - } - -/* Possessive quantifiers can use a special case. */ -if (SLJIT_UNLIKELY(possessive) && length == 3) - return -1; - -if (length > 0) - return length + 1; -return -1; -} - -static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive) -{ -DEFINE_COMPILER; -pcre_uchar *ccend = bracketend(cc); -BOOL setsom_found = recursive; -BOOL setmark_found = recursive; -int offset; - -/* >= 1 + shortest item size (2) */ -SLJIT_UNUSED_ARG(stacktop); -SLJIT_ASSERT(stackpos >= stacktop + 2); - -stackpos = STACK(stackpos); -if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) - cc = next_opcode(common, cc); -SLJIT_ASSERT(cc != NULL); -while (cc < ccend) - switch(*cc) - { - case OP_SET_SOM: - SLJIT_ASSERT(common->has_set_som); - if (!setsom_found) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin); - stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_sw); - setsom_found = TRUE; - } - cc += 1; - break; - - case OP_MARK: - SLJIT_ASSERT(common->mark_ptr != 0); - if (!setmark_found) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark); - stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_sw); - setmark_found = TRUE; - } - cc += 1 + 2 + cc[1]; - break; - - case OP_RECURSE: - if (common->has_set_som && !setsom_found) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin); - stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_sw); - setsom_found = TRUE; - } - if (common->mark_ptr != 0 && !setmark_found) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark); - stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_sw); - setmark_found = TRUE; - } - cc += 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_CBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); - stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); - stackpos += (int)sizeof(sljit_sw); - - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - default: - cc = next_opcode(common, cc); - SLJIT_ASSERT(cc != NULL); - break; - } - -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end); -SLJIT_ASSERT(stackpos == STACK(stacktop)); -} - -static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) -{ -int private_data_length = 2; -int size; -pcre_uchar *alternative; -/* Calculate the sum of the private machine words. */ -while (cc < ccend) - { - size = 0; - switch(*cc) - { - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRAPOS: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: - private_data_length++; - cc += 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_SCBRA: - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - private_data_length++; - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_CBRAPOS: - case OP_SCBRAPOS: - private_data_length += 2; - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_COND: - /* Might be a hidden SCOND. */ - alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) - private_data_length++; - cc += 1 + LINK_SIZE; - break; - - CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - private_data_length++; - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) - private_data_length += 2; - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) - private_data_length += 2; - cc += 2 + IMM2_SIZE; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - private_data_length++; - cc += 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) - private_data_length += 2; - cc += 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) - private_data_length += 2; - cc += 1 + IMM2_SIZE; - break; - - case OP_CLASS: - case OP_NCLASS: -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); -#else - size = 1 + 32 / (int)sizeof(pcre_uchar); -#endif - if (PRIVATE_DATA(cc)) - private_data_length += get_class_iterator_size(cc + size); - cc += size; - break; - - default: - cc = next_opcode(common, cc); - SLJIT_ASSERT(cc != NULL); - break; - } - } -SLJIT_ASSERT(cc == ccend); -return private_data_length; -} - -static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, - BOOL save, int stackptr, int stacktop) -{ -DEFINE_COMPILER; -int srcw[2]; -int count, size; -BOOL tmp1next = TRUE; -BOOL tmp1empty = TRUE; -BOOL tmp2empty = TRUE; -pcre_uchar *alternative; -enum { - start, - loop, - end -} status; - -status = save ? start : loop; -stackptr = STACK(stackptr - 2); -stacktop = STACK(stacktop - 1); - -if (!save) - { - stackptr += sizeof(sljit_sw); - if (stackptr < stacktop) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_sw); - tmp1empty = FALSE; - } - if (stackptr < stacktop) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_sw); - tmp2empty = FALSE; - } - /* The tmp1next must be TRUE in either way. */ - } - -while (status != end) - { - count = 0; - switch(status) - { - case start: - SLJIT_ASSERT(save && common->recursive_head != 0); - count = 1; - srcw[0] = common->recursive_head; - status = loop; - break; - - case loop: - if (cc >= ccend) - { - status = end; - break; - } - - switch(*cc) - { - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRAPOS: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: - count = 1; - srcw[0] = PRIVATE_DATA(cc); - SLJIT_ASSERT(srcw[0] != 0); - cc += 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_SCBRA: - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - { - count = 1; - srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); - } - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_CBRAPOS: - case OP_SCBRAPOS: - count = 2; - srcw[0] = PRIVATE_DATA(cc); - srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); - SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0); - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_COND: - /* Might be a hidden SCOND. */ - alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) - { - count = 1; - srcw[0] = PRIVATE_DATA(cc); - SLJIT_ASSERT(srcw[0] != 0); - } - cc += 1 + LINK_SIZE; - break; - - CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { - count = 1; - srcw[0] = PRIVATE_DATA(cc); - } - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) - { - count = 2; - srcw[0] = PRIVATE_DATA(cc); - srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); - } - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) - { - count = 2; - srcw[0] = PRIVATE_DATA(cc); - srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); - } - cc += 2 + IMM2_SIZE; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { - count = 1; - srcw[0] = PRIVATE_DATA(cc); - } - cc += 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) - { - count = 2; - srcw[0] = PRIVATE_DATA(cc); - srcw[1] = srcw[0] + sizeof(sljit_sw); - } - cc += 1; - break; - - CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) - { - count = 2; - srcw[0] = PRIVATE_DATA(cc); - srcw[1] = srcw[0] + sizeof(sljit_sw); - } - cc += 1 + IMM2_SIZE; - break; - - case OP_CLASS: - case OP_NCLASS: -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); -#else - size = 1 + 32 / (int)sizeof(pcre_uchar); -#endif - if (PRIVATE_DATA(cc)) - switch(get_class_iterator_size(cc + size)) - { - case 1: - count = 1; - srcw[0] = PRIVATE_DATA(cc); - break; - - case 2: - count = 2; - srcw[0] = PRIVATE_DATA(cc); - srcw[1] = srcw[0] + sizeof(sljit_sw); - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } - cc += size; - break; - - default: - cc = next_opcode(common, cc); - SLJIT_ASSERT(cc != NULL); - break; - } - break; - - case end: - SLJIT_ASSERT_STOP(); - break; - } - - while (count > 0) - { - count--; - if (save) - { - if (tmp1next) - { - if (!tmp1empty) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); - stackptr += sizeof(sljit_sw); - } - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); - tmp1empty = FALSE; - tmp1next = FALSE; - } - else - { - if (!tmp2empty) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); - stackptr += sizeof(sljit_sw); - } - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); - tmp2empty = FALSE; - tmp1next = TRUE; - } - } - else - { - if (tmp1next) - { - SLJIT_ASSERT(!tmp1empty); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0); - tmp1empty = stackptr >= stacktop; - if (!tmp1empty) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_sw); - } - tmp1next = FALSE; - } - else - { - SLJIT_ASSERT(!tmp2empty); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0); - tmp2empty = stackptr >= stacktop; - if (!tmp2empty) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_sw); - } - tmp1next = TRUE; - } - } - } - } - -if (save) - { - if (tmp1next) - { - if (!tmp1empty) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); - stackptr += sizeof(sljit_sw); - } - if (!tmp2empty) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); - stackptr += sizeof(sljit_sw); - } - } - else - { - if (!tmp2empty) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); - stackptr += sizeof(sljit_sw); - } - if (!tmp1empty) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); - stackptr += sizeof(sljit_sw); - } - } - } -SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); -} - -#undef CASE_ITERATOR_PRIVATE_DATA_1 -#undef CASE_ITERATOR_PRIVATE_DATA_2A -#undef CASE_ITERATOR_PRIVATE_DATA_2B -#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 -#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A -#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - -static SLJIT_INLINE BOOL is_powerof2(unsigned int value) -{ -return (value & (value - 1)) == 0; -} - -static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) -{ -while (list) - { - /* sljit_set_label is clever enough to do nothing - if either the jump or the label is NULL. */ - sljit_set_label(list->jump, label); - list = list->next; - } -} - -static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump) -{ -jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); -if (list_item) - { - list_item->next = *list; - list_item->jump = jump; - *list = list_item; - } -} - -static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start) -{ -DEFINE_COMPILER; -stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); - -if (list_item) - { - list_item->type = type; - list_item->data = data; - list_item->start = start; - list_item->quit = LABEL(); - list_item->next = common->stubs; - common->stubs = list_item; - } -} - -static void flush_stubs(compiler_common *common) -{ -DEFINE_COMPILER; -stub_list* list_item = common->stubs; - -while (list_item) - { - JUMPHERE(list_item->start); - switch(list_item->type) - { - case stack_alloc: - add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); - break; - } - JUMPTO(SLJIT_JUMP, list_item->quit); - list_item = list_item->next; - } -common->stubs = NULL; -} - -static SLJIT_INLINE void decrease_call_count(compiler_common *common) -{ -DEFINE_COMPILER; - -OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1); -add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO)); -} - -static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) -{ -/* May destroy all locals and registers except TMP2. */ -DEFINE_COMPILER; - -OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); -#ifdef DESTROY_REGISTERS -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); -OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); -OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); -#endif -add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); -} - -static SLJIT_INLINE void free_stack(compiler_common *common, int size) -{ -DEFINE_COMPILER; -OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); -} - -static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) -{ -DEFINE_COMPILER; -struct sljit_label *loop; -int i; -/* At this point we can freely use all temporary registers. */ -/* TMP1 returns with begin - 1. */ -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); -if (length < 8) - { - for (i = 0; i < length; i++) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0); - } -else - { - GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length); - loop = LABEL(); - OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, loop); - } -} - -static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) -{ -DEFINE_COMPILER; -struct sljit_label *loop; -struct sljit_jump *earlyexit; - -/* At this point we can freely use all registers. */ -OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0); - -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0); -if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); -OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount)); -if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0); -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); -GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START); -/* Unlikely, but possible */ -earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0); -loop = LABEL(); -OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0); -OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)); -/* Copy the integer value to the output buffer */ -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT); -#endif -OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1); -JUMPTO(SLJIT_C_NOT_ZERO, loop); -JUMPHERE(earlyexit); - -/* Calculate the return value, which is the maximum ovector value. */ -if (topbracket > 1) - { - GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1); - - /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */ - loop = LABEL(); - OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw))); - OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1); - CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop); - OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0); - } -else - OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); -} - -static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) -{ -DEFINE_COMPILER; - -SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2); -SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); - -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); -OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount)); -CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit); - -/* Store match begin and end. */ -OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin)); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets)); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); -OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT); -#endif -OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0); - -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT); -#endif -OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0); - -JUMPTO(SLJIT_JUMP, quit); -} - -static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) -{ -/* May destroy TMP1. */ -DEFINE_COMPILER; -struct sljit_jump *jump; - -if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - { - /* The value of -1 must be kept for start_used_ptr! */ - OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1); - /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting - is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ - jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - JUMPHERE(jump); - } -else if (common->mode == JIT_PARTIAL_HARD_COMPILE) - { - jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - JUMPHERE(jump); - } -} - -static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) -{ -/* Detects if the character has an othercase. */ -unsigned int c; - -#ifdef SUPPORT_UTF -if (common->utf) - { - GETCHAR(c, cc); - if (c > 127) - { -#ifdef SUPPORT_UCP - return c != UCD_OTHERCASE(c); -#else - return FALSE; -#endif - } -#ifndef COMPILE_PCRE8 - return common->fcc[c] != c; -#endif - } -else -#endif - c = *cc; -return MAX_255(c) ? common->fcc[c] != c : FALSE; -} - -static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) -{ -/* Returns with the othercase. */ -#ifdef SUPPORT_UTF -if (common->utf && c > 127) - { -#ifdef SUPPORT_UCP - return UCD_OTHERCASE(c); -#else - return c; -#endif - } -#endif -return TABLE_GET(c, common->fcc, c); -} - -static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc) -{ -/* Detects if the character and its othercase has only 1 bit difference. */ -unsigned int c, oc, bit; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 -int n; -#endif - -#ifdef SUPPORT_UTF -if (common->utf) - { - GETCHAR(c, cc); - if (c <= 127) - oc = common->fcc[c]; - else - { -#ifdef SUPPORT_UCP - oc = UCD_OTHERCASE(c); -#else - oc = c; -#endif - } - } -else - { - c = *cc; - oc = TABLE_GET(c, common->fcc, c); - } -#else -c = *cc; -oc = TABLE_GET(c, common->fcc, c); -#endif - -SLJIT_ASSERT(c != oc); - -bit = c ^ oc; -/* Optimized for English alphabet. */ -if (c <= 127 && bit == 0x20) - return (0 << 8) | 0x20; - -/* Since c != oc, they must have at least 1 bit difference. */ -if (!is_powerof2(bit)) - return 0; - -#if defined COMPILE_PCRE8 - -#ifdef SUPPORT_UTF -if (common->utf && c > 127) - { - n = GET_EXTRALEN(*cc); - while ((bit & 0x3f) == 0) - { - n--; - bit >>= 6; - } - return (n << 8) | bit; - } -#endif /* SUPPORT_UTF */ -return (0 << 8) | bit; - -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - -#ifdef SUPPORT_UTF -if (common->utf && c > 65535) - { - if (bit >= (1 << 10)) - bit >>= 10; - else - return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); - } -#endif /* SUPPORT_UTF */ -return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); - -#endif /* COMPILE_PCRE[8|16|32] */ -} - -static void check_partial(compiler_common *common, BOOL force) -{ -/* Checks whether a partial matching is occured. Does not modify registers. */ -DEFINE_COMPILER; -struct sljit_jump *jump = NULL; - -SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); - -if (common->mode == JIT_COMPILE) - return; - -if (!force) - jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); -else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); - -if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); -else - { - if (common->partialmatchlabel != NULL) - JUMPTO(SLJIT_JUMP, common->partialmatchlabel); - else - add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); - } - -if (jump != NULL) - JUMPHERE(jump); -} - -static struct sljit_jump *check_str_end(compiler_common *common) -{ -/* Does not affect registers. Usually used in a tight spot. */ -DEFINE_COMPILER; -struct sljit_jump *jump; -struct sljit_jump *nohit; -struct sljit_jump *return_value; - -if (common->mode == JIT_COMPILE) - return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - -jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); -if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - { - nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); - JUMPHERE(nohit); - return_value = JUMP(SLJIT_JUMP); - } -else - { - return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - if (common->partialmatchlabel != NULL) - JUMPTO(SLJIT_JUMP, common->partialmatchlabel); - else - add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); - } -JUMPHERE(jump); -return return_value; -} - -static void detect_partial_match(compiler_common *common, jump_list **backtracks) -{ -DEFINE_COMPILER; -struct sljit_jump *jump; - -if (common->mode == JIT_COMPILE) - { - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - return; - } - -/* Partial matching mode. */ -jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); -add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); -if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - } -else - { - if (common->partialmatchlabel != NULL) - JUMPTO(SLJIT_JUMP, common->partialmatchlabel); - else - add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); - } -JUMPHERE(jump); -} - -static void read_char(compiler_common *common) -{ -/* Reads the character into TMP1, updates STR_PTR. -Does not check STR_END. TMP2 Destroyed. */ -DEFINE_COMPILER; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -struct sljit_jump *jump; -#endif - -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -if (common->utf) - { -#if defined COMPILE_PCRE8 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#elif defined COMPILE_PCRE16 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); -#endif /* COMPILE_PCRE[8|16] */ - add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); - JUMPHERE(jump); - } -#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -} - -static void peek_char(compiler_common *common) -{ -/* Reads the character into TMP1, keeps STR_PTR. -Does not check STR_END. TMP2 Destroyed. */ -DEFINE_COMPILER; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -struct sljit_jump *jump; -#endif - -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -if (common->utf) - { -#if defined COMPILE_PCRE8 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#elif defined COMPILE_PCRE16 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); -#endif /* COMPILE_PCRE[8|16] */ - add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - JUMPHERE(jump); - } -#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ -} - -static void read_char8_type(compiler_common *common) -{ -/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ -DEFINE_COMPILER; -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -struct sljit_jump *jump; -#endif - -#ifdef SUPPORT_UTF -if (common->utf) - { - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined COMPILE_PCRE8 - /* This can be an extra read in some situations, but hopefully - it is needed in most cases. */ - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); - jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); - add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); - JUMPHERE(jump); -#elif defined COMPILE_PCRE16 - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); - JUMPHERE(jump); - /* Skip low surrogate if necessary. */ - OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -#elif defined COMPILE_PCRE32 - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); - JUMPHERE(jump); -#endif /* COMPILE_PCRE[8|16|32] */ - return; - } -#endif /* SUPPORT_UTF */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -/* The ctypes array contains only 256 values. */ -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); -#endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -JUMPHERE(jump); -#endif -} - -static void skip_char_back(compiler_common *common) -{ -/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ -DEFINE_COMPILER; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -#if defined COMPILE_PCRE8 -struct sljit_label *label; - -if (common->utf) - { - label = LABEL(); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); - CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); - return; - } -#elif defined COMPILE_PCRE16 -if (common->utf) - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - /* Skip low surrogate if necessary. */ - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - return; - } -#endif /* COMPILE_PCRE[8|16] */ -#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -} - -static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue) -{ -/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ -DEFINE_COMPILER; - -if (nltype == NLTYPE_ANY) - { - add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); - } -else if (nltype == NLTYPE_ANYCRLF) - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); - } -else - { - SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); - add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); - } -} - -#ifdef SUPPORT_UTF - -#if defined COMPILE_PCRE8 -static void do_utfreadchar(compiler_common *common) -{ -/* Fast decoding a UTF-8 character. TMP1 contains the first byte -of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */ -DEFINE_COMPILER; -struct sljit_jump *jump; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -/* Searching for the first zero. */ -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); -jump = JUMP(SLJIT_C_NOT_ZERO); -/* Two byte sequence. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); - -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10); -jump = JUMP(SLJIT_C_NOT_ZERO); -/* Three byte sequence. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); - -/* Four byte sequence. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -static void do_utfreadtype8(compiler_common *common) -{ -/* Fast decoding a UTF-8 character type. TMP2 contains the first byte -of the character (>= 0xc0). Return value in TMP1. */ -DEFINE_COMPILER; -struct sljit_jump *jump; -struct sljit_jump *compare; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); - -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); -jump = JUMP(SLJIT_C_NOT_ZERO); -/* Two byte sequence. */ -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); -compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); - -JUMPHERE(compare); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); - -/* We only have types for characters less than 256. */ -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -#elif defined COMPILE_PCRE16 - -static void do_utfreadchar(compiler_common *common) -{ -/* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char -of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */ -DEFINE_COMPILER; -struct sljit_jump *jump; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); -/* Do nothing, only return. */ -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); - -JUMPHERE(jump); -/* Combine two 16 bit characters. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -#endif /* COMPILE_PCRE[8|16] */ - -#endif /* SUPPORT_UTF */ - -#ifdef SUPPORT_UCP - -/* UCD_BLOCK_SIZE must be 128 (see the assert below). */ -#define UCD_BLOCK_MASK 127 -#define UCD_BLOCK_SHIFT 7 - -static void do_getucd(compiler_common *common) -{ -/* Search the UCD record for the character comes in TMP1. -Returns chartype in TMP1 and UCD offset in TMP2. */ -DEFINE_COMPILER; - -SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); -OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} -#endif - -static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline) -{ -DEFINE_COMPILER; -struct sljit_label *mainloop; -struct sljit_label *newlinelabel = NULL; -struct sljit_jump *start; -struct sljit_jump *end = NULL; -struct sljit_jump *nl = NULL; -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -struct sljit_jump *singlechar; -#endif -jump_list *newline = NULL; -BOOL newlinecheck = FALSE; -BOOL readuchar = FALSE; - -if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || - common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) - newlinecheck = TRUE; - -if (firstline) - { - /* Search for the end of the first line. */ - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - mainloop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); - JUMPHERE(end); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - } - else - { - end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - mainloop = LABEL(); - /* Continual stores does not cause data dependency. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); - read_char(common); - check_newlinechar(common, common->nltype, &newline, TRUE); - CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); - JUMPHERE(end); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); - set_jumps(newline, LABEL()); - } - - OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - } - -start = JUMP(SLJIT_JUMP); - -if (newlinecheck) - { - newlinelabel = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); -#endif - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - nl = JUMP(SLJIT_JUMP); - } - -mainloop = LABEL(); - -/* Increasing the STR_PTR here requires one less jump in the most common case. */ -#ifdef SUPPORT_UTF -if (common->utf) readuchar = TRUE; -#endif -if (newlinecheck) readuchar = TRUE; - -if (readuchar) - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - -if (newlinecheck) - CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 -#if defined COMPILE_PCRE8 -if (common->utf) - { - singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(singlechar); - } -#elif defined COMPILE_PCRE16 -if (common->utf) - { - singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(singlechar); - } -#endif /* COMPILE_PCRE[8|16] */ -#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ -JUMPHERE(start); - -if (newlinecheck) - { - JUMPHERE(end); - JUMPHERE(nl); - } - -return mainloop; -} - -#define MAX_N_CHARS 3 - -static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) -{ -DEFINE_COMPILER; -struct sljit_label *start; -struct sljit_jump *quit; -pcre_uint32 chars[MAX_N_CHARS * 2]; -pcre_uchar *cc = common->start + 1 + IMM2_SIZE; -int location = 0; -pcre_int32 len, c, bit, caseless; -int must_stop; - -/* We do not support alternatives now. */ -if (*(common->start + GET(common->start, 1)) == OP_ALT) - return FALSE; - -while (TRUE) - { - caseless = 0; - must_stop = 1; - switch(*cc) - { - case OP_CHAR: - must_stop = 0; - cc++; - break; - - case OP_CHARI: - caseless = 1; - must_stop = 0; - cc++; - break; - - case OP_SOD: - case OP_SOM: - case OP_SET_SOM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - /* Zero width assertions. */ - cc++; - continue; - - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - cc++; - break; - - case OP_EXACT: - cc += 1 + IMM2_SIZE; - break; - - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSPLUSI: - caseless = 1; - cc++; - break; - - case OP_EXACTI: - caseless = 1; - cc += 1 + IMM2_SIZE; - break; - - default: - must_stop = 2; - break; - } - - if (must_stop == 2) - break; - - len = 1; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]); -#endif - - if (caseless && char_has_othercase(common, cc)) - { - caseless = char_get_othercase_bit(common, cc); - if (caseless == 0) - return FALSE; -#ifdef COMPILE_PCRE8 - caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8)); -#else - if ((caseless & 0x100) != 0) - caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9)); - else - caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9)); -#endif - } - else - caseless = 0; - - while (len > 0 && location < MAX_N_CHARS * 2) - { - c = *cc; - bit = 0; - if (len == (caseless & 0xff)) - { - bit = caseless >> 8; - c |= bit; - } - - chars[location] = c; - chars[location + 1] = bit; - - len--; - location += 2; - cc++; - } - - if (location >= MAX_N_CHARS * 2 || must_stop != 0) - break; - } - -/* At least two characters are required. */ -if (location < 2 * 2) - return FALSE; - -if (firstline) - { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1); - } -else - OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1); - -start = LABEL(); -quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -if (chars[1] != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); -CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); -if (location > 2 * 2) - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -if (chars[3] != 0) - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start); -if (location > 2 * 2) - { - if (chars[5] != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start); - } -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - -JUMPHERE(quit); - -if (firstline) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); -else - OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1); -return TRUE; -} - -#undef MAX_N_CHARS - -static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) -{ -DEFINE_COMPILER; -struct sljit_label *start; -struct sljit_jump *quit; -struct sljit_jump *found; -pcre_uchar oc, bit; - -if (firstline) - { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); - } - -start = LABEL(); -quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - -oc = first_char; -if (caseless) - { - oc = TABLE_GET(first_char, common->fcc, first_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (first_char > 127 && common->utf) - oc = UCD_OTHERCASE(first_char); -#endif - } -if (first_char == oc) - found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char); -else - { - bit = first_char ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); - } - else - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - found = JUMP(SLJIT_C_NOT_ZERO); - } - } - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_JUMP, start); -JUMPHERE(found); -JUMPHERE(quit); - -if (firstline) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); -} - -static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) -{ -DEFINE_COMPILER; -struct sljit_label *loop; -struct sljit_jump *lastchar; -struct sljit_jump *firstchar; -struct sljit_jump *quit; -struct sljit_jump *foundcr = NULL; -struct sljit_jump *notfoundnl; -jump_list *newline = NULL; - -if (firstline) - { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); - } - -if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); - - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); -#endif - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - - loop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); - - JUMPHERE(quit); - JUMPHERE(firstchar); - JUMPHERE(lastchar); - - if (firstline) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); - return; - } - -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); -skip_char_back(common); - -loop = LABEL(); -read_char(common); -lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) - foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); -check_newlinechar(common, common->nltype, &newline, FALSE); -set_jumps(newline, loop); - -if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) - { - quit = JUMP(SLJIT_JUMP); - JUMPHERE(foundcr); - notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); -#endif - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(notfoundnl); - JUMPHERE(quit); - } -JUMPHERE(lastchar); -JUMPHERE(firstchar); - -if (firstline) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); -} - -static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) -{ -DEFINE_COMPILER; -struct sljit_label *start; -struct sljit_jump *quit; -struct sljit_jump *found; -#ifndef COMPILE_PCRE8 -struct sljit_jump *jump; -#endif - -if (firstline) - { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); - } - -start = LABEL(); -quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#ifdef SUPPORT_UTF -if (common->utf) - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); -#endif -#ifndef COMPILE_PCRE8 -jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); -JUMPHERE(jump); -#endif -OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); -OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); -found = JUMP(SLJIT_C_NOT_ZERO); - -#ifdef SUPPORT_UTF -if (common->utf) - OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); -#endif -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#ifdef SUPPORT_UTF -#if defined COMPILE_PCRE8 -if (common->utf) - { - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - } -#elif defined COMPILE_PCRE16 -if (common->utf) - { - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - } -#endif /* COMPILE_PCRE[8|16] */ -#endif /* SUPPORT_UTF */ -JUMPTO(SLJIT_JUMP, start); -JUMPHERE(found); -JUMPHERE(quit); - -if (firstline) - OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); -} - -static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) -{ -DEFINE_COMPILER; -struct sljit_label *loop; -struct sljit_jump *toolong; -struct sljit_jump *alreadyfound; -struct sljit_jump *found; -struct sljit_jump *foundoc = NULL; -struct sljit_jump *notfound; -pcre_uint32 oc, bit; - -SLJIT_ASSERT(common->req_char_ptr != 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr); -OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); -toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); -alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); - -if (has_firstchar) - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -else - OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); - -loop = LABEL(); -notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0); - -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); -oc = req_char; -if (caseless) - { - oc = TABLE_GET(req_char, common->fcc, req_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (req_char > 127 && common->utf) - oc = UCD_OTHERCASE(req_char); -#endif - } -if (req_char == oc) - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); -else - { - bit = req_char ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); - } - else - { - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); - foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc); - } - } -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_JUMP, loop); - -JUMPHERE(found); -if (foundoc) - JUMPHERE(foundoc); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0); -JUMPHERE(alreadyfound); -JUMPHERE(toolong); -return notfound; -} - -static void do_revertframes(compiler_common *common) -{ -DEFINE_COMPILER; -struct sljit_jump *jump; -struct sljit_label *mainloop; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); -GET_LOCAL_BASE(TMP3, 0, 0); - -/* Drop frames until we reach STACK_TOP. */ -mainloop = LABEL(); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); -jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); -OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw)); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); -JUMPTO(SLJIT_JUMP, mainloop); - -JUMPHERE(jump); -jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); -/* End of dropping frames. */ -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); - -JUMPHERE(jump); -jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin); -/* Set string begin. */ -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); -JUMPTO(SLJIT_JUMP, mainloop); - -JUMPHERE(jump); -if (common->mark_ptr != 0) - { - jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); - JUMPTO(SLJIT_JUMP, mainloop); - - JUMPHERE(jump); - } - -/* Unknown command. */ -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); -JUMPTO(SLJIT_JUMP, mainloop); -} - -static void check_wordboundary(compiler_common *common) -{ -DEFINE_COMPILER; -struct sljit_jump *skipread; -#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF -struct sljit_jump *jump; -#endif - -SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); - -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -/* Get type of the previous char, and put it to LOCALS1. */ -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0); -skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); -skip_char_back(common); -check_start_used_ptr(common); -read_char(common); - -/* Testing char type. */ -#ifdef SUPPORT_UCP -if (common->use_ucp) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - JUMPHERE(jump); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); - } -else -#endif - { -#ifndef COMPILE_PCRE8 - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#elif defined SUPPORT_UTF - /* Here LOCALS1 has already been zeroed. */ - jump = NULL; - if (common->utf) - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#endif /* COMPILE_PCRE8 */ - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); -#ifndef COMPILE_PCRE8 - JUMPHERE(jump); -#elif defined SUPPORT_UTF - if (jump != NULL) - JUMPHERE(jump); -#endif /* COMPILE_PCRE8 */ - } -JUMPHERE(skipread); - -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); -skipread = check_str_end(common); -peek_char(common); - -/* Testing char type. This is a code duplication. */ -#ifdef SUPPORT_UCP -if (common->use_ucp) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - JUMPHERE(jump); - } -else -#endif - { -#ifndef COMPILE_PCRE8 - /* TMP2 may be destroyed by peek_char. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#elif defined SUPPORT_UTF - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); - jump = NULL; - if (common->utf) - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#endif - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); - OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); - OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); -#ifndef COMPILE_PCRE8 - JUMPHERE(jump); -#elif defined SUPPORT_UTF - if (jump != NULL) - JUMPHERE(jump); -#endif /* COMPILE_PCRE8 */ - } -JUMPHERE(skipread); - -OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); -sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -} - -/* - range format: - - ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range). - ranges[1] = first bit (0 or 1) - ranges[2-length] = position of the bit change (when the current bit is not equal to the previous) -*/ - -static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch) -{ -DEFINE_COMPILER; -struct sljit_jump *jump; - -if (ranges[0] < 0) - return FALSE; - -switch(ranges[0]) - { - case 1: - if (readch) - read_char(common); - add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); - return TRUE; - - case 2: - if (readch) - read_char(common); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); - add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); - return TRUE; - - case 4: - if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5]) - { - if (readch) - read_char(common); - if (ranges[1] != 0) - { - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); - } - else - { - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); - JUMPHERE(jump); - } - return TRUE; - } - if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2])) - { - if (readch) - read_char(common); - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]); - add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4])); - return TRUE; - } - return FALSE; - - default: - return FALSE; - } -} - -static void get_ctype_ranges(compiler_common *common, int flag, int *ranges) -{ -int i, bit, length; -const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes; - -bit = ctypes[0] & flag; -ranges[0] = -1; -ranges[1] = bit != 0 ? 1 : 0; -length = 0; - -for (i = 1; i < 256; i++) - if ((ctypes[i] & flag) != bit) - { - if (length >= MAX_RANGE_SIZE) - return; - ranges[2 + length] = i; - length++; - bit ^= flag; - } - -if (bit != 0) - { - if (length >= MAX_RANGE_SIZE) - return; - ranges[2 + length] = 256; - length++; - } -ranges[0] = length; -} - -static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks) -{ -int ranges[2 + MAX_RANGE_SIZE]; -pcre_uint8 bit, cbit, all; -int i, byte, length = 0; - -bit = bits[0] & 0x1; -ranges[1] = bit; -/* Can be 0 or 255. */ -all = -bit; - -for (i = 0; i < 256; ) - { - byte = i >> 3; - if ((i & 0x7) == 0 && bits[byte] == all) - i += 8; - else - { - cbit = (bits[byte] >> (i & 0x7)) & 0x1; - if (cbit != bit) - { - if (length >= MAX_RANGE_SIZE) - return FALSE; - ranges[2 + length] = i; - length++; - bit = cbit; - all = -cbit; - } - i++; - } - } - -if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) - { - if (length >= MAX_RANGE_SIZE) - return FALSE; - ranges[2 + length] = 256; - length++; - } -ranges[0] = length; - -return check_ranges(common, ranges, backtracks, FALSE); -} - -static void check_anynewline(compiler_common *common) -{ -/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ -DEFINE_COMPILER; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); - -OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); -OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -#ifdef COMPILE_PCRE8 -if (common->utf) - { -#endif - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); -#ifdef COMPILE_PCRE8 - } -#endif -#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ -OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -static void check_hspace(compiler_common *common) -{ -/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ -DEFINE_COMPILER; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); - -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); -OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); -OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -#ifdef COMPILE_PCRE8 -if (common->utf) - { -#endif - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); -#ifdef COMPILE_PCRE8 - } -#endif -#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ -OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -static void check_vspace(compiler_common *common) -{ -/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ -DEFINE_COMPILER; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); - -OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); -OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -#ifdef COMPILE_PCRE8 -if (common->utf) - { -#endif - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); -#ifdef COMPILE_PCRE8 - } -#endif -#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ -OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -#define CHAR1 STR_END -#define CHAR2 STACK_TOP - -static void do_casefulcmp(compiler_common *common) -{ -DEFINE_COMPILER; -struct sljit_jump *jump; -struct sljit_label *label; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0); -OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - -label = LABEL(); -OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); -OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_C_NOT_ZERO, label); - -JUMPHERE(jump); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); -OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -#define LCC_TABLE STACK_LIMIT - -static void do_caselesscmp(compiler_common *common) -{ -DEFINE_COMPILER; -struct sljit_jump *jump; -struct sljit_label *label; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0); -OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); -OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - -label = LABEL(); -OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); -OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -#ifndef COMPILE_PCRE8 -jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255); -#endif -OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); -#ifndef COMPILE_PCRE8 -JUMPHERE(jump); -jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255); -#endif -OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); -#ifndef COMPILE_PCRE8 -JUMPHERE(jump); -#endif -jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_C_NOT_ZERO, label); - -JUMPHERE(jump); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); -OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -#undef LCC_TABLE -#undef CHAR1 -#undef CHAR2 - -#if defined SUPPORT_UTF && defined SUPPORT_UCP - -static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) -{ -/* This function would be ineffective to do in JIT level. */ -pcre_uint32 c1, c2; -const pcre_uchar *src2 = args->uchar_ptr; -const pcre_uchar *end2 = args->end; -const ucd_record *ur; -const pcre_uint32 *pp; - -while (src1 < end1) - { - if (src2 >= end2) - return (pcre_uchar*)1; - GETCHARINC(c1, src1); - GETCHARINC(c2, src2); - ur = GET_UCD(c2); - if (c1 != c2 && c1 != c2 + ur->other_case) - { - pp = PRIV(ucd_caseless_sets) + ur->caseset; - for (;;) - { - if (c1 < *pp) return NULL; - if (c1 == *pp++) break; - } - } - } -return src2; -} - -#endif /* SUPPORT_UTF && SUPPORT_UCP */ - -static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, - compare_context* context, jump_list **backtracks) -{ -DEFINE_COMPILER; -unsigned int othercasebit = 0; -pcre_uchar *othercasechar = NULL; -#ifdef SUPPORT_UTF -int utflength; -#endif - -if (caseless && char_has_othercase(common, cc)) - { - othercasebit = char_get_othercase_bit(common, cc); - SLJIT_ASSERT(othercasebit); - /* Extracting bit difference info. */ -#if defined COMPILE_PCRE8 - othercasechar = cc + (othercasebit >> 8); - othercasebit &= 0xff; -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - /* Note that this code only handles characters in the BMP. If there - ever are characters outside the BMP whose othercase differs in only one - bit from itself (there currently are none), this code will need to be - revised for COMPILE_PCRE32. */ - othercasechar = cc + (othercasebit >> 9); - if ((othercasebit & 0x100) != 0) - othercasebit = (othercasebit & 0xff) << 8; - else - othercasebit &= 0xff; -#endif /* COMPILE_PCRE[8|16|32] */ - } - -if (context->sourcereg == -1) - { -#if defined COMPILE_PCRE8 -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - if (context->length >= 4) - OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else if (context->length >= 2) - OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else -#endif - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif defined COMPILE_PCRE16 -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - if (context->length >= 4) - OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else -#endif - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif defined COMPILE_PCRE32 - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* COMPILE_PCRE[8|16|32] */ - context->sourcereg = TMP2; - } - -#ifdef SUPPORT_UTF -utflength = 1; -if (common->utf && HAS_EXTRALEN(*cc)) - utflength += GET_EXTRALEN(*cc); - -do - { -#endif - - context->length -= IN_UCHARS(1); -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - - /* Unaligned read is supported. */ - if (othercasebit != 0 && othercasechar == cc) - { - context->c.asuchars[context->ucharptr] = *cc | othercasebit; - context->oc.asuchars[context->ucharptr] = othercasebit; - } - else - { - context->c.asuchars[context->ucharptr] = *cc; - context->oc.asuchars[context->ucharptr] = 0; - } - context->ucharptr++; - -#if defined COMPILE_PCRE8 - if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) -#elif defined COMPILE_PCRE16 - if (context->ucharptr >= 2 || context->length == 0) -#elif defined COMPILE_PCRE32 - if (1 /* context->ucharptr >= 1 || context->length == 0 */) -#endif - { -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 - if (context->length >= 4) - OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#if defined COMPILE_PCRE8 - else if (context->length >= 2) - OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); - else if (context->length >= 1) - OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif defined COMPILE_PCRE16 - else if (context->length >= 2) - OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* COMPILE_PCRE[8|16] */ -#elif defined COMPILE_PCRE32 - OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* COMPILE_PCRE[8|16|32] */ - context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - - switch(context->ucharptr) - { - case 4 / sizeof(pcre_uchar): - if (context->oc.asint != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); - break; - -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 - case 2 / sizeof(pcre_uchar): - if (context->oc.asushort != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); - break; - -#ifdef COMPILE_PCRE8 - case 1: - if (context->oc.asbyte != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); - break; -#endif - -#endif /* COMPILE_PCRE[8|16] */ - - default: - SLJIT_ASSERT_STOP(); - break; - } - context->ucharptr = 0; - } - -#else - - /* Unaligned read is unsupported. */ - if (context->length > 0) - OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); - - context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - - if (othercasebit != 0 && othercasechar == cc) - { - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); - } - else - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); - -#endif - - cc++; -#ifdef SUPPORT_UTF - utflength--; - } -while (utflength > 0); -#endif - -return cc; -} - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - -#define SET_TYPE_OFFSET(value) \ - if ((value) != typeoffset) \ - { \ - if ((value) > typeoffset) \ - OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ - else \ - OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ - } \ - typeoffset = (value); - -#define SET_CHAR_OFFSET(value) \ - if ((value) != charoffset) \ - { \ - if ((value) > charoffset) \ - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \ - else \ - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \ - } \ - charoffset = (value); - -static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -jump_list *found = NULL; -jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; -pcre_int32 c, charoffset; -const pcre_uint32 *other_cases; -struct sljit_jump *jump = NULL; -pcre_uchar *ccbegin; -int compares, invertcmp, numberofcmps; -#ifdef SUPPORT_UCP -BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; -BOOL charsaved = FALSE; -int typereg = TMP1, scriptreg = TMP1; -pcre_int32 typeoffset; -#endif - -/* Although SUPPORT_UTF must be defined, we are - not necessary in utf mode even in 8 bit mode. */ -detect_partial_match(common, backtracks); -read_char(common); - -if ((*cc++ & XCL_MAP) != 0) - { - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); -#ifndef COMPILE_PCRE8 - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#elif defined SUPPORT_UTF - if (common->utf) - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#endif - - if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list)) - { - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); - } - -#ifndef COMPILE_PCRE8 - JUMPHERE(jump); -#elif defined SUPPORT_UTF - if (common->utf) - JUMPHERE(jump); -#endif - OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); -#ifdef SUPPORT_UCP - charsaved = TRUE; -#endif - cc += 32 / sizeof(pcre_uchar); - } - -/* Scanning the necessary info. */ -ccbegin = cc; -compares = 0; -while (*cc != XCL_END) - { - compares++; - if (*cc == XCL_SINGLE) - { - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif -#ifdef SUPPORT_UCP - needschar = TRUE; -#endif - } - else if (*cc == XCL_RANGE) - { - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - cc++; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif -#ifdef SUPPORT_UCP - needschar = TRUE; -#endif - } -#ifdef SUPPORT_UCP - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - switch(*cc) - { - case PT_ANY: - break; - - case PT_LAMP: - case PT_GC: - case PT_PC: - case PT_ALNUM: - needstype = TRUE; - break; - - case PT_SC: - needsscript = TRUE; - break; - - case PT_SPACE: - case PT_PXSPACE: - case PT_WORD: - needstype = TRUE; - needschar = TRUE; - break; - - case PT_CLIST: - needschar = TRUE; - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } - cc += 2; - } -#endif - } - -#ifdef SUPPORT_UCP -/* Simple register allocation. TMP1 is preferred if possible. */ -if (needstype || needsscript) - { - if (needschar && !charsaved) - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - if (needschar) - { - if (needstype) - { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); - typereg = RETURN_ADDR; - } - - if (needsscript) - scriptreg = TMP3; - OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); - } - else if (needstype && needsscript) - scriptreg = TMP3; - /* In all other cases only one of them was specified, and that can goes to TMP1. */ - - if (needsscript) - { - if (scriptreg == TMP1) - { - OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); - } - else - { - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); - } - } - } -#endif - -/* Generating code. */ -cc = ccbegin; -charoffset = 0; -numberofcmps = 0; -#ifdef SUPPORT_UCP -typeoffset = 0; -#endif - -while (*cc != XCL_END) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - jump = NULL; - - if (*cc == XCL_SINGLE) - { - cc ++; -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHARINC(c, cc); - } - else -#endif - c = *cc++; - - if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL); - numberofcmps++; - } - else if (numberofcmps > 0) - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); - numberofcmps = 0; - } - else - { - jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset); - numberofcmps = 0; - } - } - else if (*cc == XCL_RANGE) - { - cc ++; -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHARINC(c, cc); - } - else -#endif - c = *cc++; - SET_CHAR_OFFSET(c); -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHARINC(c, cc); - } - else -#endif - c = *cc++; - if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) - { - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); - numberofcmps++; - } - else if (numberofcmps > 0) - { - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); - numberofcmps = 0; - } - else - { - jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset); - numberofcmps = 0; - } - } -#ifdef SUPPORT_UCP - else - { - if (*cc == XCL_NOTPROP) - invertcmp ^= 0x1; - cc++; - switch(*cc) - { - case PT_ANY: - if (list != backtracks) - { - if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0)) - continue; - } - else if (cc[-1] == XCL_NOTPROP) - continue; - jump = JUMP(SLJIT_JUMP); - break; - - case PT_LAMP: - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); - break; - - case PT_GC: - c = PRIV(ucp_typerange)[(int)cc[1] * 2]; - SET_TYPE_OFFSET(c); - jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); - break; - - case PT_PC: - jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); - break; - - case PT_SC: - jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]); - break; - - case PT_SPACE: - case PT_PXSPACE: - if (*cc == PT_SPACE) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset); - } - SET_CHAR_OFFSET(9); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); - if (*cc == PT_SPACE) - JUMPHERE(jump); - - SET_TYPE_OFFSET(ucp_Zl); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); - break; - - case PT_WORD: - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - /* ... fall through */ - - case PT_ALNUM: - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); - SET_TYPE_OFFSET(ucp_Nd); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); - break; - - case PT_CLIST: - other_cases = PRIV(ucd_caseless_sets) + cc[1]; - - /* At least three characters are required. - Otherwise this case would be handled by the normal code path. */ - SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); - SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); - - /* Optimizing character pairs, if their difference is power of 2. */ - if (is_powerof2(other_cases[1] ^ other_cases[0])) - { - if (charoffset == 0) - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - } - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - other_cases += 2; - } - else if (is_powerof2(other_cases[2] ^ other_cases[1])) - { - if (charoffset == 0) - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - } - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset); - OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - - other_cases += 3; - } - else - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - } - - while (*other_cases != NOTACHAR) - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset); - OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - } - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); - break; - } - cc += 2; - } -#endif - - if (jump != NULL) - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - -if (found != NULL) - set_jumps(found, LABEL()); -} - -#undef SET_TYPE_OFFSET -#undef SET_CHAR_OFFSET - -#endif - -static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -int length; -unsigned int c, oc, bit; -compare_context context; -struct sljit_jump *jump[4]; -#ifdef SUPPORT_UTF -struct sljit_label *label; -#ifdef SUPPORT_UCP -pcre_uchar propdata[5]; -#endif -#endif - -switch(type) - { - case OP_SOD: - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); - return cc; - - case OP_SOM: - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); - return cc; - - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); - return cc; - - case OP_NOT_DIGIT: - case OP_DIGIT: - /* Digits are usually 0-9, so it is worth to optimize them. */ - if (common->digits[0] == -2) - get_ctype_ranges(common, ctype_digit, common->digits); - detect_partial_match(common, backtracks); - /* Flip the starting bit in the negative case. */ - if (type == OP_NOT_DIGIT) - common->digits[1] ^= 1; - if (!check_ranges(common, common->digits, backtracks, TRUE)) - { - read_char8_type(common); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); - add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); - } - if (type == OP_NOT_DIGIT) - common->digits[1] ^= 1; - return cc; - - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - detect_partial_match(common, backtracks); - read_char8_type(common); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); - add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); - return cc; - - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - detect_partial_match(common, backtracks); - read_char8_type(common); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); - add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); - return cc; - - case OP_ANY: - detect_partial_match(common, backtracks); - read_char(common); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - if (common->mode != JIT_PARTIAL_HARD_COMPILE) - jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - else - jump[1] = check_str_end(common); - - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); - if (jump[1] != NULL) - JUMPHERE(jump[1]); - JUMPHERE(jump[0]); - } - else - check_newlinechar(common, common->nltype, backtracks, TRUE); - return cc; - - case OP_ALLANY: - detect_partial_match(common, backtracks); -#ifdef SUPPORT_UTF - if (common->utf) - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 -#if defined COMPILE_PCRE8 - jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#elif defined COMPILE_PCRE16 - jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#endif - JUMPHERE(jump[0]); -#endif /* COMPILE_PCRE[8|16] */ - return cc; - } -#endif - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - return cc; - - case OP_ANYBYTE: - detect_partial_match(common, backtracks); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - return cc; - -#ifdef SUPPORT_UTF -#ifdef SUPPORT_UCP - case OP_NOTPROP: - case OP_PROP: - propdata[0] = 0; - propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; - propdata[2] = cc[0]; - propdata[3] = cc[1]; - propdata[4] = XCL_END; - compile_xclass_matchingpath(common, propdata, backtracks); - return cc + 2; -#endif -#endif - - case OP_ANYNL: - detect_partial_match(common, backtracks); - read_char(common); - jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - /* We don't need to handle soft partial matching case. */ - if (common->mode != JIT_PARTIAL_HARD_COMPILE) - jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - else - jump[1] = check_str_end(common); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[3] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[0]); - check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); - JUMPHERE(jump[1]); - JUMPHERE(jump[2]); - JUMPHERE(jump[3]); - return cc; - - case OP_NOT_HSPACE: - case OP_HSPACE: - detect_partial_match(common, backtracks); - read_char(common); - add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); - return cc; - - case OP_NOT_VSPACE: - case OP_VSPACE: - detect_partial_match(common, backtracks); - read_char(common); - add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); - return cc; - -#ifdef SUPPORT_UCP - case OP_EXTUNI: - detect_partial_match(common, backtracks); - read_char(common); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); - /* Optimize register allocation: use a real register. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); - OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); - - label = LABEL(); - jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); - read_char(common); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); - - OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); - OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); - OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - JUMPTO(SLJIT_C_NOT_ZERO, label); - - OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - JUMPHERE(jump[0]); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); - - if (common->mode == JIT_PARTIAL_HARD_COMPILE) - { - jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); - /* Since we successfully read a char above, partial matching must occure. */ - check_partial(common, TRUE); - JUMPHERE(jump[0]); - } - return cc; -#endif - - case OP_EODN: - /* Requires rather complex checks. */ - jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); - add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else if (common->nltype == NLTYPE_FIXED) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); - } - else - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); - jump[2] = JUMP(SLJIT_C_GREATER); - add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS)); - /* Equal. */ - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - - JUMPHERE(jump[1]); - if (common->nltype == NLTYPE_ANYCRLF) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); - } - else - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0); - read_char(common); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); - add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); - } - JUMPHERE(jump[2]); - JUMPHERE(jump[3]); - } - JUMPHERE(jump[0]); - check_partial(common, FALSE); - return cc; - - case OP_EOD: - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - return cc; - - case OP_CIRC: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - return cc; - - case OP_CIRCM: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - skip_char_back(common); - read_char(common); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - - case OP_DOLL: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - - if (!common->endonly) - compile_char1_matchingpath(common, OP_EODN, cc, backtracks); - else - { - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - } - return cc; - - case OP_DOLLM: - jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - check_partial(common, FALSE); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0); - /* STR_PTR = STR_END - IN_UCHARS(1) */ - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - peek_char(common); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - - case OP_CHAR: - case OP_CHARI: - length = 1; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); -#endif - if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) - { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); - - context.length = IN_UCHARS(length); - context.sourcereg = -1; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.ucharptr = 0; -#endif - return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); - } - detect_partial_match(common, backtracks); - read_char(common); -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHAR(c, cc); - } - else -#endif - c = *cc; - if (type == OP_CHAR || !char_has_othercase(common, cc)) - { - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - return cc + length; - } - oc = char_othercase(common, c); - bit = c ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); - return cc + length; - } - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); - return cc + length; - - case OP_NOT: - case OP_NOTI: - detect_partial_match(common, backtracks); - length = 1; -#ifdef SUPPORT_UTF - if (common->utf) - { -#ifdef COMPILE_PCRE8 - c = *cc; - if (c < 128) - { - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); - else - { - /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); - } - /* Skip the variable-length character. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(jump[0]); - return cc + 1; - } - else -#endif /* COMPILE_PCRE8 */ - { - GETCHARLEN(c, cc, length); - read_char(common); - } - } - else -#endif /* SUPPORT_UTF */ - { - read_char(common); - c = *cc; - } - - if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); - else - { - oc = char_othercase(common, c); - bit = c ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); - } - else - { - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); - } - } - return cc + length; - - case OP_CLASS: - case OP_NCLASS: - detect_partial_match(common, backtracks); - read_char(common); - if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks)) - return cc + 32 / sizeof(pcre_uchar); - -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - jump[0] = NULL; -#ifdef COMPILE_PCRE8 - /* This check only affects 8 bit mode. In other modes, we - always need to compare the value with 255. */ - if (common->utf) -#endif /* COMPILE_PCRE8 */ - { - jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (type == OP_CLASS) - { - add_jump(compiler, backtracks, jump[0]); - jump[0] = NULL; - } - } -#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - if (jump[0] != NULL) - JUMPHERE(jump[0]); -#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ - return cc + 32 / sizeof(pcre_uchar); - -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case OP_XCLASS: - compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); - return cc + GET(cc, 0) - 1; -#endif - - case OP_REVERSE: - length = GET(cc, 0); - if (length == 0) - return cc + LINK_SIZE; - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -#ifdef SUPPORT_UTF - if (common->utf) - { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); - label = LABEL(); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); - skip_char_back(common); - OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, label); - } - else -#endif - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); - } - check_start_used_ptr(common); - return cc + LINK_SIZE; - } -SLJIT_ASSERT_STOP(); -return cc; -} - -static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) -{ -/* This function consumes at least one input character. */ -/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ -DEFINE_COMPILER; -pcre_uchar *ccbegin = cc; -compare_context context; -int size; - -context.length = 0; -do - { - if (cc >= ccend) - break; - - if (*cc == OP_CHAR) - { - size = 1; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[1])) - size += GET_EXTRALEN(cc[1]); -#endif - } - else if (*cc == OP_CHARI) - { - size = 1; -#ifdef SUPPORT_UTF - if (common->utf) - { - if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) - size = 0; - else if (HAS_EXTRALEN(cc[1])) - size += GET_EXTRALEN(cc[1]); - } - else -#endif - if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) - size = 0; - } - else - size = 0; - - cc += 1 + size; - context.length += IN_UCHARS(size); - } -while (size > 0 && context.length <= 128); - -cc = ccbegin; -if (context.length > 0) - { - /* We have a fixed-length byte sequence. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); - - context.sourcereg = -1; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.ucharptr = 0; -#endif - do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); - return cc; - } - -/* A non-fixed length character will be checked if length == 0. */ -return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); -} - -static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -int offset = GET2(cc, 1) << 1; - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); -if (!common->jscript_compat) - { - if (backtracks == NULL) - { - /* OVECTOR(1) contains the "string begin - 1" constant. */ - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - return JUMP(SLJIT_C_NOT_ZERO); - } - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - } -return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); -} - -/* Forward definitions. */ -static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); -static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); - -#define PUSH_BACKTRACK(size, ccstart, error) \ - do \ - { \ - backtrack = sljit_alloc_memory(compiler, (size)); \ - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ - return error; \ - memset(backtrack, 0, size); \ - backtrack->prev = parent->top; \ - backtrack->cc = (ccstart); \ - parent->top = backtrack; \ - } \ - while (0) - -#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ - do \ - { \ - backtrack = sljit_alloc_memory(compiler, (size)); \ - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ - return; \ - memset(backtrack, 0, size); \ - backtrack->prev = parent->top; \ - backtrack->cc = (ccstart); \ - parent->top = backtrack; \ - } \ - while (0) - -#define BACKTRACK_AS(type) ((type *)backtrack) - -static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) -{ -DEFINE_COMPILER; -int offset = GET2(cc, 1) << 1; -struct sljit_jump *jump = NULL; -struct sljit_jump *partial; -struct sljit_jump *nopartial; - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); -/* OVECTOR(1) contains the "string begin - 1" constant. */ -if (withchecks && !common->jscript_compat) - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - -#if defined SUPPORT_UTF && defined SUPPORT_UCP -if (common->utf && *cc == OP_REFI) - { - SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - if (withchecks) - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0); - - /* Needed to save important temporary registers. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); - else - { - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); - nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); - check_partial(common, FALSE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(nopartial); - } - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); - } -else -#endif /* SUPPORT_UTF && SUPPORT_UCP */ - { - OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); - if (withchecks) - jump = JUMP(SLJIT_C_ZERO); - - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, partial); - - add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - - if (common->mode != JIT_COMPILE) - { - nopartial = JUMP(SLJIT_JUMP); - JUMPHERE(partial); - /* TMP2 -= STR_END - STR_PTR */ - OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); - partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0); - OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); - add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - JUMPHERE(partial); - check_partial(common, FALSE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(nopartial); - } - } - -if (jump != NULL) - { - if (emptyfail) - add_jump(compiler, backtracks, jump); - else - JUMPHERE(jump); - } -return cc + 1 + IMM2_SIZE; -} - -static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; -pcre_uchar type; -struct sljit_label *label; -struct sljit_jump *zerolength; -struct sljit_jump *jump = NULL; -pcre_uchar *ccbegin = cc; -int min = 0, max = 0; -BOOL minimize; - -PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); - -type = cc[1 + IMM2_SIZE]; -minimize = (type & 0x1) != 0; -switch(type) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - min = 0; - max = 0; - cc += 1 + IMM2_SIZE + 1; - break; - case OP_CRPLUS: - case OP_CRMINPLUS: - min = 1; - max = 0; - cc += 1 + IMM2_SIZE + 1; - break; - case OP_CRQUERY: - case OP_CRMINQUERY: - min = 0; - max = 1; - cc += 1 + IMM2_SIZE + 1; - break; - case OP_CRRANGE: - case OP_CRMINRANGE: - min = GET2(cc, 1 + IMM2_SIZE + 1); - max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); - cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; - break; - default: - SLJIT_ASSERT_STOP(); - break; - } - -if (!minimize) - { - if (min == 0) - { - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); - /* Temporary release of STR_PTR. */ - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - zerolength = compile_ref_checks(common, ccbegin, NULL); - /* Restore if not zero length. */ - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - } - else - { - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); - } - - if (min > 1 || max > 1) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); - - label = LABEL(); - compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); - - if (min > 1 || max > 1) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); - if (min > 1) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label); - if (max > 1) - { - jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, label); - JUMPHERE(jump); - } - } - - if (max == 0) - { - /* Includes min > 1 case as well. */ - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, label); - } - - JUMPHERE(zerolength); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); - - decrease_call_count(common); - return cc; - } - -allocate_stack(common, 2); -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); -if (type != OP_CRMINSTAR) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); - -if (min == 0) - { - zerolength = compile_ref_checks(common, ccbegin, NULL); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - jump = JUMP(SLJIT_JUMP); - } -else - zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); - -BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); -if (max > 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); - -compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - -if (min > 1) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath); - } -else if (max > 0) - OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); - -if (jump != NULL) - JUMPHERE(jump); -JUMPHERE(zerolength); - -decrease_call_count(common); -return cc; -} - -static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; -recurse_entry *entry = common->entries; -recurse_entry *prev = NULL; -int start = GET(cc, 1); - -PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); -while (entry != NULL) - { - if (entry->start == start) - break; - prev = entry; - entry = entry->next; - } - -if (entry == NULL) - { - entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - return NULL; - entry->next = NULL; - entry->entry = NULL; - entry->calls = NULL; - entry->start = start; - - if (prev != NULL) - prev->next = entry; - else - common->entries = entry; - } - -if (common->has_set_som && common->mark_ptr != 0) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - allocate_stack(common, 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - } -else if (common->has_set_som || common->mark_ptr != 0) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); - } - -if (entry->entry == NULL) - add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); -else - JUMPTO(SLJIT_FAST_CALL, entry->entry); -/* Leave if the match is failed. */ -add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); -return cc + 1 + LINK_SIZE; -} - -static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) -{ -DEFINE_COMPILER; -int framesize; -int private_data_ptr; -backtrack_common altbacktrack; -pcre_uchar *ccbegin; -pcre_uchar opcode; -pcre_uchar bra = OP_BRA; -jump_list *tmp = NULL; -jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; -jump_list **found; -/* Saving previous accept variables. */ -struct sljit_label *save_quitlabel = common->quitlabel; -struct sljit_label *save_acceptlabel = common->acceptlabel; -jump_list *save_quit = common->quit; -jump_list *save_accept = common->accept; -struct sljit_jump *jump; -struct sljit_jump *brajump = NULL; - -if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) - { - SLJIT_ASSERT(!conditional); - bra = *cc; - cc++; - } -private_data_ptr = PRIVATE_DATA(cc); -SLJIT_ASSERT(private_data_ptr != 0); -framesize = get_framesize(common, cc, FALSE); -backtrack->framesize = framesize; -backtrack->private_data_ptr = private_data_ptr; -opcode = *cc; -SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); -found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; -ccbegin = cc; -cc += GET(cc, 1); - -if (bra == OP_BRAMINZERO) - { - /* This is a braminzero backtrack path. */ - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - } - -if (framesize < 0) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - } -else - { - allocate_stack(common, framesize + 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - init_frame(common, ccbegin, framesize + 1, 2, FALSE); - } - -memset(&altbacktrack, 0, sizeof(backtrack_common)); -common->quitlabel = NULL; -common->quit = NULL; -while (1) - { - common->acceptlabel = NULL; - common->accept = NULL; - altbacktrack.top = NULL; - altbacktrack.topbacktracks = NULL; - - if (*ccbegin == OP_ALT) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - - altbacktrack.cc = ccbegin; - compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - common->quitlabel = save_quitlabel; - common->acceptlabel = save_acceptlabel; - common->quit = save_quit; - common->accept = save_accept; - return NULL; - } - common->acceptlabel = LABEL(); - if (common->accept != NULL) - set_jumps(common->accept, common->acceptlabel); - - /* Reset stack. */ - if (framesize < 0) - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - else { - if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) - { - /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); - } - else - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - } - } - - if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) - { - /* We know that STR_PTR was stored on the top of the stack. */ - if (conditional) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); - else if (bra == OP_BRAZERO) - { - if (framesize < 0) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); - else - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); - } - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } - else if (framesize >= 0) - { - /* For OP_BRA and OP_BRAMINZERO. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); - } - } - add_jump(compiler, found, JUMP(SLJIT_JUMP)); - - compile_backtrackingpath(common, altbacktrack.top); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - common->quitlabel = save_quitlabel; - common->acceptlabel = save_acceptlabel; - common->quit = save_quit; - common->accept = save_accept; - return NULL; - } - set_jumps(altbacktrack.topbacktracks, LABEL()); - - if (*cc != OP_ALT) - break; - - ccbegin = cc; - cc += GET(cc, 1); - } -/* None of them matched. */ -if (common->quit != NULL) - set_jumps(common->quit, LABEL()); - -if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) - { - /* Assert is failed. */ - if (conditional || bra == OP_BRAZERO) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - - if (framesize < 0) - { - /* The topmost item should be 0. */ - if (bra == OP_BRAZERO) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - else - free_stack(common, 1); - } - else - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - /* The topmost item should be 0. */ - if (bra == OP_BRAZERO) - { - free_stack(common, framesize + 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } - else - free_stack(common, framesize + 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); - } - jump = JUMP(SLJIT_JUMP); - if (bra != OP_BRAZERO) - add_jump(compiler, target, jump); - - /* Assert is successful. */ - set_jumps(tmp, LABEL()); - if (framesize < 0) - { - /* We know that STR_PTR was stored on the top of the stack. */ - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); - /* Keep the STR_PTR on the top of the stack. */ - if (bra == OP_BRAZERO) - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - else if (bra == OP_BRAMINZERO) - { - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } - } - else - { - if (bra == OP_BRA) - { - /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); - } - else - { - /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); - } - } - - if (bra == OP_BRAZERO) - { - backtrack->matchingpath = LABEL(); - sljit_set_label(jump, backtrack->matchingpath); - } - else if (bra == OP_BRAMINZERO) - { - JUMPTO(SLJIT_JUMP, backtrack->matchingpath); - JUMPHERE(brajump); - if (framesize >= 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); - } - set_jumps(backtrack->common.topbacktracks, LABEL()); - } - } -else - { - /* AssertNot is successful. */ - if (framesize < 0) - { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - if (bra != OP_BRA) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - else - free_stack(common, 1); - } - else - { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - /* The topmost item should be 0. */ - if (bra != OP_BRA) - { - free_stack(common, framesize + 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } - else - free_stack(common, framesize + 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); - } - - if (bra == OP_BRAZERO) - backtrack->matchingpath = LABEL(); - else if (bra == OP_BRAMINZERO) - { - JUMPTO(SLJIT_JUMP, backtrack->matchingpath); - JUMPHERE(brajump); - } - - if (bra != OP_BRA) - { - SLJIT_ASSERT(found == &backtrack->common.topbacktracks); - set_jumps(backtrack->common.topbacktracks, LABEL()); - backtrack->common.topbacktracks = NULL; - } - } - -common->quitlabel = save_quitlabel; -common->acceptlabel = save_acceptlabel; -common->quit = save_quit; -common->accept = save_accept; -return cc + 1 + LINK_SIZE; -} - -static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table) -{ -int condition = FALSE; -pcre_uchar *slotA = name_table; -pcre_uchar *slotB; -sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)]; -sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)]; -sljit_sw no_capture; -int i; - -locals += refno & 0xff; -refno >>= 8; -no_capture = locals[1]; - -for (i = 0; i < name_count; i++) - { - if (GET2(slotA, 0) == refno) break; - slotA += name_entry_size; - } - -if (i < name_count) - { - /* Found a name for the number - there can be only one; duplicate names - for different numbers are allowed, but not vice versa. First scan down - for duplicates. */ - - slotB = slotA; - while (slotB > name_table) - { - slotB -= name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = locals[GET2(slotB, 0) << 1] != no_capture; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - if (!condition) - { - slotB = slotA; - for (i++; i < name_count; i++) - { - slotB += name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = locals[GET2(slotB, 0) << 1] != no_capture; - if (condition) break; - } - else break; - } - } - } -return condition; -} - -static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table) -{ -int condition = FALSE; -pcre_uchar *slotA = name_table; -pcre_uchar *slotB; -sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)]; -sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)]; -sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)]; -sljit_uw i; - -for (i = 0; i < name_count; i++) - { - if (GET2(slotA, 0) == recno) break; - slotA += name_entry_size; - } - -if (i < name_count) - { - /* Found a name for the number - there can be only one; duplicate - names for different numbers are allowed, but not vice versa. First - scan down for duplicates. */ - - slotB = slotA; - while (slotB > name_table) - { - slotB -= name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == group_num; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - if (!condition) - { - slotB = slotA; - for (i++; i < name_count; i++) - { - slotB += name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == group_num; - if (condition) break; - } - else break; - } - } - } -return condition; -} - -/* - Handling bracketed expressions is probably the most complex part. - - Stack layout naming characters: - S - Push the current STR_PTR - 0 - Push a 0 (NULL) - A - Push the current STR_PTR. Needed for restoring the STR_PTR - before the next alternative. Not pushed if there are no alternatives. - M - Any values pushed by the current alternative. Can be empty, or anything. - C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. - L - Push the previous local (pointed by localptr) to the stack - () - opional values stored on the stack - ()* - optonal, can be stored multiple times - - The following list shows the regular expression templates, their PCRE byte codes - and stack layout supported by pcre-sljit. - - (?:) OP_BRA | OP_KET A M - () OP_CBRA | OP_KET C M - (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* - OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* - (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* - OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* - ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* - OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* - ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* - OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* - (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) - (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) - ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) - ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) - (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* - OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* - (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* - OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* - ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* - OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* - ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* - OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* - - - Stack layout naming characters: - A - Push the alternative index (starting from 0) on the stack. - Not pushed if there is no alternatives. - M - Any values pushed by the current alternative. Can be empty, or anything. - - The next list shows the possible content of a bracket: - (|) OP_*BRA | OP_ALT ... M A - (?()|) OP_*COND | OP_ALT M A - (?>|) OP_ONCE | OP_ALT ... [stack trace] M A - (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A - Or nothing, if trace is unnecessary -*/ - -static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; -pcre_uchar opcode; -int private_data_ptr = 0; -int offset = 0; -int stacksize; -pcre_uchar *ccbegin; -pcre_uchar *matchingpath; -pcre_uchar bra = OP_BRA; -pcre_uchar ket; -assert_backtrack *assert; -BOOL has_alternatives; -struct sljit_jump *jump; -struct sljit_jump *skip; -struct sljit_label *rmaxlabel = NULL; -struct sljit_jump *braminzerojump = NULL; - -PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); - -if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) - { - bra = *cc; - cc++; - opcode = *cc; - } - -opcode = *cc; -ccbegin = cc; -matchingpath = ccbegin + 1 + LINK_SIZE; - -if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) - { - /* Drop this bracket_backtrack. */ - parent->top = backtrack->prev; - return bracketend(cc); - } - -ket = *(bracketend(cc) - 1 - LINK_SIZE); -SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); -SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); -cc += GET(cc, 1); - -has_alternatives = *cc == OP_ALT; -if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - { - has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE; - if (*matchingpath == OP_NRREF) - { - stacksize = GET2(matchingpath, 1); - if (common->currententry == NULL || stacksize == RREF_ANY) - has_alternatives = FALSE; - else if (common->currententry->start == 0) - has_alternatives = stacksize != 0; - else - has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - } - } - -if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) - opcode = OP_SCOND; -if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) - opcode = OP_ONCE; - -if (opcode == OP_CBRA || opcode == OP_SCBRA) - { - /* Capturing brackets has a pre-allocated space. */ - offset = GET2(ccbegin, 1 + LINK_SIZE); - if (common->optimized_cbracket[offset] == 0) - { - private_data_ptr = OVECTOR_PRIV(offset); - offset <<= 1; - } - else - { - offset <<= 1; - private_data_ptr = OVECTOR(offset); - } - BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; - matchingpath += IMM2_SIZE; - } -else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) - { - /* Other brackets simply allocate the next entry. */ - private_data_ptr = PRIVATE_DATA(ccbegin); - SLJIT_ASSERT(private_data_ptr != 0); - BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; - if (opcode == OP_ONCE) - BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE); - } - -/* Instructions before the first alternative. */ -stacksize = 0; -if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) - stacksize++; -if (bra == OP_BRAZERO) - stacksize++; - -if (stacksize > 0) - allocate_stack(common, stacksize); - -stacksize = 0; -if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); - stacksize++; - } - -if (bra == OP_BRAZERO) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); - -if (bra == OP_BRAMINZERO) - { - /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - if (ket != OP_KETRMIN) - { - free_stack(common, 1); - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - } - else - { - if (opcode == OP_ONCE || opcode >= OP_SBRA) - { - jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - /* Nothing stored during the first run. */ - skip = JUMP(SLJIT_JUMP); - JUMPHERE(jump); - /* Checking zero-length iteration. */ - if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) - { - /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - } - else - { - /* Except when the whole stack frame must be saved. */ - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); - } - JUMPHERE(skip); - } - else - { - jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - JUMPHERE(jump); - } - } - } - -if (ket == OP_KETRMIN) - BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); - -if (ket == OP_KETRMAX) - { - rmaxlabel = LABEL(); - if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA) - BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel; - } - -/* Handling capturing brackets and alternatives. */ -if (opcode == OP_ONCE) - { - if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) - { - /* Neither capturing brackets nor recursions are not found in the block. */ - if (ket == OP_KETRMIN) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - } - else if (ket == OP_KETRMAX || has_alternatives) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - } - else - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); - } - else - { - if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) - { - allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE); - } - else - { - allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE); - } - } - } -else if (opcode == OP_CBRA || opcode == OP_SCBRA) - { - /* Saving the previous values. */ - if (common->optimized_cbracket[offset >> 1] == 0) - { - allocate_stack(common, 3); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); - } - else - { - SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); - allocate_stack(common, 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - } - } -else if (opcode == OP_SBRA || opcode == OP_SCOND) - { - /* Saving the previous value. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); - } -else if (has_alternatives) - { - /* Pushing the starting string pointer. */ - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - } - -/* Generating code for the first alternative. */ -if (opcode == OP_COND || opcode == OP_SCOND) - { - if (*matchingpath == OP_CREF) - { - SLJIT_ASSERT(has_alternatives); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), - CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - matchingpath += 1 + IMM2_SIZE; - } - else if (*matchingpath == OP_NCREF) - { - SLJIT_ASSERT(has_alternatives); - stacksize = GET2(matchingpath, 1); - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); - - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw))); - GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0)); - - JUMPHERE(jump); - matchingpath += 1 + IMM2_SIZE; - } - else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF) - { - /* Never has other case. */ - BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; - - stacksize = GET2(matchingpath, 1); - if (common->currententry == NULL) - stacksize = 0; - else if (stacksize == RREF_ANY) - stacksize = 1; - else if (common->currententry->start == 0) - stacksize = stacksize == 0; - else - stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - - if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL) - { - SLJIT_ASSERT(!has_alternatives); - if (stacksize != 0) - matchingpath += 1 + IMM2_SIZE; - else - { - if (*cc == OP_ALT) - { - matchingpath = cc + 1 + LINK_SIZE; - cc += GET(cc, 1); - } - else - matchingpath = cc; - } - } - else - { - SLJIT_ASSERT(has_alternatives); - - stacksize = GET2(matchingpath, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE)); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize); - GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0)); - matchingpath += 1 + IMM2_SIZE; - } - } - else - { - SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); - /* Similar code as PUSH_BACKTRACK macro. */ - assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - return NULL; - memset(assert, 0, sizeof(assert_backtrack)); - assert->common.cc = matchingpath; - BACKTRACK_AS(bracket_backtrack)->u.assert = assert; - matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); - } - } - -compile_matchingpath(common, matchingpath, cc, backtrack); -if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - return NULL; - -if (opcode == OP_ONCE) - { - if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - /* TMP2 which is set here used by OP_KETRMAX below. */ - if (ket == OP_KETRMAX) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); - else if (ket == OP_KETRMIN) - { - /* Move the STR_PTR to the private_data_ptr. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); - } - } - else - { - stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1; - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw)); - if (ket == OP_KETRMAX) - { - /* TMP2 which is set here used by OP_KETRMAX below. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - } - } - -stacksize = 0; -if (ket != OP_KET || bra != OP_BRA) - stacksize++; -if (has_alternatives && opcode != OP_ONCE) - stacksize++; - -if (stacksize > 0) - allocate_stack(common, stacksize); - -stacksize = 0; -if (ket != OP_KET) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); - stacksize++; - } -else if (bra != OP_BRA) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); - stacksize++; - } - -if (has_alternatives) - { - if (opcode != OP_ONCE) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); - if (ket != OP_KETRMAX) - BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); - } - -/* Must be after the matchingpath label. */ -if (offset != 0) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0); - } - -if (ket == OP_KETRMAX) - { - if (opcode == OP_ONCE || opcode >= OP_SBRA) - { - if (has_alternatives) - BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); - /* Checking zero-length iteration. */ - if (opcode != OP_ONCE) - { - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel); - /* Drop STR_PTR for greedy plus quantifier. */ - if (bra != OP_BRAZERO) - free_stack(common, 1); - } - else - /* TMP2 must contain the starting STR_PTR. */ - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel); - } - else - JUMPTO(SLJIT_JUMP, rmaxlabel); - BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); - } - -if (bra == OP_BRAZERO) - BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); - -if (bra == OP_BRAMINZERO) - { - /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ - JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); - if (braminzerojump != NULL) - { - JUMPHERE(braminzerojump); - /* We need to release the end pointer to perform the - backtrack for the zero-length iteration. When - framesize is < 0, OP_ONCE will do the release itself. */ - if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - } - else if (ket == OP_KETRMIN && opcode != OP_ONCE) - free_stack(common, 1); - } - /* Continue to the normal backtrack. */ - } - -if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) - decrease_call_count(common); - -/* Skip the other alternatives. */ -while (*cc == OP_ALT) - cc += GET(cc, 1); -cc += 1 + LINK_SIZE; -return cc; -} - -static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; -pcre_uchar opcode; -int private_data_ptr; -int cbraprivptr = 0; -int framesize; -int stacksize; -int offset = 0; -BOOL zero = FALSE; -pcre_uchar *ccbegin = NULL; -int stack; -struct sljit_label *loop = NULL; -struct jump_list *emptymatch = NULL; - -PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); -if (*cc == OP_BRAPOSZERO) - { - zero = TRUE; - cc++; - } - -opcode = *cc; -private_data_ptr = PRIVATE_DATA(cc); -SLJIT_ASSERT(private_data_ptr != 0); -BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; -switch(opcode) - { - case OP_BRAPOS: - case OP_SBRAPOS: - ccbegin = cc + 1 + LINK_SIZE; - break; - - case OP_CBRAPOS: - case OP_SCBRAPOS: - offset = GET2(cc, 1 + LINK_SIZE); - /* This case cannot be optimized in the same was as - normal capturing brackets. */ - SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); - cbraprivptr = OVECTOR_PRIV(offset); - offset <<= 1; - ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } - -framesize = get_framesize(common, cc, FALSE); -BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; -if (framesize < 0) - { - stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1; - if (!zero) - stacksize++; - BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; - allocate_stack(common, stacksize); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); - - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - } - else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - - if (!zero) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1); - } -else - { - stacksize = framesize + 1; - if (!zero) - stacksize++; - if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS) - stacksize++; - BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; - allocate_stack(common, stacksize); - - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); - stack = 0; - if (!zero) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); - stack++; - } - if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); - stack++; - } - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); - init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE); - } - -if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); - -loop = LABEL(); -while (*cc != OP_KETRPOS) - { - backtrack->top = NULL; - backtrack->topbacktracks = NULL; - cc += GET(cc, 1); - - compile_matchingpath(common, ccbegin, cc, backtrack); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - return NULL; - - if (framesize < 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - } - else - { - if (opcode == OP_SBRAPOS) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - } - - if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) - add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0)); - - if (!zero) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); - } - else - { - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) - { - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - } - else - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); - if (opcode == OP_SBRAPOS) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); - } - - if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) - add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0)); - - if (!zero) - { - if (framesize < 0) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); - else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } - } - JUMPTO(SLJIT_JUMP, loop); - flush_stubs(common); - - compile_backtrackingpath(common, backtrack->top); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - return NULL; - set_jumps(backtrack->topbacktracks, LABEL()); - - if (framesize < 0) - { - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); - else - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - else - { - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) - { - /* Last alternative. */ - if (*cc == OP_KETRPOS) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); - } - else - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); - } - } - - if (*cc == OP_KETRPOS) - break; - ccbegin = cc + 1 + LINK_SIZE; - } - -backtrack->topbacktracks = NULL; -if (!zero) - { - if (framesize < 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); - else /* TMP2 is set to [private_data_ptr] above. */ - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); - } - -/* None of them matched. */ -set_jumps(emptymatch, LABEL()); -decrease_call_count(common); -return cc + 1 + LINK_SIZE; -} - -static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end) -{ -int class_len; - -*opcode = *cc; -if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) - { - cc++; - *type = OP_CHAR; - } -else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) - { - cc++; - *type = OP_CHARI; - *opcode -= OP_STARI - OP_STAR; - } -else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) - { - cc++; - *type = OP_NOT; - *opcode -= OP_NOTSTAR - OP_STAR; - } -else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) - { - cc++; - *type = OP_NOTI; - *opcode -= OP_NOTSTARI - OP_STAR; - } -else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) - { - cc++; - *opcode -= OP_TYPESTAR - OP_STAR; - *type = 0; - } -else - { - SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS); - *type = *opcode; - cc++; - class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); - *opcode = cc[class_len - 1]; - if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) - { - *opcode -= OP_CRSTAR - OP_STAR; - if (end != NULL) - *end = cc + class_len; - } - else - { - SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE); - *arg1 = GET2(cc, (class_len + IMM2_SIZE)); - *arg2 = GET2(cc, class_len); - - if (*arg2 == 0) - { - SLJIT_ASSERT(*arg1 != 0); - *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO; - } - if (*arg1 == *arg2) - *opcode = OP_EXACT; - - if (end != NULL) - *end = cc + class_len + 2 * IMM2_SIZE; - } - return cc; - } - -if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) - { - *arg1 = GET2(cc, 0); - cc += IMM2_SIZE; - } - -if (*type == 0) - { - *type = *cc; - if (end != NULL) - *end = next_opcode(common, cc); - cc++; - return cc; - } - -if (end != NULL) - { - *end = cc + 1; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); -#endif - } -return cc; -} - -static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; -pcre_uchar opcode; -pcre_uchar type; -int arg1 = -1, arg2 = -1; -pcre_uchar* end; -jump_list *nomatch = NULL; -struct sljit_jump *jump = NULL; -struct sljit_label *label; -int private_data_ptr = PRIVATE_DATA(cc); -int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG); -int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); -int tmp_base, tmp_offset; - -PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); - -cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end); - -switch (type) - { - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_ANYBYTE: - case OP_ANYNL: - case OP_NOT_HSPACE: - case OP_HSPACE: - case OP_NOT_VSPACE: - case OP_VSPACE: - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_CLASS: - case OP_NCLASS: - tmp_base = TMP3; - tmp_offset = 0; - break; - - default: - SLJIT_ASSERT_STOP(); - /* Fall through. */ - - case OP_EXTUNI: - case OP_XCLASS: - case OP_NOTPROP: - case OP_PROP: - tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG); - tmp_offset = POSSESSIVE0; - break; - } - -switch(opcode) - { - case OP_STAR: - case OP_PLUS: - case OP_UPTO: - case OP_CRRANGE: - if (type == OP_ANYNL || type == OP_EXTUNI) - { - SLJIT_ASSERT(private_data_ptr == 0); - if (opcode == OP_STAR || opcode == OP_UPTO) - { - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); - } - else - { - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } - - if (opcode == OP_UPTO || opcode == OP_CRRANGE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); - - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (opcode == OP_UPTO || opcode == OP_CRRANGE) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - if (opcode == OP_CRRANGE && arg2 > 0) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label); - if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0)) - jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); - } - - /* We cannot use TMP3 because of this allocate_stack. */ - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, label); - if (jump != NULL) - JUMPHERE(jump); - } - else - { - if (opcode == OP_PLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode <= OP_PLUS) - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); - else - OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode <= OP_PLUS) - JUMPTO(SLJIT_JUMP, label); - else if (opcode == OP_CRRANGE && arg1 == 0) - { - OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); - JUMPTO(SLJIT_JUMP, label); - } - else - { - OP1(SLJIT_MOV, TMP1, 0, base, offset1); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, base, offset1, TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); - } - set_jumps(nomatch, LABEL()); - if (opcode == OP_CRRANGE) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1)); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - } - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); - break; - - case OP_MINSTAR: - case OP_MINPLUS: - if (opcode == OP_MINPLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (private_data_ptr == 0) - allocate_stack(common, 1); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); - break; - - case OP_MINUPTO: - case OP_CRMINRANGE: - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); - if (opcode == OP_CRMINRANGE) - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); - break; - - case OP_QUERY: - case OP_MINQUERY: - if (private_data_ptr == 0) - allocate_stack(common, 1); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode == OP_QUERY) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); - break; - - case OP_EXACT: - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, label); - break; - - case OP_POSSTAR: - case OP_POSPLUS: - case OP_POSUPTO: - if (opcode == OP_POSPLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (opcode == OP_POSUPTO) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - if (opcode != OP_POSUPTO) - JUMPTO(SLJIT_JUMP, label); - else - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, label); - } - set_jumps(nomatch, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); - break; - - case OP_POSQUERY: - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - compile_char1_matchingpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - set_jumps(nomatch, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } - -decrease_call_count(common); -return end; -} - -static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; - -PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); - -if (*cc == OP_FAIL) - { - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); - return cc + 1; - } - -if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL) - { - /* No need to check notempty conditions. */ - if (common->acceptlabel == NULL) - add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); - else - JUMPTO(SLJIT_JUMP, common->acceptlabel); - return cc + 1; - } - -if (common->acceptlabel == NULL) - add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0))); -else - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); -if (common->acceptlabel == NULL) - add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0)); -else - CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -if (common->acceptlabel == NULL) - add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); -else - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); -return cc + 1; -} - -static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc) -{ -DEFINE_COMPILER; -int offset = GET2(cc, 1); -BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; - -/* Data will be discarded anyway... */ -if (common->currententry != NULL) - return cc + 1 + IMM2_SIZE; - -if (!optimized_cbracket) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset)); -offset <<= 1; -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); -if (!optimized_cbracket) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); -return cc + 1 + IMM2_SIZE; -} - -static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) -{ -DEFINE_COMPILER; -backtrack_common *backtrack; - -while (cc < ccend) - { - switch(*cc) - { - case OP_SOD: - case OP_SOM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_ANYBYTE: - case OP_NOTPROP: - case OP_PROP: - case OP_ANYNL: - case OP_NOT_HSPACE: - case OP_HSPACE: - case OP_NOT_VSPACE: - case OP_VSPACE: - case OP_EXTUNI: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - case OP_NOT: - case OP_NOTI: - case OP_REVERSE: - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - break; - - case OP_SET_SOM: - PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); - cc++; - break; - - case OP_CHAR: - case OP_CHARI: - if (common->mode == JIT_COMPILE) - cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - break; - - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_UPTO: - case OP_MINUPTO: - case OP_EXACT: - case OP_POSSTAR: - case OP_POSPLUS: - case OP_POSQUERY: - case OP_POSUPTO: - case OP_STARI: - case OP_MINSTARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_QUERYI: - case OP_MINQUERYI: - case OP_UPTOI: - case OP_MINUPTOI: - case OP_EXACTI: - case OP_POSSTARI: - case OP_POSPLUSI: - case OP_POSQUERYI: - case OP_POSUPTOI: - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTEXACT: - case OP_NOTPOSSTAR: - case OP_NOTPOSPLUS: - case OP_NOTPOSQUERY: - case OP_NOTPOSUPTO: - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTQUERYI: - case OP_NOTMINQUERYI: - case OP_NOTUPTOI: - case OP_NOTMINUPTOI: - case OP_NOTEXACTI: - case OP_NOTPOSSTARI: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERYI: - case OP_NOTPOSUPTOI: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - case OP_TYPEPOSUPTO: - cc = compile_iterator_matchingpath(common, cc, parent); - break; - - case OP_CLASS: - case OP_NCLASS: - if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE) - cc = compile_iterator_matchingpath(common, cc, parent); - else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - break; - -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case OP_XCLASS: - if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE) - cc = compile_iterator_matchingpath(common, cc, parent); - else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - break; -#endif - - case OP_REF: - case OP_REFI: - if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) - cc = compile_ref_iterator_matchingpath(common, cc, parent); - else - cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); - break; - - case OP_RECURSE: - cc = compile_recurse_matchingpath(common, cc, parent); - break; - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); - cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); - break; - - case OP_BRAMINZERO: - PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); - cc = bracketend(cc + 1); - if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) - { - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - } - else - { - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); - } - BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); - if (cc[1] > OP_ASSERTBACK_NOT) - decrease_call_count(common); - break; - - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRA: - case OP_CBRA: - case OP_COND: - case OP_SBRA: - case OP_SCBRA: - case OP_SCOND: - cc = compile_bracket_matchingpath(common, cc, parent); - break; - - case OP_BRAZERO: - if (cc[1] > OP_ASSERTBACK_NOT) - cc = compile_bracket_matchingpath(common, cc, parent); - else - { - PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); - cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); - } - break; - - case OP_BRAPOS: - case OP_CBRAPOS: - case OP_SBRAPOS: - case OP_SCBRAPOS: - case OP_BRAPOSZERO: - cc = compile_bracketpos_matchingpath(common, cc, parent); - break; - - case OP_MARK: - PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); - SLJIT_ASSERT(common->mark_ptr != 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); - allocate_stack(common, 1); - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); - cc += 1 + 2 + cc[1]; - break; - - case OP_COMMIT: - PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); - cc += 1; - break; - - case OP_FAIL: - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - cc = compile_fail_accept_matchingpath(common, cc, parent); - break; - - case OP_CLOSE: - cc = compile_close_matchingpath(common, cc); - break; - - case OP_SKIPZERO: - cc = bracketend(cc + 1); - break; - - default: - SLJIT_ASSERT_STOP(); - return; - } - if (cc == NULL) - return; - } -SLJIT_ASSERT(cc == ccend); -} - -#undef PUSH_BACKTRACK -#undef PUSH_BACKTRACK_NOVALUE -#undef BACKTRACK_AS - -#define COMPILE_BACKTRACKINGPATH(current) \ - do \ - { \ - compile_backtrackingpath(common, (current)); \ - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ - return; \ - } \ - while (0) - -#define CURRENT_AS(type) ((type *)current) - -static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; -pcre_uchar *cc = current->cc; -pcre_uchar opcode; -pcre_uchar type; -int arg1 = -1, arg2 = -1; -struct sljit_label *label = NULL; -struct sljit_jump *jump = NULL; -jump_list *jumplist = NULL; -int private_data_ptr = PRIVATE_DATA(cc); -int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG); -int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); - -cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL); - -switch(opcode) - { - case OP_STAR: - case OP_PLUS: - case OP_UPTO: - case OP_CRRANGE: - if (type == OP_ANYNL || type == OP_EXTUNI) - { - SLJIT_ASSERT(private_data_ptr == 0); - set_jumps(current->topbacktracks, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); - } - else - { - if (opcode == OP_UPTO) - arg2 = 0; - if (opcode <= OP_PLUS) - { - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1); - } - else - { - OP1(SLJIT_MOV, TMP1, 0, base, offset1); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1); - OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); - } - skip_char_back(common); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - if (opcode == OP_CRRANGE) - set_jumps(current->topbacktracks, LABEL()); - JUMPHERE(jump); - if (private_data_ptr == 0) - free_stack(common, 2); - if (opcode == OP_PLUS) - set_jumps(current->topbacktracks, LABEL()); - } - break; - - case OP_MINSTAR: - case OP_MINPLUS: - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - set_jumps(jumplist, LABEL()); - if (private_data_ptr == 0) - free_stack(common, 1); - if (opcode == OP_MINPLUS) - set_jumps(current->topbacktracks, LABEL()); - break; - - case OP_MINUPTO: - case OP_CRMINRANGE: - if (opcode == OP_CRMINRANGE) - { - label = LABEL(); - set_jumps(current->topbacktracks, label); - } - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist); - - OP1(SLJIT_MOV, TMP1, 0, base, offset1); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, base, offset1, TMP1, 0); - - if (opcode == OP_CRMINRANGE) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label); - - if (opcode == OP_CRMINRANGE && arg1 == 0) - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - else - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath); - - set_jumps(jumplist, LABEL()); - if (private_data_ptr == 0) - free_stack(common, 2); - break; - - case OP_QUERY: - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); - jump = JUMP(SLJIT_JUMP); - set_jumps(current->topbacktracks, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - JUMPHERE(jump); - if (private_data_ptr == 0) - free_stack(common, 1); - break; - - case OP_MINQUERY: - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_matchingpath(common, type, cc, &jumplist); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - set_jumps(jumplist, LABEL()); - JUMPHERE(jump); - if (private_data_ptr == 0) - free_stack(common, 1); - break; - - case OP_EXACT: - case OP_POSPLUS: - set_jumps(current->topbacktracks, LABEL()); - break; - - case OP_POSSTAR: - case OP_POSQUERY: - case OP_POSUPTO: - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } -} - -static void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; -pcre_uchar *cc = current->cc; -pcre_uchar type; - -type = cc[1 + IMM2_SIZE]; -if ((type & 0x1) == 0) - { - set_jumps(current->topbacktracks, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); - return; - } - -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); -CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); -set_jumps(current->topbacktracks, LABEL()); -free_stack(common, 2); -} - -static void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; - -set_jumps(current->topbacktracks, LABEL()); - -if (common->has_set_som && common->mark_ptr != 0) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); - } -else if (common->has_set_som || common->mark_ptr != 0) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); - } -} - -static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; -pcre_uchar *cc = current->cc; -pcre_uchar bra = OP_BRA; -struct sljit_jump *brajump = NULL; - -SLJIT_ASSERT(*cc != OP_BRAMINZERO); -if (*cc == OP_BRAZERO) - { - bra = *cc; - cc++; - } - -if (bra == OP_BRAZERO) - { - SLJIT_ASSERT(current->topbacktracks == NULL); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - -if (CURRENT_AS(assert_backtrack)->framesize < 0) - { - set_jumps(current->topbacktracks, LABEL()); - - if (bra == OP_BRAZERO) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); - free_stack(common, 1); - } - return; - } - -if (bra == OP_BRAZERO) - { - if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) - { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); - free_stack(common, 1); - return; - } - free_stack(common, 1); - brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - } - -if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); - - set_jumps(current->topbacktracks, LABEL()); - } -else - set_jumps(current->topbacktracks, LABEL()); - -if (bra == OP_BRAZERO) - { - /* We know there is enough place on the stack. */ - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); - JUMPHERE(brajump); - } -} - -static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; -int opcode; -int offset = 0; -int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; -int stacksize; -int count; -pcre_uchar *cc = current->cc; -pcre_uchar *ccbegin; -pcre_uchar *ccprev; -jump_list *jumplist = NULL; -jump_list *jumplistitem = NULL; -pcre_uchar bra = OP_BRA; -pcre_uchar ket; -assert_backtrack *assert; -BOOL has_alternatives; -struct sljit_jump *brazero = NULL; -struct sljit_jump *once = NULL; -struct sljit_jump *cond = NULL; -struct sljit_label *rminlabel = NULL; - -if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) - { - bra = *cc; - cc++; - } - -opcode = *cc; -ccbegin = cc; -ket = *(bracketend(ccbegin) - 1 - LINK_SIZE); -cc += GET(cc, 1); -has_alternatives = *cc == OP_ALT; -if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; -if (opcode == OP_CBRA || opcode == OP_SCBRA) - offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; -if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) - opcode = OP_SCOND; -if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) - opcode = OP_ONCE; - -if (ket == OP_KETRMAX) - { - if (bra == OP_BRAZERO) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0); - } - } -else if (ket == OP_KETRMIN) - { - if (bra != OP_BRAMINZERO) - { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - if (opcode >= OP_SBRA || opcode == OP_ONCE) - { - /* Checking zero-length iteration. */ - if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); - else - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); - } - if (opcode != OP_ONCE) - free_stack(common, 1); - } - else - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); - } - rminlabel = LABEL(); - } -else if (bra == OP_BRAZERO) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); - } - -if (SLJIT_UNLIKELY(opcode == OP_ONCE)) - { - if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - } - once = JUMP(SLJIT_JUMP); - } -else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - { - if (has_alternatives) - { - /* Always exactly one alternative. */ - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - - jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list)); - if (SLJIT_UNLIKELY(!jumplistitem)) - return; - jumplist = jumplistitem; - jumplistitem->next = NULL; - jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1); - } - } -else if (*cc == OP_ALT) - { - /* Build a jump list. Get the last successfully matched branch index. */ - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - count = 1; - do - { - /* Append as the last item. */ - if (jumplist != NULL) - { - jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list)); - jumplistitem = jumplistitem->next; - } - else - { - jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list)); - jumplist = jumplistitem; - } - - if (SLJIT_UNLIKELY(!jumplistitem)) - return; - - jumplistitem->next = NULL; - jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++); - cc += GET(cc, 1); - } - while (*cc == OP_ALT); - - cc = ccbegin + GET(ccbegin, 1); - } - -COMPILE_BACKTRACKINGPATH(current->top); -if (current->topbacktracks) - set_jumps(current->topbacktracks, LABEL()); - -if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - { - /* Conditional block always has at most one alternative. */ - if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) - { - SLJIT_ASSERT(has_alternatives); - assert = CURRENT_AS(bracket_backtrack)->u.assert; - if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); - } - cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); - } - else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) - { - SLJIT_ASSERT(has_alternatives); - cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); - } - else - SLJIT_ASSERT(!has_alternatives); - } - -if (has_alternatives) - { - count = 1; - do - { - current->top = NULL; - current->topbacktracks = NULL; - current->nextbacktracks = NULL; - if (*cc == OP_ALT) - { - ccprev = cc + 1 + LINK_SIZE; - cc += GET(cc, 1); - if (opcode != OP_COND && opcode != OP_SCOND) - { - if (private_data_ptr != 0 && opcode != OP_ONCE) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - else - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - compile_matchingpath(common, ccprev, cc, current); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - return; - } - - /* Instructions after the current alternative is succesfully matched. */ - /* There is a similar code in compile_bracket_matchingpath. */ - if (opcode == OP_ONCE) - { - if (CURRENT_AS(bracket_backtrack)->u.framesize < 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - /* TMP2 which is set here used by OP_KETRMAX below. */ - if (ket == OP_KETRMAX) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); - else if (ket == OP_KETRMIN) - { - /* Move the STR_PTR to the private_data_ptr. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); - } - } - else - { - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_sw)); - if (ket == OP_KETRMAX) - { - /* TMP2 which is set here used by OP_KETRMAX below. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - } - } - - stacksize = 0; - if (opcode != OP_ONCE) - stacksize++; - if (ket != OP_KET || bra != OP_BRA) - stacksize++; - - if (stacksize > 0) { - if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize >= 0) - allocate_stack(common, stacksize); - else - { - /* We know we have place at least for one item on the top of the stack. */ - SLJIT_ASSERT(stacksize == 1); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - } - } - - stacksize = 0; - if (ket != OP_KET || bra != OP_BRA) - { - if (ket != OP_KET) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); - else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); - stacksize++; - } - - if (opcode != OP_ONCE) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++); - - if (offset != 0) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0); - } - - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); - - if (opcode != OP_ONCE) - { - SLJIT_ASSERT(jumplist); - JUMPHERE(jumplist->jump); - jumplist = jumplist->next; - } - - COMPILE_BACKTRACKINGPATH(current->top); - if (current->topbacktracks) - set_jumps(current->topbacktracks, LABEL()); - SLJIT_ASSERT(!current->nextbacktracks); - } - while (*cc == OP_ALT); - SLJIT_ASSERT(!jumplist); - - if (cond != NULL) - { - SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); - assert = CURRENT_AS(bracket_backtrack)->u.assert; - if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) - - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); - } - JUMPHERE(cond); - } - - /* Free the STR_PTR. */ - if (private_data_ptr == 0) - free_stack(common, 1); - } - -if (offset != 0) - { - /* Using both tmp register is better for instruction scheduling. */ - if (common->optimized_cbracket[offset >> 1] == 0) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); - free_stack(common, 3); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); - } - else - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); - } - } -else if (opcode == OP_SBRA || opcode == OP_SCOND) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - } -else if (opcode == OP_ONCE) - { - cc = ccbegin + GET(ccbegin, 1); - if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) - { - /* Reset head and drop saved frame. */ - stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1; - free_stack(common, CURRENT_AS(bracket_backtrack)->u.framesize + stacksize); - } - else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) - { - /* The STR_PTR must be released. */ - free_stack(common, 1); - } - - JUMPHERE(once); - /* Restore previous private_data_ptr */ - if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); - else if (ket == OP_KETRMIN) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - /* See the comment below. */ - free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); - } - } - -if (ket == OP_KETRMAX) - { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - if (bra != OP_BRAZERO) - free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); - if (bra == OP_BRAZERO) - { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); - JUMPHERE(brazero); - free_stack(common, 1); - } - } -else if (ket == OP_KETRMIN) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - - /* OP_ONCE removes everything in case of a backtrack, so we don't - need to explicitly release the STR_PTR. The extra release would - affect badly the free_stack(2) above. */ - if (opcode != OP_ONCE) - free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel); - if (opcode == OP_ONCE) - free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); - else if (bra == OP_BRAMINZERO) - free_stack(common, 1); - } -else if (bra == OP_BRAZERO) - { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); - JUMPHERE(brazero); - } -} - -static void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; -int offset; -struct sljit_jump *jump; - -if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) - { - if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) - { - offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); - } - set_jumps(current->topbacktracks, LABEL()); - free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); - return; - } - -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); -add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - -if (current->topbacktracks) - { - jump = JUMP(SLJIT_JUMP); - set_jumps(current->topbacktracks, LABEL()); - /* Drop the stack frame. */ - free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); - JUMPHERE(jump); - } -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); -} - -static void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -assert_backtrack backtrack; - -current->top = NULL; -current->topbacktracks = NULL; -current->nextbacktracks = NULL; -if (current->cc[1] > OP_ASSERTBACK_NOT) - { - /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ - compile_bracket_matchingpath(common, current->cc, current); - compile_bracket_backtrackingpath(common, current->top); - } -else - { - memset(&backtrack, 0, sizeof(backtrack)); - backtrack.common.cc = current->cc; - backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; - /* Manual call of compile_assert_matchingpath. */ - compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); - } -SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); -} - -static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) -{ -DEFINE_COMPILER; - -while (current) - { - if (current->nextbacktracks != NULL) - set_jumps(current->nextbacktracks, LABEL()); - switch(*current->cc) - { - case OP_SET_SOM: - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0); - break; - - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_UPTO: - case OP_MINUPTO: - case OP_EXACT: - case OP_POSSTAR: - case OP_POSPLUS: - case OP_POSQUERY: - case OP_POSUPTO: - case OP_STARI: - case OP_MINSTARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_QUERYI: - case OP_MINQUERYI: - case OP_UPTOI: - case OP_MINUPTOI: - case OP_EXACTI: - case OP_POSSTARI: - case OP_POSPLUSI: - case OP_POSQUERYI: - case OP_POSUPTOI: - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTEXACT: - case OP_NOTPOSSTAR: - case OP_NOTPOSPLUS: - case OP_NOTPOSQUERY: - case OP_NOTPOSUPTO: - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTQUERYI: - case OP_NOTMINQUERYI: - case OP_NOTUPTOI: - case OP_NOTMINUPTOI: - case OP_NOTEXACTI: - case OP_NOTPOSSTARI: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERYI: - case OP_NOTPOSUPTOI: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - case OP_TYPEPOSUPTO: - case OP_CLASS: - case OP_NCLASS: -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: -#endif - compile_iterator_backtrackingpath(common, current); - break; - - case OP_REF: - case OP_REFI: - compile_ref_iterator_backtrackingpath(common, current); - break; - - case OP_RECURSE: - compile_recurse_backtrackingpath(common, current); - break; - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - compile_assert_backtrackingpath(common, current); - break; - - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRA: - case OP_CBRA: - case OP_COND: - case OP_SBRA: - case OP_SCBRA: - case OP_SCOND: - compile_bracket_backtrackingpath(common, current); - break; - - case OP_BRAZERO: - if (current->cc[1] > OP_ASSERTBACK_NOT) - compile_bracket_backtrackingpath(common, current); - else - compile_assert_backtrackingpath(common, current); - break; - - case OP_BRAPOS: - case OP_CBRAPOS: - case OP_SBRAPOS: - case OP_SCBRAPOS: - case OP_BRAPOSZERO: - compile_bracketpos_backtrackingpath(common, current); - break; - - case OP_BRAMINZERO: - compile_braminzero_backtrackingpath(common, current); - break; - - case OP_MARK: - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - free_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); - break; - - case OP_COMMIT: - OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); - if (common->quitlabel == NULL) - add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); - else - JUMPTO(SLJIT_JUMP, common->quitlabel); - break; - - case OP_FAIL: - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - set_jumps(current->topbacktracks, LABEL()); - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } - current = current->prev; - } -} - -static SLJIT_INLINE void compile_recurse(compiler_common *common) -{ -DEFINE_COMPILER; -pcre_uchar *cc = common->start + common->currententry->start; -pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); -pcre_uchar *ccend = bracketend(cc); -int private_data_size = get_private_data_length_for_copy(common, ccbegin, ccend); -int framesize = get_framesize(common, cc, TRUE); -int alternativesize; -BOOL needsframe; -backtrack_common altbacktrack; -struct sljit_label *save_quitlabel = common->quitlabel; -jump_list *save_quit = common->quit; -struct sljit_jump *jump; - -SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); -needsframe = framesize >= 0; -if (!needsframe) - framesize = 0; -alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; - -SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0); -common->currententry->entry = LABEL(); -set_jumps(common->currententry->calls, common->currententry->entry); - -sljit_emit_fast_enter(compiler, TMP2, 0); -allocate_stack(common, private_data_size + framesize + alternativesize); -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); -copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0); -if (needsframe) - init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE); - -if (alternativesize > 0) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - -memset(&altbacktrack, 0, sizeof(backtrack_common)); -common->quitlabel = NULL; -common->acceptlabel = NULL; -common->quit = NULL; -common->accept = NULL; -altbacktrack.cc = ccbegin; -cc += GET(cc, 1); -while (1) - { - altbacktrack.top = NULL; - altbacktrack.topbacktracks = NULL; - - if (altbacktrack.cc != ccbegin) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - - compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - common->quitlabel = save_quitlabel; - common->quit = save_quit; - return; - } - - add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); - - compile_backtrackingpath(common, altbacktrack.top); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - common->quitlabel = save_quitlabel; - common->quit = save_quit; - return; - } - set_jumps(altbacktrack.topbacktracks, LABEL()); - - if (*cc != OP_ALT) - break; - - altbacktrack.cc = cc + 1 + LINK_SIZE; - cc += GET(cc, 1); - } -/* None of them matched. */ -if (common->quit != NULL) - set_jumps(common->quit, LABEL()); - -OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); -jump = JUMP(SLJIT_JUMP); - -set_jumps(common->accept, LABEL()); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head); -if (needsframe) - { - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); - } -OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); - -JUMPHERE(jump); -copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize); -free_stack(common, private_data_size + framesize + alternativesize); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); -OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, TMP2, 0); -sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); - -common->quitlabel = save_quitlabel; -common->quit = save_quit; -} - -#undef COMPILE_BACKTRACKINGPATH -#undef CURRENT_AS - -void -PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) -{ -struct sljit_compiler *compiler; -backtrack_common rootbacktrack; -compiler_common common_data; -compiler_common *common = &common_data; -const pcre_uint8 *tables = re->tables; -pcre_study_data *study; -int private_data_size; -pcre_uchar *ccend; -executable_functions *functions; -void *executable_func; -sljit_uw executable_size; -struct sljit_label *mainloop = NULL; -struct sljit_label *empty_match_found; -struct sljit_label *empty_match_backtrack; -struct sljit_jump *jump; -struct sljit_jump *reqbyte_notfound = NULL; -struct sljit_jump *empty_match; - -SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); -study = extra->study_data; - -if (!tables) - tables = PRIV(default_tables); - -memset(&rootbacktrack, 0, sizeof(backtrack_common)); -memset(common, 0, sizeof(compiler_common)); -rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; - -common->start = rootbacktrack.cc; -common->fcc = tables + fcc_offset; -common->lcc = (sljit_sw)(tables + lcc_offset); -common->mode = mode; -common->nltype = NLTYPE_FIXED; -switch(re->options & PCRE_NEWLINE_BITS) - { - case 0: - /* Compile-time default */ - switch(NEWLINE) - { - case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; - case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; - default: common->newline = NEWLINE; break; - } - break; - case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break; - case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break; - case PCRE_NEWLINE_CR+ - PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; - case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; - case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; - default: return; - } -if ((re->options & PCRE_BSR_ANYCRLF) != 0) - common->bsr_nltype = NLTYPE_ANYCRLF; -else if ((re->options & PCRE_BSR_UNICODE) != 0) - common->bsr_nltype = NLTYPE_ANY; -else - { -#ifdef BSR_ANYCRLF - common->bsr_nltype = NLTYPE_ANYCRLF; -#else - common->bsr_nltype = NLTYPE_ANY; -#endif - } -common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; -common->ctypes = (sljit_sw)(tables + ctypes_offset); -common->digits[0] = -2; -common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset); -common->name_count = re->name_count; -common->name_entry_size = re->name_entry_size; -common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; -#ifdef SUPPORT_UTF -/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ -common->utf = (re->options & PCRE_UTF8) != 0; -#ifdef SUPPORT_UCP -common->use_ucp = (re->options & PCRE_UCP) != 0; -#endif -#endif /* SUPPORT_UTF */ -ccend = bracketend(rootbacktrack.cc); - -/* Calculate the local space size on the stack. */ -common->ovector_start = CALL_LIMIT + sizeof(sljit_sw); -common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1); -if (!common->optimized_cbracket) - return; -memset(common->optimized_cbracket, 1, re->top_bracket + 1); - -SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); -private_data_size = get_private_data_length(common, rootbacktrack.cc, ccend); -if (private_data_size < 0) - { - SLJIT_FREE(common->optimized_cbracket); - return; - } - -/* Checking flags and updating ovector_start. */ -if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) - { - common->req_char_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } -if (mode != JIT_COMPILE) - { - common->start_used_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - if (mode == JIT_PARTIAL_SOFT_COMPILE) - { - common->hit_start = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } - } -if ((re->options & PCRE_FIRSTLINE) != 0) - { - common->first_line_end = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } - -/* Aligning ovector to even number of sljit words. */ -if ((common->ovector_start & sizeof(sljit_sw)) != 0) - common->ovector_start += sizeof(sljit_sw); - -SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); -common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); -private_data_size += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_sw); -if (private_data_size > SLJIT_MAX_LOCAL_SIZE) - { - SLJIT_FREE(common->optimized_cbracket); - return; - } -common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(int)); -if (!common->private_data_ptrs) - { - SLJIT_FREE(common->optimized_cbracket); - return; - } -memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int)); -set_private_data_ptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_sw), ccend); - -compiler = sljit_create_compiler(); -if (!compiler) - { - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - return; - } -common->compiler = compiler; - -/* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 1, 5, 5, private_data_size); - -/* Register init. */ -reset_ovector(common, (re->top_bracket + 1) * 2); -if (common->req_char_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0); - -OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0); -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit)); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); -OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0); - -if (mode == JIT_PARTIAL_SOFT_COMPILE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); - -/* Main part of the matching */ -if ((re->options & PCRE_ANCHORED) == 0) - { - mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); - /* Forward search if possible. */ - if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) - { - if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0)) - { /* Do nothing */ } - else if ((re->flags & PCRE_FIRSTSET) != 0) - fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) != 0) - fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) - fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); - } - } -if (common->req_char_ptr != 0) - reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); - -/* Store the current STR_PTR in OVECTOR(0). */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); -/* Copy the limit of allowed recursions. */ -OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT); -if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0); -/* Copy the beginning of the string. */ -if (mode == JIT_PARTIAL_SOFT_COMPILE) - { - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - JUMPHERE(jump); - } -else if (mode == JIT_PARTIAL_HARD_COMPILE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - -compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack); -if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - return; - } - -empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); -empty_match_found = LABEL(); - -common->acceptlabel = LABEL(); -if (common->accept != NULL) - set_jumps(common->accept, common->acceptlabel); - -/* This means we have a match. Update the ovector. */ -copy_ovector(common, re->top_bracket + 1); -common->quitlabel = LABEL(); -if (common->quit != NULL) - set_jumps(common->quit, common->quitlabel); -sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - -if (mode != JIT_COMPILE) - { - common->partialmatchlabel = LABEL(); - set_jumps(common->partialmatch, common->partialmatchlabel); - return_with_partial_match(common, common->quitlabel); - } - -empty_match_backtrack = LABEL(); -compile_backtrackingpath(common, rootbacktrack.top); -if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - return; - } - -SLJIT_ASSERT(rootbacktrack.prev == NULL); - -if (mode == JIT_PARTIAL_SOFT_COMPILE) - { - /* Update hit_start only in the first time. */ - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0); - JUMPHERE(jump); - } - -/* Check we have remaining characters. */ -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - -if ((re->options & PCRE_ANCHORED) == 0) - { - if ((re->options & PCRE_FIRSTLINE) == 0) - { - if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) - { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1)); - CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop); - } - else - CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); - } - else - { - SLJIT_ASSERT(common->first_line_end != 0); - if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) - { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1)); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL); - JUMPTO(SLJIT_C_ZERO, mainloop); - } - else - CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, mainloop); - } - } - -/* No more remaining characters. */ -if (reqbyte_notfound != NULL) - JUMPHERE(reqbyte_notfound); - -if (mode == JIT_PARTIAL_SOFT_COMPILE) - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0, common->partialmatchlabel); - -OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); -JUMPTO(SLJIT_JUMP, common->quitlabel); - -flush_stubs(common); - -JUMPHERE(empty_match); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); -CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found); -JUMPTO(SLJIT_JUMP, empty_match_backtrack); - -common->currententry = common->entries; -while (common->currententry != NULL) - { - /* Might add new entries. */ - compile_recurse(common); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) - { - sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - return; - } - flush_stubs(common); - common->currententry = common->currententry->next; - } - -/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ -/* This is a (really) rare case. */ -set_jumps(common->stackalloc, LABEL()); -/* RETURN_ADDR is not a saved register. */ -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0); -OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); - -sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); -jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); -OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit)); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); -sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); - -/* Allocation failed. */ -JUMPHERE(jump); -/* We break the return address cache here, but this is a really rare case. */ -OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); -JUMPTO(SLJIT_JUMP, common->quitlabel); - -/* Call limit reached. */ -set_jumps(common->calllimit, LABEL()); -OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); -JUMPTO(SLJIT_JUMP, common->quitlabel); - -if (common->revertframes != NULL) - { - set_jumps(common->revertframes, LABEL()); - do_revertframes(common); - } -if (common->wordboundary != NULL) - { - set_jumps(common->wordboundary, LABEL()); - check_wordboundary(common); - } -if (common->anynewline != NULL) - { - set_jumps(common->anynewline, LABEL()); - check_anynewline(common); - } -if (common->hspace != NULL) - { - set_jumps(common->hspace, LABEL()); - check_hspace(common); - } -if (common->vspace != NULL) - { - set_jumps(common->vspace, LABEL()); - check_vspace(common); - } -if (common->casefulcmp != NULL) - { - set_jumps(common->casefulcmp, LABEL()); - do_casefulcmp(common); - } -if (common->caselesscmp != NULL) - { - set_jumps(common->caselesscmp, LABEL()); - do_caselesscmp(common); - } -#ifdef SUPPORT_UTF -#ifndef COMPILE_PCRE32 -if (common->utfreadchar != NULL) - { - set_jumps(common->utfreadchar, LABEL()); - do_utfreadchar(common); - } -#endif /* !COMPILE_PCRE32 */ -#ifdef COMPILE_PCRE8 -if (common->utfreadtype8 != NULL) - { - set_jumps(common->utfreadtype8, LABEL()); - do_utfreadtype8(common); - } -#endif /* COMPILE_PCRE8 */ -#endif /* SUPPORT_UTF */ -#ifdef SUPPORT_UCP -if (common->getucd != NULL) - { - set_jumps(common->getucd, LABEL()); - do_getucd(common); - } -#endif - -SLJIT_FREE(common->optimized_cbracket); -SLJIT_FREE(common->private_data_ptrs); -executable_func = sljit_generate_code(compiler); -executable_size = sljit_get_generated_code_size(compiler); -sljit_free_compiler(compiler); -if (executable_func == NULL) - return; - -/* Reuse the function descriptor if possible. */ -if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) - functions = (executable_functions *)extra->executable_jit; -else - { - /* Note: If your memory-checker has flagged the allocation below as a - * memory leak, it is probably because you either forgot to call - * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or - * pcre16_extra) object, or you called said function after having - * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field - * of the object. (The function will only free the JIT data if the - * bit remains set, as the bit indicates that the pointer to the data - * is valid.) - */ - functions = SLJIT_MALLOC(sizeof(executable_functions)); - if (functions == NULL) - { - /* This case is highly unlikely since we just recently - freed a lot of memory. Although not impossible. */ - sljit_free_code(executable_func); - return; - } - memset(functions, 0, sizeof(executable_functions)); - functions->top_bracket = (re->top_bracket + 1) * 2; - extra->executable_jit = functions; - extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; - } - -functions->executable_funcs[mode] = executable_func; -functions->executable_sizes[mode] = executable_size; -} - -static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func) -{ -union { - void* executable_func; - jit_function call_executable_func; -} convert_executable_func; -pcre_uint8 local_space[MACHINE_STACK_SIZE]; -struct sljit_stack local_stack; - -local_stack.top = (sljit_sw)&local_space; -local_stack.base = local_stack.top; -local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; -local_stack.max_limit = local_stack.limit; -arguments->stack = &local_stack; -convert_executable_func.executable_func = executable_func; -return convert_executable_func.call_executable_func(arguments); -} - -int -PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, - int length, int start_offset, int options, int *offsets, int offsetcount) -{ -executable_functions *functions = (executable_functions *)extra_data->executable_jit; -union { - void* executable_func; - jit_function call_executable_func; -} convert_executable_func; -jit_arguments arguments; -int maxoffsetcount; -int retval; -int mode = JIT_COMPILE; - -if ((options & PCRE_PARTIAL_HARD) != 0) - mode = JIT_PARTIAL_HARD_COMPILE; -else if ((options & PCRE_PARTIAL_SOFT) != 0) - mode = JIT_PARTIAL_SOFT_COMPILE; - -if (functions->executable_funcs[mode] == NULL) - return PCRE_ERROR_JIT_BADOPTION; - -/* Sanity checks should be handled by pcre_exec. */ -arguments.str = subject + start_offset; -arguments.begin = subject; -arguments.end = subject + length; -arguments.mark_ptr = NULL; -/* JIT decreases this value less frequently than the interpreter. */ -arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit; -arguments.notbol = (options & PCRE_NOTBOL) != 0; -arguments.noteol = (options & PCRE_NOTEOL) != 0; -arguments.notempty = (options & PCRE_NOTEMPTY) != 0; -arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; -arguments.offsets = offsets; - -/* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of -the output vector for storing captured strings, with the remainder used as -workspace. We don't need the workspace here. For compatibility, we limit the -number of captured strings in the same way as pcre_exec(), so that the user -gets the same result with and without JIT. */ - -if (offsetcount != 2) - offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3; -maxoffsetcount = functions->top_bracket; -if (offsetcount > maxoffsetcount) - offsetcount = maxoffsetcount; -arguments.offsetcount = offsetcount; - -if (functions->callback) - arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); -else - arguments.stack = (struct sljit_stack *)functions->userdata; - -if (arguments.stack == NULL) - retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); -else - { - convert_executable_func.executable_func = functions->executable_funcs[mode]; - retval = convert_executable_func.call_executable_func(&arguments); - } - -if (retval * 2 > offsetcount) - retval = 0; -if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) - *(extra_data->mark) = arguments.mark_ptr; - -return retval; -} - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data, - PCRE_SPTR subject, int length, int start_offset, int options, - int *offsets, int offsetcount, pcre_jit_stack *stack) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, - PCRE_SPTR16 subject, int length, int start_offset, int options, - int *offsets, int offsetcount, pcre16_jit_stack *stack) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, - PCRE_SPTR32 subject, int length, int start_offset, int options, - int *offsets, int offsetcount, pcre32_jit_stack *stack) -#endif -{ -pcre_uchar *subject_ptr = (pcre_uchar *)subject; -executable_functions *functions = (executable_functions *)extra_data->executable_jit; -union { - void* executable_func; - jit_function call_executable_func; -} convert_executable_func; -jit_arguments arguments; -int maxoffsetcount; -int retval; -int mode = JIT_COMPILE; - -SLJIT_UNUSED_ARG(argument_re); - -/* Plausibility checks */ -if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION; - -if ((options & PCRE_PARTIAL_HARD) != 0) - mode = JIT_PARTIAL_HARD_COMPILE; -else if ((options & PCRE_PARTIAL_SOFT) != 0) - mode = JIT_PARTIAL_SOFT_COMPILE; - -if (functions->executable_funcs[mode] == NULL) - return PCRE_ERROR_JIT_BADOPTION; - -/* Sanity checks should be handled by pcre_exec. */ -arguments.stack = (struct sljit_stack *)stack; -arguments.str = subject_ptr + start_offset; -arguments.begin = subject_ptr; -arguments.end = subject_ptr + length; -arguments.mark_ptr = NULL; -/* JIT decreases this value less frequently than the interpreter. */ -arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit; -arguments.notbol = (options & PCRE_NOTBOL) != 0; -arguments.noteol = (options & PCRE_NOTEOL) != 0; -arguments.notempty = (options & PCRE_NOTEMPTY) != 0; -arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; -arguments.offsets = offsets; - -/* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of -the output vector for storing captured strings, with the remainder used as -workspace. We don't need the workspace here. For compatibility, we limit the -number of captured strings in the same way as pcre_exec(), so that the user -gets the same result with and without JIT. */ - -if (offsetcount != 2) - offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3; -maxoffsetcount = functions->top_bracket; -if (offsetcount > maxoffsetcount) - offsetcount = maxoffsetcount; -arguments.offsetcount = offsetcount; - -convert_executable_func.executable_func = functions->executable_funcs[mode]; -retval = convert_executable_func.call_executable_func(&arguments); - -if (retval * 2 > offsetcount) - retval = 0; -if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) - *(extra_data->mark) = arguments.mark_ptr; - -return retval; -} - -void -PRIV(jit_free)(void *executable_funcs) -{ -int i; -executable_functions *functions = (executable_functions *)executable_funcs; -for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) - { - if (functions->executable_funcs[i] != NULL) - sljit_free_code(functions->executable_funcs[i]); - } -SLJIT_FREE(functions); -} - -int -PRIV(jit_get_size)(void *executable_funcs) -{ -int i; -sljit_uw size = 0; -sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; -for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) - size += executable_sizes[i]; -return (int)size; -} - -const char* -PRIV(jit_get_target)(void) -{ -return sljit_get_platform_name(); -} - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL pcre_jit_stack * -pcre_jit_stack_alloc(int startsize, int maxsize) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL pcre16_jit_stack * -pcre16_jit_stack_alloc(int startsize, int maxsize) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL pcre32_jit_stack * -pcre32_jit_stack_alloc(int startsize, int maxsize) -#endif -{ -if (startsize < 1 || maxsize < 1) - return NULL; -if (startsize > maxsize) - startsize = maxsize; -startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); -maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); -return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize); -} - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL void -pcre_jit_stack_free(pcre_jit_stack *stack) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL void -pcre16_jit_stack_free(pcre16_jit_stack *stack) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL void -pcre32_jit_stack_free(pcre32_jit_stack *stack) -#endif -{ -sljit_free_stack((struct sljit_stack *)stack); -} - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL void -pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL void -pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL void -pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) -#endif -{ -executable_functions *functions; -if (extra != NULL && - (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && - extra->executable_jit != NULL) - { - functions = (executable_functions *)extra->executable_jit; - functions->callback = callback; - functions->userdata = userdata; - } -} - -#else /* SUPPORT_JIT */ - -/* These are dummy functions to avoid linking errors when JIT support is not -being compiled. */ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL pcre_jit_stack * -pcre_jit_stack_alloc(int startsize, int maxsize) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL pcre16_jit_stack * -pcre16_jit_stack_alloc(int startsize, int maxsize) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL pcre32_jit_stack * -pcre32_jit_stack_alloc(int startsize, int maxsize) -#endif -{ -(void)startsize; -(void)maxsize; -return NULL; -} - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL void -pcre_jit_stack_free(pcre_jit_stack *stack) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL void -pcre16_jit_stack_free(pcre16_jit_stack *stack) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL void -pcre32_jit_stack_free(pcre32_jit_stack *stack) -#endif -{ -(void)stack; -} - -#if defined COMPILE_PCRE8 -PCRE_EXP_DECL void -pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DECL void -pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DECL void -pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) -#endif -{ -(void)extra; -(void)callback; -(void)userdata; -} - -#endif - -/* End of pcre_jit_compile.c */ diff --git a/deps/libmagic/pcre/pcre_maketables.c b/deps/libmagic/pcre/pcre_maketables.c deleted file mode 100644 index 610a669..0000000 --- a/deps/libmagic/pcre/pcre_maketables.c +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_maketables(), which builds -character tables for PCRE in the current locale. The file is compiled on its -own as part of the PCRE library. However, it is also included in the -compilation of dftables.c, in which case the macro DFTABLES is defined. */ - - -#ifndef DFTABLES -# ifdef HAVE_CONFIG_H -# include "config.h" -# endif -# include "pcre_internal.h" -#endif - - -/************************************************* -* Create PCRE character tables * -*************************************************/ - -/* This function builds a set of character tables for use by PCRE and returns -a pointer to them. They are build using the ctype functions, and consequently -their contents will depend upon the current locale setting. When compiled as -part of the library, the store is obtained via PUBL(malloc)(), but when -compiled inside dftables, use malloc(). - -Arguments: none -Returns: pointer to the contiguous block of data -*/ - -#if defined COMPILE_PCRE8 -const unsigned char * -pcre_maketables(void) -#elif defined COMPILE_PCRE16 -const unsigned char * -pcre16_maketables(void) -#elif defined COMPILE_PCRE32 -const unsigned char * -pcre32_maketables(void) -#endif -{ -unsigned char *yield, *p; -int i; - -#ifndef DFTABLES -yield = (unsigned char*)(PUBL(malloc))(tables_length); -#else -yield = (unsigned char*)malloc(tables_length); -#endif - -if (yield == NULL) return NULL; -p = yield; - -/* First comes the lower casing table */ - -for (i = 0; i < 256; i++) *p++ = tolower(i); - -/* Next the case-flipping table */ - -for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); - -/* Then the character class tables. Don't try to be clever and save effort on -exclusive ones - in some locales things may be different. Note that the table -for "space" includes everything "isspace" gives, including VT in the default -locale. This makes it work for the POSIX class [:space:]. Note also that it is -possible for a character to be alnum or alpha without being lower or upper, -such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at -least under Debian Linux's locales as of 12/2005). So we must test for alnum -specially. */ - -memset(p, 0, cbit_length); -for (i = 0; i < 256; i++) - { - if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); - if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); - if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); - if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); - if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); - if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); - if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); - if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); - if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); - if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); - if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); - } -p += cbit_length; - -/* Finally, the character type table. In this, we exclude VT from the white -space chars, because Perl doesn't recognize it as such for \s and for comments -within regexes. */ - -for (i = 0; i < 256; i++) - { - int x = 0; - if (i != CHAR_VT && isspace(i)) x += ctype_space; - if (isalpha(i)) x += ctype_letter; - if (isdigit(i)) x += ctype_digit; - if (isxdigit(i)) x += ctype_xdigit; - if (isalnum(i) || i == '_') x += ctype_word; - - /* Note: strchr includes the terminating zero in the characters it considers. - In this instance, that is ok because we want binary zero to be flagged as a - meta-character, which in this sense is any character that terminates a run - of data characters. */ - - if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; - *p++ = x; - } - -return yield; -} - -/* End of pcre_maketables.c */ diff --git a/deps/libmagic/pcre/pcre_newline.c b/deps/libmagic/pcre/pcre_newline.c deleted file mode 100644 index b8f5a4d..0000000 --- a/deps/libmagic/pcre/pcre_newline.c +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains internal functions for testing newlines when more than -one kind of newline is to be recognized. When a newline is found, its length is -returned. In principle, we could implement several newline "types", each -referring to a different set of newline characters. At present, PCRE supports -only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, -and NLTYPE_ANY. The full list of Unicode newline characters is taken from -http://unicode.org/unicode/reports/tr18/. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - - -/************************************************* -* Check for newline at given position * -*************************************************/ - -/* It is guaranteed that the initial value of ptr is less than the end of the -string that is being processed. - -Arguments: - ptr pointer to possible newline - type the newline type - endptr pointer to the end of the string - lenptr where to return the length - utf TRUE if in utf mode - -Returns: TRUE or FALSE -*/ - -BOOL -PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, - BOOL utf) -{ -pcre_uint32 c; -(void)utf; -#ifdef SUPPORT_UTF -if (utf) - { - GETCHAR(c, ptr); - } -else -#endif /* SUPPORT_UTF */ - c = *ptr; - -/* Note that this function is called only for ANY or ANYCRLF. */ - -if (type == NLTYPE_ANYCRLF) switch(c) - { - case CHAR_LF: *lenptr = 1; return TRUE; - case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; - return TRUE; - default: return FALSE; - } - -/* NLTYPE_ANY */ - -else switch(c) - { -#ifdef EBCDIC - case CHAR_NEL: -#endif - case CHAR_LF: - case CHAR_VT: - case CHAR_FF: *lenptr = 1; return TRUE; - - case CHAR_CR: - *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; - return TRUE; - -#ifndef EBCDIC -#ifdef COMPILE_PCRE8 - case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; - case 0x2028: /* LS */ - case 0x2029: *lenptr = 3; return TRUE; /* PS */ -#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ - case CHAR_NEL: - case 0x2028: /* LS */ - case 0x2029: *lenptr = 1; return TRUE; /* PS */ -#endif /* COMPILE_PCRE8 */ -#endif /* Not EBCDIC */ - - default: return FALSE; - } -} - - - -/************************************************* -* Check for newline at previous position * -*************************************************/ - -/* It is guaranteed that the initial value of ptr is greater than the start of -the string that is being processed. - -Arguments: - ptr pointer to possible newline - type the newline type - startptr pointer to the start of the string - lenptr where to return the length - utf TRUE if in utf mode - -Returns: TRUE or FALSE -*/ - -BOOL -PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, - BOOL utf) -{ -pcre_uint32 c; -(void)utf; -ptr--; -#ifdef SUPPORT_UTF -if (utf) - { - BACKCHAR(ptr); - GETCHAR(c, ptr); - } -else -#endif /* SUPPORT_UTF */ - c = *ptr; - -/* Note that this function is called only for ANY or ANYCRLF. */ - -if (type == NLTYPE_ANYCRLF) switch(c) - { - case CHAR_LF: - *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; - return TRUE; - - case CHAR_CR: *lenptr = 1; return TRUE; - default: return FALSE; - } - -/* NLTYPE_ANY */ - -else switch(c) - { - case CHAR_LF: - *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; - return TRUE; - -#ifdef EBCDIC - case CHAR_NEL: -#endif - case CHAR_VT: - case CHAR_FF: - case CHAR_CR: *lenptr = 1; return TRUE; - -#ifndef EBCDIC -#ifdef COMPILE_PCRE8 - case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; - case 0x2028: /* LS */ - case 0x2029: *lenptr = 3; return TRUE; /* PS */ -#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ - case CHAR_NEL: - case 0x2028: /* LS */ - case 0x2029: *lenptr = 1; return TRUE; /* PS */ -#endif /* COMPILE_PCRE8 */ -#endif /* NotEBCDIC */ - - default: return FALSE; - } -} - -/* End of pcre_newline.c */ diff --git a/deps/libmagic/pcre/pcre_ord2utf8.c b/deps/libmagic/pcre/pcre_ord2utf8.c deleted file mode 100644 index 95f1beb..0000000 --- a/deps/libmagic/pcre/pcre_ord2utf8.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This file contains a private PCRE function that converts an ordinal -character value into a UTF8 string. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define COMPILE_PCRE8 - -#include "pcre_internal.h" - -/************************************************* -* Convert character value to UTF-8 * -*************************************************/ - -/* This function takes an integer value in the range 0 - 0x10ffff -and encodes it as a UTF-8 character in 1 to 4 pcre_uchars. - -Arguments: - cvalue the character value - buffer pointer to buffer for result - at least 6 pcre_uchars long - -Returns: number of characters placed in the buffer -*/ - -unsigned -int -PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer) -{ -#ifdef SUPPORT_UTF - -register int i, j; - -for (i = 0; i < PRIV(utf8_table1_size); i++) - if ((int)cvalue <= PRIV(utf8_table1)[i]) break; -buffer += i; -for (j = i; j > 0; j--) - { - *buffer-- = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } -*buffer = PRIV(utf8_table2)[i] | cvalue; -return i + 1; - -#else - -(void)(cvalue); /* Keep compiler happy; this function won't ever be */ -(void)(buffer); /* called when SUPPORT_UTF is not defined. */ -return 0; - -#endif -} - -/* End of pcre_ord2utf8.c */ diff --git a/deps/libmagic/pcre/pcre_printint.c b/deps/libmagic/pcre/pcre_printint.c deleted file mode 100644 index 10b5754..0000000 --- a/deps/libmagic/pcre/pcre_printint.c +++ /dev/null @@ -1,766 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains a PCRE private debugging function for printing out the -internal form of a compiled regular expression, along with some supporting -local functions. This source file is used in two places: - -(1) It is #included by pcre_compile.c when it is compiled in debugging mode -(PCRE_DEBUG defined in pcre_internal.h). It is not included in production -compiles. In this case PCRE_INCLUDED is defined. - -(2) It is also compiled separately and linked with pcretest.c, which can be -asked to print out a compiled regex for debugging purposes. */ - -#ifndef PCRE_INCLUDED - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* For pcretest program. */ -#define PRIV(name) name - -/* We have to include pcre_internal.h because we need the internal info for -displaying the results of pcre_study() and we also need to know about the -internal macros, structures, and other internal data values; pcretest has -"inside information" compared to a program that strictly follows the PCRE API. - -Although pcre_internal.h does itself include pcre.h, we explicitly include it -here before pcre_internal.h so that the PCRE_EXP_xxx macros get set -appropriately for an application, not for building PCRE. */ - -#include "pcre.h" -#include "pcre_internal.h" - -/* These are the funtions that are contained within. It doesn't seem worth -having a separate .h file just for this. */ - -#endif /* PCRE_INCLUDED */ - -#ifdef PCRE_INCLUDED -static /* Keep the following function as private. */ -#endif - -#if defined COMPILE_PCRE8 -void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); -#elif defined COMPILE_PCRE16 -void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); -#elif defined COMPILE_PCRE32 -void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths); -#endif - -/* Macro that decides whether a character should be output as a literal or in -hexadecimal. We don't use isprint() because that can vary from system to system -(even without the use of locales) and we want the output always to be the same, -for testing purposes. */ - -#ifdef EBCDIC -#define PRINTABLE(c) ((c) >= 64 && (c) < 255) -#else -#define PRINTABLE(c) ((c) >= 32 && (c) < 127) -#endif - -/* The table of operator names. */ - -static const char *priv_OP_names[] = { OP_NAME_LIST }; - -/* This table of operator lengths is not actually used by the working code, -but its size is needed for a check that ensures it is the correct size for the -number of opcodes (thus catching update omissions). */ - -static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS }; - - - -/************************************************* -* Print single- or multi-byte character * -*************************************************/ - -static unsigned int -print_char(FILE *f, pcre_uchar *ptr, BOOL utf) -{ -pcre_uint32 c = *ptr; - -#ifndef SUPPORT_UTF - -(void)utf; /* Avoid compiler warning */ -if (PRINTABLE(c)) fprintf(f, "%c", (char)c); -else if (c <= 0x80) fprintf(f, "\\x%02x", c); -else fprintf(f, "\\x{%x}", c); -return 0; - -#else - -#if defined COMPILE_PCRE8 - -if (!utf || (c & 0xc0) != 0xc0) - { - if (PRINTABLE(c)) fprintf(f, "%c", (char)c); - else if (c < 0x80) fprintf(f, "\\x%02x", c); - else fprintf(f, "\\x{%02x}", c); - return 0; - } -else - { - int i; - int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ - int s = 6*a; - c = (c & PRIV(utf8_table3)[a]) << s; - for (i = 1; i <= a; i++) - { - /* This is a check for malformed UTF-8; it should only occur if the sanity - check has been turned off. Rather than swallow random bytes, just stop if - we hit a bad one. Print it with \X instead of \x as an indication. */ - - if ((ptr[i] & 0xc0) != 0x80) - { - fprintf(f, "\\X{%x}", c); - return i - 1; - } - - /* The byte is OK */ - - s -= 6; - c |= (ptr[i] & 0x3f) << s; - } - fprintf(f, "\\x{%x}", c); - return a; - } - -#elif defined COMPILE_PCRE16 - -if (!utf || (c & 0xfc00) != 0xd800) - { - if (PRINTABLE(c)) fprintf(f, "%c", (char)c); - else if (c <= 0x80) fprintf(f, "\\x%02x", c); - else fprintf(f, "\\x{%02x}", c); - return 0; - } -else - { - /* This is a check for malformed UTF-16; it should only occur if the sanity - check has been turned off. Rather than swallow a low surrogate, just stop if - we hit a bad one. Print it with \X instead of \x as an indication. */ - - if ((ptr[1] & 0xfc00) != 0xdc00) - { - fprintf(f, "\\X{%x}", c); - return 0; - } - - c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; - fprintf(f, "\\x{%x}", c); - return 1; - } - -#elif defined COMPILE_PCRE32 - -if (!utf || (c & 0xfffff800u) != 0xd800u) - { - if (PRINTABLE(c)) fprintf(f, "%c", (char)c); - else if (c <= 0x80) fprintf(f, "\\x%02x", c); - else fprintf(f, "\\x{%x}", c); - return 0; - } -else - { - /* This is a check for malformed UTF-32; it should only occur if the sanity - check has been turned off. Rather than swallow a surrogate, just stop if - we hit one. Print it with \X instead of \x as an indication. */ - fprintf(f, "\\X{%x}", c); - return 0; - } - -#endif /* COMPILE_PCRE[8|16|32] */ - -#endif /* SUPPORT_UTF */ -} - -/************************************************* -* Print uchar string (regardless of utf) * -*************************************************/ - -static void -print_puchar(FILE *f, PCRE_PUCHAR ptr) -{ -while (*ptr != '\0') - { - register pcre_uint32 c = *ptr++; - if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); - } -} - -/************************************************* -* Find Unicode property name * -*************************************************/ - -static const char * -get_ucpname(unsigned int ptype, unsigned int pvalue) -{ -#ifdef SUPPORT_UCP -int i; -for (i = PRIV(utt_size) - 1; i >= 0; i--) - { - if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break; - } -return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??"; -#else -/* It gets harder and harder to shut off unwanted compiler warnings. */ -ptype = ptype * pvalue; -return (ptype == pvalue)? "??" : "??"; -#endif -} - - -/************************************************* -* Print Unicode property value * -*************************************************/ - -/* "Normal" properties can be printed from tables. The PT_CLIST property is a -pseudo-property that contains a pointer to a list of case-equivalent -characters. This is used only when UCP support is available and UTF mode is -selected. It should never occur otherwise, but just in case it does, have -something ready to print. */ - -static void -print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after) -{ -if (code[1] != PT_CLIST) - { - fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1], - code[2]), after); - } -else - { - const char *not = (*code == OP_PROP)? "" : "not "; -#ifndef SUPPORT_UCP - fprintf(f, "%s%sclist %d%s", before, not, code[2], after); -#else - const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2]; - fprintf (f, "%s%sclist", before, not); - while (*p < NOTACHAR) fprintf(f, " %04x", *p++); - fprintf(f, "%s", after); -#endif - } -} - - - - -/************************************************* -* Print compiled regex * -*************************************************/ - -/* Make this function work for a regex with integers either byte order. -However, we assume that what we are passed is a compiled regex. The -print_lengths flag controls whether offsets and lengths of items are printed. -They can be turned off from pcretest so that automatic tests on bytecode can be -written that do not depend on the value of LINK_SIZE. */ - -#ifdef PCRE_INCLUDED -static /* Keep the following function as private. */ -#endif -#if defined COMPILE_PCRE8 -void -pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) -#elif defined COMPILE_PCRE16 -void -pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) -#elif defined COMPILE_PCRE32 -void -pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths) -#endif -{ -REAL_PCRE *re = (REAL_PCRE *)external_re; -pcre_uchar *codestart, *code; -BOOL utf; - -unsigned int options = re->options; -int offset = re->name_table_offset; -int count = re->name_count; -int size = re->name_entry_size; - -if (re->magic_number != MAGIC_NUMBER) - { - offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); - count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); - size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); - options = ((options << 24) & 0xff000000) | - ((options << 8) & 0x00ff0000) | - ((options >> 8) & 0x0000ff00) | - ((options >> 24) & 0x000000ff); - } - -code = codestart = (pcre_uchar *)re + offset + count * size; -/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ -utf = (options & PCRE_UTF8) != 0; - -for(;;) - { - pcre_uchar *ccode; - const char *flag = " "; - pcre_uint32 c; - unsigned int extra = 0; - - if (print_lengths) - fprintf(f, "%3d ", (int)(code - codestart)); - else - fprintf(f, " "); - - switch(*code) - { -/* ========================================================================== */ - /* These cases are never obeyed. This is a fudge that causes a compile- - time error if the vectors OP_names or OP_lengths, which are indexed - by opcode, are not the correct length. It seems to be the only way to do - such a check at compile time, as the sizeof() operator does not work in - the C preprocessor. */ - - case OP_TABLE_LENGTH: - case OP_TABLE_LENGTH + - ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && - (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)): - break; -/* ========================================================================== */ - - case OP_END: - fprintf(f, " %s\n", priv_OP_names[*code]); - fprintf(f, "------------------------------------------------------------------\n"); - return; - - case OP_CHAR: - fprintf(f, " "); - do - { - code++; - code += 1 + print_char(f, code, utf); - } - while (*code == OP_CHAR); - fprintf(f, "\n"); - continue; - - case OP_CHARI: - fprintf(f, " /i "); - do - { - code++; - code += 1 + print_char(f, code, utf); - } - while (*code == OP_CHARI); - fprintf(f, "\n"); - continue; - - case OP_CBRA: - case OP_CBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); - else fprintf(f, " "); - fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE)); - break; - - case OP_BRA: - case OP_BRAPOS: - case OP_SBRA: - case OP_SBRAPOS: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - case OP_ALT: - case OP_KET: - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_COND: - case OP_SCOND: - case OP_REVERSE: - if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); - else fprintf(f, " "); - fprintf(f, "%s", priv_OP_names[*code]); - break; - - case OP_CLOSE: - fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1)); - break; - - case OP_CREF: - case OP_NCREF: - fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]); - break; - - case OP_RREF: - c = GET2(code, 1); - if (c == RREF_ANY) - fprintf(f, " Cond recurse any"); - else - fprintf(f, " Cond recurse %d", c); - break; - - case OP_NRREF: - c = GET2(code, 1); - if (c == RREF_ANY) - fprintf(f, " Cond nrecurse any"); - else - fprintf(f, " Cond nrecurse %d", c); - break; - - case OP_DEF: - fprintf(f, " Cond def"); - break; - - case OP_STARI: - case OP_MINSTARI: - case OP_POSSTARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSPLUSI: - case OP_QUERYI: - case OP_MINQUERYI: - case OP_POSQUERYI: - flag = "/i"; - /* Fall through */ - case OP_STAR: - case OP_MINSTAR: - case OP_POSSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_POSQUERY: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - fprintf(f, " %s ", flag); - if (*code >= OP_TYPESTAR) - { - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) - { - print_prop(f, code + 1, "", " "); - extra = 2; - } - else fprintf(f, "%s", priv_OP_names[code[1]]); - } - else extra = print_char(f, code+1, utf); - fprintf(f, "%s", priv_OP_names[*code]); - break; - - case OP_EXACTI: - case OP_UPTOI: - case OP_MINUPTOI: - case OP_POSUPTOI: - flag = "/i"; - /* Fall through */ - case OP_EXACT: - case OP_UPTO: - case OP_MINUPTO: - case OP_POSUPTO: - fprintf(f, " %s ", flag); - extra = print_char(f, code + 1 + IMM2_SIZE, utf); - fprintf(f, "{"); - if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); - fprintf(f, "%d}", GET2(code,1)); - if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); - else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); - break; - - case OP_TYPEEXACT: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: - if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) - { - print_prop(f, code + IMM2_SIZE + 1, " ", " "); - extra = 2; - } - else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]); - fprintf(f, "{"); - if (*code != OP_TYPEEXACT) fprintf(f, "0,"); - fprintf(f, "%d}", GET2(code,1)); - if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); - else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); - break; - - case OP_NOTI: - flag = "/i"; - /* Fall through */ - case OP_NOT: - fprintf(f, " %s [^", flag); - extra = print_char(f, code + 1, utf); - fprintf(f, "]"); - break; - - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPOSSTARI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTPOSPLUSI: - case OP_NOTQUERYI: - case OP_NOTMINQUERYI: - case OP_NOTPOSQUERYI: - flag = "/i"; - /* Fall through */ - - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPOSSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTPOSPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - case OP_NOTPOSQUERY: - fprintf(f, " %s [^", flag); - extra = print_char(f, code + 1, utf); - fprintf(f, "]%s", priv_OP_names[*code]); - break; - - case OP_NOTEXACTI: - case OP_NOTUPTOI: - case OP_NOTMINUPTOI: - case OP_NOTPOSUPTOI: - flag = "/i"; - /* Fall through */ - - case OP_NOTEXACT: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTPOSUPTO: - fprintf(f, " %s [^", flag); - extra = print_char(f, code + 1 + IMM2_SIZE, utf); - fprintf(f, "]{"); - if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); - fprintf(f, "%d}", GET2(code,1)); - if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); - else - if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); - break; - - case OP_RECURSE: - if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); - else fprintf(f, " "); - fprintf(f, "%s", priv_OP_names[*code]); - break; - - case OP_REFI: - flag = "/i"; - /* Fall through */ - case OP_REF: - fprintf(f, " %s \\%d", flag, GET2(code,1)); - ccode = code + priv_OP_lengths[*code]; - goto CLASS_REF_REPEAT; - - case OP_CALLOUT: - fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2), - GET(code, 2 + LINK_SIZE)); - break; - - case OP_PROP: - case OP_NOTPROP: - print_prop(f, code, " ", ""); - break; - - /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no - harm in having this code always here, and it makes it less messy without - all those #ifdefs. */ - - case OP_CLASS: - case OP_NCLASS: - case OP_XCLASS: - { - int i; - unsigned int min, max; - BOOL printmap; - pcre_uint8 *map; - - fprintf(f, " ["); - - if (*code == OP_XCLASS) - { - extra = GET(code, 1); - ccode = code + LINK_SIZE + 1; - printmap = (*ccode & XCL_MAP) != 0; - if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); - } - else - { - printmap = TRUE; - ccode = code + 1; - } - - /* Print a bit map */ - - if (printmap) - { - map = (pcre_uint8 *)ccode; - for (i = 0; i < 256; i++) - { - if ((map[i/8] & (1 << (i&7))) != 0) - { - int j; - for (j = i+1; j < 256; j++) - if ((map[j/8] & (1 << (j&7))) == 0) break; - if (i == '-' || i == ']') fprintf(f, "\\"); - if (PRINTABLE(i)) fprintf(f, "%c", i); - else fprintf(f, "\\x%02x", i); - if (--j > i) - { - if (j != i + 1) fprintf(f, "-"); - if (j == '-' || j == ']') fprintf(f, "\\"); - if (PRINTABLE(j)) fprintf(f, "%c", j); - else fprintf(f, "\\x%02x", j); - } - i = j; - } - } - ccode += 32 / sizeof(pcre_uchar); - } - - /* For an XCLASS there is always some additional data */ - - if (*code == OP_XCLASS) - { - pcre_uchar ch; - while ((ch = *ccode++) != XCL_END) - { - if (ch == XCL_PROP) - { - unsigned int ptype = *ccode++; - unsigned int pvalue = *ccode++; - fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); - } - else if (ch == XCL_NOTPROP) - { - unsigned int ptype = *ccode++; - unsigned int pvalue = *ccode++; - fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); - } - else - { - ccode += 1 + print_char(f, ccode, utf); - if (ch == XCL_RANGE) - { - fprintf(f, "-"); - ccode += 1 + print_char(f, ccode, utf); - } - } - } - } - - /* Indicate a non-UTF class which was created by negation */ - - fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); - - /* Handle repeats after a class or a back reference */ - - CLASS_REF_REPEAT: - switch(*ccode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - fprintf(f, "%s", priv_OP_names[*ccode]); - extra += priv_OP_lengths[*ccode]; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - min = GET2(ccode,1); - max = GET2(ccode,1 + IMM2_SIZE); - if (max == 0) fprintf(f, "{%u,}", min); - else fprintf(f, "{%u,%u}", min, max); - if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); - extra += priv_OP_lengths[*ccode]; - break; - - /* Do nothing if it's not a repeat; this code stops picky compilers - warning about the lack of a default code path. */ - - default: - break; - } - } - break; - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - case OP_THEN_ARG: - fprintf(f, " %s ", priv_OP_names[*code]); - print_puchar(f, code + 2); - extra += code[1]; - break; - - case OP_THEN: - fprintf(f, " %s", priv_OP_names[*code]); - break; - - case OP_CIRCM: - case OP_DOLLM: - flag = "/m"; - /* Fall through */ - - /* Anything else is just an item with no data, but possibly a flag. */ - - default: - fprintf(f, " %s %s", flag, priv_OP_names[*code]); - break; - } - - code += priv_OP_lengths[*code] + extra; - fprintf(f, "\n"); - } -} - -/* End of pcre_printint.src */ diff --git a/deps/libmagic/pcre/pcre_refcount.c b/deps/libmagic/pcre/pcre_refcount.c deleted file mode 100644 index 79efa90..0000000 --- a/deps/libmagic/pcre/pcre_refcount.c +++ /dev/null @@ -1,92 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_refcount(), which is an -auxiliary function that can be used to maintain a reference count in a compiled -pattern data block. This might be helpful in applications where the block is -shared by different users. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Maintain reference count * -*************************************************/ - -/* The reference count is a 16-bit field, initialized to zero. It is not -possible to transfer a non-zero count from one host to a different host that -has a different byte order - though I can't see why anyone in their right mind -would ever want to do that! - -Arguments: - argument_re points to compiled code - adjust value to add to the count - -Returns: the (possibly updated) count value (a non-negative number), or - a negative error number -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_refcount(pcre *argument_re, int adjust) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre16_refcount(pcre16 *argument_re, int adjust) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre32_refcount(pcre32 *argument_re, int adjust) -#endif -{ -REAL_PCRE *re = (REAL_PCRE *)argument_re; -if (re == NULL) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; -if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; -re->ref_count = (-adjust > re->ref_count)? 0 : - (adjust + re->ref_count > 65535)? 65535 : - re->ref_count + adjust; -return re->ref_count; -} - -/* End of pcre_refcount.c */ diff --git a/deps/libmagic/pcre/pcre_string_utils.c b/deps/libmagic/pcre/pcre_string_utils.c deleted file mode 100644 index 94a5126..0000000 --- a/deps/libmagic/pcre/pcre_string_utils.c +++ /dev/null @@ -1,211 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function that is used to match an extended -class. It is used by both pcre_exec() and pcre_def_exec(). */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - -#ifndef COMPILE_PCRE8 - -/************************************************* -* Compare string utilities * -*************************************************/ - -/* The following two functions compares two strings. Basically an strcmp -for non 8 bit characters. - -Arguments: - str1 first string - str2 second string - -Returns: 0 if both string are equal (like strcmp), 1 otherwise -*/ - -int -PRIV(strcmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2) -{ -pcre_uchar c1; -pcre_uchar c2; - -while (*str1 != '\0' || *str2 != '\0') - { - c1 = *str1++; - c2 = *str2++; - if (c1 != c2) - return ((c1 > c2) << 1) - 1; - } -/* Both length and characters must be equal. */ -return 0; -} - -#ifdef COMPILE_PCRE32 - -int -PRIV(strcmp_uc_uc_utf)(const pcre_uchar *str1, const pcre_uchar *str2) -{ -pcre_uchar c1; -pcre_uchar c2; - -while (*str1 != '\0' || *str2 != '\0') - { - c1 = RAWUCHARINC(str1); - c2 = RAWUCHARINC(str2); - if (c1 != c2) - return ((c1 > c2) << 1) - 1; - } -/* Both length and characters must be equal. */ -return 0; -} - -#endif /* COMPILE_PCRE32 */ - -int -PRIV(strcmp_uc_c8)(const pcre_uchar *str1, const char *str2) -{ -const pcre_uint8 *ustr2 = (pcre_uint8 *)str2; -pcre_uchar c1; -pcre_uchar c2; - -while (*str1 != '\0' || *ustr2 != '\0') - { - c1 = *str1++; - c2 = (pcre_uchar)*ustr2++; - if (c1 != c2) - return ((c1 > c2) << 1) - 1; - } -/* Both length and characters must be equal. */ -return 0; -} - -#ifdef COMPILE_PCRE32 - -int -PRIV(strcmp_uc_c8_utf)(const pcre_uchar *str1, const char *str2) -{ -const pcre_uint8 *ustr2 = (pcre_uint8 *)str2; -pcre_uchar c1; -pcre_uchar c2; - -while (*str1 != '\0' || *ustr2 != '\0') - { - c1 = RAWUCHARINC(str1); - c2 = (pcre_uchar)*ustr2++; - if (c1 != c2) - return ((c1 > c2) << 1) - 1; - } -/* Both length and characters must be equal. */ -return 0; -} - -#endif /* COMPILE_PCRE32 */ - -/* The following two functions compares two, fixed length -strings. Basically an strncmp for non 8 bit characters. - -Arguments: - str1 first string - str2 second string - num size of the string - -Returns: 0 if both string are equal (like strcmp), 1 otherwise -*/ - -int -PRIV(strncmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2, unsigned int num) -{ -pcre_uchar c1; -pcre_uchar c2; - -while (num-- > 0) - { - c1 = *str1++; - c2 = *str2++; - if (c1 != c2) - return ((c1 > c2) << 1) - 1; - } -/* Both length and characters must be equal. */ -return 0; -} - -int -PRIV(strncmp_uc_c8)(const pcre_uchar *str1, const char *str2, unsigned int num) -{ -const pcre_uint8 *ustr2 = (pcre_uint8 *)str2; -pcre_uchar c1; -pcre_uchar c2; - -while (num-- > 0) - { - c1 = *str1++; - c2 = (pcre_uchar)*ustr2++; - if (c1 != c2) - return ((c1 > c2) << 1) - 1; - } -/* Both length and characters must be equal. */ -return 0; -} - -/* The following function returns with the length of -a zero terminated string. Basically an strlen for non 8 bit characters. - -Arguments: - str string - -Returns: length of the string -*/ - -unsigned int -PRIV(strlen_uc)(const pcre_uchar *str) -{ -unsigned int len = 0; -while (*str++ != 0) - len++; -return len; -} - -#endif /* !COMPILE_PCRE8 */ - -/* End of pcre_string_utils.c */ diff --git a/deps/libmagic/pcre/pcre_study.c b/deps/libmagic/pcre/pcre_study.c deleted file mode 100644 index 12d2a66..0000000 --- a/deps/libmagic/pcre/pcre_study.c +++ /dev/null @@ -1,1562 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_study(), along with local -supporting functions. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - -#define SET_BIT(c) start_bits[c/8] |= (1 << (c&7)) - -/* Returns from set_start_bits() */ - -enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; - - - -/************************************************* -* Find the minimum subject length for a group * -*************************************************/ - -/* Scan a parenthesized group and compute the minimum length of subject that -is needed to match it. This is a lower bound; it does not mean there is a -string of that length that matches. In UTF8 mode, the result is in characters -rather than bytes. - -Arguments: - code pointer to start of group (the bracket) - startcode pointer to start of the whole pattern - options the compiling options - int RECURSE depth - -Returns: the minimum length - -1 if \C in UTF-8 mode or (*ACCEPT) was encountered - -2 internal error (missing capturing bracket) - -3 internal error (opcode not listed) -*/ - -static int -find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, - int recurse_depth) -{ -int length = -1; -/* PCRE_UTF16 has the same value as PCRE_UTF8. */ -BOOL utf = (options & PCRE_UTF8) != 0; -BOOL had_recurse = FALSE; -register int branchlength = 0; -register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; - -if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; - -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ - -for (;;) - { - int d, min; - pcre_uchar *cs, *ce; - register pcre_uchar op = *cc; - - switch (op) - { - case OP_COND: - case OP_SCOND: - - /* If there is only one branch in a condition, the implied branch has zero - length, so we don't add anything. This covers the DEFINE "condition" - automatically. */ - - cs = cc + GET(cc, 1); - if (*cs != OP_ALT) - { - cc = cs + 1 + LINK_SIZE; - break; - } - - /* Otherwise we can fall through and treat it the same as any other - subpattern. */ - - case OP_CBRA: - case OP_SCBRA: - case OP_BRA: - case OP_SBRA: - case OP_CBRAPOS: - case OP_SCBRAPOS: - case OP_BRAPOS: - case OP_SBRAPOS: - case OP_ONCE: - case OP_ONCE_NC: - d = find_minlength(cc, startcode, options, recurse_depth); - if (d < 0) return d; - branchlength += d; - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; - - /* ACCEPT makes things far too complicated; we have to give up. */ - - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - return -1; - - /* Reached end of a branch; if it's a ket it is the end of a nested - call. If it's ALT it is an alternation in a nested call. If it is END it's - the end of the outer call. All can be handled by the same code. If an - ACCEPT was previously encountered, use the length that was in force at that - time, and pass back the shortest ACCEPT length. */ - - case OP_ALT: - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - case OP_END: - if (length < 0 || (!had_recurse && branchlength < length)) - length = branchlength; - if (op != OP_ALT) return length; - cc += 1 + LINK_SIZE; - branchlength = 0; - had_recurse = FALSE; - break; - - /* Skip over assertive subpatterns */ - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do cc += GET(cc, 1); while (*cc == OP_ALT); - /* Fall through */ - - /* Skip over things that don't match chars */ - - case OP_REVERSE: - case OP_CREF: - case OP_NCREF: - case OP_RREF: - case OP_NRREF: - case OP_DEF: - case OP_CALLOUT: - case OP_SOD: - case OP_SOM: - case OP_EOD: - case OP_EODN: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - cc += PRIV(OP_lengths)[*cc]; - break; - - /* Skip over a subpattern that has a {0} or {0,x} quantifier */ - - case OP_BRAZERO: - case OP_BRAMINZERO: - case OP_BRAPOSZERO: - case OP_SKIPZERO: - cc += PRIV(OP_lengths)[*cc]; - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; - - /* Handle literal characters and + repetitions */ - - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_PLUS: - case OP_PLUSI: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - branchlength++; - cc += 2; -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - branchlength++; - cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2; - break; - - /* Handle exact repetitions. The count is already in characters, but we - need to skip over a multibyte character in UTF8 mode. */ - - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - branchlength += GET2(cc,1); - cc += 2 + IMM2_SIZE; -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - case OP_TYPEEXACT: - branchlength += GET2(cc,1); - cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP - || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); - break; - - /* Handle single-char non-literal matchers */ - - case OP_PROP: - case OP_NOTPROP: - cc += 2; - /* Fall through */ - - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_EXTUNI: - case OP_HSPACE: - case OP_NOT_HSPACE: - case OP_VSPACE: - case OP_NOT_VSPACE: - branchlength++; - cc++; - break; - - /* "Any newline" might match two characters, but it also might match just - one. */ - - case OP_ANYNL: - branchlength += 1; - cc++; - break; - - /* The single-byte matcher means we can't proceed in UTF-8 mode. (In - non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever - appear, but leave the code, just in case.) */ - - case OP_ANYBYTE: -#ifdef SUPPORT_UTF - if (utf) return -1; -#endif - branchlength++; - cc++; - break; - - /* For repeated character types, we have to test for \p and \P, which have - an extra two bytes of parameters. */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSQUERY: - if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2; - cc += PRIV(OP_lengths)[op]; - break; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: - if (cc[1 + IMM2_SIZE] == OP_PROP - || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; - cc += PRIV(OP_lengths)[op]; - break; - - /* Check a class for variable quantification */ - - case OP_CLASS: - case OP_NCLASS: -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - case OP_XCLASS: - /* The original code caused an unsigned overflow in 64 bit systems, - so now we use a conditional statement. */ - if (op == OP_XCLASS) - cc += GET(cc, 1); - else - cc += PRIV(OP_lengths)[OP_CLASS]; -#else - cc += PRIV(OP_lengths)[OP_CLASS]; -#endif - - switch (*cc) - { - case OP_CRPLUS: - case OP_CRMINPLUS: - branchlength++; - /* Fall through */ - - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - cc++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - branchlength += GET2(cc,1); - cc += 1 + 2 * IMM2_SIZE; - break; - - default: - branchlength++; - break; - } - break; - - /* Backreferences and subroutine calls are treated in the same way: we find - the minimum length for the subpattern. A recursion, however, causes an - a flag to be set that causes the length of this branch to be ignored. The - logic is that a recursion can only make sense if there is another - alternation that stops the recursing. That will provide the minimum length - (when no recursion happens). A backreference within the group that it is - referencing behaves in the same way. - - If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket - matches an empty string (by default it causes a matching failure), so in - that case we must set the minimum length to zero. */ - - case OP_REF: - case OP_REFI: - if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) - { - ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); - if (cs == NULL) return -2; - do ce += GET(ce, 1); while (*ce == OP_ALT); - if (cc > cs && cc < ce) - { - d = 0; - had_recurse = TRUE; - } - else - { - d = find_minlength(cs, startcode, options, recurse_depth); - } - } - else d = 0; - cc += 1 + IMM2_SIZE; - - /* Handle repeated back references */ - - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - min = 0; - cc++; - break; - - case OP_CRPLUS: - case OP_CRMINPLUS: - min = 1; - cc++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - min = GET2(cc, 1); - cc += 1 + 2 * IMM2_SIZE; - break; - - default: - min = 1; - break; - } - - branchlength += min * d; - break; - - /* We can easily detect direct recursion, but not mutual recursion. This is - caught by a recursion depth count. */ - - case OP_RECURSE: - cs = ce = (pcre_uchar *)startcode + GET(cc, 1); - do ce += GET(ce, 1); while (*ce == OP_ALT); - if ((cc > cs && cc < ce) || recurse_depth > 10) - had_recurse = TRUE; - else - { - branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); - } - cc += 1 + LINK_SIZE; - break; - - /* Anything else does not or need not match a character. We can get the - item's length from the table, but for those that can match zero occurrences - of a character, we must take special action for UTF-8 characters. As it - happens, the "NOT" versions of these opcodes are used at present only for - ASCII characters, so they could be omitted from this list. However, in - future that may change, so we include them here so as not to leave a - gotcha for a future maintainer. */ - - case OP_UPTO: - case OP_UPTOI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - - case OP_STAR: - case OP_STARI: - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - - case OP_QUERY: - case OP_QUERYI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - case OP_POSQUERY: - case OP_POSQUERYI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - - cc += PRIV(OP_lengths)[op]; -#ifdef SUPPORT_UTF - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - - /* Skip these, but we need to add in the name length. */ - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - case OP_THEN_ARG: - cc += PRIV(OP_lengths)[op] + cc[1]; - break; - - /* The remaining opcodes are just skipped over. */ - - case OP_CLOSE: - case OP_COMMIT: - case OP_FAIL: - case OP_PRUNE: - case OP_SET_SOM: - case OP_SKIP: - case OP_THEN: - cc += PRIV(OP_lengths)[op]; - break; - - /* This should not occur: we list all opcodes explicitly so that when - new ones get added they are properly considered. */ - - default: - return -3; - } - } -/* Control never gets here */ -} - - - -/************************************************* -* Set a bit and maybe its alternate case * -*************************************************/ - -/* Given a character, set its first byte's bit in the table, and also the -corresponding bit for the other version of a letter if we are caseless. In -UTF-8 mode, for characters greater than 127, we can only do the caseless thing -when Unicode property support is available. - -Arguments: - start_bits points to the bit map - p points to the character - caseless the caseless flag - cd the block with char table pointers - utf TRUE for UTF-8 / UTF-16 / UTF-32 mode - -Returns: pointer after the character -*/ - -static const pcre_uchar * -set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, - compile_data *cd, BOOL utf) -{ -pcre_uint32 c = *p; - -#ifdef COMPILE_PCRE8 -SET_BIT(c); - -#ifdef SUPPORT_UTF -if (utf && c > 127) - { - GETCHARINC(c, p); -#ifdef SUPPORT_UCP - if (caseless) - { - pcre_uchar buff[6]; - c = UCD_OTHERCASE(c); - (void)PRIV(ord2utf)(c, buff); - SET_BIT(buff[0]); - } -#endif /* Not SUPPORT_UCP */ - return p; - } -#else /* Not SUPPORT_UTF */ -(void)(utf); /* Stops warning for unused parameter */ -#endif /* SUPPORT_UTF */ - -/* Not UTF-8 mode, or character is less than 127. */ - -if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); -return p + 1; -#endif /* COMPILE_PCRE8 */ - -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -if (c > 0xff) - { - c = 0xff; - caseless = FALSE; - } -SET_BIT(c); - -#ifdef SUPPORT_UTF -if (utf && c > 127) - { - GETCHARINC(c, p); -#ifdef SUPPORT_UCP - if (caseless) - { - c = UCD_OTHERCASE(c); - if (c > 0xff) - c = 0xff; - SET_BIT(c); - } -#endif /* SUPPORT_UCP */ - return p; - } -#else /* Not SUPPORT_UTF */ -(void)(utf); /* Stops warning for unused parameter */ -#endif /* SUPPORT_UTF */ - -if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); -return p + 1; -#endif -} - - - -/************************************************* -* Set bits for a positive character type * -*************************************************/ - -/* This function sets starting bits for a character type. In UTF-8 mode, we can -only do a direct setting for bytes less than 128, as otherwise there can be -confusion with bytes in the middle of UTF-8 characters. In a "traditional" -environment, the tables will only recognize ASCII characters anyway, but in at -least one Windows environment, some higher bytes bits were set in the tables. -So we deal with that case by considering the UTF-8 encoding. - -Arguments: - start_bits the starting bitmap - cbit type the type of character wanted - table_limit 32 for non-UTF-8; 16 for UTF-8 - cd the block with char table pointers - -Returns: nothing -*/ - -static void -set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, - compile_data *cd) -{ -register pcre_uint32 c; -for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 -if (table_limit == 32) return; -for (c = 128; c < 256; c++) - { - if ((cd->cbits[c/8] & (1 << (c&7))) != 0) - { - pcre_uchar buff[6]; - (void)PRIV(ord2utf)(c, buff); - SET_BIT(buff[0]); - } - } -#endif -} - - -/************************************************* -* Set bits for a negative character type * -*************************************************/ - -/* This function sets starting bits for a negative character type such as \D. -In UTF-8 mode, we can only do a direct setting for bytes less than 128, as -otherwise there can be confusion with bytes in the middle of UTF-8 characters. -Unlike in the positive case, where we can set appropriate starting bits for -specific high-valued UTF-8 characters, in this case we have to set the bits for -all high-valued characters. The lowest is 0xc2, but we overkill by starting at -0xc0 (192) for simplicity. - -Arguments: - start_bits the starting bitmap - cbit type the type of character wanted - table_limit 32 for non-UTF-8; 16 for UTF-8 - cd the block with char table pointers - -Returns: nothing -*/ - -static void -set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, - compile_data *cd) -{ -register pcre_uint32 c; -for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 -if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; -#endif -} - - - -/************************************************* -* Create bitmap of starting bytes * -*************************************************/ - -/* This function scans a compiled unanchored expression recursively and -attempts to build a bitmap of the set of possible starting bytes. As time goes -by, we may be able to get more clever at doing this. The SSB_CONTINUE return is -useful for parenthesized groups in patterns such as (a*)b where the group -provides some optional starting bytes but scanning must continue at the outer -level to find at least one mandatory byte. At the outermost level, this -function fails unless the result is SSB_DONE. - -Arguments: - code points to an expression - start_bits points to a 32-byte table, initialized to 0 - utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode - cd the block with char table pointers - -Returns: SSB_FAIL => Failed to find any starting bytes - SSB_DONE => Found mandatory starting bytes - SSB_CONTINUE => Found optional starting bytes - SSB_UNKNOWN => Hit an unrecognized opcode -*/ - -static int -set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, - compile_data *cd) -{ -register pcre_uint32 c; -int yield = SSB_DONE; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 -int table_limit = utf? 16:32; -#else -int table_limit = 32; -#endif - -#if 0 -/* ========================================================================= */ -/* The following comment and code was inserted in January 1999. In May 2006, -when it was observed to cause compiler warnings about unused values, I took it -out again. If anybody is still using OS/2, they will have to put it back -manually. */ - -/* This next statement and the later reference to dummy are here in order to -trick the optimizer of the IBM C compiler for OS/2 into generating correct -code. Apparently IBM isn't going to fix the problem, and we would rather not -disable optimization (in this module it actually makes a big difference, and -the pcre module can use all the optimization it can get). */ - -volatile int dummy; -/* ========================================================================= */ -#endif - -do - { - BOOL try_next = TRUE; - const pcre_uchar *tcode = code + 1 + LINK_SIZE; - - if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; - - while (try_next) /* Loop for items in this branch */ - { - int rc; - - switch(*tcode) - { - /* If we reach something we don't understand, it means a new opcode has - been created that hasn't been added to this code. Hopefully this problem - will be discovered during testing. */ - - default: - return SSB_UNKNOWN; - - /* Fail for a valid opcode that implies no starting bits. */ - - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - case OP_ALLANY: - case OP_ANY: - case OP_ANYBYTE: - case OP_CIRC: - case OP_CIRCM: - case OP_CLOSE: - case OP_COMMIT: - case OP_COND: - case OP_CREF: - case OP_DEF: - case OP_DOLL: - case OP_DOLLM: - case OP_END: - case OP_EOD: - case OP_EODN: - case OP_EXTUNI: - case OP_FAIL: - case OP_MARK: - case OP_NCREF: - case OP_NOT: - case OP_NOTEXACT: - case OP_NOTEXACTI: - case OP_NOTI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - case OP_NOTPROP: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_NOT_HSPACE: - case OP_NOT_VSPACE: - case OP_NRREF: - case OP_PROP: - case OP_PRUNE: - case OP_PRUNE_ARG: - case OP_RECURSE: - case OP_REF: - case OP_REFI: - case OP_REVERSE: - case OP_RREF: - case OP_SCOND: - case OP_SET_SOM: - case OP_SKIP: - case OP_SKIP_ARG: - case OP_SOD: - case OP_SOM: - case OP_THEN: - case OP_THEN_ARG: -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: -#endif - return SSB_FAIL; - - /* We can ignore word boundary tests. */ - - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - tcode++; - break; - - /* If we hit a bracket or a positive lookahead assertion, recurse to set - bits from within the subpattern. If it can't find anything, we have to - give up. If it finds some mandatory character(s), we are done for this - branch. Otherwise, carry on scanning after the subpattern. */ - - case OP_BRA: - case OP_SBRA: - case OP_CBRA: - case OP_SCBRA: - case OP_BRAPOS: - case OP_SBRAPOS: - case OP_CBRAPOS: - case OP_SCBRAPOS: - case OP_ONCE: - case OP_ONCE_NC: - case OP_ASSERT: - rc = set_start_bits(tcode, start_bits, utf, cd); - if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; - if (rc == SSB_DONE) try_next = FALSE; else - { - do tcode += GET(tcode, 1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - } - break; - - /* If we hit ALT or KET, it means we haven't found anything mandatory in - this branch, though we might have found something optional. For ALT, we - continue with the next alternative, but we have to arrange that the final - result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, - return SSB_CONTINUE: if this is the top level, that indicates failure, - but after a nested subpattern, it causes scanning to continue. */ - - case OP_ALT: - yield = SSB_CONTINUE; - try_next = FALSE; - break; - - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - return SSB_CONTINUE; - - /* Skip over callout */ - - case OP_CALLOUT: - tcode += 2 + 2*LINK_SIZE; - break; - - /* Skip over lookbehind and negative lookahead assertions */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do tcode += GET(tcode, 1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - break; - - /* BRAZERO does the bracket, but carries on. */ - - case OP_BRAZERO: - case OP_BRAMINZERO: - case OP_BRAPOSZERO: - rc = set_start_bits(++tcode, start_bits, utf, cd); - if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; -/* ========================================================================= - See the comment at the head of this function concerning the next line, - which was an old fudge for the benefit of OS/2. - dummy = 1; - ========================================================================= */ - do tcode += GET(tcode,1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - break; - - /* SKIPZERO skips the bracket. */ - - case OP_SKIPZERO: - tcode++; - do tcode += GET(tcode,1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - break; - - /* Single-char * or ? sets the bit and tries the next item */ - - case OP_STAR: - case OP_MINSTAR: - case OP_POSSTAR: - case OP_QUERY: - case OP_MINQUERY: - case OP_POSQUERY: - tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); - break; - - case OP_STARI: - case OP_MINSTARI: - case OP_POSSTARI: - case OP_QUERYI: - case OP_MINQUERYI: - case OP_POSQUERYI: - tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); - break; - - /* Single-char upto sets the bit and tries the next */ - - case OP_UPTO: - case OP_MINUPTO: - case OP_POSUPTO: - tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf); - break; - - case OP_UPTOI: - case OP_MINUPTOI: - case OP_POSUPTOI: - tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf); - break; - - /* At least one single char sets the bit and stops */ - - case OP_EXACT: - tcode += IMM2_SIZE; - /* Fall through */ - case OP_CHAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSPLUS: - (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); - try_next = FALSE; - break; - - case OP_EXACTI: - tcode += IMM2_SIZE; - /* Fall through */ - case OP_CHARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSPLUSI: - (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); - try_next = FALSE; - break; - - /* Special spacing and line-terminating items. These recognize specific - lists of characters. The difference between VSPACE and ANYNL is that the - latter can match the two-character CRLF sequence, but that is not - relevant for finding the first character, so their code here is - identical. */ - - case OP_HSPACE: - SET_BIT(CHAR_HT); - SET_BIT(CHAR_SPACE); -#ifdef SUPPORT_UTF - if (utf) - { -#ifdef COMPILE_PCRE8 - SET_BIT(0xC2); /* For U+00A0 */ - SET_BIT(0xE1); /* For U+1680, U+180E */ - SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ - SET_BIT(0xE3); /* For U+3000 */ -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(0xA0); - SET_BIT(0xFF); /* For characters > 255 */ -#endif /* COMPILE_PCRE[8|16|32] */ - } - else -#endif /* SUPPORT_UTF */ - { -#ifndef EBCDIC - SET_BIT(0xA0); -#endif /* Not EBCDIC */ -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(0xFF); /* For characters > 255 */ -#endif /* COMPILE_PCRE[16|32] */ - } - try_next = FALSE; - break; - - case OP_ANYNL: - case OP_VSPACE: - SET_BIT(CHAR_LF); - SET_BIT(CHAR_VT); - SET_BIT(CHAR_FF); - SET_BIT(CHAR_CR); -#ifdef SUPPORT_UTF - if (utf) - { -#ifdef COMPILE_PCRE8 - SET_BIT(0xC2); /* For U+0085 */ - SET_BIT(0xE2); /* For U+2028, U+2029 */ -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(CHAR_NEL); - SET_BIT(0xFF); /* For characters > 255 */ -#endif /* COMPILE_PCRE[8|16|32] */ - } - else -#endif /* SUPPORT_UTF */ - { - SET_BIT(CHAR_NEL); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(0xFF); /* For characters > 255 */ -#endif - } - try_next = FALSE; - break; - - /* Single character types set the bits and stop. Note that if PCRE_UCP - is set, we do not see these op codes because \d etc are converted to - properties. Therefore, these apply in the case when only characters less - than 256 are recognized to match the types. */ - - case OP_NOT_DIGIT: - set_nottype_bits(start_bits, cbit_digit, table_limit, cd); - try_next = FALSE; - break; - - case OP_DIGIT: - set_type_bits(start_bits, cbit_digit, table_limit, cd); - try_next = FALSE; - break; - - /* The cbit_space table has vertical tab as whitespace; we have to - ensure it is set as not whitespace. Luckily, the code value is the same - (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */ - - case OP_NOT_WHITESPACE: - set_nottype_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] |= 0x08; - try_next = FALSE; - break; - - /* The cbit_space table has vertical tab as whitespace; we have to not - set it from the table. Luckily, the code value is the same (0x0b) in - ASCII and EBCDIC, so we can just adjust the appropriate bit. */ - - case OP_WHITESPACE: - c = start_bits[1]; /* Save in case it was already set */ - set_type_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] = (start_bits[1] & ~0x08) | c; - try_next = FALSE; - break; - - case OP_NOT_WORDCHAR: - set_nottype_bits(start_bits, cbit_word, table_limit, cd); - try_next = FALSE; - break; - - case OP_WORDCHAR: - set_type_bits(start_bits, cbit_word, table_limit, cd); - try_next = FALSE; - break; - - /* One or more character type fudges the pointer and restarts, knowing - it will hit a single character type and stop there. */ - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - tcode++; - break; - - case OP_TYPEEXACT: - tcode += 1 + IMM2_SIZE; - break; - - /* Zero or more repeats of character types set the bits and then - try again. */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: - tcode += IMM2_SIZE; /* Fall through */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - switch(tcode[1]) - { - default: - case OP_ANY: - case OP_ALLANY: - return SSB_FAIL; - - case OP_HSPACE: - SET_BIT(CHAR_HT); - SET_BIT(CHAR_SPACE); -#ifdef SUPPORT_UTF - if (utf) - { -#ifdef COMPILE_PCRE8 - SET_BIT(0xC2); /* For U+00A0 */ - SET_BIT(0xE1); /* For U+1680, U+180E */ - SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ - SET_BIT(0xE3); /* For U+3000 */ -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(0xA0); - SET_BIT(0xFF); /* For characters > 255 */ -#endif /* COMPILE_PCRE[8|16|32] */ - } - else -#endif /* SUPPORT_UTF */ -#ifndef EBCDIC - SET_BIT(0xA0); -#endif /* Not EBCDIC */ - break; - - case OP_ANYNL: - case OP_VSPACE: - SET_BIT(CHAR_LF); - SET_BIT(CHAR_VT); - SET_BIT(CHAR_FF); - SET_BIT(CHAR_CR); -#ifdef SUPPORT_UTF - if (utf) - { -#ifdef COMPILE_PCRE8 - SET_BIT(0xC2); /* For U+0085 */ - SET_BIT(0xE2); /* For U+2028, U+2029 */ -#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(CHAR_NEL); - SET_BIT(0xFF); /* For characters > 255 */ -#endif /* COMPILE_PCRE16 */ - } - else -#endif /* SUPPORT_UTF */ - SET_BIT(CHAR_NEL); - break; - - case OP_NOT_DIGIT: - set_nottype_bits(start_bits, cbit_digit, table_limit, cd); - break; - - case OP_DIGIT: - set_type_bits(start_bits, cbit_digit, table_limit, cd); - break; - - /* The cbit_space table has vertical tab as whitespace; we have to - ensure it gets set as not whitespace. Luckily, the code value is the - same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate - bit. */ - - case OP_NOT_WHITESPACE: - set_nottype_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] |= 0x08; - break; - - /* The cbit_space table has vertical tab as whitespace; we have to - avoid setting it. Luckily, the code value is the same (0x0b) in ASCII - and EBCDIC, so we can just adjust the appropriate bit. */ - - case OP_WHITESPACE: - c = start_bits[1]; /* Save in case it was already set */ - set_type_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] = (start_bits[1] & ~0x08) | c; - break; - - case OP_NOT_WORDCHAR: - set_nottype_bits(start_bits, cbit_word, table_limit, cd); - break; - - case OP_WORDCHAR: - set_type_bits(start_bits, cbit_word, table_limit, cd); - break; - } - - tcode += 2; - break; - - /* Character class where all the information is in a bit map: set the - bits and either carry on or not, according to the repeat count. If it was - a negative class, and we are operating with UTF-8 characters, any byte - with a value >= 0xc4 is a potentially valid starter because it starts a - character with a value > 255. */ - - case OP_NCLASS: -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 - if (utf) - { - start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ - memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ - } -#endif -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 - SET_BIT(0xFF); /* For characters > 255 */ -#endif - /* Fall through */ - - case OP_CLASS: - { - pcre_uint8 *map; - tcode++; - map = (pcre_uint8 *)tcode; - - /* In UTF-8 mode, the bits in a bit map correspond to character - values, not to byte values. However, the bit map we are constructing is - for byte values. So we have to do a conversion for characters whose - value is > 127. In fact, there are only two possible starting bytes for - characters in the range 128 - 255. */ - -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 - if (utf) - { - for (c = 0; c < 16; c++) start_bits[c] |= map[c]; - for (c = 128; c < 256; c++) - { - if ((map[c/8] && (1 << (c&7))) != 0) - { - int d = (c >> 6) | 0xc0; /* Set bit for this starter */ - start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ - c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ - } - } - } - else -#endif - { - /* In non-UTF-8 mode, the two bit maps are completely compatible. */ - for (c = 0; c < 32; c++) start_bits[c] |= map[c]; - } - - /* Advance past the bit map, and act on what follows. For a zero - minimum repeat, continue; otherwise stop processing. */ - - tcode += 32 / sizeof(pcre_uchar); - switch (*tcode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - tcode++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; - else try_next = FALSE; - break; - - default: - try_next = FALSE; - break; - } - } - break; /* End of bitmap class handling */ - - } /* End of switch */ - } /* End of try_next loop */ - - code += GET(code, 1); /* Advance to next branch */ - } -while (*code == OP_ALT); -return yield; -} - - - - - -/************************************************* -* Study a compiled expression * -*************************************************/ - -/* This function is handed a compiled expression that it must study to produce -information that will speed up the matching. It returns a pcre[16]_extra block -which then gets handed back to pcre_exec(). - -Arguments: - re points to the compiled expression - options contains option bits - errorptr points to where to place error messages; - set NULL unless error - -Returns: pointer to a pcre[16]_extra block, with study_data filled in and - the appropriate flags set; - NULL on error or if no optimization possible -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION -pcre_study(const pcre *external_re, int options, const char **errorptr) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION -pcre16_study(const pcre16 *external_re, int options, const char **errorptr) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION -pcre32_study(const pcre32 *external_re, int options, const char **errorptr) -#endif -{ -int min; -BOOL bits_set = FALSE; -pcre_uint8 start_bits[32]; -PUBL(extra) *extra = NULL; -pcre_study_data *study; -const pcre_uint8 *tables; -pcre_uchar *code; -compile_data compile_block; -const REAL_PCRE *re = (const REAL_PCRE *)external_re; - -*errorptr = NULL; - -if (re == NULL || re->magic_number != MAGIC_NUMBER) - { - *errorptr = "argument is not a compiled regular expression"; - return NULL; - } - -if ((re->flags & PCRE_MODE) == 0) - { -#if defined COMPILE_PCRE8 - *errorptr = "argument not compiled in 8 bit mode"; -#elif defined COMPILE_PCRE16 - *errorptr = "argument not compiled in 16 bit mode"; -#elif defined COMPILE_PCRE32 - *errorptr = "argument not compiled in 32 bit mode"; -#endif - return NULL; - } - -if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) - { - *errorptr = "unknown or incorrect option bit(s) set"; - return NULL; - } - -code = (pcre_uchar *)re + re->name_table_offset + - (re->name_count * re->name_entry_size); - -/* For an anchored pattern, or an unanchored pattern that has a first char, or -a multiline pattern that matches only at "line starts", there is no point in -seeking a list of starting bytes. */ - -if ((re->options & PCRE_ANCHORED) == 0 && - (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0) - { - int rc; - - /* Set the character tables in the block that is passed around */ - - tables = re->tables; - -#if defined COMPILE_PCRE8 - if (tables == NULL) - (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, - (void *)(&tables)); -#elif defined COMPILE_PCRE16 - if (tables == NULL) - (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, - (void *)(&tables)); -#elif defined COMPILE_PCRE32 - if (tables == NULL) - (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, - (void *)(&tables)); -#endif - - compile_block.lcc = tables + lcc_offset; - compile_block.fcc = tables + fcc_offset; - compile_block.cbits = tables + cbits_offset; - compile_block.ctypes = tables + ctypes_offset; - - /* See if we can find a fixed set of initial characters for the pattern. */ - - memset(start_bits, 0, 32 * sizeof(pcre_uint8)); - rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, - &compile_block); - bits_set = rc == SSB_DONE; - if (rc == SSB_UNKNOWN) - { - *errorptr = "internal error: opcode not recognized"; - return NULL; - } - } - -/* Find the minimum length of subject string. */ - -switch(min = find_minlength(code, code, re->options, 0)) - { - case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; - case -3: *errorptr = "internal error: opcode not recognized"; return NULL; - default: break; - } - -/* If a set of starting bytes has been identified, or if the minimum length is -greater than zero, or if JIT optimization has been requested, or if -PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a -pcre_study_data block. The study data is put in the latter, which is pointed to -by the former, which may also get additional data set later by the calling -program. At the moment, the size of pcre_study_data is fixed. We nevertheless -save it in a field for returning via the pcre_fullinfo() function so that if it -becomes variable in the future, we don't have to change that code. */ - -if (bits_set || min > 0 || (options & ( -#ifdef SUPPORT_JIT - PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | - PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE | -#endif - PCRE_STUDY_EXTRA_NEEDED)) != 0) - { - extra = (PUBL(extra) *)(PUBL(malloc)) - (sizeof(PUBL(extra)) + sizeof(pcre_study_data)); - if (extra == NULL) - { - *errorptr = "failed to get memory"; - return NULL; - } - - study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra))); - extra->flags = PCRE_EXTRA_STUDY_DATA; - extra->study_data = study; - - study->size = sizeof(pcre_study_data); - study->flags = 0; - - /* Set the start bits always, to avoid unset memory errors if the - study data is written to a file, but set the flag only if any of the bits - are set, to save time looking when none are. */ - - if (bits_set) - { - study->flags |= PCRE_STUDY_MAPPED; - memcpy(study->start_bits, start_bits, sizeof(start_bits)); - } - else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8)); - -#ifdef PCRE_DEBUG - if (bits_set) - { - pcre_uint8 *ptr = start_bits; - int i; - - printf("Start bits:\n"); - for (i = 0; i < 32; i++) - printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n"); - } -#endif - - /* Always set the minlength value in the block, because the JIT compiler - makes use of it. However, don't set the bit unless the length is greater than - zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time - checking the zero case. */ - - if (min > 0) - { - study->flags |= PCRE_STUDY_MINLEN; - study->minlength = min; - } - else study->minlength = 0; - - /* If JIT support was compiled and requested, attempt the JIT compilation. - If no starting bytes were found, and the minimum length is zero, and JIT - compilation fails, abandon the extra block and return NULL, unless - PCRE_STUDY_EXTRA_NEEDED is set. */ - -#ifdef SUPPORT_JIT - extra->executable_jit = NULL; - if ((options & PCRE_STUDY_JIT_COMPILE) != 0) - PRIV(jit_compile)(re, extra, JIT_COMPILE); - if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0) - PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE); - if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0) - PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE); - - if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 && - (options & PCRE_STUDY_EXTRA_NEEDED) == 0) - { -#if defined COMPILE_PCRE8 - pcre_free_study(extra); -#elif defined COMPILE_PCRE16 - pcre16_free_study(extra); -#elif defined COMPILE_PCRE32 - pcre32_free_study(extra); -#endif - extra = NULL; - } -#endif - } - -return extra; -} - - -/************************************************* -* Free the study data * -*************************************************/ - -/* This function frees the memory that was obtained by pcre_study(). - -Argument: a pointer to the pcre[16]_extra block -Returns: nothing -*/ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN void -pcre_free_study(pcre_extra *extra) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN void -pcre16_free_study(pcre16_extra *extra) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN void -pcre32_free_study(pcre32_extra *extra) -#endif -{ -if (extra == NULL) - return; -#ifdef SUPPORT_JIT -if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && - extra->executable_jit != NULL) - PRIV(jit_free)(extra->executable_jit); -#endif -PUBL(free)(extra); -} - -/* End of pcre_study.c */ diff --git a/deps/libmagic/pcre/pcre_tables.c b/deps/libmagic/pcre/pcre_tables.c deleted file mode 100644 index 34ee048..0000000 --- a/deps/libmagic/pcre/pcre_tables.c +++ /dev/null @@ -1,655 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -#ifndef PCRE_INCLUDED - -/* This module contains some fixed tables that are used by more than one of the -PCRE code modules. The tables are also #included by the pcretest program, which -uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name -clashes with the library. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - -#endif /* PCRE_INCLUDED */ - -/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that -the definition is next to the definition of the opcodes in pcre_internal.h. */ - -const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS }; - -/* Tables of horizontal and vertical whitespace characters, suitable for -adding to classes. */ - -const pcre_uint32 PRIV(hspace_list)[] = { HSPACE_LIST }; -const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST }; - - - -/************************************************* -* Tables for UTF-8 support * -*************************************************/ - -/* These are the breakpoints for different numbers of bytes in a UTF-8 -character. */ - -#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ - || (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)) - -/* These tables are also required by pcretest in 16- or 32-bit mode. */ - -const int PRIV(utf8_table1)[] = - { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; - -const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); - -/* These are the indicator bits and the mask for the data bits to set in the -first byte of a character, indexed by the number of additional bytes. */ - -const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; -const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -/* Table of the number of extra bytes, indexed by the first byte masked with -0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ - -const pcre_uint8 PRIV(utf8_table4)[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - -#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/ - -#ifdef SUPPORT_UTF - -/* Table to translate from particular type value to the general value. */ - -const pcre_uint32 PRIV(ucp_gentype)[] = { - ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ - ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ - ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ - ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */ - ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */ - ucp_P, ucp_P, /* Ps, Po */ - ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */ - ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */ -}; - -/* This table encodes the rules for finding the end of an extended grapheme -cluster. Every code point has a grapheme break property which is one of the -ucp_gbXX values defined in ucp.h. The 2-dimensional table is indexed by the -properties of two adjacent code points. The left property selects a word from -the table, and the right property selects a bit from that word like this: - - ucp_gbtable[left-property] & (1 << right-property) - -The value is non-zero if a grapheme break is NOT permitted between the relevant -two code points. The breaking rules are as follows: - -1. Break at the start and end of text (pretty obviously). - -2. Do not break between a CR and LF; otherwise, break before and after - controls. - -3. Do not break Hangul syllable sequences, the rules for which are: - - L may be followed by L, V, LV or LVT - LV or V may be followed by V or T - LVT or T may be followed by T - -4. Do not break before extending characters. - -The next two rules are only for extended grapheme clusters (but that's what we -are implementing). - -5. Do not break before SpacingMarks. - -6. Do not break after Prepend characters. - -7. Otherwise, break everywhere. -*/ - -const pcre_uint32 PRIV(ucp_gbtable[]) = { - (1< 0x10ffff is not permitted -PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted -PCRE_UTF8_ERR15 Overlong 2-byte sequence -PCRE_UTF8_ERR16 Overlong 3-byte sequence -PCRE_UTF8_ERR17 Overlong 4-byte sequence -PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur) -PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur) -PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character) -PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff -PCRE_UTF8_ERR22 Non-character - -Arguments: - string points to the string - length length of string, or -1 if the string is zero-terminated - errp pointer to an error position offset variable - -Returns: = 0 if the string is a valid UTF-8 string - > 0 otherwise, setting the offset of the bad character -*/ - -int -PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset) -{ -#ifdef SUPPORT_UTF -register PCRE_PUCHAR p; - -if (length < 0) - { - for (p = string; *p != 0; p++); - length = (int)(p - string); - } - -for (p = string; length-- > 0; p++) - { - register pcre_uchar ab, c, d; - pcre_uint32 v = 0; - - c = *p; - if (c < 128) continue; /* ASCII character */ - - if (c < 0xc0) /* Isolated 10xx xxxx byte */ - { - *erroroffset = (int)(p - string); - return PCRE_UTF8_ERR20; - } - - if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */ - { - *erroroffset = (int)(p - string); - return PCRE_UTF8_ERR21; - } - - ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ - if (length < ab) - { - *erroroffset = (int)(p - string); /* Missing bytes */ - return ab - length; /* Codes ERR1 to ERR5 */ - } - length -= ab; /* Length remaining */ - - /* Check top bits in the second byte */ - - if (((d = *(++p)) & 0xc0) != 0x80) - { - *erroroffset = (int)(p - string) - 1; - return PCRE_UTF8_ERR6; - } - - /* For each length, check that the remaining bytes start with the 0x80 bit - set and not the 0x40 bit. Then check for an overlong sequence, and for the - excluded range 0xd800 to 0xdfff. */ - - switch (ab) - { - /* 2-byte character. No further bytes to check for 0x80. Check first byte - for for xx00 000x (overlong sequence). */ - - case 1: if ((c & 0x3e) == 0) - { - *erroroffset = (int)(p - string) - 1; - return PCRE_UTF8_ERR15; - } - break; - - /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes - for 1110 0000, xx0x xxxx (overlong sequence) or - 1110 1101, 1010 xxxx (0xd800 - 0xdfff) */ - - case 2: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { - *erroroffset = (int)(p - string) - 2; - return PCRE_UTF8_ERR7; - } - if (c == 0xe0 && (d & 0x20) == 0) - { - *erroroffset = (int)(p - string) - 2; - return PCRE_UTF8_ERR16; - } - if (c == 0xed && d >= 0xa0) - { - *erroroffset = (int)(p - string) - 2; - return PCRE_UTF8_ERR14; - } - v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f); - break; - - /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2 - bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a - character greater than 0x0010ffff (f4 8f bf bf) */ - - case 3: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { - *erroroffset = (int)(p - string) - 2; - return PCRE_UTF8_ERR7; - } - if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ - { - *erroroffset = (int)(p - string) - 3; - return PCRE_UTF8_ERR8; - } - if (c == 0xf0 && (d & 0x30) == 0) - { - *erroroffset = (int)(p - string) - 3; - return PCRE_UTF8_ERR17; - } - if (c > 0xf4 || (c == 0xf4 && d > 0x8f)) - { - *erroroffset = (int)(p - string) - 3; - return PCRE_UTF8_ERR13; - } - v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f); - break; - - /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be - rejected by the length test below. However, we do the appropriate tests - here so that overlong sequences get diagnosed, and also in case there is - ever an option for handling these larger code points. */ - - /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for - 1111 1000, xx00 0xxx */ - - case 4: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { - *erroroffset = (int)(p - string) - 2; - return PCRE_UTF8_ERR7; - } - if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ - { - *erroroffset = (int)(p - string) - 3; - return PCRE_UTF8_ERR8; - } - if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ - { - *erroroffset = (int)(p - string) - 4; - return PCRE_UTF8_ERR9; - } - if (c == 0xf8 && (d & 0x38) == 0) - { - *erroroffset = (int)(p - string) - 4; - return PCRE_UTF8_ERR18; - } - break; - - /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for - 1111 1100, xx00 00xx. */ - - case 5: - if ((*(++p) & 0xc0) != 0x80) /* Third byte */ - { - *erroroffset = (int)(p - string) - 2; - return PCRE_UTF8_ERR7; - } - if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ - { - *erroroffset = (int)(p - string) - 3; - return PCRE_UTF8_ERR8; - } - if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ - { - *erroroffset = (int)(p - string) - 4; - return PCRE_UTF8_ERR9; - } - if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */ - { - *erroroffset = (int)(p - string) - 5; - return PCRE_UTF8_ERR10; - } - if (c == 0xfc && (d & 0x3c) == 0) - { - *erroroffset = (int)(p - string) - 5; - return PCRE_UTF8_ERR19; - } - break; - } - - /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are - excluded by RFC 3629. The pointer p is currently at the last byte of the - character. */ - - if (ab > 3) - { - *erroroffset = (int)(p - string) - ab; - return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12; - } - - /* Reject non-characters. The pointer p is currently at the last byte of the - character. */ - if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef)) - { - *erroroffset = (int)(p - string) - ab; - return PCRE_UTF8_ERR22; - } - } - -#else /* Not SUPPORT_UTF */ -(void)(string); /* Keep picky compilers happy */ -(void)(length); -(void)(erroroffset); -#endif - -return PCRE_UTF8_ERR0; /* This indicates success */ -} - -/* End of pcre_valid_utf8.c */ diff --git a/deps/libmagic/pcre/pcre_version.c b/deps/libmagic/pcre/pcre_version.c deleted file mode 100644 index ae86ff2..0000000 --- a/deps/libmagic/pcre/pcre_version.c +++ /dev/null @@ -1,98 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_version(), which returns a -string that identifies the PCRE version that is in use. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Return version string * -*************************************************/ - -/* These macros are the standard way of turning unquoted text into C strings. -They allow macros like PCRE_MAJOR to be defined without quotes, which is -convenient for user programs that want to test its value. */ - -#define STRING(a) # a -#define XSTRING(s) STRING(s) - -/* A problem turned up with PCRE_PRERELEASE, which is defined empty for -production releases. Originally, it was used naively in this code: - - return XSTRING(PCRE_MAJOR) - "." XSTRING(PCRE_MINOR) - XSTRING(PCRE_PRERELEASE) - " " XSTRING(PCRE_DATE); - -However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of -STRING(). The C standard states: "If (before argument substitution) any -argument consists of no preprocessing tokens, the behavior is undefined." It -turns out the gcc treats this case as a single empty string - which is what we -really want - but Visual C grumbles about the lack of an argument for the -macro. Unfortunately, both are within their rights. To cope with both ways of -handling this, I had resort to some messy hackery that does a test at run time. -I could find no way of detecting that a macro is defined as an empty string at -pre-processor time. This hack uses a standard trick for avoiding calling -the STRING macro with an empty argument when doing the test. */ - -#if defined COMPILE_PCRE8 -PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION -pcre_version(void) -#elif defined COMPILE_PCRE16 -PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION -pcre16_version(void) -#elif defined COMPILE_PCRE32 -PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION -pcre32_version(void) -#endif -{ -return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)? - XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : - XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE); -} - -/* End of pcre_version.c */ diff --git a/deps/libmagic/pcre/pcre_xclass.c b/deps/libmagic/pcre/pcre_xclass.c deleted file mode 100644 index fa73cd8..0000000 --- a/deps/libmagic/pcre/pcre_xclass.c +++ /dev/null @@ -1,198 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function that is used to match an extended -class. It is used by both pcre_exec() and pcre_def_exec(). */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Match character against an XCLASS * -*************************************************/ - -/* This function is called to match a character against an extended class that -might contain values > 255 and/or Unicode properties. - -Arguments: - c the character - data points to the flag byte of the XCLASS data - -Returns: TRUE if character matches, else FALSE -*/ - -BOOL -PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf) -{ -pcre_uchar t; -BOOL negated = (*data & XCL_NOT) != 0; - -(void)utf; -#ifdef COMPILE_PCRE8 -/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ -utf = TRUE; -#endif - -/* Character values < 256 are matched against a bitmap, if one is present. If -not, we still carry on, because there may be ranges that start below 256 in the -additional data. */ - -if (c < 256) - { - if ((*data & XCL_MAP) != 0 && - (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0) - return !negated; /* char found */ - } - -/* First skip the bit map if present. Then match against the list of Unicode -properties or large chars or ranges that end with a large char. We won't ever -encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ - -if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar); - -while ((t = *data++) != XCL_END) - { - pcre_uint32 x, y; - if (t == XCL_SINGLE) - { -#ifdef SUPPORT_UTF - if (utf) - { - GETCHARINC(x, data); /* macro generates multiple statements */ - } - else -#endif - x = *data++; - if (c == x) return !negated; - } - else if (t == XCL_RANGE) - { -#ifdef SUPPORT_UTF - if (utf) - { - GETCHARINC(x, data); /* macro generates multiple statements */ - GETCHARINC(y, data); /* macro generates multiple statements */ - } - else -#endif - { - x = *data++; - y = *data++; - } - if (c >= x && c <= y) return !negated; - } - -#ifdef SUPPORT_UCP - else /* XCL_PROP & XCL_NOTPROP */ - { - const ucd_record *prop = GET_UCD(c); - - switch(*data) - { - case PT_ANY: - if (t == XCL_PROP) return !negated; - break; - - case PT_LAMP: - if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; - break; - - case PT_GC: - if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) - return !negated; - break; - - case PT_PC: - if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; - break; - - case PT_SC: - if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; - break; - - case PT_ALNUM: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) - return !negated; - break; - - case PT_SPACE: /* Perl space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) - == (t == XCL_PROP)) - return !negated; - break; - - case PT_PXSPACE: /* POSIX space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || - c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || - c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) - return !negated; - break; - - case PT_WORD: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) - == (t == XCL_PROP)) - return !negated; - break; - - /* This should never occur, but compilers may mutter if there is no - default. */ - - default: - return FALSE; - } - - data += 2; - } -#endif /* SUPPORT_UCP */ - } - -return negated; /* char did not match */ -} - -/* End of pcre_xclass.c */ diff --git a/deps/libmagic/pcre/pcreposix.c b/deps/libmagic/pcre/pcreposix.c deleted file mode 100644 index 15195c0..0000000 --- a/deps/libmagic/pcre/pcreposix.c +++ /dev/null @@ -1,419 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module is a wrapper that provides a POSIX API to the underlying PCRE -functions. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - - -/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for -compiling these functions. This must come before including pcreposix.h, where -they are set for an application (using these functions) if they have not -previously been set. */ - -#if defined(_WIN32) && !defined(PCRE_STATIC) -# define PCREPOSIX_EXP_DECL extern __declspec(dllexport) -# define PCREPOSIX_EXP_DEFN __declspec(dllexport) -#endif - -/* We include pcre.h before pcre_internal.h so that the PCRE library functions -are declared as "import" for Windows by defining PCRE_EXP_DECL as "import". -This is needed even though pcre_internal.h itself includes pcre.h, because it -does so after it has set PCRE_EXP_DECL to "export" if it is not already set. */ - -#include "pcre.h" -#include "pcre_internal.h" -#include "pcreposix.h" - - -/* Table to translate PCRE compile time error codes into POSIX error codes. */ - -static const int eint[] = { - 0, /* no error */ - REG_EESCAPE, /* \ at end of pattern */ - REG_EESCAPE, /* \c at end of pattern */ - REG_EESCAPE, /* unrecognized character follows \ */ - REG_BADBR, /* numbers out of order in {} quantifier */ - /* 5 */ - REG_BADBR, /* number too big in {} quantifier */ - REG_EBRACK, /* missing terminating ] for character class */ - REG_ECTYPE, /* invalid escape sequence in character class */ - REG_ERANGE, /* range out of order in character class */ - REG_BADRPT, /* nothing to repeat */ - /* 10 */ - REG_BADRPT, /* operand of unlimited repeat could match the empty string */ - REG_ASSERT, /* internal error: unexpected repeat */ - REG_BADPAT, /* unrecognized character after (? */ - REG_BADPAT, /* POSIX named classes are supported only within a class */ - REG_EPAREN, /* missing ) */ - /* 15 */ - REG_ESUBREG, /* reference to non-existent subpattern */ - REG_INVARG, /* erroffset passed as NULL */ - REG_INVARG, /* unknown option bit(s) set */ - REG_EPAREN, /* missing ) after comment */ - REG_ESIZE, /* parentheses nested too deeply */ - /* 20 */ - REG_ESIZE, /* regular expression too large */ - REG_ESPACE, /* failed to get memory */ - REG_EPAREN, /* unmatched parentheses */ - REG_ASSERT, /* internal error: code overflow */ - REG_BADPAT, /* unrecognized character after (?< */ - /* 25 */ - REG_BADPAT, /* lookbehind assertion is not fixed length */ - REG_BADPAT, /* malformed number or name after (?( */ - REG_BADPAT, /* conditional group contains more than two branches */ - REG_BADPAT, /* assertion expected after (?( */ - REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ - /* 30 */ - REG_ECTYPE, /* unknown POSIX class name */ - REG_BADPAT, /* POSIX collating elements are not supported */ - REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ - REG_BADPAT, /* spare error */ - REG_BADPAT, /* character value in \x{...} sequence is too large */ - /* 35 */ - REG_BADPAT, /* invalid condition (?(0) */ - REG_BADPAT, /* \C not allowed in lookbehind assertion */ - REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ - REG_BADPAT, /* number after (?C is > 255 */ - REG_BADPAT, /* closing ) for (?C expected */ - /* 40 */ - REG_BADPAT, /* recursive call could loop indefinitely */ - REG_BADPAT, /* unrecognized character after (?P */ - REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ - REG_BADPAT, /* two named subpatterns have the same name */ - REG_BADPAT, /* invalid UTF-8 string */ - /* 45 */ - REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ - REG_BADPAT, /* malformed \P or \p sequence */ - REG_BADPAT, /* unknown property name after \P or \p */ - REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ - REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ - /* 50 */ - REG_BADPAT, /* repeated subpattern is too long */ - REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ - REG_BADPAT, /* internal error: overran compiling workspace */ - REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ - REG_BADPAT, /* DEFINE group contains more than one branch */ - /* 55 */ - REG_BADPAT, /* repeating a DEFINE group is not allowed */ - REG_INVARG, /* inconsistent NEWLINE options */ - REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ - REG_BADPAT, /* a numbered reference must not be zero */ - REG_BADPAT, /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */ - /* 60 */ - REG_BADPAT, /* (*VERB) not recognized */ - REG_BADPAT, /* number is too big */ - REG_BADPAT, /* subpattern name expected */ - REG_BADPAT, /* digit expected after (?+ */ - REG_BADPAT, /* ] is an invalid data character in JavaScript compatibility mode */ - /* 65 */ - REG_BADPAT, /* different names for subpatterns of the same number are not allowed */ - REG_BADPAT, /* (*MARK) must have an argument */ - REG_INVARG, /* this version of PCRE is not compiled with PCRE_UCP support */ - REG_BADPAT, /* \c must be followed by an ASCII character */ - REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */ - /* 70 */ - REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */ - REG_BADPAT, /* \N is not supported in a class */ - REG_BADPAT, /* too many forward references */ - REG_BADPAT, /* disallowed UTF-8/16/32 code point (>= 0xd800 && <= 0xdfff) */ - REG_BADPAT, /* invalid UTF-16 string (should not occur) */ - /* 75 */ - REG_BADPAT, /* overlong MARK name */ - REG_BADPAT, /* character value in \u.... sequence is too large */ - REG_BADPAT /* invalid UTF-32 string (should not occur) */ -}; - -/* Table of texts corresponding to POSIX error codes */ - -static const char *const pstring[] = { - "", /* Dummy for value 0 */ - "internal error", /* REG_ASSERT */ - "invalid repeat counts in {}", /* BADBR */ - "pattern error", /* BADPAT */ - "? * + invalid", /* BADRPT */ - "unbalanced {}", /* EBRACE */ - "unbalanced []", /* EBRACK */ - "collation error - not relevant", /* ECOLLATE */ - "bad class", /* ECTYPE */ - "bad escape sequence", /* EESCAPE */ - "empty expression", /* EMPTY */ - "unbalanced ()", /* EPAREN */ - "bad range inside []", /* ERANGE */ - "expression too big", /* ESIZE */ - "failed to get memory", /* ESPACE */ - "bad back reference", /* ESUBREG */ - "bad argument", /* INVARG */ - "match failed" /* NOMATCH */ -}; - - - - -/************************************************* -* Translate error code to string * -*************************************************/ - -PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION -regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) -{ -const char *message, *addmessage; -size_t length, addlength; - -message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? - "unknown error code" : pstring[errcode]; -length = strlen(message) + 1; - -addmessage = " at offset "; -addlength = (preg != NULL && (int)preg->re_erroffset != -1)? - strlen(addmessage) + 6 : 0; - -if (errbuf_size > 0) - { - if (addlength > 0 && errbuf_size >= length + addlength) - sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); - else - { - strncpy(errbuf, message, errbuf_size - 1); - errbuf[errbuf_size-1] = 0; - } - } - -return length + addlength; -} - - - - -/************************************************* -* Free store held by a regex * -*************************************************/ - -PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION -regfree(regex_t *preg) -{ -(PUBL(free))(preg->re_pcre); -} - - - - -/************************************************* -* Compile a regular expression * -*************************************************/ - -/* -Arguments: - preg points to a structure for recording the compiled expression - pattern the pattern to compile - cflags compilation flags - -Returns: 0 on success - various non-zero codes on failure -*/ - -PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION -regcomp(regex_t *preg, const char *pattern, int cflags) -{ -const char *errorptr; -int erroffset; -int errorcode; -int options = 0; -int re_nsub = 0; - -if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; -if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; -if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL; -if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE; -if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8; -if ((cflags & REG_UCP) != 0) options |= PCRE_UCP; -if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY; - -preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, - &erroffset, NULL); -preg->re_erroffset = erroffset; - -/* Safety: if the error code is too big for the translation vector (which -should not happen, but we all make mistakes), return REG_BADPAT. */ - -if (preg->re_pcre == NULL) - { - return (errorcode < (int)(sizeof(eint)/sizeof(const int)))? - eint[errorcode] : REG_BADPAT; - } - -(void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, - &re_nsub); -preg->re_nsub = (size_t)re_nsub; -return 0; -} - - - - -/************************************************* -* Match a regular expression * -*************************************************/ - -/* Unfortunately, PCRE requires 3 ints of working space for each captured -substring, so we have to get and release working store instead of just using -the POSIX structures as was done in earlier releases when PCRE needed only 2 -ints. However, if the number of possible capturing brackets is small, use a -block of store on the stack, to reduce the use of malloc/free. The threshold is -in a macro that can be changed at configure time. - -If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will -be set. When this is the case, the nmatch and pmatch arguments are ignored, and -the only result is yes/no/error. */ - -PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION -regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags) -{ -int rc, so, eo; -int options = 0; -int *ovector = NULL; -int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; -BOOL allocated_ovector = FALSE; -BOOL nosub = - (REAL_PCRE_OPTIONS((const pcre *)preg->re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0; - -if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; -if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; -if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY; - -((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ - -/* When no string data is being returned, or no vector has been passed in which -to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding -the return data is large enough. */ - -if (nosub || pmatch == NULL) nmatch = 0; - -else if (nmatch > 0) - { - if (nmatch <= POSIX_MALLOC_THRESHOLD) - { - ovector = &(small_ovector[0]); - } - else - { - if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE; - ovector = (int *)malloc(sizeof(int) * nmatch * 3); - if (ovector == NULL) return REG_ESPACE; - allocated_ovector = TRUE; - } - } - -/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings. -The man page from OS X says "REG_STARTEND affects only the location of the -string, not how it is matched". That is why the "so" value is used to bump the -start location rather than being passed as a PCRE "starting offset". */ - -if ((eflags & REG_STARTEND) != 0) - { - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - } -else - { - so = 0; - eo = (int)strlen(string); - } - -rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so), - 0, options, ovector, (int)(nmatch * 3)); - -if (rc == 0) rc = (int)nmatch; /* All captured slots were filled in */ - -/* Successful match */ - -if (rc >= 0) - { - size_t i; - if (!nosub) - { - for (i = 0; i < (size_t)rc; i++) - { - pmatch[i].rm_so = ovector[i*2]; - pmatch[i].rm_eo = ovector[i*2+1]; - } - if (allocated_ovector) free(ovector); - for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; - } - return 0; - } - -/* Unsuccessful match */ - -if (allocated_ovector) free(ovector); -switch(rc) - { -/* ========================================================================== */ - /* These cases are never obeyed. This is a fudge that causes a compile-time - error if the vector eint, which is indexed by compile-time error number, is - not the correct length. It seems to be the only way to do such a check at - compile time, as the sizeof() operator does not work in the C preprocessor. - As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */ - - case 0: - case (sizeof(eint)/sizeof(int) == ERRCOUNT): - return REG_ASSERT; -/* ========================================================================== */ - - case PCRE_ERROR_NOMATCH: return REG_NOMATCH; - case PCRE_ERROR_NULL: return REG_INVARG; - case PCRE_ERROR_BADOPTION: return REG_INVARG; - case PCRE_ERROR_BADMAGIC: return REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; - case PCRE_ERROR_NOMEMORY: return REG_ESPACE; - case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE; - case PCRE_ERROR_BADUTF8: return REG_INVARG; - case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG; - case PCRE_ERROR_BADMODE: return REG_INVARG; - default: return REG_ASSERT; - } -} - -/* End of pcreposix.c */ diff --git a/deps/libmagic/pcre/pcreposix.h b/deps/libmagic/pcre/pcreposix.h deleted file mode 100644 index 4527f78..0000000 --- a/deps/libmagic/pcre/pcreposix.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -#ifndef _PCREPOSIX_H -#define _PCREPOSIX_H - -/* This is the header for the POSIX wrapper interface to the PCRE Perl- -Compatible Regular Expression library. It defines the things POSIX says should -be there. I hope. - - Copyright (c) 1997-2012 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* Have to include stdlib.h in order to ensure that size_t is defined. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Options, mostly defined by POSIX, but with some extras. */ - -#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */ -#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */ -#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */ -#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */ -#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */ -#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */ -#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */ -#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ -#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */ -#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */ -#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */ - -/* This is not used by PCRE, but by defining it we make it easier -to slot PCRE into existing programs that make POSIX calls. */ - -#define REG_EXTENDED 0 - -/* Error values. Not all these are relevant or used by the wrapper. */ - -enum { - REG_ASSERT = 1, /* internal error ? */ - REG_BADBR, /* invalid repeat counts in {} */ - REG_BADPAT, /* pattern error */ - REG_BADRPT, /* ? * + invalid */ - REG_EBRACE, /* unbalanced {} */ - REG_EBRACK, /* unbalanced [] */ - REG_ECOLLATE, /* collation error - not relevant */ - REG_ECTYPE, /* bad class */ - REG_EESCAPE, /* bad escape sequence */ - REG_EMPTY, /* empty expression */ - REG_EPAREN, /* unbalanced () */ - REG_ERANGE, /* bad range inside [] */ - REG_ESIZE, /* expression too big */ - REG_ESPACE, /* failed to get memory */ - REG_ESUBREG, /* bad back reference */ - REG_INVARG, /* bad argument */ - REG_NOMATCH /* match failed */ -}; - - -/* The structure representing a compiled regular expression. */ - -typedef struct { - void *re_pcre; - size_t re_nsub; - size_t re_erroffset; -} regex_t; - -/* The structure in which a captured offset is returned. */ - -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} regmatch_t; - -/* When an application links to a PCRE DLL in Windows, the symbols that are -imported have to be identified as such. When building PCRE, the appropriate -export settings are needed, and are set in pcreposix.c before including this -file. */ - -#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL) -# define PCREPOSIX_EXP_DECL extern __declspec(dllimport) -# define PCREPOSIX_EXP_DEFN __declspec(dllimport) -#endif - -/* By default, we use the standard "extern" declarations. */ - -#ifndef PCREPOSIX_EXP_DECL -# ifdef __cplusplus -# define PCREPOSIX_EXP_DECL extern "C" -# define PCREPOSIX_EXP_DEFN extern "C" -# else -# define PCREPOSIX_EXP_DECL extern -# define PCREPOSIX_EXP_DEFN extern -# endif -#endif - -/* The functions */ - -PCREPOSIX_EXP_DECL int pcreposix_regcomp(regex_t *, const char *, int); -PCREPOSIX_EXP_DECL int pcreposix_regexec(const regex_t *, const char *, size_t, - regmatch_t *, int); -PCREPOSIX_EXP_DECL size_t pcreposix_regerror(int, const regex_t *, char *, size_t); -PCREPOSIX_EXP_DECL void pcreposix_regfree(regex_t *); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#define regcomp pcreposix_regcomp -#define regexec pcreposix_regexec -#define regerror pcreposix_regerror -#define regfree pcreposix_regfree - -#endif /* End of pcreposix.h */ diff --git a/deps/libmagic/pcre/regex.h b/deps/libmagic/pcre/regex.h deleted file mode 100644 index 5403ee2..0000000 --- a/deps/libmagic/pcre/regex.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _REGEX_H -#define _REGEX_H 1 - -# include -# include - -#endif /* _REGEX_H */ \ No newline at end of file diff --git a/deps/libmagic/pcre/sljit/sljitConfig.h b/deps/libmagic/pcre/sljit/sljitConfig.h deleted file mode 100644 index 68bc59d..0000000 --- a/deps/libmagic/pcre/sljit/sljitConfig.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SLJIT_CONFIG_H_ -#define _SLJIT_CONFIG_H_ - -/* --------------------------------------------------------------------- */ -/* Custom defines */ -/* --------------------------------------------------------------------- */ - -/* Put your custom defines here. This empty section will never change - which helps maintaining patches (with diff / patch utilities). */ - -/* --------------------------------------------------------------------- */ -/* Architecture */ -/* --------------------------------------------------------------------- */ - -/* Architecture selection. */ -/* #define SLJIT_CONFIG_X86_32 1 */ -/* #define SLJIT_CONFIG_X86_64 1 */ -/* #define SLJIT_CONFIG_ARM_V5 1 */ -/* #define SLJIT_CONFIG_ARM_V7 1 */ -/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ -/* #define SLJIT_CONFIG_PPC_32 1 */ -/* #define SLJIT_CONFIG_PPC_64 1 */ -/* #define SLJIT_CONFIG_MIPS_32 1 */ -/* #define SLJIT_CONFIG_SPARC_32 1 */ - -/* #define SLJIT_CONFIG_AUTO 1 */ -/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ - -/* --------------------------------------------------------------------- */ -/* Utilities */ -/* --------------------------------------------------------------------- */ - -/* Useful for thread-safe compiling of global functions. */ -#ifndef SLJIT_UTIL_GLOBAL_LOCK -/* Enabled by default */ -#define SLJIT_UTIL_GLOBAL_LOCK 1 -#endif - -/* Implements a stack like data structure (by using mmap / VirtualAlloc). */ -#ifndef SLJIT_UTIL_STACK -/* Enabled by default */ -#define SLJIT_UTIL_STACK 1 -#endif - -/* Single threaded application. Does not require any locks. */ -#ifndef SLJIT_SINGLE_THREADED -/* Disabled by default. */ -#define SLJIT_SINGLE_THREADED 0 -#endif - -/* --------------------------------------------------------------------- */ -/* Configuration */ -/* --------------------------------------------------------------------- */ - -/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should - define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */ -#ifndef SLJIT_STD_MACROS_DEFINED -/* Disabled by default. */ -#define SLJIT_STD_MACROS_DEFINED 0 -#endif - -/* Executable code allocation: - If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should - define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */ -#ifndef SLJIT_EXECUTABLE_ALLOCATOR -/* Enabled by default. */ -#define SLJIT_EXECUTABLE_ALLOCATOR 1 -#endif - -/* Debug checks (assertions, etc.). */ -#ifndef SLJIT_DEBUG -/* Enabled by default */ -#define SLJIT_DEBUG 1 -#endif - -/* Verbose operations */ -#ifndef SLJIT_VERBOSE -/* Enabled by default */ -#define SLJIT_VERBOSE 1 -#endif - -/* See the beginning of sljitConfigInternal.h */ - -#endif diff --git a/deps/libmagic/pcre/sljit/sljitConfigInternal.h b/deps/libmagic/pcre/sljit/sljitConfigInternal.h deleted file mode 100644 index 2b6616e..0000000 --- a/deps/libmagic/pcre/sljit/sljitConfigInternal.h +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SLJIT_CONFIG_INTERNAL_H_ -#define _SLJIT_CONFIG_INTERNAL_H_ - -/* - SLJIT defines the following macros depending on the target architecture: - - Feature detection (boolean) macros: - SLJIT_32BIT_ARCHITECTURE : 32 bit architecture - SLJIT_64BIT_ARCHITECTURE : 64 bit architecture - SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index - SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double array by index - SLJIT_LITTLE_ENDIAN : little endian architecture - SLJIT_BIG_ENDIAN : big endian architecture - SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information - SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address - - Types and useful macros: - sljit_sb, sljit_ub : signed and unsigned 8 bit byte - sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type - sljit_si, sljit_ui : signed and unsigned 32 bit integer type - sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) - sljit_s : single precision floating point value - sljit_d : double precision floating point value - SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT - SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) -*/ - -#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ - || (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ - || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ - || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ - || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ - || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ - || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ - || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)) -#error "An architecture must be selected" -#endif - -/* Sanity check. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ - + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ - + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ - + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ - + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ - + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ - + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ - + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 -#error "Multiple architectures are selected" -#endif - -/* Auto select option (requires compiler support) */ -#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) - -#ifndef _WIN32 - -#if defined(__i386__) || defined(__i386) -#define SLJIT_CONFIG_X86_32 1 -#elif defined(__x86_64__) -#define SLJIT_CONFIG_X86_64 1 -#elif defined(__arm__) || defined(__ARM__) -#ifdef __thumb2__ -#define SLJIT_CONFIG_ARM_THUMB2 1 -#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) -#define SLJIT_CONFIG_ARM_V7 1 -#else -#define SLJIT_CONFIG_ARM_V5 1 -#endif -#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__)) -#define SLJIT_CONFIG_PPC_64 1 -#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) -#define SLJIT_CONFIG_PPC_32 1 -#elif defined(__mips__) -#define SLJIT_CONFIG_MIPS_32 1 -#elif defined(__sparc__) || defined(__sparc) -#define SLJIT_CONFIG_SPARC_32 1 -#else -/* Unsupported architecture */ -#define SLJIT_CONFIG_UNSUPPORTED 1 -#endif - -#else /* !_WIN32 */ - -#if defined(_M_X64) || defined(__x86_64__) -#define SLJIT_CONFIG_X86_64 1 -#elif defined(_ARM_) -#define SLJIT_CONFIG_ARM_V5 1 -#else -#define SLJIT_CONFIG_X86_32 1 -#endif - -#endif /* !WIN32 */ -#endif /* SLJIT_CONFIG_AUTO */ - -#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) -#undef SLJIT_EXECUTABLE_ALLOCATOR -#endif - -#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) - -/* These libraries are needed for the macros below. */ -#include -#include - -#endif /* STD_MACROS_DEFINED */ - -/* General macros: - Note: SLJIT is designed to be independent from them as possible. - - In release mode (SLJIT_DEBUG is not defined) only the following macros are needed: -*/ - -#ifndef SLJIT_MALLOC -#define SLJIT_MALLOC(size) malloc(size) -#endif - -#ifndef SLJIT_FREE -#define SLJIT_FREE(ptr) free(ptr) -#endif - -#ifndef SLJIT_MEMMOVE -#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) -#endif - -#ifndef SLJIT_ZEROMEM -#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) -#endif - -#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) - -#if defined(__GNUC__) && (__GNUC__ >= 3) -#define SLJIT_LIKELY(x) __builtin_expect((x), 1) -#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0) -#else -#define SLJIT_LIKELY(x) (x) -#define SLJIT_UNLIKELY(x) (x) -#endif - -#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */ - -#ifndef SLJIT_INLINE -/* Inline functions. */ -#define SLJIT_INLINE __inline -#endif - -#ifndef SLJIT_CONST -/* Const variables. */ -#define SLJIT_CONST const -#endif - -#ifndef SLJIT_UNUSED_ARG -/* Unused arguments. */ -#define SLJIT_UNUSED_ARG(arg) (void)arg -#endif - -#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) -/* Static ABI functions. For all-in-one programs. */ - -#if defined(__GNUC__) -/* Disable unused warnings in gcc. */ -#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused)) -#else -#define SLJIT_API_FUNC_ATTRIBUTE static -#endif - -#else -#define SLJIT_API_FUNC_ATTRIBUTE -#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */ - -#ifndef SLJIT_CACHE_FLUSH - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - -/* Not required to implement on archs with unified caches. */ -#define SLJIT_CACHE_FLUSH(from, to) - -#elif defined __APPLE__ - -/* Supported by all macs since Mac OS 10.5. - However, it does not work on non-jailbroken iOS devices, - although the compilation is successful. */ - -#define SLJIT_CACHE_FLUSH(from, to) \ - sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) - -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - -/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ -#define SLJIT_CACHE_FLUSH(from, to) \ - ppc_cache_flush((from), (to)) - -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - -/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ -#define SLJIT_CACHE_FLUSH(from, to) \ - sparc_cache_flush((from), (to)) - -#else - -/* Calls __ARM_NR_cacheflush on ARM-Linux. */ -#define SLJIT_CACHE_FLUSH(from, to) \ - __clear_cache((char*)(from), (char*)(to)) - -#endif - -#endif /* !SLJIT_CACHE_FLUSH */ - -/* 8 bit byte type. */ -typedef unsigned char sljit_ub; -typedef signed char sljit_sb; - -/* 16 bit half-word type. */ -typedef unsigned short int sljit_uh; -typedef signed short int sljit_sh; - -/* 32 bit integer type. */ -typedef unsigned int sljit_ui; -typedef signed int sljit_si; - -/* Machine word type. Can encapsulate a pointer. - 32 bit for 32 bit machines. - 64 bit for 64 bit machines. */ -#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) -/* Just to have something. */ -#define SLJIT_WORD_SHIFT 0 -typedef unsigned long int sljit_uw; -typedef long int sljit_sw; -#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define SLJIT_32BIT_ARCHITECTURE 1 -#define SLJIT_WORD_SHIFT 2 -typedef unsigned int sljit_uw; -typedef int sljit_sw; -#else -#define SLJIT_64BIT_ARCHITECTURE 1 -#define SLJIT_WORD_SHIFT 3 -#ifdef _WIN32 -typedef unsigned __int64 sljit_uw; -typedef __int64 sljit_sw; -#else -typedef unsigned long int sljit_uw; -typedef long int sljit_sw; -#endif -#endif - -typedef sljit_uw sljit_p; - -/* Floating point types. */ -typedef float sljit_s; -typedef double sljit_d; - -/* Shift for pointer sized data. */ -#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT - -/* Shift for double precision sized data. */ -#define SLJIT_DOUBLE_SHIFT 3 - -#ifndef SLJIT_W - -/* Defining long constants. */ -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) -#define SLJIT_W(w) (w##ll) -#else -#define SLJIT_W(w) (w) -#endif - -#endif /* !SLJIT_W */ - -#ifndef SLJIT_CALL - -/* ABI (Application Binary Interface) types. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - -#if defined(__GNUC__) - -#define SLJIT_CALL __attribute__ ((fastcall)) -#define SLJIT_X86_32_FASTCALL 1 - -#elif defined(_MSC_VER) - -#define SLJIT_CALL __fastcall -#define SLJIT_X86_32_FASTCALL 1 - -#elif defined(__BORLANDC__) - -#define SLJIT_CALL __msfastcall -#define SLJIT_X86_32_FASTCALL 1 - -#else /* Unknown compiler. */ - -/* The cdecl attribute is the default. */ -#define SLJIT_CALL - -#endif - -#else /* Non x86-32 architectures. */ - -#define SLJIT_CALL - -#endif /* SLJIT_CONFIG_X86_32 */ - -#endif /* !SLJIT_CALL */ - -#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) - -/* These macros are useful for the application. */ -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ - || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define SLJIT_BIG_ENDIAN 1 - -#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - -#ifdef __MIPSEL__ -#define SLJIT_LITTLE_ENDIAN 1 -#else -#define SLJIT_BIG_ENDIAN 1 -#endif - -#else -#define SLJIT_LITTLE_ENDIAN 1 -#endif - -#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */ - -/* Sanity check. */ -#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) -#error "Exactly one endianness must be selected" -#endif - -#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) -#error "Exactly one endianness must be selected" -#endif - -#ifndef SLJIT_INDIRECT_CALL -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32 && defined _AIX) -/* It seems certain ppc compilers use an indirect addressing for functions - which makes things complicated. */ -#define SLJIT_INDIRECT_CALL 1 -#endif -#endif /* SLJIT_INDIRECT_CALL */ - -#ifndef SLJIT_RETURN_ADDRESS_OFFSET -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define SLJIT_RETURN_ADDRESS_OFFSET 8 -#else -#define SLJIT_RETURN_ADDRESS_OFFSET 0 -#endif -#endif /* SLJIT_RETURN_ADDRESS_OFFSET */ - -#ifndef SLJIT_SSE2 - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -/* Turn on SSE2 support on x86. */ -#define SLJIT_SSE2 1 - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -/* Auto detect SSE2 support using CPUID. - On 64 bit x86 cpus, sse2 must be present. */ -#define SLJIT_DETECT_SSE2 1 -#endif - -#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */ - -#endif /* !SLJIT_SSE2 */ - -#ifndef SLJIT_UNALIGNED - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ - || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ - || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define SLJIT_UNALIGNED 1 -#endif - -#endif /* !SLJIT_UNALIGNED */ - -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) -SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); -#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) -#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) -#endif - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) -#include -#endif - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - -#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP) - -/* SLJIT_HALT_PROCESS must halt the process. */ -#ifndef SLJIT_HALT_PROCESS -#include - -#define SLJIT_HALT_PROCESS() \ - abort(); -#endif /* !SLJIT_HALT_PROCESS */ - -#include - -#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */ - -/* Feel free to redefine these two macros. */ -#ifndef SLJIT_ASSERT - -#define SLJIT_ASSERT(x) \ - do { \ - if (SLJIT_UNLIKELY(!(x))) { \ - printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \ - SLJIT_HALT_PROCESS(); \ - } \ - } while (0) - -#endif /* !SLJIT_ASSERT */ - -#ifndef SLJIT_ASSERT_STOP - -#define SLJIT_ASSERT_STOP() \ - do { \ - printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ - SLJIT_HALT_PROCESS(); \ - } while (0) - -#endif /* !SLJIT_ASSERT_STOP */ - -#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ - -/* Forcing empty, but valid statements. */ -#undef SLJIT_ASSERT -#undef SLJIT_ASSERT_STOP - -#define SLJIT_ASSERT(x) \ - do { } while (0) -#define SLJIT_ASSERT_STOP() \ - do { } while (0) - -#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ - -#ifndef SLJIT_COMPILE_ASSERT - -/* Should be improved eventually. */ -#define SLJIT_COMPILE_ASSERT(x, description) \ - SLJIT_ASSERT(x) - -#endif /* !SLJIT_COMPILE_ASSERT */ - -#endif diff --git a/deps/libmagic/pcre/sljit/sljitExecAllocator.c b/deps/libmagic/pcre/sljit/sljitExecAllocator.c deleted file mode 100644 index 75a3899..0000000 --- a/deps/libmagic/pcre/sljit/sljitExecAllocator.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - This file contains a simple executable memory allocator - - It is assumed, that executable code blocks are usually medium (or sometimes - large) memory blocks, and the allocator is not too frequently called (less - optimized than other allocators). Thus, using it as a generic allocator is - not suggested. - - How does it work: - Memory is allocated in continuous memory areas called chunks by alloc_chunk() - Chunk format: - [ block ][ block ] ... [ block ][ block terminator ] - - All blocks and the block terminator is started with block_header. The block - header contains the size of the previous and the next block. These sizes - can also contain special values. - Block size: - 0 - The block is a free_block, with a different size member. - 1 - The block is a block terminator. - n - The block is used at the moment, and the value contains its size. - Previous block size: - 0 - This is the first block of the memory chunk. - n - The size of the previous block. - - Using these size values we can go forward or backward on the block chain. - The unused blocks are stored in a chain list pointed by free_blocks. This - list is useful if we need to find a suitable memory area when the allocator - is called. - - When a block is freed, the new free block is connected to its adjacent free - blocks if possible. - - [ free block ][ used block ][ free block ] - and "used block" is freed, the three blocks are connected together: - [ one big free block ] -*/ - -/* --------------------------------------------------------------------- */ -/* System (OS) functions */ -/* --------------------------------------------------------------------- */ - -/* 64 KByte. */ -#define CHUNK_SIZE 0x10000 - -/* - alloc_chunk / free_chunk : - * allocate executable system memory chunks - * the size is always divisible by CHUNK_SIZE - allocator_grab_lock / allocator_release_lock : - * make the allocator thread safe - * can be empty if the OS (or the application) does not support threading - * only the allocator requires this lock, sljit is fully thread safe - as it only uses local variables -*/ - -#ifdef _WIN32 - -static SLJIT_INLINE void* alloc_chunk(sljit_uw size) -{ - return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); -} - -static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) -{ - SLJIT_UNUSED_ARG(size); - VirtualFree(chunk, 0, MEM_RELEASE); -} - -#else - -static SLJIT_INLINE void* alloc_chunk(sljit_uw size) -{ - void* retval; - -#ifdef MAP_ANON - retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); -#else - if (dev_zero < 0) { - if (open_dev_zero()) - return NULL; - } - retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0); -#endif - - return (retval != MAP_FAILED) ? retval : NULL; -} - -static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) -{ - munmap(chunk, size); -} - -#endif - -/* --------------------------------------------------------------------- */ -/* Common functions */ -/* --------------------------------------------------------------------- */ - -#define CHUNK_MASK (~(CHUNK_SIZE - 1)) - -struct block_header { - sljit_uw size; - sljit_uw prev_size; -}; - -struct free_block { - struct block_header header; - struct free_block *next; - struct free_block *prev; - sljit_uw size; -}; - -#define AS_BLOCK_HEADER(base, offset) \ - ((struct block_header*)(((sljit_ub*)base) + offset)) -#define AS_FREE_BLOCK(base, offset) \ - ((struct free_block*)(((sljit_ub*)base) + offset)) -#define MEM_START(base) ((void*)(((sljit_ub*)base) + sizeof(struct block_header))) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) - -static struct free_block* free_blocks; -static sljit_uw allocated_size; -static sljit_uw total_size; - -static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) -{ - free_block->header.size = 0; - free_block->size = size; - - free_block->next = free_blocks; - free_block->prev = 0; - if (free_blocks) - free_blocks->prev = free_block; - free_blocks = free_block; -} - -static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) -{ - if (free_block->next) - free_block->next->prev = free_block->prev; - - if (free_block->prev) - free_block->prev->next = free_block->next; - else { - SLJIT_ASSERT(free_blocks == free_block); - free_blocks = free_block->next; - } -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) -{ - struct block_header *header; - struct block_header *next_header; - struct free_block *free_block; - sljit_uw chunk_size; - - allocator_grab_lock(); - if (size < sizeof(struct free_block)) - size = sizeof(struct free_block); - size = ALIGN_SIZE(size); - - free_block = free_blocks; - while (free_block) { - if (free_block->size >= size) { - chunk_size = free_block->size; - if (chunk_size > size + 64) { - /* We just cut a block from the end of the free block. */ - chunk_size -= size; - free_block->size = chunk_size; - header = AS_BLOCK_HEADER(free_block, chunk_size); - header->prev_size = chunk_size; - AS_BLOCK_HEADER(header, size)->prev_size = size; - } - else { - sljit_remove_free_block(free_block); - header = (struct block_header*)free_block; - size = chunk_size; - } - allocated_size += size; - header->size = size; - allocator_release_lock(); - return MEM_START(header); - } - free_block = free_block->next; - } - - chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK; - header = (struct block_header*)alloc_chunk(chunk_size); - if (!header) { - allocator_release_lock(); - return NULL; - } - - chunk_size -= sizeof(struct block_header); - total_size += chunk_size; - - header->prev_size = 0; - if (chunk_size > size + 64) { - /* Cut the allocated space into a free and a used block. */ - allocated_size += size; - header->size = size; - chunk_size -= size; - - free_block = AS_FREE_BLOCK(header, size); - free_block->header.prev_size = size; - sljit_insert_free_block(free_block, chunk_size); - next_header = AS_BLOCK_HEADER(free_block, chunk_size); - } - else { - /* All space belongs to this allocation. */ - allocated_size += chunk_size; - header->size = chunk_size; - next_header = AS_BLOCK_HEADER(header, chunk_size); - } - next_header->size = 1; - next_header->prev_size = chunk_size; - allocator_release_lock(); - return MEM_START(header); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) -{ - struct block_header *header; - struct free_block* free_block; - - allocator_grab_lock(); - header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); - allocated_size -= header->size; - - /* Connecting free blocks together if possible. */ - - /* If header->prev_size == 0, free_block will equal to header. - In this case, free_block->header.size will be > 0. */ - free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); - if (SLJIT_UNLIKELY(!free_block->header.size)) { - free_block->size += header->size; - header = AS_BLOCK_HEADER(free_block, free_block->size); - header->prev_size = free_block->size; - } - else { - free_block = (struct free_block*)header; - sljit_insert_free_block(free_block, header->size); - } - - header = AS_BLOCK_HEADER(free_block, free_block->size); - if (SLJIT_UNLIKELY(!header->size)) { - free_block->size += ((struct free_block*)header)->size; - sljit_remove_free_block((struct free_block*)header); - header = AS_BLOCK_HEADER(free_block, free_block->size); - header->prev_size = free_block->size; - } - - /* The whole chunk is free. */ - if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { - /* If this block is freed, we still have (allocated_size / 2) free space. */ - if (total_size - free_block->size > (allocated_size * 3 / 2)) { - total_size -= free_block->size; - sljit_remove_free_block(free_block); - free_chunk(free_block, free_block->size + sizeof(struct block_header)); - } - } - - allocator_release_lock(); -} diff --git a/deps/libmagic/pcre/sljit/sljitLir.c b/deps/libmagic/pcre/sljit/sljitLir.c deleted file mode 100644 index 6979841..0000000 --- a/deps/libmagic/pcre/sljit/sljitLir.c +++ /dev/null @@ -1,1766 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sljitLir.h" - -#define CHECK_ERROR() \ - do { \ - if (SLJIT_UNLIKELY(compiler->error)) \ - return compiler->error; \ - } while (0) - -#define CHECK_ERROR_PTR() \ - do { \ - if (SLJIT_UNLIKELY(compiler->error)) \ - return NULL; \ - } while (0) - -#define CHECK_ERROR_VOID() \ - do { \ - if (SLJIT_UNLIKELY(compiler->error)) \ - return; \ - } while (0) - -#define FAIL_IF(expr) \ - do { \ - if (SLJIT_UNLIKELY(expr)) \ - return compiler->error; \ - } while (0) - -#define PTR_FAIL_IF(expr) \ - do { \ - if (SLJIT_UNLIKELY(expr)) \ - return NULL; \ - } while (0) - -#define FAIL_IF_NULL(ptr) \ - do { \ - if (SLJIT_UNLIKELY(!(ptr))) { \ - compiler->error = SLJIT_ERR_ALLOC_FAILED; \ - return SLJIT_ERR_ALLOC_FAILED; \ - } \ - } while (0) - -#define PTR_FAIL_IF_NULL(ptr) \ - do { \ - if (SLJIT_UNLIKELY(!(ptr))) { \ - compiler->error = SLJIT_ERR_ALLOC_FAILED; \ - return NULL; \ - } \ - } while (0) - -#define PTR_FAIL_WITH_EXEC_IF(ptr) \ - do { \ - if (SLJIT_UNLIKELY(!(ptr))) { \ - compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \ - return NULL; \ - } \ - } while (0) - -#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) - -#define GET_OPCODE(op) \ - ((op) & ~(SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) - -#define GET_FLAGS(op) \ - ((op) & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C)) - -#define GET_ALL_FLAGS(op) \ - ((op) & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) - -#define TYPE_CAST_NEEDED(op) \ - (((op) >= SLJIT_MOV_UB && (op) <= SLJIT_MOV_SH) || ((op) >= SLJIT_MOVU_UB && (op) <= SLJIT_MOVU_SH)) - -#define BUF_SIZE 4096 - -#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) -#define ABUF_SIZE 2048 -#else -#define ABUF_SIZE 4096 -#endif - -/* Jump flags. */ -#define JUMP_LABEL 0x1 -#define JUMP_ADDR 0x2 -/* SLJIT_REWRITABLE_JUMP is 0x1000. */ - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -# define PATCH_MB 0x4 -# define PATCH_MW 0x8 -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -# define PATCH_MD 0x10 -#endif -#endif - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) -# define IS_BL 0x4 -# define PATCH_B 0x8 -#endif - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) -# define CPOOL_SIZE 512 -#endif - -#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) -# define IS_COND 0x04 -# define IS_BL 0x08 - /* cannot be encoded as branch */ -# define B_TYPE0 0x00 - /* conditional + imm8 */ -# define B_TYPE1 0x10 - /* conditional + imm20 */ -# define B_TYPE2 0x20 - /* IT + imm24 */ -# define B_TYPE3 0x30 - /* imm11 */ -# define B_TYPE4 0x40 - /* imm24 */ -# define B_TYPE5 0x50 - /* BL + imm24 */ -# define BL_TYPE6 0x60 - /* 0xf00 cc code for branches */ -#endif - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -# define UNCOND_B 0x04 -# define PATCH_B 0x08 -# define ABSOLUTE_B 0x10 -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define IS_MOVABLE 0x04 -# define IS_JAL 0x08 -# define IS_BIT26_COND 0x10 -# define IS_BIT16_COND 0x20 - -# define IS_COND (IS_BIT26_COND | IS_BIT16_COND) - -# define PATCH_B 0x40 -# define PATCH_J 0x80 - - /* instruction types */ -# define MOVABLE_INS 0 - /* 1 - 31 last destination register */ - /* no destination (i.e: store) */ -# define UNMOVABLE_INS 32 - /* FPU status register */ -# define FCSR_FCC 33 -#endif - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -# define IS_MOVABLE 0x04 -# define IS_COND 0x08 -# define IS_CALL 0x10 - -# define PATCH_B 0x20 -# define PATCH_CALL 0x40 - - /* instruction types */ -# define MOVABLE_INS 0 - /* 1 - 31 last destination register */ - /* no destination (i.e: store) */ -# define UNMOVABLE_INS 32 - -# define DST_INS_MASK 0xff - - /* ICC_SET is the same as SET_FLAGS. */ -# define ICC_IS_SET (1 << 23) -# define FCC_IS_SET (1 << 24) -#endif - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1 -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) -#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw)) -#endif -#endif - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#ifdef _WIN64 -#define FIXED_LOCALS_OFFSET ((4 + 2) * sizeof(sljit_sw)) -#else -#define FIXED_LOCALS_OFFSET (sizeof(sljit_sw)) -#endif -#endif - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw)) -#else -#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_sw)) -#endif -#endif - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw)) -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_sw)) -#endif - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET (23 * sizeof(sljit_sw)) -#endif - -#if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET) - -#define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - (i) += compiler->locals_offset; - -#elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET) - -#define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - (i) += FIXED_LOCALS_OFFSET; - -#else - -#define ADJUST_LOCAL_OFFSET(p, i) - -#endif - -#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ - -/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ -#include "sljitUtils.c" - -#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) - -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) -#include "sljitExecAllocator.c" -#endif - -#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2) -#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2 -#endif - -/* --------------------------------------------------------------------- */ -/* Public functions */ -/* --------------------------------------------------------------------- */ - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64))) -#define SLJIT_NEEDS_COMPILER_INIT 1 -static sljit_si compiler_initialized = 0; -/* A thread safe initialization. */ -static void init_compiler(void); -#endif - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) -{ - struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler)); - if (!compiler) - return NULL; - SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); - - SLJIT_COMPILE_ASSERT( - sizeof(sljit_sb) == 1 && sizeof(sljit_ub) == 1 - && sizeof(sljit_sh) == 2 && sizeof(sljit_uh) == 2 - && sizeof(sljit_si) == 4 && sizeof(sljit_ui) == 4 - && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) - && sizeof(sljit_p) <= sizeof(sljit_sw) - && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) - && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), - invalid_integer_types); - SLJIT_COMPILE_ASSERT(SLJIT_INT_OP == SLJIT_SINGLE_OP, - int_op_and_single_op_must_be_the_same); - SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_SINGLE_OP, - rewritable_jump_and_single_op_must_not_be_the_same); - - /* Only the non-zero members must be set. */ - compiler->error = SLJIT_SUCCESS; - - compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE); - compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE); - - if (!compiler->buf || !compiler->abuf) { - if (compiler->buf) - SLJIT_FREE(compiler->buf); - if (compiler->abuf) - SLJIT_FREE(compiler->abuf); - SLJIT_FREE(compiler); - return NULL; - } - - compiler->buf->next = NULL; - compiler->buf->used_size = 0; - compiler->abuf->next = NULL; - compiler->abuf->used_size = 0; - - compiler->scratches = -1; - compiler->saveds = -1; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->args = -1; -#endif - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + CPOOL_SIZE * sizeof(sljit_ub)); - if (!compiler->cpool) { - SLJIT_FREE(compiler->buf); - SLJIT_FREE(compiler->abuf); - SLJIT_FREE(compiler); - return NULL; - } - compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE); - compiler->cpool_diff = 0xffffffff; -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - compiler->delay_slot = UNMOVABLE_INS; -#endif - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - compiler->delay_slot = UNMOVABLE_INS; -#endif - -#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) - if (!compiler_initialized) { - init_compiler(); - compiler_initialized = 1; - } -#endif - - return compiler; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - struct sljit_memory_fragment *curr; - - buf = compiler->buf; - while (buf) { - curr = buf; - buf = buf->next; - SLJIT_FREE(curr); - } - - buf = compiler->abuf; - while (buf) { - curr = buf; - buf = buf->next; - SLJIT_FREE(curr); - } - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - SLJIT_FREE(compiler->cpool); -#endif - SLJIT_FREE(compiler); -} - -#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) -{ - /* Remove thumb mode flag. */ - SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1)); -} -#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) -{ - /* Resolve indirection. */ - code = (void*)(*(sljit_uw*)code); - SLJIT_FREE_EXEC(code); -} -#else -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) -{ - SLJIT_FREE_EXEC(code); -} -#endif - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) -{ - if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { - jump->flags &= ~JUMP_ADDR; - jump->flags |= JUMP_LABEL; - jump->u.label = label; - } -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) -{ - if (SLJIT_LIKELY(!!jump)) { - SLJIT_ASSERT(jump->flags & SLJIT_REWRITABLE_JUMP); - - jump->flags &= ~JUMP_LABEL; - jump->flags |= JUMP_ADDR; - jump->u.target = target; - } -} - -/* --------------------------------------------------------------------- */ -/* Private functions */ -/* --------------------------------------------------------------------- */ - -static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) -{ - sljit_ub *ret; - struct sljit_memory_fragment *new_frag; - - SLJIT_ASSERT(size <= 256); - if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { - ret = compiler->buf->memory + compiler->buf->used_size; - compiler->buf->used_size += size; - return ret; - } - new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE); - PTR_FAIL_IF_NULL(new_frag); - new_frag->next = compiler->buf; - compiler->buf = new_frag; - new_frag->used_size = size; - return new_frag->memory; -} - -static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) -{ - sljit_ub *ret; - struct sljit_memory_fragment *new_frag; - - SLJIT_ASSERT(size <= 256); - if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { - ret = compiler->abuf->memory + compiler->abuf->used_size; - compiler->abuf->used_size += size; - return ret; - } - new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE); - PTR_FAIL_IF_NULL(new_frag); - new_frag->next = compiler->abuf; - compiler->abuf = new_frag; - new_frag->used_size = size; - return new_frag->memory; -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size) -{ - CHECK_ERROR_PTR(); - -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - if (size <= 0 || size > 128) - return NULL; - size = (size + 7) & ~7; -#else - if (size <= 0 || size > 64) - return NULL; - size = (size + 3) & ~3; -#endif - return ensure_abuf(compiler, size); -} - -static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf = compiler->buf; - struct sljit_memory_fragment *prev = NULL; - struct sljit_memory_fragment *tmp; - - do { - tmp = buf->next; - buf->next = prev; - prev = buf; - buf = tmp; - } while (buf != NULL); - - compiler->buf = prev; -} - -static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) -{ - label->next = NULL; - label->size = compiler->size; - if (compiler->last_label) - compiler->last_label->next = label; - else - compiler->labels = label; - compiler->last_label = label; -} - -static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_si flags) -{ - jump->next = NULL; - jump->flags = flags; - if (compiler->last_jump) - compiler->last_jump->next = jump; - else - compiler->jumps = jump; - compiler->last_jump = jump; -} - -static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler) -{ - const_->next = NULL; - const_->addr = compiler->size; - if (compiler->last_const) - compiler->last_const->next = const_; - else - compiler->consts = const_; - compiler->last_const = const_; -} - -#define ADDRESSING_DEPENDS_ON(exp, reg) \ - (((exp) & SLJIT_MEM) && (((exp) & 0xf) == reg || (((exp) >> 4) & 0xf) == reg)) - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) -#define FUNCTION_CHECK_OP() \ - SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ - switch (GET_OPCODE(op)) { \ - case SLJIT_NOT: \ - case SLJIT_CLZ: \ - case SLJIT_AND: \ - case SLJIT_OR: \ - case SLJIT_XOR: \ - case SLJIT_SHL: \ - case SLJIT_LSHR: \ - case SLJIT_ASHR: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C))); \ - break; \ - case SLJIT_NEG: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_C))); \ - break; \ - case SLJIT_MUL: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_C))); \ - break; \ - case SLJIT_CMPD: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ - break; \ - case SLJIT_ADD: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_S | SLJIT_SET_U))); \ - break; \ - case SLJIT_SUB: \ - break; \ - case SLJIT_ADDC: \ - case SLJIT_SUBC: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))); \ - break; \ - case SLJIT_BREAKPOINT: \ - case SLJIT_NOP: \ - case SLJIT_UMUL: \ - case SLJIT_SMUL: \ - case SLJIT_MOV: \ - case SLJIT_MOV_P: \ - case SLJIT_MOVU: \ - case SLJIT_MOVU_P: \ - /* Nothing allowed */ \ - SLJIT_ASSERT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - break; \ - default: \ - /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ - SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - break; \ - } - -#define FUNCTION_CHECK_IS_REG(r) \ - ((r) == SLJIT_UNUSED || \ - ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \ - ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds)) - -#define FUNCTION_CHECK_SRC(p, i) \ - SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ - if (FUNCTION_CHECK_IS_REG(p)) \ - SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \ - else if ((p) == SLJIT_IMM) \ - ; \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ - else if ((p) & SLJIT_MEM) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \ - if ((p) & 0xf0) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \ - SLJIT_ASSERT(!((i) & ~0x3)); \ - } \ - SLJIT_ASSERT(((p) >> 9) == 0); \ - } \ - else \ - SLJIT_ASSERT_STOP(); - -#define FUNCTION_CHECK_DST(p, i) \ - SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ - if (FUNCTION_CHECK_IS_REG(p)) \ - SLJIT_ASSERT((i) == 0); \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ - else if ((p) & SLJIT_MEM) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \ - if ((p) & 0xf0) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \ - SLJIT_ASSERT(!((i) & ~0x3)); \ - } \ - SLJIT_ASSERT(((p) >> 9) == 0); \ - } \ - else \ - SLJIT_ASSERT_STOP(); - -#define FUNCTION_FCHECK(p, i) \ - if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \ - SLJIT_ASSERT(i == 0); \ - else if ((p) & SLJIT_MEM) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \ - if ((p) & 0xf0) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \ - SLJIT_ASSERT(((p) & 0xf0) != (SLJIT_LOCALS_REG << 4) && !(i & ~0x3)); \ - } else \ - SLJIT_ASSERT((((p) >> 4) & 0xf) == 0); \ - SLJIT_ASSERT(((p) >> 9) == 0); \ - } \ - else \ - SLJIT_ASSERT_STOP(); - -#define FUNCTION_CHECK_OP1() \ - if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \ - SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & 0xf) != SLJIT_LOCALS_REG); \ - SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & 0xf) != SLJIT_LOCALS_REG); \ - if ((src & SLJIT_MEM) && (src & 0xf)) \ - SLJIT_ASSERT((dst & 0xf) != (src & 0xf) && ((dst >> 4) & 0xf) != (src & 0xf)); \ - } - -#endif - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - -SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) -{ - compiler->verbose = verbose; -} - -static char* reg_names[] = { - (char*)"", (char*)"t1", (char*)"t2", (char*)"t3", - (char*)"te1", (char*)"te2", (char*)"s1", (char*)"s2", - (char*)"s3", (char*)"se1", (char*)"se2", (char*)"lcr" -}; - -static char* freg_names[] = { - (char*)"", (char*)"float_r1", (char*)"float_r2", (char*)"float_r3", - (char*)"float_r4", (char*)"float_r5", (char*)"float_r6" -}; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#ifdef _WIN64 -# define SLJIT_PRINT_D "I64" -#else -# define SLJIT_PRINT_D "l" -#endif -#else -# define SLJIT_PRINT_D "" -#endif - -#define sljit_verbose_param(p, i) \ - if ((p) & SLJIT_IMM) \ - fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \ - else if ((p) & SLJIT_MEM) { \ - if ((p) & 0xf) { \ - if (i) { \ - if (((p) >> 4) & 0xf) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & 0xF], (i)); \ - } \ - else { \ - if (((p) >> 4) & 0xf) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & 0xF]); \ - } \ - } \ - else \ - fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", reg_names[p]); -#define sljit_verbose_fparam(p, i) \ - if ((p) & SLJIT_MEM) { \ - if ((p) & 0xf) { \ - if (i) { \ - if (((p) >> 4) & 0xf) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & 0xF], (i)); \ - } \ - else { \ - if (((p) >> 4) & 0xF) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & 0xF]); \ - } \ - } \ - else \ - fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", freg_names[p]); - -static SLJIT_CONST char* op_names[] = { - /* op0 */ - (char*)"breakpoint", (char*)"nop", - (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv", - /* op1 */ - (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh", - (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"mov.p", - (char*)"movu", (char*)"movu.ub", (char*)"movu.sb", (char*)"movu.uh", - (char*)"movu.sh", (char*)"movu.ui", (char*)"movu.si", (char*)"movu.p", - (char*)"not", (char*)"neg", (char*)"clz", - /* op2 */ - (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", - (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", - (char*)"shl", (char*)"lshr", (char*)"ashr", - /* fop1 */ - (char*)"cmp", (char*)"mov", (char*)"neg", (char*)"abs", - /* fop2 */ - (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" -}; - -static char* jump_names[] = { - (char*)"c_equal", (char*)"c_not_equal", - (char*)"c_less", (char*)"c_greater_equal", - (char*)"c_greater", (char*)"c_less_equal", - (char*)"c_sig_less", (char*)"c_sig_greater_equal", - (char*)"c_sig_greater", (char*)"c_sig_less_equal", - (char*)"c_overflow", (char*)"c_not_overflow", - (char*)"c_mul_overflow", (char*)"c_mul_not_overflow", - (char*)"c_float_equal", (char*)"c_float_not_equal", - (char*)"c_float_less", (char*)"c_float_greater_equal", - (char*)"c_float_greater", (char*)"c_float_less_equal", - (char*)"c_float_unordered", (char*)"c_float_ordered", - (char*)"jump", (char*)"fast_call", - (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3" -}; - -#endif - -/* --------------------------------------------------------------------- */ -/* Arch dependent */ -/* --------------------------------------------------------------------- */ - -static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compiler) -{ -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - struct sljit_jump *jump; -#endif - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - - SLJIT_ASSERT(compiler->size > 0); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - jump = compiler->jumps; - while (jump) { - /* All jumps have target. */ - SLJIT_ASSERT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); - jump = jump->next; - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(local_size); - - SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= saveds); - SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " enter args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); -#endif -} - -static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(local_size); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - return; - } -#endif - - SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= saveds); - SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " set_context args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); -#endif -} - -static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (op != SLJIT_UNUSED) { - SLJIT_ASSERT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); - FUNCTION_CHECK_SRC(src, srcw); - } - else - SLJIT_ASSERT(src == 0 && srcw == 0); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - if (op == SLJIT_UNUSED) - fprintf(compiler->verbose, " return\n"); - else { - fprintf(compiler->verbose, " return %s ", op_names[op]); - sljit_verbose_param(src, srcw); - fprintf(compiler->verbose, "\n"); - } - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_DST(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " fast_enter "); - sljit_verbose_param(dst, dstw); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_SRC(src, srcw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " fast_return "); - sljit_verbose_param(src, srcw); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - - SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL) - || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]); -#endif -} - -static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - return; - } -#endif - - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); - FUNCTION_CHECK_SRC(src, srcw); - FUNCTION_CHECK_DST(dst, dstw); - FUNCTION_CHECK_OP1(); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_U) ? "" : ".u", - !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); - fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - return; - } -#endif - - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); - FUNCTION_CHECK_SRC(src1, src1w); - FUNCTION_CHECK_SRC(src2, src2w); - FUNCTION_CHECK_DST(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_U) ? "" : ".u", - !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); - fprintf(compiler->verbose, ", "); - sljit_verbose_param(src1, src1w); - fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg) -{ - SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS); -} - -static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(instruction); - SLJIT_UNUSED_ARG(size); - SLJIT_ASSERT(instruction); -} - -static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - return; - } -#endif - - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CMPD && GET_OPCODE(op) <= SLJIT_ABSD); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); - FUNCTION_FCHECK(src, srcw); - FUNCTION_FCHECK(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d", - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s"); - sljit_verbose_fparam(dst, dstw); - fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src, srcw); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADDD && GET_OPCODE(op) <= SLJIT_DIVD); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); - FUNCTION_FCHECK(src1, src1w); - FUNCTION_FCHECK(src2, src2w); - FUNCTION_FCHECK(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d"); - sljit_verbose_fparam(dst, dstw); - fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src1, src1w); - fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_label(struct sljit_compiler *compiler) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, "label:\n"); -#endif -} - -static SLJIT_INLINE void check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - return; - } -#endif - - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_CALL3); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " jump%s<%s>\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); -#endif -} - -static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_C_SIG_LESS_EQUAL); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_SRC(src1, src1w); - FUNCTION_CHECK_SRC(src2, src2w); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %scmp%s<%s> ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_param(src1, src1w); - fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_ORDERED); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_FCHECK(src1, src1w); - FUNCTION_FCHECK(src2, src2w); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %scmp%s<%s> ", (type & SLJIT_SINGLE_OP) ? "s" : "d", - !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_fparam(src1, src1w); - fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - return; - } -#endif - - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_SRC(src, srcw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " ijump<%s> ", jump_names[type]); - sljit_verbose_param(src, srcw); - fprintf(compiler->verbose, "\n"); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNUSED_ARG(type); - - SLJIT_ASSERT(type >= SLJIT_C_EQUAL && type < SLJIT_JUMP); - SLJIT_ASSERT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI - || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); - SLJIT_ASSERT((op & (SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C)) == 0); - SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS)); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (GET_OPCODE(op) < SLJIT_ADD) { - SLJIT_ASSERT(src == SLJIT_UNUSED && srcw == 0); - } else { - SLJIT_ASSERT(src == dst && srcw == dstw); - } - FUNCTION_CHECK_DST(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " op_flags<%s%s%s%s> ", !(op & SLJIT_INT_OP) ? "" : "i", - op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); - if (src != SLJIT_UNUSED) { - fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); - } - fprintf(compiler->verbose, ", <%s>\n", jump_names[type]); - } -#endif -} - -static SLJIT_INLINE void check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(offset); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_DST(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " local_base "); - sljit_verbose_param(dst, dstw); - fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); - } -#endif -} - -static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(init_value); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_DST(dst, dstw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " const "); - sljit_verbose_param(dst, dstw); - fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); - } -#endif -} - -static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - /* Return if don't need to do anything. */ - if (op == SLJIT_UNUSED) - return SLJIT_SUCCESS; - -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ - if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) - return SLJIT_SUCCESS; -#else - if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P)) - return SLJIT_SUCCESS; -#endif - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); -} - -/* CPU description section */ - -#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) -#define SLJIT_CPUINFO_PART1 " 32bit (" -#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) -#define SLJIT_CPUINFO_PART1 " 64bit (" -#else -#error "Internal error: CPU type info missing" -#endif - -#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) -#define SLJIT_CPUINFO_PART2 "little endian + " -#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) -#define SLJIT_CPUINFO_PART2 "big endian + " -#else -#error "Internal error: CPU type info missing" -#endif - -#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) -#define SLJIT_CPUINFO_PART3 "unaligned)" -#else -#define SLJIT_CPUINFO_PART3 "aligned)" -#endif - -#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -# include "sljitNativeX86_common.c" -#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -# include "sljitNativeX86_common.c" -#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) -# include "sljitNativeARM_v5.c" -#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) -# include "sljitNativeARM_v5.c" -#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) -# include "sljitNativeARM_Thumb2.c" -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -# include "sljitNativePPC_common.c" -#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -# include "sljitNativePPC_common.c" -#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# include "sljitNativeMIPS_common.c" -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -# include "sljitNativeSPARC_common.c" -#endif - -#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* Default compare for most architectures. */ - sljit_si flags, tmp_src, condition; - sljit_sw tmp_srcw; - - CHECK_ERROR_PTR(); - check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); - - condition = type & 0xff; - if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { - /* Immediate is prefered as second argument by most architectures. */ - switch (condition) { - case SLJIT_C_LESS: - condition = SLJIT_C_GREATER; - break; - case SLJIT_C_GREATER_EQUAL: - condition = SLJIT_C_LESS_EQUAL; - break; - case SLJIT_C_GREATER: - condition = SLJIT_C_LESS; - break; - case SLJIT_C_LESS_EQUAL: - condition = SLJIT_C_GREATER_EQUAL; - break; - case SLJIT_C_SIG_LESS: - condition = SLJIT_C_SIG_GREATER; - break; - case SLJIT_C_SIG_GREATER_EQUAL: - condition = SLJIT_C_SIG_LESS_EQUAL; - break; - case SLJIT_C_SIG_GREATER: - condition = SLJIT_C_SIG_LESS; - break; - case SLJIT_C_SIG_LESS_EQUAL: - condition = SLJIT_C_SIG_GREATER_EQUAL; - break; - } - type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP)); - tmp_src = src1; - src1 = src2; - src2 = tmp_src; - tmp_srcw = src1w; - src1w = src2w; - src2w = tmp_srcw; - } - - if (condition <= SLJIT_C_NOT_ZERO) - flags = SLJIT_SET_E; - else if (condition <= SLJIT_C_LESS_EQUAL) - flags = SLJIT_SET_U; - else - flags = SLJIT_SET_S; - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP), - SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si flags, condition; - - check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w); - - condition = type & 0xff; - flags = (condition <= SLJIT_C_FLOAT_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S; - if (type & SLJIT_SINGLE_OP) - flags |= SLJIT_SINGLE_OP; - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - sljit_emit_fop1(compiler, SLJIT_CMPD | flags, src1, src1w, src2, src2w); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); -} - -#endif - -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) -{ - CHECK_ERROR(); - check_sljit_get_local_base(compiler, dst, dstw, offset); - - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - if (offset != 0) - return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_LOCALS_REG, 0); -} - -#endif - -#else /* SLJIT_CONFIG_UNSUPPORTED */ - -/* Empty function bodies for those machines, which are not (yet) supported. */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ - return "unsupported"; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) -{ - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(size); - SLJIT_ASSERT_STOP(); - return NULL; -} - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) -SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(verbose); - SLJIT_ASSERT_STOP(); -} -#endif - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) -{ - SLJIT_UNUSED_ARG(code); - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(local_size); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(local_size); - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - SLJIT_ASSERT_STOP(); - return reg; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(instruction); - SLJIT_UNUSED_ARG(size); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ - SLJIT_ASSERT_STOP(); - return 0; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) -{ - SLJIT_UNUSED_ARG(jump); - SLJIT_UNUSED_ARG(label); - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) -{ - SLJIT_UNUSED_ARG(jump); - SLJIT_UNUSED_ARG(target); - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNUSED_ARG(type); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(offset); - SLJIT_ASSERT_STOP(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw initval) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(initval); - SLJIT_ASSERT_STOP(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - SLJIT_UNUSED_ARG(addr); - SLJIT_UNUSED_ARG(new_addr); - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - SLJIT_UNUSED_ARG(addr); - SLJIT_UNUSED_ARG(new_constant); - SLJIT_ASSERT_STOP(); -} - -#endif diff --git a/deps/libmagic/pcre/sljit/sljitLir.h b/deps/libmagic/pcre/sljit/sljitLir.h deleted file mode 100644 index 3171d15..0000000 --- a/deps/libmagic/pcre/sljit/sljitLir.h +++ /dev/null @@ -1,985 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SLJIT_LIR_H_ -#define _SLJIT_LIR_H_ - -/* - ------------------------------------------------------------------------ - Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) - ------------------------------------------------------------------------ - - Short description - Advantages: - - The execution can be continued from any LIR instruction. In other - words, it is possible to jump to any label from anywhere, even from - a code fragment, which is compiled later, if both compiled code - shares the same context. See sljit_emit_enter for more details - - Supports self modifying code: target of (conditional) jump and call - instructions and some constant values can be dynamically modified - during runtime - - although it is not suggested to do it frequently - - can be used for inline caching: save an important value once - in the instruction stream - - since this feature limits the optimization possibilities, a - special flag must be passed at compile time when these - instructions are emitted - - A fixed stack space can be allocated for local variables - - The compiler is thread-safe - - The compiler is highly configurable through preprocessor macros. - You can disable unneeded features (multithreading in single - threaded applications), and you can use your own system functions - (including memory allocators). See sljitConfig.h - Disadvantages: - - No automatic register allocation, and temporary results are - not stored on the stack. (hence the name comes) - - Limited number of registers (only 6+4 integer registers, max 3+2 - scratch, max 3+2 saved and 6 floating point registers) - In practice: - - This approach is very effective for interpreters - - One of the saved registers typically points to a stack interface - - It can jump to any exception handler anytime (even if it belongs - to another function) - - Hot paths can be modified during runtime reflecting the changes - of the fastest execution path of the dynamic language - - SLJIT supports complex memory addressing modes - - mainly position and context independent code (except some cases) - - For valgrind users: - - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" -*/ - -#if !(defined SLJIT_NO_DEFAULT_CONFIG && SLJIT_NO_DEFAULT_CONFIG) -#include "sljitConfig.h" -#endif - -/* The following header file defines useful macros for fine tuning -sljit based code generators. They are listed in the begining -of sljitConfigInternal.h */ - -#include "sljitConfigInternal.h" - -/* --------------------------------------------------------------------- */ -/* Error codes */ -/* --------------------------------------------------------------------- */ - -/* Indicates no error. */ -#define SLJIT_SUCCESS 0 -/* After the call of sljit_generate_code(), the error code of the compiler - is set to this value to avoid future sljit calls (in debug mode at least). - The complier should be freed after sljit_generate_code(). */ -#define SLJIT_ERR_COMPILED 1 -/* Cannot allocate non executable memory. */ -#define SLJIT_ERR_ALLOC_FAILED 2 -/* Cannot allocate executable memory. - Only for sljit_generate_code() */ -#define SLJIT_ERR_EX_ALLOC_FAILED 3 -/* return value for SLJIT_CONFIG_UNSUPPORTED empty architecture. */ -#define SLJIT_ERR_UNSUPPORTED 4 - -/* --------------------------------------------------------------------- */ -/* Registers */ -/* --------------------------------------------------------------------- */ - -#define SLJIT_UNUSED 0 - -/* Scratch (temporary) registers whose may not preserve their values - across function calls. */ -#define SLJIT_SCRATCH_REG1 1 -#define SLJIT_SCRATCH_REG2 2 -#define SLJIT_SCRATCH_REG3 3 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_TEMPORARY_EREG1 4 -#define SLJIT_TEMPORARY_EREG2 5 - -/* Saved registers whose preserve their values across function calls. */ -#define SLJIT_SAVED_REG1 6 -#define SLJIT_SAVED_REG2 7 -#define SLJIT_SAVED_REG3 8 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_SAVED_EREG1 9 -#define SLJIT_SAVED_EREG2 10 - -/* Read-only register (cannot be the destination of an operation). - Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since - several ABIs has certain limitations about the stack layout. However - sljit_get_local_base() can be used to obtain the offset of a value - on the stack. */ -#define SLJIT_LOCALS_REG 11 - -/* Number of registers. */ -#define SLJIT_NO_TMP_REGISTERS 5 -#define SLJIT_NO_GEN_REGISTERS 5 -#define SLJIT_NO_REGISTERS 11 - -/* Return with machine word. */ - -#define SLJIT_RETURN_REG SLJIT_SCRATCH_REG1 - -/* x86 prefers specific registers for special purposes. In case of shift - by register it supports only SLJIT_SCRATCH_REG3 for shift argument - (which is the src2 argument of sljit_emit_op2). If another register is - used, sljit must exchange data between registers which cause a minor - slowdown. Other architectures has no such limitation. */ - -#define SLJIT_PREF_SHIFT_REG SLJIT_SCRATCH_REG3 - -/* --------------------------------------------------------------------- */ -/* Floating point registers */ -/* --------------------------------------------------------------------- */ - -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ - -/* Floating point operations are performed on double or - single precision values. */ - -#define SLJIT_FLOAT_REG1 1 -#define SLJIT_FLOAT_REG2 2 -#define SLJIT_FLOAT_REG3 3 -#define SLJIT_FLOAT_REG4 4 -#define SLJIT_FLOAT_REG5 5 -#define SLJIT_FLOAT_REG6 6 - -/* --------------------------------------------------------------------- */ -/* Main structures and functions */ -/* --------------------------------------------------------------------- */ - -struct sljit_memory_fragment { - struct sljit_memory_fragment *next; - sljit_uw used_size; - /* Must be aligned to sljit_sw. */ - sljit_ub memory[1]; -}; - -struct sljit_label { - struct sljit_label *next; - sljit_uw addr; - /* The maximum size difference. */ - sljit_uw size; -}; - -struct sljit_jump { - struct sljit_jump *next; - sljit_uw addr; - sljit_sw flags; - union { - sljit_uw target; - struct sljit_label* label; - } u; -}; - -struct sljit_const { - struct sljit_const *next; - sljit_uw addr; -}; - -struct sljit_compiler { - sljit_si error; - - struct sljit_label *labels; - struct sljit_jump *jumps; - struct sljit_const *consts; - struct sljit_label *last_label; - struct sljit_jump *last_jump; - struct sljit_const *last_const; - - struct sljit_memory_fragment *buf; - struct sljit_memory_fragment *abuf; - - /* Used local registers. */ - sljit_si scratches; - /* Used saved registers. */ - sljit_si saveds; - /* Local stack size. */ - sljit_si local_size; - /* Code size. */ - sljit_uw size; - /* For statistical purposes. */ - sljit_uw executable_size; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_si args; - sljit_si locals_offset; - sljit_si scratches_start; - sljit_si saveds_start; -#endif - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si mode32; -#endif - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si flags_saved; -#endif - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - /* Constant pool handling. */ - sljit_uw *cpool; - sljit_ub *cpool_unique; - sljit_uw cpool_diff; - sljit_uw cpool_fill; - /* Other members. */ - /* Contains pointer, "ldr pc, [...]" pairs. */ - sljit_uw patches; -#endif - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - /* Temporary fields. */ - sljit_uw shift_imm; - sljit_si cache_arg; - sljit_sw cache_argw; -#endif - -#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) - sljit_si cache_arg; - sljit_sw cache_argw; -#endif - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_sw imm; - sljit_si cache_arg; - sljit_sw cache_argw; -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - sljit_si delay_slot; - sljit_si cache_arg; - sljit_sw cache_argw; -#endif - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - sljit_si delay_slot; - sljit_si cache_arg; - sljit_sw cache_argw; -#endif - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - FILE* verbose; -#endif - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - /* Local size passed to the functions. */ - sljit_si logical_local_size; -#endif - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - sljit_si skip_checks; -#endif -}; - -/* --------------------------------------------------------------------- */ -/* Main functions */ -/* --------------------------------------------------------------------- */ - -/* Creates an sljit compiler. - Returns NULL if failed. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void); - -/* Free everything except the compiled machine code. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); - -/* Returns the current error code. If an error is occured, future sljit - calls which uses the same compiler argument returns early with the same - error code. Thus there is no need for checking the error after every - call, it is enough to do it before the code is compiled. Removing - these checks increases the performance of the compiling process. */ -static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } - -/* - Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, - and <= 128 bytes on 64 bit architectures. The memory area is owned by the - compiler, and freed by sljit_free_compiler. The returned pointer is - sizeof(sljit_sw) aligned. Excellent for allocating small blocks during - the compiling, and no need to worry about freeing them. The size is - enough to contain at most 16 pointers. If the size is outside of the range, - the function will return with NULL. However, this return value does not - indicate that there is no more memory (does not set the current error code - of the compiler to out-of-memory status). -*/ -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) -/* Passing NULL disables verbose. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); -#endif - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code); - -/* - After the machine code generation is finished we can retrieve the allocated - executable memory size, although this area may not be fully filled with - instructions depending on some optimizations. This function is useful only - for statistical purposes. - - Before a successful code generation, this function returns with 0. -*/ -static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } - -/* Instruction generation. Returns with any error code. If there is no - error, they return with SLJIT_SUCCESS. */ - -/* - The executable code is basically a function call from the viewpoint of - the C language. The function calls must obey to the ABI (Application - Binary Interface) of the platform, which specify the purpose of machine - registers and stack handling among other things. The sljit_emit_enter - function emits the necessary instructions for setting up a new context - for the executable code and moves function arguments to the saved - registers. The number of arguments are specified in the "args" - parameter and the first argument goes to SLJIT_SAVED_REG1, the second - goes to SLJIT_SAVED_REG2 and so on. The number of scratch and - saved registers are passed in "scratches" and "saveds" arguments - respectively. Since the saved registers contains the arguments, - "args" must be less or equal than "saveds". The sljit_emit_enter - is also capable of allocating a stack space for local variables. The - "local_size" argument contains the size in bytes of this local area - and its staring address is stored in SLJIT_LOCALS_REG. However - the SLJIT_LOCALS_REG is not necessary the machine stack pointer. - The memory bytes between SLJIT_LOCALS_REG (inclusive) and - SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely - until the function returns. The stack space is uninitialized. - - Note: every call of sljit_emit_enter and sljit_set_context - overwrites the previous context. */ - -#define SLJIT_MAX_LOCAL_SIZE 65536 - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); - -/* The machine code has a context (which contains the local stack space size, - number of used registers, etc.) which initialized by sljit_emit_enter. Several - functions (like sljit_emit_return) requres this context to be able to generate - the appropriate code. However, some code fragments (like inline cache) may have - no normal entry point so their context is unknown for the compiler. Using the - function below we can specify their context. - - Note: every call of sljit_emit_enter and sljit_set_context overwrites - the previous context. */ - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); - -/* Return from machine code. The op argument can be SLJIT_UNUSED which means the - function does not return with anything or any opcode between SLJIT_MOV and - SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op - is SLJIT_UNUSED, otherwise see below the description about source and - destination arguments. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, - sljit_si src, sljit_sw srcw); - -/* Fast calling mechanism for utility functions (see SLJIT_FAST_CALL). All registers and - even the stack frame is passed to the callee. The return address is preserved in - dst/dstw by sljit_emit_fast_enter (the type of the value stored by this function - is sljit_p), and sljit_emit_fast_return can use this as a return value later. */ - -/* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine - instructions are needed. Excellent for small uility functions, where saving registers - and setting up a new stack frame would cost too much performance. However, it is still - possible to return to the address of the caller (or anywhere else). */ - -/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */ - -/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested, - since many architectures do clever branch prediction on call / return instruction pairs. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw); - -/* - Source and destination values for arithmetical instructions - imm - a simple immediate value (cannot be used as a destination) - reg - any of the registers (immediate argument must be 0) - [imm] - absolute immediate memory address - [reg+imm] - indirect memory address - [reg+(reg<addr; } -static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } -static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } - -/* Only the address is required to rewrite the code. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr); -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant); - -/* --------------------------------------------------------------------- */ -/* Miscellaneous utility functions */ -/* --------------------------------------------------------------------- */ - -#define SLJIT_MAJOR_VERSION 0 -#define SLJIT_MINOR_VERSION 90 - -/* Get the human readable name of the platform. Can be useful on platforms - like ARM, where ARM and Thumb2 functions can be mixed, and - it is useful to know the type of the code generator. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void); - -/* Portable helper function to get an offset of a member. */ -#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) - -#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) -/* This global lock is useful to compile common functions. */ -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void); -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void); -#endif - -#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) - -/* The sljit_stack is a utiliy feature of sljit, which allocates a - writable memory region between base (inclusive) and limit (exclusive). - Both base and limit is a pointer, and base is always <= than limit. - This feature uses the "address space reserve" feature - of modern operating systems. Basically we don't need to allocate a - huge memory block in one step for the worst case, we can start with - a smaller chunk and extend it later. Since the address space is - reserved, the data never copied to other regions, thus it is safe - to store pointers here. */ - -/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more). - Note: stack growing should not happen in small steps: 4k, 16k or even - bigger growth is better. - Note: this structure may not be supported by all operating systems. - Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK - is not defined. */ - -struct sljit_stack { - /* User data, anything can be stored here. - Starting with the same value as base. */ - sljit_uw top; - /* These members are read only. */ - sljit_uw base; - sljit_uw limit; - sljit_uw max_limit; -}; - -/* Returns NULL if unsuccessful. - Note: limit and max_limit contains the size for stack allocation - Note: the top field is initialized to base. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit); -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack); - -/* Can be used to increase (allocate) or decrease (free) the memory area. - Returns with a non-zero value if unsuccessful. If new_limit is greater than - max_limit, it will fail. It is very easy to implement a stack data structure, - since the growth ratio can be added to the current limit, and sljit_stack_resize - will do all the necessary checks. The fields of the stack are not changed if - sljit_stack_resize fails. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit); - -#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ - -#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - -/* Get the entry address of a given function. */ -#define SLJIT_FUNC_OFFSET(func_name) ((sljit_sw)func_name) - -#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ - -/* All JIT related code should be placed in the same context (library, binary, etc.). */ - -#define SLJIT_FUNC_OFFSET(func_name) (*(sljit_sw*)(void*)func_name) - -/* For powerpc64, the function pointers point to a context descriptor. */ -struct sljit_function_context { - sljit_sw addr; - sljit_sw r2; - sljit_sw r11; -}; - -/* Fill the context arguments using the addr and the function. - If func_ptr is NULL, it will not be set to the address of context - If addr is NULL, the function address also comes from the func pointer. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func); - -#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ - -#endif /* _SLJIT_LIR_H_ */ diff --git a/deps/libmagic/pcre/sljit/sljitNativeARM_Thumb2.c b/deps/libmagic/pcre/sljit/sljitNativeARM_Thumb2.c deleted file mode 100644 index 0a60dc2..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeARM_Thumb2.c +++ /dev/null @@ -1,2008 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ - return "ARM-Thumb2" SLJIT_CPUINFO; -} - -/* Length of an instruction word. */ -typedef sljit_ui sljit_ins; - -/* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) - -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) - -/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 -}; - -#define COPY_BITS(src, from, to, bits) \ - ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) - -/* Thumb16 encodings. */ -#define RD3(rd) (reg_map[rd]) -#define RN3(rn) (reg_map[rn] << 3) -#define RM3(rm) (reg_map[rm] << 6) -#define RDN3(rdn) (reg_map[rdn] << 8) -#define IMM3(imm) (imm << 6) -#define IMM8(imm) (imm) - -/* Thumb16 helpers. */ -#define SET_REGS44(rd, rn) \ - ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) -#define IS_2_LO_REGS(reg1, reg2) \ - (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) -#define IS_3_LO_REGS(reg1, reg2, reg3) \ - (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) - -/* Thumb32 encodings. */ -#define RD4(rd) (reg_map[rd] << 8) -#define RN4(rn) (reg_map[rn] << 16) -#define RM4(rm) (reg_map[rm]) -#define RT4(rt) (reg_map[rt] << 12) -#define DD4(dd) ((dd) << 12) -#define DN4(dn) ((dn) << 16) -#define DM4(dm) (dm) -#define IMM5(imm) \ - (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) -#define IMM12(imm) \ - (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ - -/* dot '.' changed to _ - I immediate form (possibly followed by number of immediate bits). */ -#define ADCI 0xf1400000 -#define ADCS 0x4140 -#define ADC_W 0xeb400000 -#define ADD 0x4400 -#define ADDS 0x1800 -#define ADDSI3 0x1c00 -#define ADDSI8 0x3000 -#define ADD_W 0xeb000000 -#define ADDWI 0xf2000000 -#define ADD_SP 0xb000 -#define ADD_W 0xeb000000 -#define ADD_WI 0xf1000000 -#define ANDI 0xf0000000 -#define ANDS 0x4000 -#define AND_W 0xea000000 -#define ASRS 0x4100 -#define ASRSI 0x1000 -#define ASR_W 0xfa40f000 -#define ASR_WI 0xea4f0020 -#define BICI 0xf0200000 -#define BKPT 0xbe00 -#define BLX 0x4780 -#define BX 0x4700 -#define CLZ 0xfab0f080 -#define CMPI 0x2800 -#define CMP_W 0xebb00f00 -#define EORI 0xf0800000 -#define EORS 0x4040 -#define EOR_W 0xea800000 -#define IT 0xbf00 -#define LSLS 0x4080 -#define LSLSI 0x0000 -#define LSL_W 0xfa00f000 -#define LSL_WI 0xea4f0000 -#define LSRS 0x40c0 -#define LSRSI 0x0800 -#define LSR_W 0xfa20f000 -#define LSR_WI 0xea4f0010 -#define MOV 0x4600 -#define MOVS 0x0000 -#define MOVSI 0x2000 -#define MOVT 0xf2c00000 -#define MOVW 0xf2400000 -#define MOV_W 0xea4f0000 -#define MOV_WI 0xf04f0000 -#define MUL 0xfb00f000 -#define MVNS 0x43c0 -#define MVN_W 0xea6f0000 -#define MVN_WI 0xf06f0000 -#define NOP 0xbf00 -#define ORNI 0xf0600000 -#define ORRI 0xf0400000 -#define ORRS 0x4300 -#define ORR_W 0xea400000 -#define POP 0xbd00 -#define POP_W 0xe8bd0000 -#define PUSH 0xb500 -#define PUSH_W 0xe92d0000 -#define RSB_WI 0xf1c00000 -#define RSBSI 0x4240 -#define SBCI 0xf1600000 -#define SBCS 0x4180 -#define SBC_W 0xeb600000 -#define SMULL 0xfb800000 -#define STR_SP 0x9000 -#define SUBS 0x1a00 -#define SUBSI3 0x1e00 -#define SUBSI8 0x3800 -#define SUB_W 0xeba00000 -#define SUBWI 0xf2a00000 -#define SUB_SP 0xb080 -#define SUB_WI 0xf1a00000 -#define SXTB 0xb240 -#define SXTB_W 0xfa4ff080 -#define SXTH 0xb200 -#define SXTH_W 0xfa0ff080 -#define TST 0x4200 -#define UMULL 0xfba00000 -#define UXTB 0xb2c0 -#define UXTB_W 0xfa5ff080 -#define UXTH 0xb280 -#define UXTH_W 0xfa1ff080 -#define VABS_F32 0xeeb00ac0 -#define VADD_F32 0xee300a00 -#define VCMP_F32 0xeeb40a40 -#define VDIV_F32 0xee800a00 -#define VMOV_F32 0xeeb00a40 -#define VMRS 0xeef1fa10 -#define VMUL_F32 0xee200a00 -#define VNEG_F32 0xeeb10a40 -#define VSTR_F32 0xed000a00 -#define VSUB_F32 0xee300a40 - -static sljit_si push_inst16(struct sljit_compiler *compiler, sljit_ins inst) -{ - sljit_uh *ptr; - SLJIT_ASSERT(!(inst & 0xffff0000)); - - ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh)); - FAIL_IF(!ptr); - *ptr = inst; - compiler->size++; - return SLJIT_SUCCESS; -} - -static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst) -{ - sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins)); - FAIL_IF(!ptr); - *ptr++ = inst >> 16; - *ptr = inst; - compiler->size += 2; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) -{ - FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | - COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); - return push_inst32(compiler, MOVT | RD4(dst) | - COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); -} - -static SLJIT_INLINE void modify_imm32_const(sljit_uh* inst, sljit_uw new_imm) -{ - sljit_si dst = inst[1] & 0x0f00; - SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); - inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); - inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); - inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); - inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); -} - -static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code) -{ - sljit_sw diff; - - if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; - - if (jump->flags & JUMP_ADDR) { - /* Branch to ARM code is not optimized yet. */ - if (!(jump->u.target & 0x1)) - return 0; - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1; - } - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; - } - - if (jump->flags & IS_COND) { - SLJIT_ASSERT(!(jump->flags & IS_BL)); - if (diff <= 127 && diff >= -128) { - jump->flags |= B_TYPE1; - return 5; - } - if (diff <= 524287 && diff >= -524288) { - jump->flags |= B_TYPE2; - return 4; - } - /* +1 comes from the prefix IT instruction. */ - diff--; - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= B_TYPE3; - return 3; - } - } - else if (jump->flags & IS_BL) { - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= BL_TYPE6; - return 3; - } - } - else { - if (diff <= 1023 && diff >= -1024) { - jump->flags |= B_TYPE4; - return 4; - } - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= B_TYPE5; - return 3; - } - } - - return 0; -} - -static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush) -{ - sljit_uh* inst = (sljit_uh*)addr; - modify_imm32_const(inst, new_addr); - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 3); - } -} - -static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) -{ - sljit_si type = (jump->flags >> 4) & 0xf; - sljit_sw diff; - sljit_uh *jump_inst; - sljit_si s, j1, j2; - - if (SLJIT_UNLIKELY(type == 0)) { - inline_set_jump_addr(jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); - return; - } - - if (jump->flags & JUMP_ADDR) { - SLJIT_ASSERT(jump->u.target & 0x1); - diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1; - } - else - diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1; - jump_inst = (sljit_uh*)jump->addr; - - switch (type) { - case 1: - /* Encoding T1 of 'B' instruction */ - SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); - jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); - return; - case 2: - /* Encoding T3 of 'B' instruction */ - SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); - jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); - jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); - return; - case 3: - SLJIT_ASSERT(jump->flags & IS_COND); - *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; - diff--; - type = 5; - break; - case 4: - /* Encoding T2 of 'B' instruction */ - SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); - jump_inst[0] = 0xe000 | (diff & 0x7ff); - return; - } - - SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); - - /* Really complex instruction form for branches. */ - s = (diff >> 23) & 0x1; - j1 = (~(diff >> 21) ^ s) & 0x1; - j2 = (~(diff >> 22) ^ s) & 0x1; - jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); - jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); - - /* The others have a common form. */ - if (type == 5) /* Encoding T4 of 'B' instruction */ - jump_inst[1] |= 0x9000; - else if (type == 6) /* Encoding T1 of 'BL' instruction */ - jump_inst[1] |= 0xd000; - else - SLJIT_ASSERT_STOP(); -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_uh *code; - sljit_uh *code_ptr; - sljit_uh *buf_ptr; - sljit_uh *buf_end; - sljit_uw half_count; - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); - reverse_buf(compiler); - - code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh)); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - - code_ptr = code; - half_count = 0; - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - - do { - buf_ptr = (sljit_uh*)buf->memory; - buf_end = buf_ptr + (buf->used_size >> 1); - do { - *code_ptr = *buf_ptr++; - /* These structures are ordered by their address. */ - SLJIT_ASSERT(!label || label->size >= half_count); - SLJIT_ASSERT(!jump || jump->addr >= half_count); - SLJIT_ASSERT(!const_ || const_->addr >= half_count); - if (label && label->size == half_count) { - label->addr = ((sljit_uw)code_ptr) | 0x1; - label->size = code_ptr - code; - label = label->next; - } - if (jump && jump->addr == half_count) { - jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); - code_ptr -= detect_jump_type(jump, code_ptr, code); - jump = jump->next; - } - if (const_ && const_->addr == half_count) { - const_->addr = (sljit_uw)code_ptr; - const_ = const_->next; - } - code_ptr ++; - half_count ++; - } while (buf_ptr < buf_end); - - buf = buf->next; - } while (buf); - - if (label && label->size == half_count) { - label->addr = ((sljit_uw)code_ptr) | 0x1; - label->size = code_ptr - code; - label = label->next; - } - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); - - jump = compiler->jumps; - while (jump) { - set_jump_instruction(jump); - jump = jump->next; - } - - SLJIT_CACHE_FLUSH(code, code_ptr); - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = compiler->size * sizeof(sljit_uh); - /* Set thumb mode flag. */ - return (void*)((sljit_uw)code | 0x1); -} - -#define INVALID_IMM 0x80000000 -static sljit_uw get_imm(sljit_uw imm) -{ - /* Thumb immediate form. */ - sljit_si counter; - - if (imm <= 0xff) - return imm; - - if ((imm & 0xffff) == (imm >> 16)) { - /* Some special cases. */ - if (!(imm & 0xff00)) - return (1 << 12) | (imm & 0xff); - if (!(imm & 0xff)) - return (2 << 12) | ((imm >> 8) & 0xff); - if ((imm & 0xff00) == ((imm & 0xff) << 8)) - return (3 << 12) | (imm & 0xff); - } - - /* Assembly optimization: count leading zeroes? */ - counter = 8; - if (!(imm & 0xffff0000)) { - counter += 16; - imm <<= 16; - } - if (!(imm & 0xff000000)) { - counter += 8; - imm <<= 8; - } - if (!(imm & 0xf0000000)) { - counter += 4; - imm <<= 4; - } - if (!(imm & 0xc0000000)) { - counter += 2; - imm <<= 2; - } - if (!(imm & 0x80000000)) { - counter += 1; - imm <<= 1; - } - /* Since imm >= 128, this must be true. */ - SLJIT_ASSERT(counter <= 31); - - if (imm & 0x00ffffff) - return INVALID_IMM; /* Cannot be encoded. */ - - return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); -} - -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) -{ - sljit_uw tmp; - - if (imm >= 0x10000) { - tmp = get_imm(imm); - if (tmp != INVALID_IMM) - return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); - tmp = get_imm(~imm); - if (tmp != INVALID_IMM) - return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); - } - - /* set low 16 bits, set hi 16 bits to 0. */ - FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | - COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); - - /* set hi 16 bit if needed. */ - if (imm >= 0x10000) - return push_inst32(compiler, MOVT | RD4(dst) | - COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); - return SLJIT_SUCCESS; -} - -#define ARG1_IMM 0x0010000 -#define ARG2_IMM 0x0020000 -#define KEEP_FLAGS 0x0040000 -#define SET_MULOV 0x0080000 -/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ -#define SET_FLAGS 0x0100000 -#define UNUSED_RETURN 0x0200000 -#define SLOW_DEST 0x0400000 -#define SLOW_SRC1 0x0800000 -#define SLOW_SRC2 0x1000000 - -static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_uw arg1, sljit_uw arg2) -{ - /* dst must be register, TMP_REG1 - arg1 must be register, TMP_REG1, imm - arg2 must be register, TMP_REG2, imm */ - sljit_si reg; - sljit_uw imm, negated_imm; - - if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { - /* Both are immediates. */ - flags &= ~ARG1_IMM; - FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); - arg1 = TMP_REG1; - } - - if (flags & (ARG1_IMM | ARG2_IMM)) { - reg = (flags & ARG2_IMM) ? arg1 : arg2; - imm = (flags & ARG2_IMM) ? arg2 : arg1; - - switch (flags & 0xffff) { - case SLJIT_MOV: - SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); - return load_immediate(compiler, dst, imm); - case SLJIT_NOT: - if (!(flags & SET_FLAGS)) - return load_immediate(compiler, dst, ~imm); - /* Since the flags should be set, we just fallback to the register mode. - Although I could do some clever things here, "NOT IMM" does not worth the efforts. */ - break; - case SLJIT_CLZ: - /* No form with immediate operand. */ - break; - case SLJIT_ADD: - negated_imm = (sljit_uw)-(sljit_sw)imm; - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { - if (imm <= 0x7) - return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); - if (negated_imm <= 0x7) - return push_inst16(compiler, SUBSI3 | IMM3(negated_imm) | RD3(dst) | RN3(reg)); - if (reg == dst) { - if (imm <= 0xff) - return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); - if (negated_imm <= 0xff) - return push_inst16(compiler, SUBSI8 | IMM8(negated_imm) | RDN3(dst)); - } - } - if (!(flags & SET_FLAGS)) { - if (imm <= 0xfff) - return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); - if (negated_imm <= 0xfff) - return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(negated_imm)); - } - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - break; - case SLJIT_ADDC: - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - break; - case SLJIT_SUB: - if (flags & ARG2_IMM) { - negated_imm = (sljit_uw)-(sljit_sw)imm; - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { - if (imm <= 0x7) - return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); - if (negated_imm <= 0x7) - return push_inst16(compiler, ADDSI3 | IMM3(negated_imm) | RD3(dst) | RN3(reg)); - if (reg == dst) { - if (imm <= 0xff) - return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); - if (negated_imm <= 0xff) - return push_inst16(compiler, ADDSI8 | IMM8(negated_imm) | RDN3(dst)); - } - if (imm <= 0xff && (flags & UNUSED_RETURN)) - return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); - } - if (!(flags & SET_FLAGS)) { - if (imm <= 0xfff) - return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); - if (negated_imm <= 0xfff) - return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(negated_imm)); - } - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - } - else { - if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst)) - return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - } - break; - case SLJIT_SUBC: - if (flags & ARG2_IMM) { - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - } - break; - case SLJIT_MUL: - /* No form with immediate operand. */ - break; - case SLJIT_AND: - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - imm = get_imm(~((flags & ARG2_IMM) ? arg2 : arg1)); - if (imm != INVALID_IMM) - return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - break; - case SLJIT_OR: - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - imm = get_imm(~((flags & ARG2_IMM) ? arg2 : arg1)); - if (imm != INVALID_IMM) - return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - break; - case SLJIT_XOR: - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); - break; - case SLJIT_SHL: - if (flags & ARG2_IMM) { - imm &= 0x1f; - if (imm == 0) { - if (!(flags & SET_FLAGS)) - return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); - if (IS_2_LO_REGS(dst, reg)) - return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); - } - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) - return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); - return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); - } - break; - case SLJIT_LSHR: - if (flags & ARG2_IMM) { - imm &= 0x1f; - if (imm == 0) { - if (!(flags & SET_FLAGS)) - return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); - if (IS_2_LO_REGS(dst, reg)) - return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); - } - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) - return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); - return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); - } - break; - case SLJIT_ASHR: - if (flags & ARG2_IMM) { - imm &= 0x1f; - if (imm == 0) { - if (!(flags & SET_FLAGS)) - return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); - if (IS_2_LO_REGS(dst, reg)) - return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); - } - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) - return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); - return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); - } - break; - default: - SLJIT_ASSERT_STOP(); - break; - } - - if (flags & ARG2_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); - arg2 = TMP_REG2; - } - else { - FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); - arg1 = TMP_REG1; - } - } - - /* Both arguments are registers. */ - switch (flags & 0xffff) { - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - case SLJIT_MOV_P: - case SLJIT_MOVU: - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: - case SLJIT_MOVU_P: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); - case SLJIT_MOV_UB: - case SLJIT_MOVU_UB: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); - case SLJIT_MOV_SB: - case SLJIT_MOVU_SB: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); - case SLJIT_MOV_UH: - case SLJIT_MOVU_UH: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); - case SLJIT_MOV_SH: - case SLJIT_MOVU_SH: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); - case SLJIT_NOT: - SLJIT_ASSERT(arg1 == TMP_REG1); - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); - case SLJIT_CLZ: - SLJIT_ASSERT(arg1 == TMP_REG1); - FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); - if (flags & SET_FLAGS) { - if (reg_map[dst] <= 7) - return push_inst16(compiler, CMPI | RDN3(dst)); - return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst)); - } - return SLJIT_SUCCESS; - case SLJIT_ADD: - if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) - return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); - if (dst == arg1 && !(flags & SET_FLAGS)) - return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); - return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_ADDC: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_SUB: - if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) - return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); - return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_SUBC: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_MUL: - if (!(flags & SET_FLAGS)) - return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); - SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2); - FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); - /* cmp TMP_REG2, dst asr #31. */ - return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); - case SLJIT_AND: - if (!(flags & KEEP_FLAGS)) { - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); - if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) - return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); - } - return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_OR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_XOR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_SHL: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_LSHR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - case SLJIT_ASHR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); - return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); - } - - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; -} - -#define STORE 0x01 -#define SIGNED 0x02 - -#define WORD_SIZE 0x00 -#define BYTE_SIZE 0x04 -#define HALF_SIZE 0x08 - -#define UPDATE 0x10 -#define ARG_TEST 0x20 - -#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) -#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) - -/* - 1st letter: - w = word - b = byte - h = half - - 2nd letter: - s = signed - u = unsigned - - 3rd letter: - l = load - s = store -*/ - -static SLJIT_CONST sljit_uw sljit_mem16[12] = { -/* w u l */ 0x5800 /* ldr */, -/* w u s */ 0x5000 /* str */, -/* w s l */ 0x5800 /* ldr */, -/* w s s */ 0x5000 /* str */, - -/* b u l */ 0x5c00 /* ldrb */, -/* b u s */ 0x5400 /* strb */, -/* b s l */ 0x5600 /* ldrsb */, -/* b s s */ 0x5400 /* strb */, - -/* h u l */ 0x5a00 /* ldrh */, -/* h u s */ 0x5200 /* strh */, -/* h s l */ 0x5e00 /* ldrsh */, -/* h s s */ 0x5200 /* strh */, -}; - -static SLJIT_CONST sljit_uw sljit_mem16_imm5[12] = { -/* w u l */ 0x6800 /* ldr imm5 */, -/* w u s */ 0x6000 /* str imm5 */, -/* w s l */ 0x6800 /* ldr imm5 */, -/* w s s */ 0x6000 /* str imm5 */, - -/* b u l */ 0x7800 /* ldrb imm5 */, -/* b u s */ 0x7000 /* strb imm5 */, -/* b s l */ 0x0000 /* not allowed */, -/* b s s */ 0x7000 /* strb imm5 */, - -/* h u l */ 0x8800 /* ldrh imm5 */, -/* h u s */ 0x8000 /* strh imm5 */, -/* h s l */ 0x0000 /* not allowed */, -/* h s s */ 0x8000 /* strh imm5 */, -}; - -#define MEM_IMM8 0xc00 -#define MEM_IMM12 0x800000 -static SLJIT_CONST sljit_uw sljit_mem32[12] = { -/* w u l */ 0xf8500000 /* ldr.w */, -/* w u s */ 0xf8400000 /* str.w */, -/* w s l */ 0xf8500000 /* ldr.w */, -/* w s s */ 0xf8400000 /* str.w */, - -/* b u l */ 0xf8100000 /* ldrb.w */, -/* b u s */ 0xf8000000 /* strb.w */, -/* b s l */ 0xf9100000 /* ldrsb.w */, -/* b s s */ 0xf8000000 /* strb.w */, - -/* h u l */ 0xf8300000 /* ldrh.w */, -/* h u s */ 0xf8200000 /* strsh.w */, -/* h s l */ 0xf9300000 /* ldrsh.w */, -/* h s s */ 0xf8200000 /* strsh.w */, -}; - -/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ -static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) -{ - if (value >= 0) { - if (value <= 0xfff) - return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); - value = get_imm(value); - if (value != INVALID_IMM) - return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); - } - else { - value = -value; - if (value <= 0xfff) - return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); - value = get_imm(value); - if (value != INVALID_IMM) - return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); - } - return SLJIT_ERR_UNSUPPORTED; -} - -/* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - sljit_si tmp; - - SLJIT_ASSERT(arg & SLJIT_MEM); - - if (SLJIT_UNLIKELY(flags & UPDATE)) { - if ((arg & 0xf) && !(arg & 0xf0) && argw <= 0xff && argw >= -0xff) { - flags &= ~UPDATE; - arg &= 0xf; - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - - if (argw >= 0) - argw |= 0x200; - else { - argw = -argw; - } - SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff); - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw)); - return -1; - } - return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0; - } - - if (SLJIT_UNLIKELY(arg & 0xf0)) { - argw &= 0x3; - tmp = (arg >> 4) & 0xf; - arg &= 0xf; - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - - if (!argw && IS_3_LO_REGS(reg, arg, tmp)) - FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp))); - else - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp) | (argw << 4))); - return -1; - } - - if (!(arg & 0xf) || argw > 0xfff || argw < -0xff) - return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0; - - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - - arg &= 0xf; - if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { - tmp = 3; - if (IS_WORD_SIZE(flags)) { - if (OFFSET_CHECK(0x1f, 2)) - tmp = 2; - } - else if (flags & BYTE_SIZE) - { - if (OFFSET_CHECK(0x1f, 0)) - tmp = 0; - } - else { - SLJIT_ASSERT(flags & HALF_SIZE); - if (OFFSET_CHECK(0x1f, 1)) - tmp = 1; - } - - if (tmp != 3) { - FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp)))); - return -1; - } - } - - /* SP based immediate. */ - if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { - FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); - return -1; - } - - if (argw >= 0) - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); - else - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw)); - return -1; -} - -/* see getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - /* Simple operation except for updates. */ - if ((arg & 0xf0) || !(next_arg & SLJIT_MEM)) - return 0; - - if (!(arg & 0xf)) { - if ((sljit_uw)(argw - next_argw) <= 0xfff || (sljit_uw)(next_argw - argw) <= 0xfff) - return 1; - return 0; - } - - if (argw == next_argw) - return 1; - - if (arg == next_arg && ((sljit_uw)(argw - next_argw) <= 0xfff || (sljit_uw)(next_argw - argw) <= 0xfff)) - return 1; - - return 0; -} - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - sljit_si tmp_r; - sljit_sw tmp; - - SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(next_arg & SLJIT_MEM)) { - next_arg = 0; - next_argw = 0; - } - - tmp_r = (flags & STORE) ? TMP_REG3 : reg; - - if (SLJIT_UNLIKELY(flags & UPDATE)) { - flags &= ~UPDATE; - /* Update only applies if a base register exists. */ - if (arg & 0xf) { - /* There is no caching here. */ - tmp = (arg & 0xf0) >> 4; - arg &= 0xf; - - if (!tmp) { - if (!(argw & ~0xfff)) { - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); - return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw)); - } - - if (compiler->cache_arg == SLJIT_MEM) { - if (argw == compiler->cache_argw) { - tmp = TMP_REG3; - argw = 0; - } - else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - tmp = TMP_REG3; - argw = 0; - } - } - - if (argw) { - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - compiler->cache_arg = SLJIT_MEM; - compiler->cache_argw = argw; - tmp = TMP_REG3; - argw = 0; - } - } - - argw &= 0x3; - if (!argw && IS_3_LO_REGS(reg, arg, tmp)) { - FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp))); - return push_inst16(compiler, ADD | SET_REGS44(arg, tmp)); - } - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp) | (argw << 4))); - return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(tmp) | (argw << 6)); - } - } - - SLJIT_ASSERT(!(arg & 0xf0)); - - if (compiler->cache_arg == arg) { - if (!((argw - compiler->cache_argw) & ~0xfff)) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | (argw - compiler->cache_argw)); - if (!((compiler->cache_argw - argw) & ~0xff)) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); - } - } - - next_arg = (arg & 0xf) && (arg == next_arg); - arg &= 0xf; - if (arg && compiler->cache_arg == SLJIT_MEM && compiler->cache_argw == argw) - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); - - compiler->cache_argw = argw; - if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_arg = SLJIT_MEM | arg; - arg = 0; - } - else { - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - compiler->cache_arg = SLJIT_MEM; - - if (next_arg) { - FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg))); - compiler->cache_arg = SLJIT_MEM | arg; - arg = 0; - } - } - - if (arg) - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); -} - -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); -} - -/* --------------------------------------------------------------------- */ -/* Entry, exit */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si size; - sljit_ins push; - - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - push = (1 << 4); - if (saveds >= 5) - push |= 1 << 11; - if (saveds >= 4) - push |= 1 << 10; - if (saveds >= 3) - push |= 1 << 8; - if (saveds >= 2) - push |= 1 << 7; - if (saveds >= 1) - push |= 1 << 6; - if (scratches >= 5) - push |= 1 << 5; - FAIL_IF(saveds >= 3 - ? push_inst32(compiler, PUSH_W | (1 << 14) | push) - : push_inst16(compiler, PUSH | push)); - - /* Stack must be aligned to 8 bytes: */ - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; - if (local_size > 0) { - if (local_size <= (127 << 2)) - FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); - else - FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size)); - } - - if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1))); - if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2))); - if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3))); - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si size; - - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - sljit_ins pop; - - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - if (compiler->local_size > 0) { - if (compiler->local_size <= (127 << 2)) - FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); - else - FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size)); - } - - pop = (1 << 4); - if (compiler->saveds >= 5) - pop |= 1 << 11; - if (compiler->saveds >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 3) - pop |= 1 << 8; - if (compiler->saveds >= 2) - pop |= 1 << 7; - if (compiler->saveds >= 1) - pop |= 1 << 6; - if (compiler->scratches >= 5) - pop |= 1 << 5; - return compiler->saveds >= 3 - ? push_inst32(compiler, POP_W | (1 << 15) | pop) - : push_inst16(compiler, POP | pop); -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__GNUC__) -extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); -extern int __aeabi_idivmod(int numerator, int denominator); -#else -#error "Software divmod functions are needed" -#endif - -#ifdef __cplusplus -} -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - push_inst16(compiler, BKPT); - break; - case SLJIT_NOP: - push_inst16(compiler, NOP); - break; - case SLJIT_UMUL: - case SLJIT_SMUL: - return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 8) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 16) - | reg_map[SLJIT_SCRATCH_REG2]); - case SLJIT_UDIV: - case SLJIT_SDIV: - if (compiler->scratches >= 4) { - FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */)); - FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */)); - } else if (compiler->scratches >= 3) - FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */)); -#if defined(__GNUC__) - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); -#else -#error "Software divmod functions are needed" -#endif - if (compiler->scratches >= 4) { - FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */)); - return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */); - } else if (compiler->scratches >= 3) - return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */); - return SLJIT_SUCCESS; - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_r, flags; - sljit_si op_flags = GET_ALL_FLAGS(op); - - CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_r = (dst >= SLJIT_SCRATCH_REG1 && dst <= TMP_REG3) ? dst : TMP_REG1; - - op = GET_OPCODE(op); - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - case SLJIT_MOV_P: - flags = WORD_SIZE; - break; - case SLJIT_MOV_UB: - flags = BYTE_SIZE; - if (src & SLJIT_IMM) - srcw = (sljit_ub)srcw; - break; - case SLJIT_MOV_SB: - flags = BYTE_SIZE | SIGNED; - if (src & SLJIT_IMM) - srcw = (sljit_sb)srcw; - break; - case SLJIT_MOV_UH: - flags = HALF_SIZE; - if (src & SLJIT_IMM) - srcw = (sljit_uh)srcw; - break; - case SLJIT_MOV_SH: - flags = HALF_SIZE | SIGNED; - if (src & SLJIT_IMM) - srcw = (sljit_sh)srcw; - break; - case SLJIT_MOVU: - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: - case SLJIT_MOVU_P: - flags = WORD_SIZE | UPDATE; - break; - case SLJIT_MOVU_UB: - flags = BYTE_SIZE | UPDATE; - if (src & SLJIT_IMM) - srcw = (sljit_ub)srcw; - break; - case SLJIT_MOVU_SB: - flags = BYTE_SIZE | SIGNED | UPDATE; - if (src & SLJIT_IMM) - srcw = (sljit_sb)srcw; - break; - case SLJIT_MOVU_UH: - flags = HALF_SIZE | UPDATE; - if (src & SLJIT_IMM) - srcw = (sljit_uh)srcw; - break; - case SLJIT_MOVU_SH: - flags = HALF_SIZE | SIGNED | UPDATE; - if (src & SLJIT_IMM) - srcw = (sljit_sh)srcw; - break; - default: - SLJIT_ASSERT_STOP(); - flags = 0; - break; - } - - if (src & SLJIT_IMM) - FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); - else if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) - FAIL_IF(compiler->error); - else - FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); - } else { - if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op, dst_r, TMP_REG1, src); - dst_r = src; - } - - if (dst & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) - return compiler->error; - else - return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); - } - return SLJIT_SUCCESS; - } - - if (op == SLJIT_NEG) { -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); - } - - flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); - if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw)) - FAIL_IF(compiler->error); - else - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); - src = TMP_REG2; - } - - if (src & SLJIT_IMM) - flags |= ARG2_IMM; - else - srcw = src; - - emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); - - if (dst & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) - return compiler->error; - else - return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); - } - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_r, flags; - - CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_r = (dst >= SLJIT_SCRATCH_REG1 && dst <= TMP_REG3) ? dst : TMP_REG1; - flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); - - if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw)) - flags |= SLOW_DEST; - - if (src1 & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC1; - } - if (src2 & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC2; - } - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); - - if (src1 & SLJIT_MEM) - src1 = TMP_REG1; - if (src2 & SLJIT_MEM) - src2 = TMP_REG2; - - if (src1 & SLJIT_IMM) - flags |= ARG1_IMM; - else - src1w = src1; - if (src2 & SLJIT_IMM) - flags |= ARG2_IMM; - else - src2w = src2; - - if (dst == SLJIT_UNUSED) - flags |= UNUSED_RETURN; - - if (GET_OPCODE(op) == SLJIT_MUL && (op & SLJIT_SET_O)) - flags |= SET_MULOV; - - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); - - if (dst & SLJIT_MEM) { - if (!(flags & SLOW_DEST)) { - getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw); - return compiler->error; - } - return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0); - } - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - check_sljit_get_register_index(reg); - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 2 || size == 4); - - if (size == 2) - return push_inst16(compiler, *(sljit_uh*)instruction); - return push_inst32(compiler, *(sljit_ins*)instruction); -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ - return 1; -} - -#define FPU_LOAD (1 << 20) - -static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - sljit_sw tmp; - sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD)); - - SLJIT_ASSERT(arg & SLJIT_MEM); - - /* Fast loads and stores. */ - if (SLJIT_UNLIKELY(arg & 0xf0)) { - FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & 0xf) | RM4((arg & 0xf0) >> 4) | ((argw & 0x3) << 6))); - arg = SLJIT_MEM | TMP_REG2; - argw = 0; - } - - if ((arg & 0xf) && (argw & 0x3) == 0) { - if (!(argw & ~0x3fc)) - return push_inst32(compiler, inst | 0x800000 | RN4(arg & 0xf) | DD4(reg) | (argw >> 2)); - if (!(-argw & ~0x3fc)) - return push_inst32(compiler, inst | RN4(arg & 0xf) | DD4(reg) | (-argw >> 2)); - } - - SLJIT_ASSERT(!(arg & 0xf0)); - if (compiler->cache_arg == arg) { - tmp = argw - compiler->cache_argw; - if (!(tmp & ~0x3fc)) - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2)); - if (!(-tmp & ~0x3fc)) - return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); - } - } - - if (arg & 0xf) { - if (emit_set_delta(compiler, TMP_REG1, arg & 0xf, argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); - } - imm = get_imm(argw & ~0x3fc); - if (imm != INVALID_IMM) { - FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & 0xf) | imm)); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); - } - imm = get_imm(-argw & ~0x3fc); - if (imm != INVALID_IMM) { - argw = -argw; - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & 0xf) | imm)); - return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); - } - } - - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - if (SLJIT_UNLIKELY(!(arg & 0xf))) - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - else { - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - if (arg & 0xf) - FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & 0xf)))); - } - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_r; - - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; - - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw); - src = TMP_FREG2; - } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src))); - return push_inst32(compiler, VMRS); - } - - dst_r = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : dst; - if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw); - src = dst_r; - } - - switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_r) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); - break; - } - - if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_r; - - CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; - - dst_r = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : dst; - if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); - src1 = TMP_FREG1; - } - if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); - src2 = TMP_FREG2; - } - - switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); - break; - case SLJIT_SUBD: - FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); - break; - case SLJIT_MULD: - FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); - break; - case SLJIT_DIVD: - FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); - break; - } - - if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); - return SLJIT_SUCCESS; -} - -#undef FPU_LOAD - -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - if (dst <= TMP_REG3) - return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); - - /* Memory. */ - if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw)) - return compiler->error; - /* TMP_REG3 is used for caching. */ - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3))); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= TMP_REG3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); - else if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) - FAIL_IF(compiler->error); - else { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0)); - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2))); - } - } - else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); - return push_inst16(compiler, BLX | RN3(TMP_REG3)); -} - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -static sljit_uw get_cc(sljit_si type) -{ - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: - case SLJIT_C_FLOAT_EQUAL: - return 0x0; - - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_FLOAT_NOT_EQUAL: - return 0x1; - - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - return 0x3; - - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - return 0x2; - - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - return 0x8; - - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - return 0x9; - - case SLJIT_C_SIG_LESS: - return 0xb; - - case SLJIT_C_SIG_GREATER_EQUAL: - return 0xa; - - case SLJIT_C_SIG_GREATER: - return 0xc; - - case SLJIT_C_SIG_LESS_EQUAL: - return 0xd; - - case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_UNORDERED: - return 0x6; - - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_ORDERED: - return 0x7; - - default: /* SLJIT_JUMP */ - return 0xe; - } -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - struct sljit_label *label; - - CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - return label; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - struct sljit_jump *jump; - sljit_si cc; - - CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - /* In ARM, we don't need to touch the arguments. */ - PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); - if (type < SLJIT_JUMP) { - jump->flags |= IS_COND; - cc = get_cc(type); - jump->flags |= cc << 8; - PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); - } - - jump->addr = compiler->size; - if (type <= SLJIT_JUMP) - PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); - else { - jump->flags |= IS_BL; - PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); - } - - return jump; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - struct sljit_jump *jump; - - CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - /* In ARM, we don't need to touch the arguments. */ - if (src & SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; - - FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); - jump->addr = compiler->size; - FAIL_IF(push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1))); - } - else { - if (src <= TMP_REG3) - return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); - - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw)); - if (type >= SLJIT_FAST_CALL) - return push_inst16(compiler, BLX | RN3(TMP_REG1)); - } - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - sljit_si dst_r, flags = GET_ALL_FLAGS(op); - sljit_ins ins; - sljit_uw cc; - - CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - op = GET_OPCODE(op); - cc = get_cc(type); - dst_r = (dst <= TMP_REG3) ? dst : TMP_REG2; - - if (op < SLJIT_ADD) { - FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); - if (reg_map[dst_r] > 7) { - FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); - FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); - } else { - FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); - FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); - } - return dst_r == TMP_REG2 ? emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; - } - - ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI)); - if ((op == SLJIT_OR || op == SLJIT_XOR) && dst <= TMP_REG3 && dst == src) { - /* Does not change the other bits. */ - FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1)); - if (flags & SLJIT_SET_E) { - /* The condition must always be set, even if the ORRI/EORI is not executed above. */ - if (reg_map[dst] <= 7) - return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst)); - } - return SLJIT_SUCCESS; - } - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } else if (src & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - srcw = 0; - } - - FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0)); - if (dst_r == TMP_REG2) - FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0)); - - if (flags & SLJIT_SET_E) { - /* The condition must always be set, even if the ORR/EORI is not executed above. */ - if (reg_map[dst_r] <= 7) - return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); - } - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - struct sljit_const *const_; - sljit_si dst_r; - - CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); - ADJUST_LOCAL_OFFSET(dst, dstw); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - - dst_r = (dst <= TMP_REG3) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); - - if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); - return const_; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - inline_set_jump_addr(addr, new_addr, 1); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - sljit_uh* inst = (sljit_uh*)addr; - modify_imm32_const(inst, new_constant); - SLJIT_CACHE_FLUSH(inst, inst + 3); -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeARM_v5.c b/deps/libmagic/pcre/sljit/sljitNativeARM_v5.c deleted file mode 100644 index 23a45a4..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeARM_v5.c +++ /dev/null @@ -1,2515 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - return "ARMv7" SLJIT_CPUINFO; -#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - return "ARMv5" SLJIT_CPUINFO; -#else -#error "Internal error: Unknown ARM architecture" -#endif -} - -/* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) - -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) - -/* In ARM instruction words. - Cache lines are usually 32 byte aligned. */ -#define CONST_POOL_ALIGNMENT 8 -#define CONST_POOL_EMPTY 0xffffffff - -#define ALIGN_INSTRUCTION(ptr) \ - (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) -#define MAX_DIFFERENCE(max_diff) \ - (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) - -/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 -}; - -#define RM(rm) (reg_map[rm]) -#define RD(rd) (reg_map[rd] << 12) -#define RN(rn) (reg_map[rn] << 16) - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ - -/* The instruction includes the AL condition. - INST_NAME - CONDITIONAL remove this flag. */ -#define COND_MASK 0xf0000000 -#define CONDITIONAL 0xe0000000 -#define PUSH_POOL 0xff000000 - -/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */ -#define ADC_DP 0x5 -#define ADD_DP 0x4 -#define AND_DP 0x0 -#define B 0xea000000 -#define BIC_DP 0xe -#define BL 0xeb000000 -#define BLX 0xe12fff30 -#define BX 0xe12fff10 -#define CLZ 0xe16f0f10 -#define CMP_DP 0xa -#define BKPT 0xe1200070 -#define EOR_DP 0x1 -#define MOV_DP 0xd -#define MUL 0xe0000090 -#define MVN_DP 0xf -#define NOP 0xe1a00000 -#define ORR_DP 0xc -#define PUSH 0xe92d0000 -#define POP 0xe8bd0000 -#define RSB_DP 0x3 -#define RSC_DP 0x7 -#define SBC_DP 0x6 -#define SMULL 0xe0c00090 -#define SUB_DP 0x2 -#define UMULL 0xe0800090 -#define VABS_F32 0xeeb00ac0 -#define VADD_F32 0xee300a00 -#define VCMP_F32 0xeeb40a40 -#define VDIV_F32 0xee800a00 -#define VMOV_F32 0xeeb00a40 -#define VMRS 0xeef1fa10 -#define VMUL_F32 0xee200a00 -#define VNEG_F32 0xeeb10a40 -#define VSTR_F32 0xed000a00 -#define VSUB_F32 0xee300a40 - -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) -/* Arm v7 specific instructions. */ -#define MOVW 0xe3000000 -#define MOVT 0xe3400000 -#define SXTB 0xe6af0070 -#define SXTH 0xe6bf0070 -#define UXTB 0xe6ef0070 -#define UXTH 0xe6ff0070 -#endif - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - -static sljit_si push_cpool(struct sljit_compiler *compiler) -{ - /* Pushing the constant pool into the instruction stream. */ - sljit_uw* inst; - sljit_uw* cpool_ptr; - sljit_uw* cpool_end; - sljit_si i; - - /* The label could point the address after the constant pool. */ - if (compiler->last_label && compiler->last_label->size == compiler->size) - compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; - - SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); - inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!inst); - compiler->size++; - *inst = 0xff000000 | compiler->cpool_fill; - - for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { - inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!inst); - compiler->size++; - *inst = 0; - } - - cpool_ptr = compiler->cpool; - cpool_end = cpool_ptr + compiler->cpool_fill; - while (cpool_ptr < cpool_end) { - inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!inst); - compiler->size++; - *inst = *cpool_ptr++; - } - compiler->cpool_diff = CONST_POOL_EMPTY; - compiler->cpool_fill = 0; - return SLJIT_SUCCESS; -} - -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) -{ - sljit_uw* ptr; - - if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) - FAIL_IF(push_cpool(compiler)); - - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!ptr); - compiler->size++; - *ptr = inst; - return SLJIT_SUCCESS; -} - -static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) -{ - sljit_uw* ptr; - sljit_uw cpool_index = CPOOL_SIZE; - sljit_uw* cpool_ptr; - sljit_uw* cpool_end; - sljit_ub* cpool_unique_ptr; - - if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) - FAIL_IF(push_cpool(compiler)); - else if (compiler->cpool_fill > 0) { - cpool_ptr = compiler->cpool; - cpool_end = cpool_ptr + compiler->cpool_fill; - cpool_unique_ptr = compiler->cpool_unique; - do { - if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { - cpool_index = cpool_ptr - compiler->cpool; - break; - } - cpool_ptr++; - cpool_unique_ptr++; - } while (cpool_ptr < cpool_end); - } - - if (cpool_index == CPOOL_SIZE) { - /* Must allocate a new entry in the literal pool. */ - if (compiler->cpool_fill < CPOOL_SIZE) { - cpool_index = compiler->cpool_fill; - compiler->cpool_fill++; - } - else { - FAIL_IF(push_cpool(compiler)); - cpool_index = 0; - compiler->cpool_fill = 1; - } - } - - SLJIT_ASSERT((inst & 0xfff) == 0); - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!ptr); - compiler->size++; - *ptr = inst | cpool_index; - - compiler->cpool[cpool_index] = literal; - compiler->cpool_unique[cpool_index] = 0; - if (compiler->cpool_diff == CONST_POOL_EMPTY) - compiler->cpool_diff = compiler->size; - return SLJIT_SUCCESS; -} - -static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) -{ - sljit_uw* ptr; - if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) - FAIL_IF(push_cpool(compiler)); - - SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!ptr); - compiler->size++; - *ptr = inst | compiler->cpool_fill; - - compiler->cpool[compiler->cpool_fill] = literal; - compiler->cpool_unique[compiler->cpool_fill] = 1; - compiler->cpool_fill++; - if (compiler->cpool_diff == CONST_POOL_EMPTY) - compiler->cpool_diff = compiler->size; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler) -{ - /* Place for at least two instruction (doesn't matter whether the first has a literal). */ - if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) - return push_cpool(compiler); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_blx(struct sljit_compiler *compiler) -{ - /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ - SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); - return push_inst(compiler, BLX | RM(TMP_REG1)); -} - -static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) -{ - sljit_uw diff; - sljit_uw ind; - sljit_uw counter = 0; - sljit_uw* clear_const_pool = const_pool; - sljit_uw* clear_const_pool_end = const_pool + cpool_size; - - SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); - /* Set unused flag for all literals in the constant pool. - I.e.: unused literals can belong to branches, which can be encoded as B or BL. - We can "compress" the constant pool by discarding these literals. */ - while (clear_const_pool < clear_const_pool_end) - *clear_const_pool++ = (sljit_uw)(-1); - - while (last_pc_patch < code_ptr) { - /* Data transfer instruction with Rn == r15. */ - if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { - diff = const_pool - last_pc_patch; - ind = (*last_pc_patch) & 0xfff; - - /* Must be a load instruction with immediate offset. */ - SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); - if ((sljit_si)const_pool[ind] < 0) { - const_pool[ind] = counter; - ind = counter; - counter++; - } - else - ind = const_pool[ind]; - - SLJIT_ASSERT(diff >= 1); - if (diff >= 2 || ind > 0) { - diff = (diff + ind - 2) << 2; - SLJIT_ASSERT(diff <= 0xfff); - *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; - } - else - *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; - } - last_pc_patch++; - } - return counter; -} - -/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ -struct future_patch { - struct future_patch* next; - sljit_si index; - sljit_si value; -}; - -static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) -{ - sljit_si value; - struct future_patch *curr_patch, *prev_patch; - - /* Using the values generated by patch_pc_relative_loads. */ - if (!*first_patch) - value = (sljit_si)cpool_start_address[cpool_current_index]; - else { - curr_patch = *first_patch; - prev_patch = 0; - while (1) { - if (!curr_patch) { - value = (sljit_si)cpool_start_address[cpool_current_index]; - break; - } - if ((sljit_uw)curr_patch->index == cpool_current_index) { - value = curr_patch->value; - if (prev_patch) - prev_patch->next = curr_patch->next; - else - *first_patch = curr_patch->next; - SLJIT_FREE(curr_patch); - break; - } - prev_patch = curr_patch; - curr_patch = curr_patch->next; - } - } - - if (value >= 0) { - if ((sljit_uw)value > cpool_current_index) { - curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch)); - if (!curr_patch) { - while (*first_patch) { - curr_patch = *first_patch; - *first_patch = (*first_patch)->next; - SLJIT_FREE(curr_patch); - } - return SLJIT_ERR_ALLOC_FAILED; - } - curr_patch->next = *first_patch; - curr_patch->index = value; - curr_patch->value = cpool_start_address[value]; - *first_patch = curr_patch; - } - cpool_start_address[value] = *buf_ptr; - } - return SLJIT_SUCCESS; -} - -#else - -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) -{ - sljit_uw* ptr; - - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); - FAIL_IF(!ptr); - compiler->size++; - *ptr = inst; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) -{ - FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); - return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); -} - -#endif - -static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code) -{ - sljit_sw diff; - - if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (jump->flags & IS_BL) - code_ptr--; - - if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)); - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); - } - - /* Branch to Thumb code has not been optimized yet. */ - if (diff & 0x3) - return 0; - - diff >>= 2; - if (jump->flags & IS_BL) { - if (diff <= 0x01ffffff && diff >= -0x02000000) { - *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); - jump->flags |= PATCH_B; - return 1; - } - } - else { - if (diff <= 0x01ffffff && diff >= -0x02000000) { - *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); - jump->flags |= PATCH_B; - } - } -#else - if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr); - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); - } - - /* Branch to Thumb code has not been optimized yet. */ - if (diff & 0x3) - return 0; - - diff >>= 2; - if (diff <= 0x01ffffff && diff >= -0x02000000) { - code_ptr -= 2; - *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); - jump->flags |= PATCH_B; - return 1; - } -#endif - return 0; -} - -static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush) -{ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw *ptr = (sljit_uw*)addr; - sljit_uw *inst = (sljit_uw*)ptr[0]; - sljit_uw mov_pc = ptr[1]; - sljit_si bl = (mov_pc & 0x0000f000) != RD(TMP_PC); - sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2); - - if (diff <= 0x7fffff && diff >= -0x800000) { - /* Turn to branch. */ - if (!bl) { - inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 1); - } - } else { - inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); - inst[1] = NOP; - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 2); - } - } - } else { - /* Get the position of the constant. */ - if (mov_pc & (1 << 23)) - ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; - else - ptr = inst + 1; - - if (*inst != mov_pc) { - inst[0] = mov_pc; - if (!bl) { - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 1); - } - } else { - inst[1] = BLX | RM(TMP_REG1); - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 2); - } - } - } - *ptr = new_addr; - } -#else - sljit_uw *inst = (sljit_uw*)addr; - SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); - inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); - inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 2); - } -#endif -} - -static sljit_uw get_imm(sljit_uw imm); - -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_si flush) -{ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw *ptr = (sljit_uw*)addr; - sljit_uw *inst = (sljit_uw*)ptr[0]; - sljit_uw ldr_literal = ptr[1]; - sljit_uw src2; - - src2 = get_imm(new_constant); - if (src2) { - *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 1); - } - return; - } - - src2 = get_imm(~new_constant); - if (src2) { - *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 1); - } - return; - } - - if (ldr_literal & (1 << 23)) - ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; - else - ptr = inst + 1; - - if (*inst != ldr_literal) { - *inst = ldr_literal; - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 1); - } - } - *ptr = new_constant; -#else - sljit_uw *inst = (sljit_uw*)addr; - SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); - inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); - inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); - if (flush) { - SLJIT_CACHE_FLUSH(inst, inst + 2); - } -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_uw *code; - sljit_uw *code_ptr; - sljit_uw *buf_ptr; - sljit_uw *buf_end; - sljit_uw size; - sljit_uw word_count; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw cpool_size; - sljit_uw cpool_skip_alignment; - sljit_uw cpool_current_index; - sljit_uw *cpool_start_address; - sljit_uw *last_pc_patch; - struct future_patch *first_patch; -#endif - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); - reverse_buf(compiler); - - /* Second code generation pass. */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - size = compiler->size + (compiler->patches << 1); - if (compiler->cpool_fill > 0) - size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; -#else - size = compiler->size; -#endif - code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw)); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - cpool_size = 0; - cpool_skip_alignment = 0; - cpool_current_index = 0; - cpool_start_address = NULL; - first_patch = NULL; - last_pc_patch = code; -#endif - - code_ptr = code; - word_count = 0; - - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - - if (label && label->size == 0) { - label->addr = (sljit_uw)code; - label->size = 0; - label = label->next; - } - - do { - buf_ptr = (sljit_uw*)buf->memory; - buf_end = buf_ptr + (buf->used_size >> 2); - do { - word_count++; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (cpool_size > 0) { - if (cpool_skip_alignment > 0) { - buf_ptr++; - cpool_skip_alignment--; - } - else { - if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { - SLJIT_FREE_EXEC(code); - compiler->error = SLJIT_ERR_ALLOC_FAILED; - return NULL; - } - buf_ptr++; - if (++cpool_current_index >= cpool_size) { - SLJIT_ASSERT(!first_patch); - cpool_size = 0; - if (label && label->size == word_count) { - /* Points after the current instruction. */ - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - } - } - } - else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { -#endif - *code_ptr = *buf_ptr++; - /* These structures are ordered by their address. */ - SLJIT_ASSERT(!label || label->size >= word_count); - SLJIT_ASSERT(!jump || jump->addr >= word_count); - SLJIT_ASSERT(!const_ || const_->addr >= word_count); - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (detect_jump_type(jump, code_ptr, code)) - code_ptr--; - jump->addr = (sljit_uw)code_ptr; -#else - jump->addr = (sljit_uw)(code_ptr - 2); - if (detect_jump_type(jump, code_ptr, code)) - code_ptr -= 2; -#endif - jump = jump->next; - } - if (label && label->size == word_count) { - /* code_ptr can be affected above. */ - label->addr = (sljit_uw)(code_ptr + 1); - label->size = (code_ptr + 1) - code; - label = label->next; - } - if (const_ && const_->addr == word_count) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - const_->addr = (sljit_uw)code_ptr; -#else - const_->addr = (sljit_uw)(code_ptr - 1); -#endif - const_ = const_->next; - } - code_ptr++; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - } - else { - /* Fortunately, no need to shift. */ - cpool_size = *buf_ptr++ & ~PUSH_POOL; - SLJIT_ASSERT(cpool_size > 0); - cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); - cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); - if (cpool_current_index > 0) { - /* Unconditional branch. */ - *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); - code_ptr = cpool_start_address + cpool_current_index; - } - cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; - cpool_current_index = 0; - last_pc_patch = code_ptr; - } -#endif - } while (buf_ptr < buf_end); - buf = buf->next; - } while (buf); - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - SLJIT_ASSERT(cpool_size == 0); - if (compiler->cpool_fill > 0) { - cpool_start_address = ALIGN_INSTRUCTION(code_ptr); - cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); - if (cpool_current_index > 0) - code_ptr = cpool_start_address + cpool_current_index; - - buf_ptr = compiler->cpool; - buf_end = buf_ptr + compiler->cpool_fill; - cpool_current_index = 0; - while (buf_ptr < buf_end) { - if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { - SLJIT_FREE_EXEC(code); - compiler->error = SLJIT_ERR_ALLOC_FAILED; - return NULL; - } - buf_ptr++; - cpool_current_index++; - } - SLJIT_ASSERT(!first_patch); - } -#endif - - jump = compiler->jumps; - while (jump) { - buf_ptr = (sljit_uw*)jump->addr; - - if (jump->flags & PATCH_B) { - if (!(jump->flags & JUMP_ADDR)) { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; - } - else { - SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; - } - } - else if (jump->flags & SLJIT_REWRITABLE_JUMP) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - jump->addr = (sljit_uw)code_ptr; - code_ptr[0] = (sljit_uw)buf_ptr; - code_ptr[1] = *buf_ptr; - inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); - code_ptr += 2; -#else - inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); -#endif - } - else { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (jump->flags & IS_BL) - buf_ptr--; - if (*buf_ptr & (1 << 23)) - buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; - else - buf_ptr += 1; - *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; -#else - inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); -#endif - } - jump = jump->next; - } - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - const_ = compiler->consts; - while (const_) { - buf_ptr = (sljit_uw*)const_->addr; - const_->addr = (sljit_uw)code_ptr; - - code_ptr[0] = (sljit_uw)buf_ptr; - code_ptr[1] = *buf_ptr; - if (*buf_ptr & (1 << 23)) - buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; - else - buf_ptr += 1; - /* Set the value again (can be a simple constant). */ - inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0); - code_ptr += 2; - - const_ = const_->next; - } -#endif - - SLJIT_ASSERT(code_ptr - code <= (sljit_si)size); - - SLJIT_CACHE_FLUSH(code, code_ptr); - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = size * sizeof(sljit_uw); - return code; -} - -/* --------------------------------------------------------------------- */ -/* Entry, exit */ -/* --------------------------------------------------------------------- */ - -/* emit_op inp_flags. - WRITE_BACK must be the first, since it is a flag. */ -#define WRITE_BACK 0x01 -#define ALLOW_IMM 0x02 -#define ALLOW_INV_IMM 0x04 -#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) -#define ARG_TEST 0x08 - -/* Creates an index in data_transfer_insts array. */ -#define WORD_DATA 0x00 -#define BYTE_DATA 0x10 -#define HALF_DATA 0x20 -#define SIGNED_DATA 0x40 -#define LOAD_DATA 0x80 - -#define EMIT_INSTRUCTION(inst) \ - FAIL_IF(push_inst(compiler, (inst))) - -/* Condition: AL. */ -#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ - (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) - -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si size; - sljit_uw push; - - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - /* Push saved registers, temporary registers - stmdb sp!, {..., lr} */ - push = PUSH | (1 << 14); - if (scratches >= 5) - push |= 1 << 11; - if (scratches >= 4) - push |= 1 << 10; - if (saveds >= 5) - push |= 1 << 8; - if (saveds >= 4) - push |= 1 << 7; - if (saveds >= 3) - push |= 1 << 6; - if (saveds >= 2) - push |= 1 << 5; - if (saveds >= 1) - push |= 1 << 4; - EMIT_INSTRUCTION(push); - - /* Stack must be aligned to 8 bytes: */ - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; - if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size)); - - if (args >= 1) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1))); - if (args >= 2) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))); - if (args >= 3) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3))); - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si size; - - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - sljit_uw pop; - - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); - - pop = POP | (1 << 15); - /* Push saved registers, temporary registers - ldmia sp!, {..., pc} */ - if (compiler->scratches >= 5) - pop |= 1 << 11; - if (compiler->scratches >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 5) - pop |= 1 << 8; - if (compiler->saveds >= 4) - pop |= 1 << 7; - if (compiler->saveds >= 3) - pop |= 1 << 6; - if (compiler->saveds >= 2) - pop |= 1 << 5; - if (compiler->saveds >= 1) - pop |= 1 << 4; - - return push_inst(compiler, pop); -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -/* s/l - store/load (1 bit) - u/s - signed/unsigned (1 bit) - w/b/h/N - word/byte/half/NOT allowed (2 bit) - It contans 16 items, but not all are different. */ - -static sljit_sw data_transfer_insts[16] = { -/* s u w */ 0xe5000000 /* str */, -/* s u b */ 0xe5400000 /* strb */, -/* s u h */ 0xe10000b0 /* strh */, -/* s u N */ 0x00000000 /* not allowed */, -/* s s w */ 0xe5000000 /* str */, -/* s s b */ 0xe5400000 /* strb */, -/* s s h */ 0xe10000b0 /* strh */, -/* s s N */ 0x00000000 /* not allowed */, - -/* l u w */ 0xe5100000 /* ldr */, -/* l u b */ 0xe5500000 /* ldrb */, -/* l u h */ 0xe11000b0 /* ldrh */, -/* l u N */ 0x00000000 /* not allowed */, -/* l s w */ 0xe5100000 /* ldr */, -/* l s b */ 0xe11000d0 /* ldrsb */, -/* l s h */ 0xe11000f0 /* ldrsh */, -/* l s N */ 0x00000000 /* not allowed */, -}; - -#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \ - (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2)) -/* Normal ldr/str instruction. - Type2: ldrsb, ldrh, ldrsh */ -#define IS_TYPE1_TRANSFER(type) \ - (data_transfer_insts[(type) >> 4] & 0x04000000) -#define TYPE2_TRANSFER_IMM(imm) \ - (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) - -/* flags: */ - /* Arguments are swapped. */ -#define ARGS_SWAPPED 0x01 - /* Inverted immediate. */ -#define INV_IMM 0x02 - /* Source and destination is register. */ -#define REG_DEST 0x04 -#define REG_SOURCE 0x08 - /* One instruction is enough. */ -#define FAST_DEST 0x10 - /* Multiple instructions are required. */ -#define SLOW_DEST 0x20 -/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ -#define SET_FLAGS (1 << 20) -/* dst: reg - src1: reg - src2: reg or imm (if allowed) - SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ -#define SRC2_IMM (1 << 25) - -#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))) - -#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2)) - -#define EMIT_SHIFT_INS_AND_RETURN(opcode) \ - SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ - if (compiler->shift_imm != 0x20) { \ - SLJIT_ASSERT(src1 == TMP_REG1); \ - SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ - if (compiler->shift_imm != 0) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \ - } \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); - -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_si src2) -{ - sljit_sw mul_inst; - - switch (GET_OPCODE(op)) { - case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (dst != src2) { - if (src2 & SRC2_IMM) { - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - } - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (op == SLJIT_MOV_UB) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); -#else - return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); -#endif - } - else if (dst != src2) { - SLJIT_ASSERT(src2 & SRC2_IMM); - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); -#else - return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); -#endif - } - else if (dst != src2) { - SLJIT_ASSERT(src2 & SRC2_IMM); - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - } - return SLJIT_SUCCESS; - - case SLJIT_NOT: - if (src2 & SRC2_IMM) { - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - } - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); - - case SLJIT_CLZ: - SLJIT_ASSERT(!(flags & INV_IMM)); - SLJIT_ASSERT(!(src2 & SRC2_IMM)); - FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); - if (flags & SET_FLAGS) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); - return SLJIT_SUCCESS; - - case SLJIT_ADD: - SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP); - - case SLJIT_ADDC: - SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP); - - case SLJIT_SUB: - SLJIT_ASSERT(!(flags & INV_IMM)); - if (!(flags & ARGS_SWAPPED)) - EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP); - EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP); - - case SLJIT_SUBC: - SLJIT_ASSERT(!(flags & INV_IMM)); - if (!(flags & ARGS_SWAPPED)) - EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP); - EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP); - - case SLJIT_MUL: - SLJIT_ASSERT(!(flags & INV_IMM)); - SLJIT_ASSERT(!(src2 & SRC2_IMM)); - if (SLJIT_UNLIKELY(op & SLJIT_SET_O)) - mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12); - else - mul_inst = MUL | (reg_map[dst] << 16); - - if (dst != src2) - FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2])); - else if (dst != src1) - FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1])); - else { - /* Rm and Rd must not be the same register. */ - SLJIT_ASSERT(dst != TMP_REG1); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2]))); - FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1])); - } - - if (!(op & SLJIT_SET_O)) - return SLJIT_SUCCESS; - - /* We need to use TMP_REG3. */ - compiler->cache_arg = 0; - compiler->cache_argw = 0; - /* cmp TMP_REG2, dst asr #31. */ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0)); - - case SLJIT_AND: - if (!(flags & INV_IMM)) - EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP); - EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP); - - case SLJIT_OR: - SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP); - - case SLJIT_XOR: - SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP); - - case SLJIT_SHL: - EMIT_SHIFT_INS_AND_RETURN(0); - - case SLJIT_LSHR: - EMIT_SHIFT_INS_AND_RETURN(1); - - case SLJIT_ASHR: - EMIT_SHIFT_INS_AND_RETURN(2); - } - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; -} - -#undef EMIT_DATA_PROCESS_INS_AND_RETURN -#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN -#undef EMIT_SHIFT_INS_AND_RETURN - -/* Tests whether the immediate can be stored in the 12 bit imm field. - Returns with 0 if not possible. */ -static sljit_uw get_imm(sljit_uw imm) -{ - sljit_si rol; - - if (imm <= 0xff) - return SRC2_IMM | imm; - - if (!(imm & 0xff000000)) { - imm <<= 8; - rol = 8; - } - else { - imm = (imm << 24) | (imm >> 8); - rol = 0; - } - - if (!(imm & 0xff000000)) { - imm <<= 8; - rol += 4; - } - - if (!(imm & 0xf0000000)) { - imm <<= 4; - rol += 2; - } - - if (!(imm & 0xc0000000)) { - imm <<= 2; - rol += 1; - } - - if (!(imm & 0x00ffffff)) - return SRC2_IMM | (imm >> 24) | (rol << 8); - else - return 0; -} - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) -static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm, sljit_si positive) -{ - sljit_uw mask; - sljit_uw imm1; - sljit_uw imm2; - sljit_si rol; - - /* Step1: Search a zero byte (8 continous zero bit). */ - mask = 0xff000000; - rol = 8; - while(1) { - if (!(imm & mask)) { - /* Rol imm by rol. */ - imm = (imm << rol) | (imm >> (32 - rol)); - /* Calculate arm rol. */ - rol = 4 + (rol >> 1); - break; - } - rol += 2; - mask >>= 2; - if (mask & 0x3) { - /* rol by 8. */ - imm = (imm << 8) | (imm >> 24); - mask = 0xff00; - rol = 24; - while (1) { - if (!(imm & mask)) { - /* Rol imm by rol. */ - imm = (imm << rol) | (imm >> (32 - rol)); - /* Calculate arm rol. */ - rol = (rol >> 1) - 8; - break; - } - rol += 2; - mask >>= 2; - if (mask & 0x3) - return 0; - } - break; - } - } - - /* The low 8 bit must be zero. */ - SLJIT_ASSERT(!(imm & 0xff)); - - if (!(imm & 0xff000000)) { - imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); - imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); - } - else if (imm & 0xc0000000) { - imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); - imm <<= 8; - rol += 4; - - if (!(imm & 0xff000000)) { - imm <<= 8; - rol += 4; - } - - if (!(imm & 0xf0000000)) { - imm <<= 4; - rol += 2; - } - - if (!(imm & 0xc0000000)) { - imm <<= 2; - rol += 1; - } - - if (!(imm & 0x00ffffff)) - imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); - else - return 0; - } - else { - if (!(imm & 0xf0000000)) { - imm <<= 4; - rol += 2; - } - - if (!(imm & 0xc0000000)) { - imm <<= 2; - rol += 1; - } - - imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); - imm <<= 8; - rol += 4; - - if (!(imm & 0xf0000000)) { - imm <<= 4; - rol += 2; - } - - if (!(imm & 0xc0000000)) { - imm <<= 2; - rol += 1; - } - - if (!(imm & 0x00ffffff)) - imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); - else - return 0; - } - - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)); - return 1; -} -#endif - -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm) -{ - sljit_uw tmp; - -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - if (!(imm & ~0xffff)) - return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); -#endif - - /* Create imm by 1 inst. */ - tmp = get_imm(imm); - if (tmp) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); - return SLJIT_SUCCESS; - } - - tmp = get_imm(~imm); - if (tmp) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); - return SLJIT_SUCCESS; - } - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - /* Create imm by 2 inst. */ - FAIL_IF(generate_int(compiler, reg, imm, 1)); - FAIL_IF(generate_int(compiler, reg, ~imm, 0)); - - /* Load integer. */ - return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm); -#else - return emit_imm(compiler, reg, imm); -#endif -} - -/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ -static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) -{ - if (value >= 0) { - value = get_imm(value); - if (value) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value)); - } - else { - value = get_imm(-value); - if (value) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value)); - } - return SLJIT_ERR_UNSUPPORTED; -} - -/* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - sljit_uw imm; - - if (arg & SLJIT_IMM) { - imm = get_imm(argw); - if (imm) { - if (inp_flags & ARG_TEST) - return 1; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)); - return -1; - } - imm = get_imm(~argw); - if (imm) { - if (inp_flags & ARG_TEST) - return 1; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)); - return -1; - } - return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; - } - - SLJIT_ASSERT(arg & SLJIT_MEM); - - /* Fast loads/stores. */ - if (arg & 0xf) { - if (!(arg & 0xf0)) { - if (IS_TYPE1_TRANSFER(inp_flags)) { - if (argw >= 0 && argw <= 0xfff) { - if (inp_flags & ARG_TEST) - return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, argw)); - return -1; - } - if (argw < 0 && argw >= -0xfff) { - if (inp_flags & ARG_TEST) - return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & 0xf, -argw)); - return -1; - } - } - else { - if (argw >= 0 && argw <= 0xff) { - if (inp_flags & ARG_TEST) - return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, TYPE2_TRANSFER_IMM(argw))); - return -1; - } - if (argw < 0 && argw >= -0xff) { - if (inp_flags & ARG_TEST) - return 1; - argw = -argw; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & 0xf, TYPE2_TRANSFER_IMM(argw))); - return -1; - } - } - } - else if ((argw & 0x3) == 0 || IS_TYPE1_TRANSFER(inp_flags)) { - if (inp_flags & ARG_TEST) - return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, - RM((arg >> 4) & 0xf) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))); - return -1; - } - } - - return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; -} - -/* See getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - /* Immediate caching is not supported as it would be an operation on constant arguments. */ - if (arg & SLJIT_IMM) - return 0; - - /* Always a simple operation. */ - if (arg & 0xf0) - return 0; - - if (!(arg & 0xf)) { - /* Immediate access. */ - if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) - return 1; - return 0; - } - - if (argw <= 0xfffff && argw >= -0xfffff) - return 0; - - if (argw == next_argw && (next_arg & SLJIT_MEM)) - return 1; - - if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) - return 1; - - return 0; -} - -#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \ - if (max_delta & 0xf00) \ - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \ - else \ - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm)))); - -#define TEST_WRITE_BACK() \ - if (inp_flags & WRITE_BACK) { \ - tmp_r = arg & 0xf; \ - if (reg == tmp_r) { \ - /* This can only happen for stores */ \ - /* since ldr reg, [reg, ...]! has no meaning */ \ - SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \ - reg = TMP_REG3; \ - } \ - } - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - sljit_si tmp_r; - sljit_sw max_delta; - sljit_sw sign; - sljit_uw imm; - - if (arg & SLJIT_IMM) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - return load_immediate(compiler, reg, argw); - } - - SLJIT_ASSERT(arg & SLJIT_MEM); - - tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; - max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff; - - if ((arg & 0xf) == SLJIT_UNUSED) { - /* Write back is not used. */ - imm = (sljit_uw)(argw - compiler->cache_argw); - if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { - if (imm <= (sljit_uw)max_delta) { - sign = 1; - argw = argw - compiler->cache_argw; - } - else { - sign = 0; - argw = compiler->cache_argw - argw; - } - - GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw); - return SLJIT_SUCCESS; - } - - /* With write back, we can create some sophisticated loads, but - it is hard to decide whether we should convert downward (0s) or upward (1s). */ - imm = (sljit_uw)(argw - next_argw); - if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - tmp_r = TMP_REG3; - } - - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0); - return SLJIT_SUCCESS; - } - - if (arg & 0xf0) { - SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); - if (inp_flags & WRITE_BACK) - tmp_r = arg & 0xf; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & 0xf, RM((arg >> 4) & 0xf) | ((argw & 0x3) << 7))); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); - return SLJIT_SUCCESS; - } - - imm = (sljit_uw)(argw - compiler->cache_argw); - if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) { - SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); - GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm); - return SLJIT_SUCCESS; - } - if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) { - SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); - imm = (sljit_uw)-(sljit_sw)imm; - GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm); - return SLJIT_SUCCESS; - } - - imm = get_imm(argw & ~max_delta); - if (imm) { - TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & 0xf, imm)); - GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); - return SLJIT_SUCCESS; - } - - imm = get_imm(-argw & ~max_delta); - if (imm) { - argw = -argw; - TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & 0xf, imm)); - GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); - return SLJIT_SUCCESS; - } - - if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { - TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; - } - - if (argw == next_argw && (next_arg & SLJIT_MEM)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - - TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; - } - - imm = (sljit_uw)(argw - next_argw); - if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & 0xf])); - - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0); - return SLJIT_SUCCESS; - } - - if ((arg & 0xf) == tmp_r) { - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - tmp_r = TMP_REG3; - } - - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); -} - -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* arg1 goes to TMP_REG1 or src reg - arg2 goes to TMP_REG2, imm or src reg - TMP_REG3 can be used for caching - result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - - /* We prefers register and simple consts. */ - sljit_si dst_r; - sljit_si src1_r; - sljit_si src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; - sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - /* Destination check. */ - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - dst_r = TMP_REG2; - } - else if (dst <= TMP_REG3) { - dst_r = dst; - flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - sugg_src2_r = dst_r; - } - else { - SLJIT_ASSERT(dst & SLJIT_MEM); - if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { - flags |= FAST_DEST; - dst_r = TMP_REG2; - } - else { - flags |= SLOW_DEST; - dst_r = 0; - } - } - - /* Source 1. */ - if (src1 <= TMP_REG3) - src1_r = src1; - else if (src2 <= TMP_REG3) { - flags |= ARGS_SWAPPED; - src1_r = src2; - src2 = src1; - src2w = src1w; - } - else do { /* do { } while(0) is used because of breaks. */ - src1_r = 0; - if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) { - /* The second check will generate a hit. */ - src2_r = get_imm(src1w); - if (src2_r) { - flags |= ARGS_SWAPPED; - src1 = src2; - src1w = src2w; - break; - } - if (inp_flags & ALLOW_INV_IMM) { - src2_r = get_imm(~src1w); - if (src2_r) { - flags |= ARGS_SWAPPED | INV_IMM; - src1 = src2; - src1w = src2w; - break; - } - } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_r = get_imm(-src1w); - if (src2_r) { - /* Note: ARGS_SWAPPED is intentionally not applied! */ - src1 = src2; - src1w = src2w; - op = SLJIT_SUB | GET_ALL_FLAGS(op); - break; - } - } - } - - if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1_r = TMP_REG1; - } - } while (0); - - /* Source 2. */ - if (src2_r == 0) { - if (src2 <= TMP_REG3) { - src2_r = src2; - flags |= REG_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - dst_r = src2_r; - } - else do { /* do { } while(0) is used because of breaks. */ - if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) { - src2_r = get_imm(src2w); - if (src2_r) - break; - if (inp_flags & ALLOW_INV_IMM) { - src2_r = get_imm(~src2w); - if (src2_r) { - flags |= INV_IMM; - break; - } - } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_r = get_imm(-src2w); - if (src2_r) { - op = SLJIT_SUB | GET_ALL_FLAGS(op); - flags &= ~ARGS_SWAPPED; - break; - } - } - if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) { - src2_r = get_imm(-src2w); - if (src2_r) { - op = SLJIT_ADD | GET_ALL_FLAGS(op); - flags &= ~ARGS_SWAPPED; - break; - } - } - } - - /* src2_r is 0. */ - if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { - FAIL_IF(compiler->error); - src2_r = sugg_src2_r; - } - } while (0); - } - - /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero. - If they are zero, they must not be registers. */ - if (src1_r == 0 && src2_r == 0 && dst_r == 0) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); - flags |= ARGS_SWAPPED; - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } - src1_r = TMP_REG1; - src2_r = TMP_REG2; - } - else if (src1_r == 0 && src2_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - src1_r = TMP_REG1; - } - else if (src1_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - src1_r = TMP_REG1; - } - else if (src2_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); - src2_r = sugg_src2_r; - } - - if (dst_r == 0) - dst_r = TMP_REG2; - - if (src1_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); - src1_r = TMP_REG1; - } - - if (src2_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); - src2_r = sugg_src2_r; - } - - FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); - - if (flags & (FAST_DEST | SLOW_DEST)) { - if (flags & FAST_DEST) - FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); - else - FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); - } - return SLJIT_SUCCESS; -} - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__GNUC__) -extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); -extern int __aeabi_idivmod(int numerator, int denominator); -#else -#error "Software divmod functions are needed" -#endif - -#ifdef __cplusplus -} -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - EMIT_INSTRUCTION(BKPT); - break; - case SLJIT_NOP: - EMIT_INSTRUCTION(NOP); - break; - case SLJIT_UMUL: - case SLJIT_SMUL: -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) - | reg_map[SLJIT_SCRATCH_REG2]); -#else - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) - | reg_map[TMP_REG1]); -#endif - case SLJIT_UDIV: - case SLJIT_SDIV: - if (compiler->scratches >= 3) - EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */); -#if defined(__GNUC__) - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); -#else -#error "Software divmod functions are needed" -#endif - if (compiler->scratches >= 3) - return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */); - return SLJIT_SUCCESS; - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - switch (GET_OPCODE(op)) { - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); - - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); - - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); - - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); - - case SLJIT_MOVU: - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: - case SLJIT_MOVU_P: - return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); - - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); - - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); - - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); - - case SLJIT_NOT: - return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); - - case SLJIT_CLZ: - return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - switch (GET_OPCODE(op)) { - case SLJIT_ADD: - case SLJIT_ADDC: - case SLJIT_SUB: - case SLJIT_SUBC: - case SLJIT_OR: - case SLJIT_XOR: - return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_MUL: - return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_AND: - return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SHL: - case SLJIT_LSHR: - case SLJIT_ASHR: - if (src2 & SLJIT_IMM) { - compiler->shift_imm = src2w & 0x1f; - return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); - } - else { - compiler->shift_imm = 0x20; - return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); - } - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - check_sljit_get_register_index(reg); - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); - - return push_inst(compiler, *(sljit_uw*)instruction); -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - -/* 0 - no fpu - 1 - vfp */ -static sljit_si arm_fpu_type = -1; - -static void init_compiler(void) -{ - if (arm_fpu_type != -1) - return; - - /* TODO: Only the OS can help to determine the correct fpu type. */ - arm_fpu_type = 1; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ - if (arm_fpu_type == -1) - init_compiler(); - return arm_fpu_type; -} - -#else - -#define arm_fpu_type 1 - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ - /* Always available. */ - return 1; -} - -#endif - -#define FPU_LOAD (1 << 20) -#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ - ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs)) -#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ - ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16)) - -static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - sljit_sw tmp; - sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD)); - SLJIT_ASSERT(arg & SLJIT_MEM); - - if (SLJIT_UNLIKELY(arg & 0xf0)) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & 0xf, RM((arg >> 4) & 0xf) | ((argw & 0x3) << 7))); - arg = SLJIT_MEM | TMP_REG1; - argw = 0; - } - - /* Fast loads and stores. */ - if ((arg & 0xf)) { - if (!(argw & ~0x3fc)) - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & 0xf, reg, argw >> 2)); - if (!(-argw & ~0x3fc)) - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & 0xf, reg, (-argw) >> 2)); - } - - if (compiler->cache_arg == arg) { - tmp = argw - compiler->cache_argw; - if (!(tmp & ~0x3fc)) - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2)); - if (!(-tmp & ~0x3fc)) - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); - } - } - - if (arg & 0xf) { - if (emit_set_delta(compiler, TMP_REG1, arg & 0xf, argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); - } - imm = get_imm(argw & ~0x3fc); - if (imm) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & 0xf, imm)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); - } - imm = get_imm(-argw & ~0x3fc); - if (imm) { - argw = -argw; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & 0xf, imm)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); - } - } - - compiler->cache_arg = arg; - compiler->cache_argw = argw; - if (arg & 0xf) { - FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & 0xf, reg_map[TMP_REG1])); - } - else - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_fr; - - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; - - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw)); - dst = TMP_FREG1; - } - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw)); - src = TMP_FREG2; - } - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0)); - EMIT_INSTRUCTION(VMRS); - return SLJIT_SUCCESS; - } - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : dst; - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw)); - src = dst_fr; - } - - switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - case SLJIT_NEGD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - case SLJIT_ABSD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw)); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_fr; - - CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : dst; - - if (src2 > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); - src2 = TMP_FREG2; - } - - if (src1 > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; - } - - switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); - break; - - case SLJIT_SUBD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); - break; - - case SLJIT_MULD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); - break; - - case SLJIT_DIVD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); - break; - } - - if (dst_fr == TMP_FREG1) - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw)); - - return SLJIT_SUCCESS; -} - -#undef FPU_LOAD -#undef EMIT_FPU_DATA_TRANSFER -#undef EMIT_FPU_OPERATION - -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - if (dst <= TMP_REG3) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3))); - - /* Memory. */ - if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) - return compiler->error; - /* TMP_REG3 is used for caching. */ - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= TMP_REG3) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))); - else if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) - FAIL_IF(compiler->error); - else { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))); - } - } - else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); - return push_inst(compiler, BLX | RM(TMP_REG3)); -} - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -static sljit_uw get_cc(sljit_si type) -{ - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: - case SLJIT_C_FLOAT_EQUAL: - return 0x00000000; - - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_FLOAT_NOT_EQUAL: - return 0x10000000; - - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - return 0x30000000; - - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - return 0x20000000; - - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - return 0x80000000; - - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - return 0x90000000; - - case SLJIT_C_SIG_LESS: - return 0xb0000000; - - case SLJIT_C_SIG_GREATER_EQUAL: - return 0xa0000000; - - case SLJIT_C_SIG_GREATER: - return 0xc0000000; - - case SLJIT_C_SIG_LESS_EQUAL: - return 0xd0000000; - - case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_UNORDERED: - return 0x60000000; - - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_ORDERED: - return 0x70000000; - - default: /* SLJIT_JUMP */ - return 0xe0000000; - } -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - struct sljit_label *label; - - CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - return label; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - struct sljit_jump *jump; - - CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - /* In ARM, we don't need to touch the arguments. */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (type >= SLJIT_FAST_CALL) - PTR_FAIL_IF(prepare_blx(compiler)); - PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, - type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); - - if (jump->flags & SLJIT_REWRITABLE_JUMP) { - jump->addr = compiler->size; - compiler->patches++; - } - - if (type >= SLJIT_FAST_CALL) { - jump->flags |= IS_BL; - PTR_FAIL_IF(emit_blx(compiler)); - } - - if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - jump->addr = compiler->size; -#else - if (type >= SLJIT_FAST_CALL) - jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); - jump->addr = compiler->size; -#endif - return jump; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - struct sljit_jump *jump; - - CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - /* In ARM, we don't need to touch the arguments. */ - if (src & SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (type >= SLJIT_FAST_CALL) - FAIL_IF(prepare_blx(compiler)); - FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); - if (type >= SLJIT_FAST_CALL) - FAIL_IF(emit_blx(compiler)); -#else - FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); -#endif - jump->addr = compiler->size; - } - else { - if (src <= TMP_REG3) - return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); - - SLJIT_ASSERT(src & SLJIT_MEM); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); - return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - sljit_si dst_r, flags = GET_ALL_FLAGS(op); - sljit_uw cc, ins; - - CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - op = GET_OPCODE(op); - cc = get_cc(type); - dst_r = (dst <= TMP_REG3) ? dst : TMP_REG2; - - if (op < SLJIT_ADD) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)); - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc); - return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; - } - - ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); - if ((op == SLJIT_OR || op == SLJIT_XOR) && dst <= TMP_REG3 && dst == src) { - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc); - /* The condition must always be set, even if the ORR/EOR is not executed above. */ - return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; - } - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } else if (src & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - srcw = 0; - } - - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc); - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)); - if (dst_r == TMP_REG2) - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0)); - - return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - struct sljit_const *const_; - sljit_si reg; - - CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); - ADJUST_LOCAL_OFFSET(dst, dstw); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - - reg = (dst <= TMP_REG3) ? dst : TMP_REG2; - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value)); - compiler->patches++; -#else - PTR_FAIL_IF(emit_imm(compiler, reg, init_value)); -#endif - set_const(const_, compiler); - - if (reg == TMP_REG2 && dst != SLJIT_UNUSED) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - return const_; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - inline_set_jump_addr(addr, new_addr, 1); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - inline_set_const(addr, new_constant, 1); -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeMIPS_32.c b/deps/libmagic/pcre/sljit/sljitNativeMIPS_32.c deleted file mode 100644 index f8c2148..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeMIPS_32.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* mips 32-bit arch dependent functions. */ - -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) -{ - if (!(imm & ~0xffff)) - return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); - - if (imm < 0 && imm >= SIMM_MIN) - return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); - - FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); - return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; -} - -#define EMIT_LOGICAL(op_imm, op_norm) \ - if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_E) \ - FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ - FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ - } \ - else { \ - if (op & SLJIT_SET_E) \ - FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ - FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ - } - -#define EMIT_SHIFT(op_imm, op_norm) \ - if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_E) \ - FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ - FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ - } \ - else { \ - if (op & SLJIT_SET_E) \ - FAIL_IF(push_inst(compiler, op_norm | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ - FAIL_IF(push_inst(compiler, op_norm | S(src2) | T(src1) | D(dst), DR(dst))); \ - } - -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_sw src2) -{ - sljit_si overflow_ra = 0; - - switch (GET_OPCODE(op)) { - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - case SLJIT_MOV_P: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (dst != src2) - return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); -#else - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); -#endif - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); -#else - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); -#endif - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); - return SLJIT_SUCCESS; - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); -#else - if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { - FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); - return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); - } - /* Nearly all instructions are unmovable in the following sequence. */ - FAIL_IF(push_inst(compiler, ADDU_W | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - /* Check zero. */ - FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ADDIU_W | SA(0) | T(dst) | IMM(-1), DR(dst))); - /* Loop for searching the highest bit. */ - FAIL_IF(push_inst(compiler, ADDIU_W | S(dst) | T(dst) | IMM(1), DR(dst))); - FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); - if (op & SLJIT_SET_E) - return push_inst(compiler, ADDU_W | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); -#endif - return SLJIT_SUCCESS; - - case SLJIT_ADD: - if (flags & SRC2_IMM) { - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, SRL | T(src1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); - if (src2 < 0) - FAIL_IF(push_inst(compiler, XORI | SA(TMP_EREG1) | TA(TMP_EREG1) | IMM(1), TMP_EREG1)); - } - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - if (op & SLJIT_SET_C) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); - else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); - } - } - /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, SRL | T(dst) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - if (src2 < 0) - FAIL_IF(push_inst(compiler, XORI | SA(OVERFLOW_FLAG) | TA(OVERFLOW_FLAG) | IMM(1), OVERFLOW_FLAG)); - } - } - else { - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); - FAIL_IF(push_inst(compiler, SRL | TA(TMP_EREG1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); - if (src1 != dst) - overflow_ra = DR(src1); - else if (src2 != dst) - overflow_ra = DR(src2); - else { - /* Rare ocasion. */ - FAIL_IF(push_inst(compiler, ADDU | S(src1) | TA(0) | DA(TMP_EREG2), TMP_EREG2)); - overflow_ra = TMP_EREG2; - } - } - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); - /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(overflow_ra) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - } - } - - /* a + b >= a | b (otherwise, the carry should be set to 1). */ - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); - if (op & SLJIT_SET_O) - return push_inst(compiler, MOVN | SA(0) | TA(TMP_EREG1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); - return SLJIT_SUCCESS; - - case SLJIT_ADDC: - if (flags & SRC2_IMM) { - if (op & SLJIT_SET_C) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); - } - } - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); - } else { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); - } - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); - - FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - if (!(op & SLJIT_SET_C)) - return SLJIT_SUCCESS; - - /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ - FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(TMP_EREG2) | IMM(1), TMP_EREG2)); - FAIL_IF(push_inst(compiler, AND | SA(TMP_EREG2) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); - /* Set carry flag. */ - return push_inst(compiler, OR | SA(TMP_EREG2) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); - - case SLJIT_SUB: - if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_S | SLJIT_SET_U)) || src2 == SIMM_MIN)) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - if (flags & SRC2_IMM) { - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, SRL | T(src1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); - if (src2 < 0) - FAIL_IF(push_inst(compiler, XORI | SA(TMP_EREG1) | TA(TMP_EREG1) | IMM(1), TMP_EREG1)); - if (src1 != dst) - overflow_ra = DR(src1); - else { - /* Rare ocasion. */ - FAIL_IF(push_inst(compiler, ADDU | S(src1) | TA(0) | DA(TMP_EREG2), TMP_EREG2)); - overflow_ra = TMP_EREG2; - } - } - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); - /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); - } - else { - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); - FAIL_IF(push_inst(compiler, SRL | TA(TMP_EREG1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); - if (src1 != dst) - overflow_ra = DR(src1); - else { - /* Rare ocasion. */ - FAIL_IF(push_inst(compiler, ADDU | S(src1) | TA(0) | DA(TMP_EREG2), TMP_EREG2)); - overflow_ra = TMP_EREG2; - } - } - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (op & (SLJIT_SET_U | SLJIT_SET_C)) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); - if (op & SLJIT_SET_U) - FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG)); - if (op & SLJIT_SET_S) { - FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG)); - FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG)); - } - /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_C)) - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); - } - - if (op & SLJIT_SET_O) { - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(overflow_ra) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - return push_inst(compiler, MOVZ | SA(0) | TA(TMP_EREG1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); - } - return SLJIT_SUCCESS; - - case SLJIT_SUBC: - if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - if (flags & SRC2_IMM) { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(-src2), TMP_EREG1)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); - } - else { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); - } - - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, MOVZ | SA(ULESS_FLAG) | T(dst) | DA(TMP_EREG1), TMP_EREG1)); - - FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, ADDU | SA(TMP_EREG1) | TA(0) | DA(ULESS_FLAG), ULESS_FLAG)); - - return SLJIT_SUCCESS; - - case SLJIT_MUL: - SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (!(op & SLJIT_SET_O)) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); -#else - FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - return push_inst(compiler, MFLO | D(dst), DR(dst)); -#endif - } - FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); - FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); - - case SLJIT_AND: - EMIT_LOGICAL(ANDI, AND); - return SLJIT_SUCCESS; - - case SLJIT_OR: - EMIT_LOGICAL(ORI, OR); - return SLJIT_SUCCESS; - - case SLJIT_XOR: - EMIT_LOGICAL(XORI, XOR); - return SLJIT_SUCCESS; - - case SLJIT_SHL: - EMIT_SHIFT(SLL, SLLV); - return SLJIT_SUCCESS; - - case SLJIT_LSHR: - EMIT_SHIFT(SRL, SRLV); - return SLJIT_SUCCESS; - - case SLJIT_ASHR: - EMIT_SHIFT(SRA, SRAV); - return SLJIT_SUCCESS; - } - - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) -{ - FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); - return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeMIPS_common.c b/deps/libmagic/pcre/sljit/sljitNativeMIPS_common.c deleted file mode 100644 index 9559ec3..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeMIPS_common.c +++ /dev/null @@ -1,1881 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Latest MIPS architecture. */ -/* Automatically detect SLJIT_MIPS_32_64 */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return "MIPS" SLJIT_CPUINFO; -#else - return "MIPS III" SLJIT_CPUINFO; -#endif -} - -/* Length of an instruction word - Both for mips-32 and mips-64 */ -typedef sljit_ui sljit_ins; - -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) - -/* For position independent code, t9 must contain the function address. */ -#define PIC_ADDR_REG TMP_REG2 - -/* TMP_EREG1 is used mainly for literal encoding on 64 bit. */ -#define TMP_EREG1 15 -#define TMP_EREG2 24 -/* Floating point status register. */ -#define FCSR_REG 31 -/* Return address register. */ -#define RETURN_ADDR_REG 31 - -/* Flags are keept in volatile registers. */ -#define EQUAL_FLAG 7 -/* And carry flag as well. */ -#define ULESS_FLAG 10 -#define UGREATER_FLAG 11 -#define LESS_FLAG 12 -#define GREATER_FLAG 13 -#define OVERFLOW_FLAG 14 - -#define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) - -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9 -}; - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ - -#define S(s) (reg_map[s] << 21) -#define T(t) (reg_map[t] << 16) -#define D(d) (reg_map[d] << 11) -/* Absolute registers. */ -#define SA(s) ((s) << 21) -#define TA(t) ((t) << 16) -#define DA(d) ((d) << 11) -#define FT(t) ((t) << 16) -#define FS(s) ((s) << 11) -#define FD(d) ((d) << 6) -#define IMM(imm) ((imm) & 0xffff) -#define SH_IMM(imm) ((imm & 0x1f) << 6) - -#define DR(dr) (reg_map[dr]) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) (opcode) -/* S = (16 << 21) D = (17 << 21) */ -#define FMT_SD (16 << 21) - -#define ABS_fmt (HI(17) | FMT_SD | LO(5)) -#define ADD_fmt (HI(17) | FMT_SD | LO(0)) -#define ADDU (HI(0) | LO(33)) -#define ADDIU (HI(9)) -#define AND (HI(0) | LO(36)) -#define ANDI (HI(12)) -#define B (HI(4)) -#define BAL (HI(1) | (17 << 16)) -#define BC1F (HI(17) | (8 << 21)) -#define BC1T (HI(17) | (8 << 21) | (1 << 16)) -#define BEQ (HI(4)) -#define BGEZ (HI(1) | (1 << 16)) -#define BGTZ (HI(7)) -#define BLEZ (HI(6)) -#define BLTZ (HI(1) | (0 << 16)) -#define BNE (HI(5)) -#define BREAK (HI(0) | LO(13)) -#define CFC1 (HI(17) | (2 << 21)) -#define C_UN_fmt (HI(17) | FMT_SD | LO(49)) -#define C_UEQ_fmt (HI(17) | FMT_SD | LO(51)) -#define C_ULE_fmt (HI(17) | FMT_SD | LO(55)) -#define C_ULT_fmt (HI(17) | FMT_SD | LO(53)) -#define DIV (HI(0) | LO(26)) -#define DIVU (HI(0) | LO(27)) -#define DIV_fmt (HI(17) | FMT_SD | LO(3)) -#define J (HI(2)) -#define JAL (HI(3)) -#define JALR (HI(0) | LO(9)) -#define JR (HI(0) | LO(8)) -#define LD (HI(55)) -#define LUI (HI(15)) -#define LW (HI(35)) -#define MFHI (HI(0) | LO(16)) -#define MFLO (HI(0) | LO(18)) -#define MOV_fmt (HI(17) | FMT_SD | LO(6)) -#define MOVN (HI(0) | LO(11)) -#define MOVZ (HI(0) | LO(10)) -#define MUL_fmt (HI(17) | FMT_SD | LO(2)) -#define MULT (HI(0) | LO(24)) -#define MULTU (HI(0) | LO(25)) -#define NEG_fmt (HI(17) | FMT_SD | LO(7)) -#define NOP (HI(0) | LO(0)) -#define NOR (HI(0) | LO(39)) -#define OR (HI(0) | LO(37)) -#define ORI (HI(13)) -#define SD (HI(63)) -#define SLT (HI(0) | LO(42)) -#define SLTI (HI(10)) -#define SLTIU (HI(11)) -#define SLTU (HI(0) | LO(43)) -#define SLL (HI(0) | LO(0)) -#define SLLV (HI(0) | LO(4)) -#define SRL (HI(0) | LO(2)) -#define SRLV (HI(0) | LO(6)) -#define SRA (HI(0) | LO(3)) -#define SRAV (HI(0) | LO(7)) -#define SUB_fmt (HI(17) | FMT_SD | LO(1)) -#define SUBU (HI(0) | LO(35)) -#define SW (HI(43)) -#define XOR (HI(0) | LO(38)) -#define XORI (HI(14)) - -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) -#define CLZ (HI(28) | LO(32)) -#define MUL (HI(28) | LO(2)) -#define SEB (HI(31) | (16 << 6) | LO(32)) -#define SEH (HI(31) | (24 << 6) | LO(32)) -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define ADDU_W ADDU -#define ADDIU_W ADDIU -#define SLL_W SLL -#define SUBU_W SUBU -#else -#define ADDU_W DADDU -#define ADDIU_W DADDIU -#define SLL_W DSLL -#define SUBU_W DSUBU -#endif - -#define SIMM_MAX (0x7fff) -#define SIMM_MIN (-0x8000) -#define UIMM_MAX (0xffff) - -/* dest_reg is the absolute name of the register - Useful for reordering instructions in the delay slot. */ -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot) -{ - SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS - || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); - sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); - FAIL_IF(!ptr); - *ptr = ins; - compiler->size++; - compiler->delay_slot = delay_slot; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_ins invert_branch(sljit_si flags) -{ - return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16); -} - -static SLJIT_INLINE sljit_ins* optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) -{ - sljit_sw diff; - sljit_uw target_addr; - sljit_ins *inst; - sljit_ins saved_inst; - - if (jump->flags & SLJIT_REWRITABLE_JUMP) - return code_ptr; - - if (jump->flags & JUMP_ADDR) - target_addr = jump->u.target; - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); - } - inst = (sljit_ins*)jump->addr; - if (jump->flags & IS_COND) - inst--; - - /* B instructions. */ - if (jump->flags & IS_MOVABLE) { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; - if (diff <= SIMM_MAX && diff >= SIMM_MIN) { - jump->flags |= PATCH_B; - - if (!(jump->flags & IS_COND)) { - inst[0] = inst[-1]; - inst[-1] = (jump->flags & IS_JAL) ? BAL : B; - jump->addr -= sizeof(sljit_ins); - return inst; - } - saved_inst = inst[0]; - inst[0] = inst[-1]; - inst[-1] = saved_inst ^ invert_branch(jump->flags); - jump->addr -= 2 * sizeof(sljit_ins); - return inst; - } - } - - diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2; - if (diff <= SIMM_MAX && diff >= SIMM_MIN) { - jump->flags |= PATCH_B; - - if (!(jump->flags & IS_COND)) { - inst[0] = (jump->flags & IS_JAL) ? BAL : B; - inst[1] = NOP; - return inst + 1; - } - inst[0] = inst[0] ^ invert_branch(jump->flags); - inst[1] = NOP; - jump->addr -= sizeof(sljit_ins); - return inst + 1; - } - - if (jump->flags & IS_COND) { - if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { - jump->flags |= PATCH_J; - inst[0] = (inst[0] & 0xffff0000) | 3; - inst[1] = NOP; - inst[2] = J; - inst[3] = NOP; - jump->addr += sizeof(sljit_ins); - return inst + 3; - } - return code_ptr; - } - - /* J instuctions. */ - if (jump->flags & IS_MOVABLE) { - if ((target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { - jump->flags |= PATCH_J; - inst[0] = inst[-1]; - inst[-1] = (jump->flags & IS_JAL) ? JAL : J; - jump->addr -= sizeof(sljit_ins); - return inst; - } - } - - if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { - jump->flags |= PATCH_J; - inst[0] = (jump->flags & IS_JAL) ? JAL : J; - inst[1] = NOP; - return inst + 1; - } - - return code_ptr; -} - -#ifdef __GNUC__ -static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr) -{ - SLJIT_CACHE_FLUSH(code, code_ptr); -} -#endif - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_ins *code; - sljit_ins *code_ptr; - sljit_ins *buf_ptr; - sljit_ins *buf_end; - sljit_uw word_count; - sljit_uw addr; - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); - reverse_buf(compiler); - - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - - code_ptr = code; - word_count = 0; - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - do { - buf_ptr = (sljit_ins*)buf->memory; - buf_end = buf_ptr + (buf->used_size >> 2); - do { - *code_ptr = *buf_ptr++; - SLJIT_ASSERT(!label || label->size >= word_count); - SLJIT_ASSERT(!jump || jump->addr >= word_count); - SLJIT_ASSERT(!const_ || const_->addr >= word_count); - /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - jump->addr = (sljit_uw)(code_ptr - 3); -#else -#error "Implementation required" -#endif - code_ptr = optimize_jump(jump, code_ptr, code); - jump = jump->next; - } - if (const_ && const_->addr == word_count) { - /* Just recording the address. */ - const_->addr = (sljit_uw)code_ptr; - const_ = const_->next; - } - code_ptr ++; - word_count ++; - } while (buf_ptr < buf_end); - - buf = buf->next; - } while (buf); - - if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); - - jump = compiler->jumps; - while (jump) { - do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; - - if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); - break; - } - if (jump->flags & PATCH_J) { - SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)); - buf_ptr[0] |= (addr >> 2) & 0x03ffffff; - break; - } - - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); -#else -#error "Implementation required" -#endif - } while (0); - jump = jump->next; - } - - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = compiler->size * sizeof(sljit_ins); -#ifndef __GNUC__ - SLJIT_CACHE_FLUSH(code, code_ptr); -#else - /* GCC workaround for invalid code generation with -O2. */ - sljit_cache_flush(code, code_ptr); -#endif - return code; -} - -/* --------------------------------------------------------------------- */ -/* Entry, exit */ -/* --------------------------------------------------------------------- */ - -/* Creates an index in data_transfer_insts array. */ -#define LOAD_DATA 0x01 -#define WORD_DATA 0x00 -#define BYTE_DATA 0x02 -#define HALF_DATA 0x04 -#define INT_DATA 0x06 -#define SIGNED_DATA 0x08 -/* Separates integer and floating point registers */ -#define GPR_REG 0x0f -#define DOUBLE_DATA 0x10 - -#define MEM_MASK 0x1f - -#define WRITE_BACK 0x00020 -#define ARG_TEST 0x00040 -#define ALT_KEEP_CACHE 0x00080 -#define CUMULATIVE_OP 0x00100 -#define LOGICAL_OP 0x00200 -#define IMM_OP 0x00400 -#define SRC2_IMM 0x00800 - -#define UNUSED_DEST 0x01000 -#define REG_DEST 0x02000 -#define REG1_SOURCE 0x04000 -#define REG2_SOURCE 0x08000 -#define SLOW_SRC1 0x10000 -#define SLOW_SRC2 0x20000 -#define SLOW_DEST 0x40000 - -/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */ -#define CHECK_FLAGS(list) \ - (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define STACK_STORE SW -#define STACK_LOAD LW -#else -#define STACK_STORE SD -#define STACK_LOAD LD -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#include "sljitNativeMIPS_32.c" -#else -#include "sljitNativeMIPS_64.c" -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_ins base; - - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - local_size += (saveds + 1 + 4) * sizeof(sljit_sw); - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size; - - if (local_size <= SIMM_MAX) { - /* Frequent case. */ - FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG))); - base = S(SLJIT_LOCALS_REG); - } - else { - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG))); - base = S(TMP_REG2); - local_size = 0; - } - - FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - - if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1))); - if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2))); - if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3))); - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - local_size += (saveds + 1 + 4) * sizeof(sljit_sw); - compiler->local_size = (local_size + 15) & ~0xf; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - sljit_si local_size; - sljit_ins base; - - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - local_size = compiler->local_size; - if (local_size <= SIMM_MAX) - base = S(SLJIT_LOCALS_REG); - else { - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); - base = S(TMP_REG1); - local_size = 0; - } - - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG2))); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG1))); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG3))); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG2))); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG1))); - - FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); - if (compiler->local_size <= SIMM_MAX) - return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS); - else - return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS); -} - -#undef STACK_STORE -#undef STACK_LOAD - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define ARCH_32_64(a, b) a -#else -#define ARCH_32_64(a, b) b -#endif - -static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { -/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), -/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), -/* u b s */ HI(40) /* sb */, -/* u b l */ HI(36) /* lbu */, -/* u h s */ HI(41) /* sh */, -/* u h l */ HI(37) /* lhu */, -/* u i s */ HI(43) /* sw */, -/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */), - -/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), -/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), -/* s b s */ HI(40) /* sb */, -/* s b l */ HI(32) /* lb */, -/* s h s */ HI(41) /* sh */, -/* s h l */ HI(33) /* lh */, -/* s i s */ HI(43) /* sw */, -/* s i l */ HI(35) /* lw */, - -/* d s */ HI(61) /* sdc1 */, -/* d l */ HI(53) /* ldc1 */, -/* s s */ HI(57) /* swc1 */, -/* s l */ HI(49) /* lwc1 */, -}; - -#undef ARCH_32_64 - -/* reg_ar is an absoulute register! */ - -/* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) -{ - SLJIT_ASSERT(arg & SLJIT_MEM); - - if ((!(flags & WRITE_BACK) || !(arg & 0xf)) && !(arg & 0xf0) && argw <= SIMM_MAX && argw >= SIMM_MIN) { - /* Works for both absoulte and relative addresses. */ - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf) - | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS)); - return -1; - } - return 0; -} - -/* See getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); - - /* Simple operation except for updates. */ - if (arg & 0xf0) { - argw &= 0x3; - next_argw &= 0x3; - if (argw && argw == next_argw && (arg == next_arg || (arg & 0xf0) == (next_arg & 0xf0))) - return 1; - return 0; - } - - if (arg == next_arg) { - if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) - return 1; - return 0; - } - - return 0; -} - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - sljit_si tmp_ar, base, delay_slot; - - SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(next_arg & SLJIT_MEM)) { - next_arg = 0; - next_argw = 0; - } - - if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { - tmp_ar = reg_ar; - delay_slot = reg_ar; - } else { - tmp_ar = DR(TMP_REG1); - delay_slot = MOVABLE_INS; - } - base = arg & 0xf; - - if (SLJIT_UNLIKELY(arg & 0xf0)) { - argw &= 0x3; - if ((flags & WRITE_BACK) && reg_ar == DR(base)) { - SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); - FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - reg_ar = DR(TMP_REG1); - } - - /* Using the cache. */ - if (argw == compiler->cache_argw) { - if (!(flags & WRITE_BACK)) { - if (arg == compiler->cache_arg) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) { - if (arg == next_arg && argw == (next_argw & 0x3)) { - compiler->cache_arg = arg; - compiler->cache_argw = argw; - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - } - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); - } - } - else { - if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) { - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); - } - } - } - - if (SLJIT_UNLIKELY(argw)) { - compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); - compiler->cache_argw = argw; - FAIL_IF(push_inst(compiler, SLL_W | T((arg >> 4) & 0xf) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); - } - - if (!(flags & WRITE_BACK)) { - if (arg == next_arg && argw == (next_argw & 0x3)) { - compiler->cache_arg = arg; - compiler->cache_argw = argw; - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); - tmp_ar = DR(TMP_REG3); - } - else - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); - } - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(base), DR(base))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); - } - - if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { - /* Update only applies if a base register exists. */ - if (reg_ar == DR(base)) { - SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS)); - if (argw) - return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)); - return SLJIT_SUCCESS; - } - FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - reg_ar = DR(TMP_REG1); - } - - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (argw) - FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base))); - } - else { - if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { - if (argw != compiler->cache_argw) { - FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - compiler->cache_argw = argw; - } - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); - } - else { - compiler->cache_arg = SLJIT_MEM; - compiler->cache_argw = argw; - FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); - } - } - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); - } - - if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { - if (argw != compiler->cache_argw) { - FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - compiler->cache_argw = argw; - } - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - } - - if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { - if (argw != compiler->cache_argw) - FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - } - else { - compiler->cache_arg = SLJIT_MEM; - FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); - } - compiler->cache_argw = argw; - - if (!base) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - - if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { - compiler->cache_arg = arg; - FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - } - - FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); -} - -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); -} - -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* arg1 goes to TMP_REG1 or src reg - arg2 goes to TMP_REG2, imm or src reg - TMP_REG3 can be used for caching - result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r = TMP_REG2; - sljit_si src1_r; - sljit_sw src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; - - if (!(flags & ALT_KEEP_CACHE)) { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - } - - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - if (GET_FLAGS(op)) - flags |= UNUSED_DEST; - } - else if (dst <= TMP_REG3) { - dst_r = dst; - flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - sugg_src2_r = dst_r; - } - else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) - flags |= SLOW_DEST; - - if (flags & IMM_OP) { - if ((src2 & SLJIT_IMM) && src2w) { - if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN)) - || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) { - flags |= SRC2_IMM; - src2_r = src2w; - } - } - if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { - if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN)) - || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) { - flags |= SRC2_IMM; - src2_r = src1w; - - /* And swap arguments. */ - src1 = src2; - src1w = src2w; - src2 = SLJIT_IMM; - /* src2w = src2_r unneeded. */ - } - } - } - - /* Source 1. */ - if (src1 <= TMP_REG3) { - src1_r = src1; - flags |= REG1_SOURCE; - } - else if (src1 & SLJIT_IMM) { - if (src1w) { - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); - src1_r = TMP_REG1; - } - else - src1_r = 0; - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC1; - src1_r = TMP_REG1; - } - - /* Source 2. */ - if (src2 <= TMP_REG3) { - src2_r = src2; - flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - dst_r = src2_r; - } - else if (src2 & SLJIT_IMM) { - if (!(flags & SRC2_IMM)) { - if (src2w) { - FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); - src2_r = sugg_src2_r; - } - else { - src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) - dst_r = 0; - } - } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC2; - src2_r = sugg_src2_r; - } - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); - - FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); - - if (dst & SLJIT_MEM) { - if (!(flags & SLOW_DEST)) { - getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw); - return compiler->error; - } - return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - return push_inst(compiler, BREAK, UNMOVABLE_INS); - case SLJIT_NOP: - return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_UMUL: - case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); - case SLJIT_UDIV: - case SLJIT_SDIV: -#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 -#endif - - CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - switch (GET_OPCODE(op)) { - case SLJIT_MOV: - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_UI: - return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_SI: - return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); - - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); - - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); - - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); - - case SLJIT_MOVU: - case SLJIT_MOVU_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_UI: - return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_SI: - return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); - - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); - - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); - - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); - - case SLJIT_NOT: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: - return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); - - case SLJIT_CLZ: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - } - - return SLJIT_SUCCESS; -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 -#endif - - CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - switch (GET_OPCODE(op)) { - case SLJIT_ADD: - case SLJIT_ADDC: - return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SUB: - case SLJIT_SUBC: - return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_MUL: - return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_AND: - case SLJIT_OR: - case SLJIT_XOR: - return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SHL: - case SLJIT_LSHR: - case SLJIT_ASHR: -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - if (src2 & SLJIT_IMM) - src2w &= 0x1f; -#else - SLJIT_ASSERT_STOP(); -#endif - return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - } - - return SLJIT_SUCCESS; -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - check_sljit_get_register_index(reg); - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); - - return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ -#if (defined SLJIT_QEMU && SLJIT_QEMU) - /* Qemu says fir is 0 by default. */ - return 1; -#elif defined(__GNUC__) - sljit_sw fir; - asm ("cfc1 %0, $0" : "=r"(fir)); - return (fir >> 22) & 0x1; -#else -#error "FIR check is not implemented for this architecture" -#endif -} - -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) -#define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8)) - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_fr; - - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - else - dst <<= 1; - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - else - src <<= 1; - - /* src and dst are swapped. */ - if (op & SLJIT_SET_E) { - FAIL_IF(push_inst(compiler, C_UEQ_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); - } - if (op & SLJIT_SET_S) { - /* Mixing the instructions for the two checks. */ - FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(dst) | FS(src), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); - } - return push_inst(compiler, C_UN_fmt | FMT(op) | FT(src) | FS(dst), FCSR_FCC); - } - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : (dst << 1); - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; - } - else - src <<= 1; - - switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - FAIL_IF(push_inst(compiler, MOV_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, NEG_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, ABS_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_fr, flags = 0; - - CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG2 : (dst << 1); - - if (src1 > SLJIT_FLOAT_REG6) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1 = TMP_FREG1; - } else - flags |= SLOW_SRC1; - } - else - src1 <<= 1; - - if (src2 > SLJIT_FLOAT_REG6) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { - FAIL_IF(compiler->error); - src2 = TMP_FREG2; - } else - flags |= SLOW_SRC2; - } - else - src2 <<= 1; - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - - if (flags & SLOW_SRC1) - src1 = TMP_FREG1; - if (flags & SLOW_SRC2) - src2 = TMP_FREG2; - - switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, ADD_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); - break; - - case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SUB_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); - break; - - case SLJIT_MULD: - FAIL_IF(push_inst(compiler, MUL_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); - break; - - case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, DIV_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG2) - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); - - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - if (dst <= TMP_REG3) - return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst)); - - /* Memory. */ - return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= TMP_REG3) - FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG)); - else if (src & SLJIT_MEM) - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); - else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw)); - - FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); - return push_inst(compiler, NOP, UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - struct sljit_label *label; - - CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - compiler->delay_slot = UNMOVABLE_INS; - return label; -} - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define JUMP_LENGTH 4 -#else -#error "Implementation required" -#endif - -#define BR_Z(src) \ - inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \ - flags = IS_BIT26_COND; \ - delay_check = src; - -#define BR_NZ(src) \ - inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \ - flags = IS_BIT26_COND; \ - delay_check = src; - -#define BR_T() \ - inst = BC1T | JUMP_LENGTH; \ - flags = IS_BIT16_COND; \ - delay_check = FCSR_FCC; - -#define BR_F() \ - inst = BC1F | JUMP_LENGTH; \ - flags = IS_BIT16_COND; \ - delay_check = FCSR_FCC; - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - struct sljit_jump *jump; - sljit_ins inst; - sljit_si flags = 0; - sljit_si delay_check = UNMOVABLE_INS; - - CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: - BR_NZ(EQUAL_FLAG); - break; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_EQUAL: - BR_Z(EQUAL_FLAG); - break; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - BR_Z(ULESS_FLAG); - break; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - BR_NZ(ULESS_FLAG); - break; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - BR_Z(UGREATER_FLAG); - break; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - BR_NZ(UGREATER_FLAG); - break; - case SLJIT_C_SIG_LESS: - BR_Z(LESS_FLAG); - break; - case SLJIT_C_SIG_GREATER_EQUAL: - BR_NZ(LESS_FLAG); - break; - case SLJIT_C_SIG_GREATER: - BR_Z(GREATER_FLAG); - break; - case SLJIT_C_SIG_LESS_EQUAL: - BR_NZ(GREATER_FLAG); - break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: - BR_Z(OVERFLOW_FLAG); - break; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: - BR_NZ(OVERFLOW_FLAG); - break; - case SLJIT_C_FLOAT_UNORDERED: - BR_F(); - break; - case SLJIT_C_FLOAT_ORDERED: - BR_T(); - break; - default: - /* Not conditional branch. */ - inst = 0; - break; - } - - jump->flags |= flags; - if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check)) - jump->flags |= IS_MOVABLE; - - if (inst) - PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); - - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - if (type <= SLJIT_JUMP) { - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - } else { - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - /* Cannot be optimized out if type is >= CALL0. */ - jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); - PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); - jump->addr = compiler->size; - /* A NOP if type < CALL1. */ - PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS)); - } - return jump; -} - -#define RESOLVE_IMM1() \ - if (src1 & SLJIT_IMM) { \ - if (src1w) { \ - PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ - src1 = TMP_REG1; \ - } \ - else \ - src1 = 0; \ - } - -#define RESOLVE_IMM2() \ - if (src2 & SLJIT_IMM) { \ - if (src2w) { \ - PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ - src2 = TMP_REG2; \ - } \ - else \ - src2 = 0; \ - } - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - struct sljit_jump *jump; - sljit_si flags; - sljit_ins inst; - - CHECK_ERROR_PTR(); - check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; - if (src1 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); - src1 = TMP_REG1; - } - if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); - src2 = TMP_REG2; - } - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - if (type <= SLJIT_C_NOT_EQUAL) { - RESOLVE_IMM1(); - RESOLVE_IMM2(); - jump->flags |= IS_BIT26_COND; - if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) - jump->flags |= IS_MOVABLE; - PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); - } - else if (type >= SLJIT_C_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { - inst = NOP; - if ((src1 & SLJIT_IMM) && (src1w == 0)) { - RESOLVE_IMM2(); - switch (type) { - case SLJIT_C_SIG_LESS: - inst = BLEZ; - jump->flags |= IS_BIT26_COND; - break; - case SLJIT_C_SIG_GREATER_EQUAL: - inst = BGTZ; - jump->flags |= IS_BIT26_COND; - break; - case SLJIT_C_SIG_GREATER: - inst = BGEZ; - jump->flags |= IS_BIT16_COND; - break; - case SLJIT_C_SIG_LESS_EQUAL: - inst = BLTZ; - jump->flags |= IS_BIT16_COND; - break; - } - src1 = src2; - } - else { - RESOLVE_IMM1(); - switch (type) { - case SLJIT_C_SIG_LESS: - inst = BGEZ; - jump->flags |= IS_BIT16_COND; - break; - case SLJIT_C_SIG_GREATER_EQUAL: - inst = BLTZ; - jump->flags |= IS_BIT16_COND; - break; - case SLJIT_C_SIG_GREATER: - inst = BLEZ; - jump->flags |= IS_BIT26_COND; - break; - case SLJIT_C_SIG_LESS_EQUAL: - inst = BGTZ; - jump->flags |= IS_BIT26_COND; - break; - } - } - PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS)); - } - else { - if (type == SLJIT_C_LESS || type == SLJIT_C_GREATER_EQUAL || type == SLJIT_C_SIG_LESS || type == SLJIT_C_SIG_GREATER_EQUAL) { - RESOLVE_IMM1(); - if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); - else { - RESOLVE_IMM2(); - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); - } - type = (type == SLJIT_C_LESS || type == SLJIT_C_SIG_LESS) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL; - } - else { - RESOLVE_IMM2(); - if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); - else { - RESOLVE_IMM1(); - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); - } - type = (type == SLJIT_C_GREATER || type == SLJIT_C_SIG_GREATER) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL; - } - - jump->flags |= IS_BIT26_COND; - PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); - } - - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - return jump; -} - -#undef RESOLVE_IMM1 -#undef RESOLVE_IMM2 - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - struct sljit_jump *jump; - sljit_ins inst; - sljit_si if_true; - - CHECK_ERROR_PTR(); - check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - if (src1 > SLJIT_FLOAT_REG6) { - PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - src1 = TMP_FREG1; - } - else - src1 <<= 1; - - if (src2 > SLJIT_FLOAT_REG6) { - PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); - src2 = TMP_FREG2; - } - else - src2 <<= 1; - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - jump->flags |= IS_BIT16_COND; - - switch (type & 0xff) { - case SLJIT_C_FLOAT_EQUAL: - inst = C_UEQ_fmt; - if_true = 1; - break; - case SLJIT_C_FLOAT_NOT_EQUAL: - inst = C_UEQ_fmt; - if_true = 0; - break; - case SLJIT_C_FLOAT_LESS: - inst = C_ULT_fmt; - if_true = 1; - break; - case SLJIT_C_FLOAT_GREATER_EQUAL: - inst = C_ULT_fmt; - if_true = 0; - break; - case SLJIT_C_FLOAT_GREATER: - inst = C_ULE_fmt; - if_true = 0; - break; - case SLJIT_C_FLOAT_LESS_EQUAL: - inst = C_ULE_fmt; - if_true = 1; - break; - case SLJIT_C_FLOAT_UNORDERED: - inst = C_UN_fmt; - if_true = 1; - break; - case SLJIT_C_FLOAT_ORDERED: - default: /* Make compilers happy. */ - inst = C_UN_fmt; - if_true = 0; - break; - } - - PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS)); - /* Intentionally the other opcode. */ - PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS)); - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - return jump; -} - -#undef JUMP_LENGTH -#undef BR_Z -#undef BR_NZ -#undef BR_T -#undef BR_F - -#undef FLOAT_DATA -#undef FMT - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - sljit_si src_r = TMP_REG2; - struct sljit_jump *jump = NULL; - - CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= TMP_REG3) { - if (DR(src) != 4) - src_r = src; - else - FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - } - - if (type >= SLJIT_CALL0) { - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - if (src & (SLJIT_IMM | SLJIT_MEM)) { - if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); - else { - SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - } - FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); - /* We need an extra instruction in any case. */ - return push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS); - } - - /* Register input. */ - if (type >= SLJIT_CALL1) - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), 4)); - FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); - return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS); - } - - if (src & SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); - jump->u.target = srcw; - - if (compiler->delay_slot != UNMOVABLE_INS) - jump->flags |= IS_MOVABLE; - - FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - } - else if (src & SLJIT_MEM) - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - - FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS)); - if (jump) - jump->addr = compiler->size; - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - sljit_si sugg_dst_ar, dst_ar; - sljit_si flags = GET_ALL_FLAGS(op); - - CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); - ADJUST_LOCAL_OFFSET(dst, dstw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - op = GET_OPCODE(op); - sugg_dst_ar = DR((op < SLJIT_ADD && dst <= TMP_REG3) ? dst : TMP_REG2); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } - - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_NOT_EQUAL: - FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; - break; - case SLJIT_C_LESS: - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_LESS: - case SLJIT_C_FLOAT_GREATER_EQUAL: - dst_ar = ULESS_FLAG; - break; - case SLJIT_C_GREATER: - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_GREATER: - case SLJIT_C_FLOAT_LESS_EQUAL: - dst_ar = UGREATER_FLAG; - break; - case SLJIT_C_SIG_LESS: - case SLJIT_C_SIG_GREATER_EQUAL: - dst_ar = LESS_FLAG; - break; - case SLJIT_C_SIG_GREATER: - case SLJIT_C_SIG_LESS_EQUAL: - dst_ar = GREATER_FLAG; - break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_NOT_OVERFLOW: - dst_ar = OVERFLOW_FLAG; - break; - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: - FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; - type ^= 0x1; /* Flip type bit for the XORI below. */ - break; - case SLJIT_C_FLOAT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: - dst_ar = EQUAL_FLAG; - break; - - case SLJIT_C_FLOAT_UNORDERED: - case SLJIT_C_FLOAT_ORDERED: - FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar)); - FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar)); - FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; - break; - - default: - SLJIT_ASSERT_STOP(); - dst_ar = sugg_dst_ar; - break; - } - - if (type & 0x1) { - FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; - } - - if (op >= SLJIT_ADD) { - if (DR(TMP_REG2) != dst_ar) - FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); - } - - if (dst & SLJIT_MEM) - return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); - - if (sugg_dst_ar != dst_ar) - return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar); - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - struct sljit_const *const_; - sljit_si reg; - - CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); - ADJUST_LOCAL_OFFSET(dst, dstw); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - - reg = (dst <= TMP_REG3) ? dst : TMP_REG2; - - PTR_FAIL_IF(emit_const(compiler, reg, init_value)); - - if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); - return const_; -} diff --git a/deps/libmagic/pcre/sljit/sljitNativePPC_32.c b/deps/libmagic/pcre/sljit/sljitNativePPC_32.c deleted file mode 100644 index 0bd35a6..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativePPC_32.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* ppc 32-bit arch dependent functions. */ - -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) -{ - if (imm <= SIMM_MAX && imm >= SIMM_MIN) - return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); - - if (!(imm & ~0xffff)) - return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm)); - - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); - return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; -} - -#define INS_CLEAR_LEFT(dst, src, from) \ - (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1)) - -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_si src2) -{ - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - case SLJIT_MOV_P: - SLJIT_ASSERT(src1 == TMP_REG1); - if (dst != src2) - return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); - } - else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - else { - SLJIT_ASSERT(dst == src2); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) - return push_inst(compiler, EXTSH | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); - } - else { - SLJIT_ASSERT(dst == src2); - } - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); - - case SLJIT_ADD: - if (flags & ALT_FORM1) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); - } - if (flags & ALT_FORM2) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); - } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); - } - if (flags & ALT_FORM4) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); - return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); - } - if (!(flags & ALT_SET_FLAGS)) - return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); - return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); - - case SLJIT_ADDC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); - return push_inst(compiler, MTXER | S(0)); - } - return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); - - case SLJIT_SUB: - if (flags & ALT_FORM1) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); - } - if (flags & (ALT_FORM2 | ALT_FORM3)) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) - FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); - if (flags & ALT_FORM3) - return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); - return SLJIT_SUCCESS; - } - if (flags & (ALT_FORM4 | ALT_FORM5)) { - if (flags & ALT_FORM4) - FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); - if (flags & ALT_FORM5) - FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); - return SLJIT_SUCCESS; - } - if (!(flags & ALT_SET_FLAGS)) - return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); - if (flags & ALT_FORM6) - FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); - return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - - case SLJIT_SUBC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); - return push_inst(compiler, MTXER | S(0)); - } - return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); - - case SLJIT_MUL: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); - } - return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); - - case SLJIT_AND: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM2) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); - } - return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_OR: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM2) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); - return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); - } - return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_XOR: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM2) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); - return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); - } - return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_SHL: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - compiler->imm &= 0x1f; - return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); - } - return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_LSHR: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - compiler->imm &= 0x1f; - return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); - } - return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_ASHR: - if (flags & ALT_FORM3) - FAIL_IF(push_inst(compiler, MFXER | D(0))); - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - compiler->imm &= 0x1f; - FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); - } - else - FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2))); - return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; - } - - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value) -{ - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); - return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} diff --git a/deps/libmagic/pcre/sljit/sljitNativePPC_64.c b/deps/libmagic/pcre/sljit/sljitNativePPC_64.c deleted file mode 100644 index 8eaeb41..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativePPC_64.c +++ /dev/null @@ -1,421 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* ppc 64-bit arch dependent functions. */ - -#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) -#define ASM_SLJIT_CLZ(src, dst) \ - __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) ) -#elif defined(__xlc__) -#error "Please enable GCC syntax for inline assembly statements" -#else -#error "Must implement count leading zeroes" -#endif - -#define RLDI(dst, src, sh, mb, type) \ - (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) - -#define PUSH_RLDICR(reg, shift) \ - push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1)) - -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) -{ - sljit_uw tmp; - sljit_uw shift; - sljit_uw tmp2; - sljit_uw shift2; - - if (imm <= SIMM_MAX && imm >= SIMM_MIN) - return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); - - if (!(imm & ~0xffff)) - return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm)); - - if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) { - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); - return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; - } - - /* Count leading zeroes. */ - tmp = (imm >= 0) ? imm : ~imm; - ASM_SLJIT_CLZ(tmp, shift); - SLJIT_ASSERT(shift > 0); - shift--; - tmp = (imm << shift); - - if ((tmp & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); - shift += 15; - return PUSH_RLDICR(reg, shift); - } - - if ((tmp & ~0xffffffff00000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); - shift += 31; - return PUSH_RLDICR(reg, shift); - } - - /* Cut out the 16 bit from immediate. */ - shift += 15; - tmp2 = imm & ((1ul << (63 - shift)) - 1); - - if (tmp2 <= 0xffff) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); - FAIL_IF(PUSH_RLDICR(reg, shift)); - return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); - } - - if (tmp2 <= 0xffffffff) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); - FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); - return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; - } - - ASM_SLJIT_CLZ(tmp2, shift2); - tmp2 <<= shift2; - - if ((tmp2 & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); - shift2 += 15; - shift += (63 - shift2); - FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); - return PUSH_RLDICR(reg, shift2); - } - - /* The general version. */ - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); - FAIL_IF(PUSH_RLDICR(reg, 31)); - FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); - return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)); -} - -/* Simplified mnemonics: clrldi. */ -#define INS_CLEAR_LEFT(dst, src, from) \ - (RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5)) - -/* Sign extension for integer operations. */ -#define UN_EXTS() \ - if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \ - FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ - src2 = TMP_REG2; \ - } - -#define BIN_EXTS() \ - if (flags & ALT_SIGN_EXT) { \ - if (flags & REG1_SOURCE) { \ - FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ - src1 = TMP_REG1; \ - } \ - if (flags & REG2_SOURCE) { \ - FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ - src2 = TMP_REG2; \ - } \ - } - -#define BIN_IMM_EXTS() \ - if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \ - FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ - src1 = TMP_REG1; \ - } - -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_si src2) -{ - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_P: - SLJIT_ASSERT(src1 == TMP_REG1); - if (dst != src2) - return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SI) - return push_inst(compiler, EXTSW | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); - } - else { - SLJIT_ASSERT(dst == src2); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); - } - else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - else { - SLJIT_ASSERT(dst == src2); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) - return push_inst(compiler, EXTSH | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); - } - else { - SLJIT_ASSERT(dst == src2); - } - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1); - UN_EXTS(); - return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - UN_EXTS(); - return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1); - if (flags & ALT_FORM1) - return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); - return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst)); - - case SLJIT_ADD: - if (flags & ALT_FORM1) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); - } - if (flags & ALT_FORM2) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); - } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - BIN_IMM_EXTS(); - return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); - } - if (flags & ALT_FORM4) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); - return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); - } - if (!(flags & ALT_SET_FLAGS)) - return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); - BIN_EXTS(); - return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); - - case SLJIT_ADDC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); - return push_inst(compiler, MTXER | S(0)); - } - BIN_EXTS(); - return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); - - case SLJIT_SUB: - if (flags & ALT_FORM1) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); - } - if (flags & (ALT_FORM2 | ALT_FORM3)) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) - FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); - if (flags & ALT_FORM3) - return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); - return SLJIT_SUCCESS; - } - if (flags & (ALT_FORM4 | ALT_FORM5)) { - if (flags & ALT_FORM4) - FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); - if (flags & ALT_FORM5) - return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); - return SLJIT_SUCCESS; - } - if (!(flags & ALT_SET_FLAGS)) - return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); - BIN_EXTS(); - if (flags & ALT_FORM6) - FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); - return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - - case SLJIT_SUBC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); - return push_inst(compiler, MTXER | S(0)); - } - BIN_EXTS(); - return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); - - case SLJIT_MUL: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); - } - BIN_EXTS(); - if (flags & ALT_FORM2) - return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1)); - - case SLJIT_AND: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM2) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); - } - return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_OR: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM2) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); - return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); - } - return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_XOR: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM2) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); - } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); - return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); - } - return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_SHL: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) { - compiler->imm &= 0x1f; - return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); - } - else { - compiler->imm &= 0x3f; - return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags)); - } - } - return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_LSHR: - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) { - compiler->imm &= 0x1f; - return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); - } - else { - compiler->imm &= 0x3f; - return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags)); - } - } - return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_ASHR: - if (flags & ALT_FORM3) - FAIL_IF(push_inst(compiler, MFXER | D(0))); - if (flags & ALT_FORM1) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) { - compiler->imm &= 0x1f; - FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); - } - else { - compiler->imm &= 0x3f; - FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4))); - } - } - else - FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2))); - return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; - } - - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value) -{ - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); - FAIL_IF(PUSH_RLDICR(reg, 31)); - FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16))); - return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff); - SLJIT_CACHE_FLUSH(inst, inst + 5); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); - inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff); - SLJIT_CACHE_FLUSH(inst, inst + 5); -} diff --git a/deps/libmagic/pcre/sljit/sljitNativePPC_common.c b/deps/libmagic/pcre/sljit/sljitNativePPC_common.c deleted file mode 100644 index f7c75a7..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativePPC_common.c +++ /dev/null @@ -1,2014 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ - return "PowerPC" SLJIT_CPUINFO; -} - -/* Length of an instruction word. - Both for ppc-32 and ppc-64. */ -typedef sljit_ui sljit_ins; - -#ifdef _AIX -#include -#endif - -static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) -{ -#ifdef _AIX - _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from)); -#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) -# if defined(_ARCH_PWR) || defined(_ARCH_PWR2) - /* Cache flush for POWER architecture. */ - while (from < to) { - __asm__ volatile ( - "clf 0, %0\n" - "dcs\n" - : : "r"(from) - ); - from++; - } - __asm__ volatile ( "ics" ); -# elif defined(_ARCH_COM) && !defined(_ARCH_PPC) -# error "Cache flush is not implemented for PowerPC/POWER common mode." -# else - /* Cache flush for PowerPC architecture. */ - while (from < to) { - __asm__ volatile ( - "dcbf 0, %0\n" - "sync\n" - "icbi 0, %0\n" - : : "r"(from) - ); - from++; - } - __asm__ volatile ( "isync" ); -# endif -# ifdef __xlc__ -# warning "This file may fail to compile if -qfuncsect is used" -# endif -#elif defined(__xlc__) -#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc" -#else -#error "This platform requires a cache flush implementation." -#endif /* _AIX */ -} - -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define ZERO_REG (SLJIT_NO_REGISTERS + 4) - -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) - -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31 -}; - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ -#define D(d) (reg_map[d] << 21) -#define S(s) (reg_map[s] << 21) -#define A(a) (reg_map[a] << 16) -#define B(b) (reg_map[b] << 11) -#define C(c) (reg_map[c] << 6) -#define FD(fd) ((fd) << 21) -#define FA(fa) ((fa) << 16) -#define FB(fb) ((fb) << 11) -#define FC(fc) ((fc) << 6) -#define IMM(imm) ((imm) & 0xffff) -#define CRD(d) ((d) << 21) - -/* Instruction bit sections. - OE and Rc flag (see ALT_SET_FLAGS). */ -#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS)) -/* Rc flag (see ALT_SET_FLAGS). */ -#define RC(flags) ((flags & ALT_SET_FLAGS) >> 10) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) ((opcode) << 1) - -#define ADD (HI(31) | LO(266)) -#define ADDC (HI(31) | LO(10)) -#define ADDE (HI(31) | LO(138)) -#define ADDI (HI(14)) -#define ADDIC (HI(13)) -#define ADDIS (HI(15)) -#define ADDME (HI(31) | LO(234)) -#define AND (HI(31) | LO(28)) -#define ANDI (HI(28)) -#define ANDIS (HI(29)) -#define Bx (HI(18)) -#define BCx (HI(16)) -#define BCCTR (HI(19) | LO(528) | (3 << 11)) -#define BLR (HI(19) | LO(16) | (0x14 << 21)) -#define CNTLZD (HI(31) | LO(58)) -#define CNTLZW (HI(31) | LO(26)) -#define CMP (HI(31) | LO(0)) -#define CMPI (HI(11)) -#define CMPL (HI(31) | LO(32)) -#define CMPLI (HI(10)) -#define CROR (HI(19) | LO(449)) -#define DIVD (HI(31) | LO(489)) -#define DIVDU (HI(31) | LO(457)) -#define DIVW (HI(31) | LO(491)) -#define DIVWU (HI(31) | LO(459)) -#define EXTSB (HI(31) | LO(954)) -#define EXTSH (HI(31) | LO(922)) -#define EXTSW (HI(31) | LO(986)) -#define FABS (HI(63) | LO(264)) -#define FADD (HI(63) | LO(21)) -#define FADDS (HI(59) | LO(21)) -#define FCMPU (HI(63) | LO(0)) -#define FDIV (HI(63) | LO(18)) -#define FDIVS (HI(59) | LO(18)) -#define FMR (HI(63) | LO(72)) -#define FMUL (HI(63) | LO(25)) -#define FMULS (HI(59) | LO(25)) -#define FNEG (HI(63) | LO(40)) -#define FSUB (HI(63) | LO(20)) -#define FSUBS (HI(59) | LO(20)) -#define LD (HI(58) | 0) -#define LWZ (HI(32)) -#define MFCR (HI(31) | LO(19)) -#define MFLR (HI(31) | LO(339) | 0x80000) -#define MFXER (HI(31) | LO(339) | 0x10000) -#define MTCTR (HI(31) | LO(467) | 0x90000) -#define MTLR (HI(31) | LO(467) | 0x80000) -#define MTXER (HI(31) | LO(467) | 0x10000) -#define MULHD (HI(31) | LO(73)) -#define MULHDU (HI(31) | LO(9)) -#define MULHW (HI(31) | LO(75)) -#define MULHWU (HI(31) | LO(11)) -#define MULLD (HI(31) | LO(233)) -#define MULLI (HI(7)) -#define MULLW (HI(31) | LO(235)) -#define NEG (HI(31) | LO(104)) -#define NOP (HI(24)) -#define NOR (HI(31) | LO(124)) -#define OR (HI(31) | LO(444)) -#define ORI (HI(24)) -#define ORIS (HI(25)) -#define RLDICL (HI(30)) -#define RLWINM (HI(21)) -#define SLD (HI(31) | LO(27)) -#define SLW (HI(31) | LO(24)) -#define SRAD (HI(31) | LO(794)) -#define SRADI (HI(31) | LO(413 << 1)) -#define SRAW (HI(31) | LO(792)) -#define SRAWI (HI(31) | LO(824)) -#define SRD (HI(31) | LO(539)) -#define SRW (HI(31) | LO(536)) -#define STD (HI(62) | 0) -#define STDU (HI(62) | 1) -#define STDUX (HI(31) | LO(181)) -#define STW (HI(36)) -#define STWU (HI(37)) -#define STWUX (HI(31) | LO(183)) -#define SUBF (HI(31) | LO(40)) -#define SUBFC (HI(31) | LO(8)) -#define SUBFE (HI(31) | LO(136)) -#define SUBFIC (HI(8)) -#define XOR (HI(31) | LO(316)) -#define XORI (HI(26)) -#define XORIS (HI(27)) - -#define SIMM_MAX (0x7fff) -#define SIMM_MIN (-0x8000) -#define UIMM_MAX (0xffff) - -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func) -{ - sljit_sw* ptrs; - if (func_ptr) - *func_ptr = (void*)context; - ptrs = (sljit_sw*)func; - context->addr = addr ? addr : ptrs[0]; - context->r2 = ptrs[1]; - context->r11 = ptrs[2]; -} -#endif - -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) -{ - sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); - FAIL_IF(!ptr); - *ptr = ins; - compiler->size++; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) -{ - sljit_sw diff; - sljit_uw target_addr; - - if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; - - if (jump->flags & JUMP_ADDR) - target_addr = jump->u.target; - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); - } - diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l; - - if (jump->flags & UNCOND_B) { - if (diff <= 0x01ffffff && diff >= -0x02000000) { - jump->flags |= PATCH_B; - return 1; - } - if (target_addr <= 0x03ffffff) { - jump->flags |= PATCH_B | ABSOLUTE_B; - return 1; - } - } - else { - if (diff <= 0x7fff && diff >= -0x8000) { - jump->flags |= PATCH_B; - return 1; - } - if (target_addr <= 0xffff) { - jump->flags |= PATCH_B | ABSOLUTE_B; - return 1; - } - } - return 0; -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_ins *code; - sljit_ins *code_ptr; - sljit_ins *buf_ptr; - sljit_ins *buf_end; - sljit_uw word_count; - sljit_uw addr; - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); - reverse_buf(compiler); - -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); -#else - compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); -#endif -#endif - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - - code_ptr = code; - word_count = 0; - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - do { - buf_ptr = (sljit_ins*)buf->memory; - buf_end = buf_ptr + (buf->used_size >> 2); - do { - *code_ptr = *buf_ptr++; - SLJIT_ASSERT(!label || label->size >= word_count); - SLJIT_ASSERT(!jump || jump->addr >= word_count); - SLJIT_ASSERT(!const_ || const_->addr >= word_count); - /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - jump->addr = (sljit_uw)(code_ptr - 3); -#else - jump->addr = (sljit_uw)(code_ptr - 6); -#endif - if (optimize_jump(jump, code_ptr, code)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - code_ptr[-3] = code_ptr[0]; - code_ptr -= 3; -#else - code_ptr[-6] = code_ptr[0]; - code_ptr -= 6; -#endif - } - jump = jump->next; - } - if (const_ && const_->addr == word_count) { - /* Just recording the address. */ - const_->addr = (sljit_uw)code_ptr; - const_ = const_->next; - } - code_ptr ++; - word_count ++; - } while (buf_ptr < buf_end); - - buf = buf->next; - } while (buf); - - if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins))); -#else - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); -#endif - - jump = compiler->jumps; - while (jump) { - do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; - if (jump->flags & PATCH_B) { - if (jump->flags & UNCOND_B) { - if (!(jump->flags & ABSOLUTE_B)) { - addr = addr - jump->addr; - SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); - } - else { - SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); - } - } - else { - if (!(jump->flags & ABSOLUTE_B)) { - addr = addr - jump->addr; - SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); - } - else { - addr = addr & ~0x3l; - SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); - } - - } - break; - } - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); -#else - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); -#endif - } while (0); - jump = jump->next; - } - - SLJIT_CACHE_FLUSH(code, code_ptr); - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = compiler->size * sizeof(sljit_ins); - -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (((sljit_sw)code_ptr) & 0x4) - code_ptr++; - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); - return code_ptr; -#else - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); - return code_ptr; -#endif -#else - return code; -#endif -} - -/* --------------------------------------------------------------------- */ -/* Entry, exit */ -/* --------------------------------------------------------------------- */ - -/* inp_flags: */ - -/* Creates an index in data_transfer_insts array. */ -#define LOAD_DATA 0x01 -#define INDEXED 0x02 -#define WRITE_BACK 0x04 -#define WORD_DATA 0x00 -#define BYTE_DATA 0x08 -#define HALF_DATA 0x10 -#define INT_DATA 0x18 -#define SIGNED_DATA 0x20 -/* Separates integer and floating point registers */ -#define GPR_REG 0x3f -#define DOUBLE_DATA 0x40 - -#define MEM_MASK 0x7f - -/* Other inp_flags. */ - -#define ARG_TEST 0x000100 -/* Integer opertion and set flags -> requires exts on 64 bit systems. */ -#define ALT_SIGN_EXT 0x000200 -/* This flag affects the RC() and OERC() macros. */ -#define ALT_SET_FLAGS 0x000400 -#define ALT_KEEP_CACHE 0x000800 -#define ALT_FORM1 0x010000 -#define ALT_FORM2 0x020000 -#define ALT_FORM3 0x040000 -#define ALT_FORM4 0x080000 -#define ALT_FORM5 0x100000 -#define ALT_FORM6 0x200000 - -/* Source and destination is register. */ -#define REG_DEST 0x000001 -#define REG1_SOURCE 0x000002 -#define REG2_SOURCE 0x000004 -/* getput_arg_fast returned true. */ -#define FAST_DEST 0x000008 -/* Multiple instructions are required. */ -#define SLOW_DEST 0x000010 -/* -ALT_SIGN_EXT 0x000200 -ALT_SET_FLAGS 0x000400 -ALT_FORM1 0x010000 -... -ALT_FORM6 0x200000 */ - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -#include "sljitNativePPC_32.c" -#else -#include "sljitNativePPC_64.c" -#endif - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -#define STACK_STORE STW -#define STACK_LOAD LWZ -#else -#define STACK_STORE STD -#define STACK_LOAD LD -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - FAIL_IF(push_inst(compiler, MFLR | D(0))); - FAIL_IF(push_inst(compiler, STACK_STORE | S(ZERO_REG) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) )); - - FAIL_IF(push_inst(compiler, ADDI | D(ZERO_REG) | A(0) | 0)); - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3))); - -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; -#else - compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; -#endif - compiler->local_size = (compiler->local_size + 15) & ~0xf; - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); - FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); - } -#else - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); - FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); - } -#endif - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; -#else - compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; -#endif - compiler->local_size = (compiler->local_size + 15) & ~0xf; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); - FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); - } - - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)))); - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - FAIL_IF(push_inst(compiler, STACK_LOAD | D(ZERO_REG) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); - - FAIL_IF(push_inst(compiler, MTLR | S(0))); - FAIL_IF(push_inst(compiler, BLR)); - - return SLJIT_SUCCESS; -} - -#undef STACK_STORE -#undef STACK_LOAD - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -/* i/x - immediate/indexed form - n/w - no write-back / write-back (1 bit) - s/l - store/load (1 bit) - u/s - signed/unsigned (1 bit) - w/b/h/i - word/byte/half/int allowed (2 bit) - It contans 32 items, but not all are different. */ - -/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ -#define ADDR_MODE2 0x10000 -/* 64-bit only: there is no lwau instruction. */ -#define UPDATE_REQ 0x20000 - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -#define ARCH_32_64(a, b) a -#define INST_CODE_AND_DST(inst, flags, reg) \ - ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) -#else -#define ARCH_32_64(a, b) b -#define INST_CODE_AND_DST(inst, flags, reg) \ - (((inst) & ~(ADDR_MODE2 | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) -#endif - -static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { - -/* -------- Unsigned -------- */ - -/* Word. */ - -/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), -/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), -/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), -/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), - -/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), -/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), -/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), -/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), - -/* Byte. */ - -/* u b n i s */ HI(38) /* stb */, -/* u b n i l */ HI(34) /* lbz */, -/* u b n x s */ HI(31) | LO(215) /* stbx */, -/* u b n x l */ HI(31) | LO(87) /* lbzx */, - -/* u b w i s */ HI(39) /* stbu */, -/* u b w i l */ HI(35) /* lbzu */, -/* u b w x s */ HI(31) | LO(247) /* stbux */, -/* u b w x l */ HI(31) | LO(119) /* lbzux */, - -/* Half. */ - -/* u h n i s */ HI(44) /* sth */, -/* u h n i l */ HI(40) /* lhz */, -/* u h n x s */ HI(31) | LO(407) /* sthx */, -/* u h n x l */ HI(31) | LO(279) /* lhzx */, - -/* u h w i s */ HI(45) /* sthu */, -/* u h w i l */ HI(41) /* lhzu */, -/* u h w x s */ HI(31) | LO(439) /* sthux */, -/* u h w x l */ HI(31) | LO(311) /* lhzux */, - -/* Int. */ - -/* u i n i s */ HI(36) /* stw */, -/* u i n i l */ HI(32) /* lwz */, -/* u i n x s */ HI(31) | LO(151) /* stwx */, -/* u i n x l */ HI(31) | LO(23) /* lwzx */, - -/* u i w i s */ HI(37) /* stwu */, -/* u i w i l */ HI(33) /* lwzu */, -/* u i w x s */ HI(31) | LO(183) /* stwux */, -/* u i w x l */ HI(31) | LO(55) /* lwzux */, - -/* -------- Signed -------- */ - -/* Word. */ - -/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), -/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), -/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), -/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), - -/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), -/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), -/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), -/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), - -/* Byte. */ - -/* s b n i s */ HI(38) /* stb */, -/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */, -/* s b n x s */ HI(31) | LO(215) /* stbx */, -/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, - -/* s b w i s */ HI(39) /* stbu */, -/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */, -/* s b w x s */ HI(31) | LO(247) /* stbux */, -/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */, - -/* Half. */ - -/* s h n i s */ HI(44) /* sth */, -/* s h n i l */ HI(42) /* lha */, -/* s h n x s */ HI(31) | LO(407) /* sthx */, -/* s h n x l */ HI(31) | LO(343) /* lhax */, - -/* s h w i s */ HI(45) /* sthu */, -/* s h w i l */ HI(43) /* lhau */, -/* s h w x s */ HI(31) | LO(439) /* sthux */, -/* s h w x l */ HI(31) | LO(375) /* lhaux */, - -/* Int. */ - -/* s i n i s */ HI(36) /* stw */, -/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */), -/* s i n x s */ HI(31) | LO(151) /* stwx */, -/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), - -/* s i w i s */ HI(37) /* stwu */, -/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */), -/* s i w x s */ HI(31) | LO(183) /* stwux */, -/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), - -/* -------- Double -------- */ - -/* d n i s */ HI(54) /* stfd */, -/* d n i l */ HI(50) /* lfd */, -/* d n x s */ HI(31) | LO(727) /* stfdx */, -/* d n x l */ HI(31) | LO(599) /* lfdx */, - -/* s n i s */ HI(52) /* stfs */, -/* s n i l */ HI(48) /* lfs */, -/* s n x s */ HI(31) | LO(663) /* stfsx */, -/* s n x l */ HI(31) | LO(535) /* lfsx */, - -}; - -#undef ARCH_32_64 - -/* Simple cases, (no caching is required). */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - sljit_ins inst; -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_si tmp_reg; -#endif - - SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(arg & 0xf)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (inp_flags & ARG_TEST) - return 1; - - inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw)); - return -1; - } -#else - inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; - if (argw <= SIMM_MAX && argw >= SIMM_MIN && - (!(inst & ADDR_MODE2) || (argw & 0x3) == 0)) { - if (inp_flags & ARG_TEST) - return 1; - - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw)); - return -1; - } -#endif - return 0; - } - - if (!(arg & 0xf0)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (inp_flags & ARG_TEST) - return 1; - - inst = data_transfer_insts[inp_flags & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw)); - return -1; - } -#else - inst = data_transfer_insts[inp_flags & MEM_MASK]; - if (argw <= SIMM_MAX && argw >= SIMM_MIN && (!(inst & ADDR_MODE2) || (argw & 0x3) == 0)) { - if (inp_flags & ARG_TEST) - return 1; - - if ((inp_flags & WRITE_BACK) && (inst & UPDATE_REQ)) { - tmp_reg = (inp_flags & LOAD_DATA) ? (arg & 0xf) : TMP_REG3; - if (push_inst(compiler, ADDI | D(tmp_reg) | A(arg & 0xf) | IMM(argw))) - return -1; - arg = tmp_reg | SLJIT_MEM; - argw = 0; - } - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw)); - return -1; - } -#endif - } - else if (!(argw & 0x3)) { - if (inp_flags & ARG_TEST) - return 1; - inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); - return -1; - } - return 0; -} - -/* See getput_arg below. - Note: can_cache is called only for binary operators. Those operator always - uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); - - if (!(arg & 0xf)) - return (next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX); - - if (arg & 0xf0) - return ((arg & 0xf0) == (next_arg & 0xf0) && (argw & 0x3) == (next_argw & 0x3)); - - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (arg == next_arg && (next_argw >= SIMM_MAX && next_argw <= SIMM_MIN)) - return 1; - } - - if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) - return 1; - - return 0; -} - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define ADJUST_CACHED_IMM(imm) \ - if ((inst & ADDR_MODE2) && (imm & 0x3)) { \ - /* Adjust cached value. Fortunately this is really a rare case */ \ - compiler->cache_argw += imm & 0x3; \ - FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \ - imm &= ~0x3; \ - } -#else -#define ADJUST_CACHED_IMM(imm) -#endif - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - sljit_si tmp_r; - sljit_ins inst; - - SLJIT_ASSERT(arg & SLJIT_MEM); - - tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1; - /* Special case for "mov reg, [reg, ... ]". */ - if ((arg & 0xf) == tmp_r) - tmp_r = TMP_REG1; - - if (!(arg & 0xf)) { - inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; - if ((compiler->cache_arg & SLJIT_IMM) && (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= SIMM_MAX || ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= SIMM_MAX)) { - argw = argw - compiler->cache_argw; - ADJUST_CACHED_IMM(argw); - SLJIT_ASSERT(!(inst & UPDATE_REQ)); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw)); - } - - if ((next_arg & SLJIT_MEM) && (argw - next_argw <= SIMM_MAX || next_argw - argw <= SIMM_MAX)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - tmp_r = TMP_REG3; - } - - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r)); - } - - if (SLJIT_UNLIKELY(arg & 0xf0)) { - argw &= 0x3; - /* Otherwise getput_arg_fast would capture it. */ - SLJIT_ASSERT(argw); - - if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg && argw == compiler->cache_argw) - tmp_r = TMP_REG3; - else { - if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) { - compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); - compiler->cache_argw = argw; - tmp_r = TMP_REG3; - } -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); -#else - FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1))); -#endif - } - inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r)); - } - - inst = data_transfer_insts[inp_flags & MEM_MASK]; - - if (compiler->cache_arg == arg && ((sljit_uw)argw - (sljit_uw)compiler->cache_argw <= SIMM_MAX || (sljit_uw)compiler->cache_argw - (sljit_uw)argw <= SIMM_MAX)) { - SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); - argw = argw - compiler->cache_argw; - ADJUST_CACHED_IMM(argw); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw)); - } - - if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { - inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3)); - } - - if (argw == next_argw && (next_arg & SLJIT_MEM)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - - inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3)); - } - - if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & 0xf))); - - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3)); - } - - /* Get the indexed version instead of the normal one. */ - inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r)); -} - -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); -} - -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* arg1 goes to TMP_REG1 or src reg - arg2 goes to TMP_REG2, imm or src reg - TMP_REG3 can be used for caching - result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r; - sljit_si src1_r; - sljit_si src2_r; - sljit_si sugg_src2_r = TMP_REG2; - sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); - - if (!(input_flags & ALT_KEEP_CACHE)) { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - } - - /* Destination check. */ - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - dst_r = TMP_REG2; - } - else if (dst <= ZERO_REG) { - dst_r = dst; - flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - sugg_src2_r = dst_r; - } - else { - SLJIT_ASSERT(dst & SLJIT_MEM); - if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) { - flags |= FAST_DEST; - dst_r = TMP_REG2; - } - else { - flags |= SLOW_DEST; - dst_r = 0; - } - } - - /* Source 1. */ - if (src1 <= ZERO_REG) { - src1_r = src1; - flags |= REG1_SOURCE; - } - else if (src1 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; - } - else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1_r = TMP_REG1; - } - else - src1_r = 0; - - /* Source 2. */ - if (src2 <= ZERO_REG) { - src2_r = src2; - flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - dst_r = src2_r; - } - else if (src2 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; - } - else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { - FAIL_IF(compiler->error); - src2_r = sugg_src2_r; - } - else - src2_r = 0; - - /* src1_r, src2_r and dst_r can be zero (=unprocessed). - All arguments are complex addressing modes, and it is a binary operator. */ - if (src1_r == 0 && src2_r == 0 && dst_r == 0) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } - src1_r = TMP_REG1; - src2_r = TMP_REG2; - } - else if (src1_r == 0 && src2_r == 0) { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - src1_r = TMP_REG1; - } - else if (src1_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - src1_r = TMP_REG1; - } - else if (src2_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); - src2_r = sugg_src2_r; - } - - if (dst_r == 0) - dst_r = TMP_REG2; - - if (src1_r == 0) { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); - src1_r = TMP_REG1; - } - - if (src2_r == 0) { - FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); - src2_r = sugg_src2_r; - } - - FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); - - if (flags & (FAST_DEST | SLOW_DEST)) { - if (flags & FAST_DEST) - FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw)); - else - FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0)); - } - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - switch (GET_OPCODE(op)) { - case SLJIT_BREAKPOINT: - case SLJIT_NOP: - return push_inst(compiler, NOP); - break; - case SLJIT_UMUL: - case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); -#else - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); -#endif - case SLJIT_UDIV: - case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) { - FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); - } - FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); -#else - FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); -#endif - } - - return SLJIT_SUCCESS; -} - -#define EMIT_MOV(type, type_flags, type_cast) \ - emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; - sljit_si op_flags = GET_ALL_FLAGS(op); - - CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - op = GET_OPCODE(op); - if ((src & SLJIT_IMM) && srcw == 0) - src = ZERO_REG; - - if (op_flags & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, MTXER | S(ZERO_REG))); - - if (op_flags & SLJIT_INT_OP) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { - if (src <= ZERO_REG && src == dst) { - if (!TYPE_CAST_NEEDED(op)) - return SLJIT_SUCCESS; - } -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) - op = SLJIT_MOV_UI; - if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) - op = SLJIT_MOVU_UI; - if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) - op = SLJIT_MOV_SI; - if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) - op = SLJIT_MOVU_SI; -#endif - } -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - else { - /* Most operations expect sign extended arguments. */ - flags |= INT_DATA | SIGNED_DATA; - if (src & SLJIT_IMM) - srcw = (sljit_si)srcw; - } -#endif - } - - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_P: -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: -#endif - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - case SLJIT_MOV_UI: - return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui)); - - case SLJIT_MOV_SI: - return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si)); -#endif - - case SLJIT_MOV_UB: - return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub)); - - case SLJIT_MOV_SB: - return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb)); - - case SLJIT_MOV_UH: - return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh)); - - case SLJIT_MOV_SH: - return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh)); - - case SLJIT_MOVU: - case SLJIT_MOVU_P: -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: -#endif - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - case SLJIT_MOVU_UI: - return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui)); - - case SLJIT_MOVU_SI: - return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si)); -#endif - - case SLJIT_MOVU_UB: - return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub)); - - case SLJIT_MOVU_SB: - return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb)); - - case SLJIT_MOVU_UH: - return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh)); - - case SLJIT_MOVU_SH: - return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh)); - - case SLJIT_NOT: - return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: - return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_CLZ: -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); -#else - return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); -#endif - } - - return SLJIT_SUCCESS; -} - -#undef EMIT_MOV - -#define TEST_SL_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) - -#define TEST_UL_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffff)) - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define TEST_SH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= SLJIT_W(0x7fffffff) && (srcw) >= SLJIT_W(-0x80000000)) -#else -#define TEST_SH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) -#endif - -#define TEST_UH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define TEST_ADD_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && (srcw) <= SLJIT_W(0x7fff7fff) && (srcw) >= SLJIT_W(-0x80000000)) -#else -#define TEST_ADD_IMM(src, srcw) \ - ((src) & SLJIT_IMM) -#endif - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define TEST_UI_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) -#else -#define TEST_UI_IMM(src, srcw) \ - ((src) & SLJIT_IMM) -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; - - CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - if ((src1 & SLJIT_IMM) && src1w == 0) - src1 = ZERO_REG; - if ((src2 & SLJIT_IMM) && src2w == 0) - src2 = ZERO_REG; - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) { - /* Most operations expect sign extended arguments. */ - flags |= INT_DATA | SIGNED_DATA; - if (src1 & SLJIT_IMM) - src1w = (sljit_si)(src1w); - if (src2 & SLJIT_IMM) - src2w = (sljit_si)(src2w); - if (GET_FLAGS(op)) - flags |= ALT_SIGN_EXT; - } -#endif - if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, MTXER | S(ZERO_REG))); - if (src2 == TMP_REG2) - flags |= ALT_KEEP_CACHE; - - switch (GET_OPCODE(op)) { - case SLJIT_ADD: - if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); - } - if (TEST_SH_IMM(src2, src2w)) { - compiler->imm = (src2w >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SH_IMM(src1, src1w)) { - compiler->imm = (src1w >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); - } - /* Range between -1 and -32768 is covered above. */ - if (TEST_ADD_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_ADD_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) { - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_ADDC: - return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SUB: - if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); - } - if (TEST_SH_IMM(src2, -src2w)) { - compiler->imm = ((-src2w) >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - /* Range between -1 and -32768 is covered above. */ - if (TEST_ADD_IMM(src2, -src2w)) { - compiler->imm = -src2w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); - } - } - if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { - if (!(op & SLJIT_SET_U)) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); - } - if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) { - compiler->imm = src2w; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } - return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); - } - if (!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))) { - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } - } - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SUBC: - return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_MUL: -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) - flags |= ALT_FORM2; -#endif - if (!GET_FLAGS(op)) { - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_AND: - case SLJIT_OR: - case SLJIT_XOR: - /* Commutative unsigned operations. */ - if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { - if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_UL_IMM(src1, src1w)) { - compiler->imm = src1w; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); - } - if (TEST_UH_IMM(src2, src2w)) { - compiler->imm = (src2w >> 16) & 0xffff; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_UH_IMM(src1, src1w)) { - compiler->imm = (src1w >> 16) & 0xffff; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { - if (TEST_UI_IMM(src2, src2w)) { - compiler->imm = src2w; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_UI_IMM(src1, src1w)) { - compiler->imm = src1w; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_ASHR: - if (op & SLJIT_KEEP_FLAGS) - flags |= ALT_FORM3; - /* Fall through. */ - case SLJIT_SHL: - case SLJIT_LSHR: -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) - flags |= ALT_FORM2; -#endif - if (src2 & SLJIT_IMM) { - compiler->imm = src2w; - return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); - } - return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - check_sljit_get_register_index(reg); - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); - - return push_inst(compiler, *(sljit_ins*)instruction); -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ - /* Always available. */ - return 1; -} - -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_fr; - - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - - return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src)); - } - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : dst; - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; - } - - switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src))); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src))); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src))); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_fr, flags = 0; - - CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG2 : dst; - - if (src1 > SLJIT_FLOAT_REG6) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1 = TMP_FREG1; - } else - flags |= ALT_FORM1; - } - - if (src2 > SLJIT_FLOAT_REG6) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { - FAIL_IF(compiler->error); - src2 = TMP_FREG2; - } else - flags |= ALT_FORM2; - } - - if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - } - } - else if (flags & ALT_FORM1) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - else if (flags & ALT_FORM2) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - - if (flags & ALT_FORM1) - src1 = TMP_FREG1; - if (flags & ALT_FORM2) - src2 = TMP_FREG2; - - switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2))); - break; - - case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2))); - break; - - case SLJIT_MULD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); - break; - - case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2))); - break; - } - - if (dst_fr == TMP_FREG2) - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); - - return SLJIT_SUCCESS; -} - -#undef FLOAT_DATA -#undef SELECT_FOP - -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - if (dst <= ZERO_REG) - return push_inst(compiler, MFLR | D(dst)); - - /* Memory. */ - FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2))); - return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= ZERO_REG) - FAIL_IF(push_inst(compiler, MTLR | S(src))); - else { - if (src & SLJIT_MEM) - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); - FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2))); - } - return push_inst(compiler, BLR); -} - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - struct sljit_label *label; - - CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - return label; -} - -static sljit_ins get_bo_bi_flags(sljit_si type) -{ - switch (type) { - case SLJIT_C_EQUAL: - return (12 << 21) | (2 << 16); - - case SLJIT_C_NOT_EQUAL: - return (4 << 21) | (2 << 16); - - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - return (12 << 21) | ((4 + 0) << 16); - - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - return (4 << 21) | ((4 + 0) << 16); - - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - return (12 << 21) | ((4 + 1) << 16); - - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - return (4 << 21) | ((4 + 1) << 16); - - case SLJIT_C_SIG_LESS: - return (12 << 21) | (0 << 16); - - case SLJIT_C_SIG_GREATER_EQUAL: - return (4 << 21) | (0 << 16); - - case SLJIT_C_SIG_GREATER: - return (12 << 21) | (1 << 16); - - case SLJIT_C_SIG_LESS_EQUAL: - return (4 << 21) | (1 << 16); - - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: - return (12 << 21) | (3 << 16); - - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: - return (4 << 21) | (3 << 16); - - case SLJIT_C_FLOAT_EQUAL: - return (12 << 21) | ((4 + 2) << 16); - - case SLJIT_C_FLOAT_NOT_EQUAL: - return (4 << 21) | ((4 + 2) << 16); - - case SLJIT_C_FLOAT_UNORDERED: - return (12 << 21) | ((4 + 3) << 16); - - case SLJIT_C_FLOAT_ORDERED: - return (4 << 21) | ((4 + 3) << 16); - - default: - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); - return (20 << 21); - } -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - struct sljit_jump *jump; - sljit_ins bo_bi_flags; - - CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); - - bo_bi_flags = get_bo_bi_flags(type & 0xff); - if (!bo_bi_flags) - return NULL; - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - /* In PPC, we don't need to touch the arguments. */ - if (type >= SLJIT_JUMP) - jump->flags |= UNCOND_B; - - PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_REG1))); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); - return jump; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - struct sljit_jump *jump = NULL; - sljit_si src_r; - - CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= ZERO_REG) - src_r = src; - else if (src & SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR | UNCOND_B); - jump->u.target = srcw; - - FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - src_r = TMP_REG2; - } - else { - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - src_r = TMP_REG2; - } - - FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); - if (jump) - jump->addr = compiler->size; - return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); -} - -/* Get a bit from CR, all other bits are zeroed. */ -#define GET_CR_BIT(bit, dst) \ - FAIL_IF(push_inst(compiler, MFCR | D(dst))); \ - FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1))); - -#define INVERT_BIT(dst) \ - FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1)); - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - sljit_si reg, input_flags; - sljit_si flags = GET_ALL_FLAGS(op); - - CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); - ADJUST_LOCAL_OFFSET(dst, dstw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - op = GET_OPCODE(op); - reg = (op < SLJIT_ADD && dst <= ZERO_REG) ? dst : TMP_REG2; - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { - ADJUST_LOCAL_OFFSET(src, srcw); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA; -#else - input_flags = WORD_DATA; -#endif - FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } - - switch (type) { - case SLJIT_C_EQUAL: - GET_CR_BIT(2, reg); - break; - - case SLJIT_C_NOT_EQUAL: - GET_CR_BIT(2, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - GET_CR_BIT(4 + 0, reg); - break; - - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - GET_CR_BIT(4 + 0, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - GET_CR_BIT(4 + 1, reg); - break; - - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - GET_CR_BIT(4 + 1, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_SIG_LESS: - GET_CR_BIT(0, reg); - break; - - case SLJIT_C_SIG_GREATER_EQUAL: - GET_CR_BIT(0, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_SIG_GREATER: - GET_CR_BIT(1, reg); - break; - - case SLJIT_C_SIG_LESS_EQUAL: - GET_CR_BIT(1, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: - GET_CR_BIT(3, reg); - break; - - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: - GET_CR_BIT(3, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_FLOAT_EQUAL: - GET_CR_BIT(4 + 2, reg); - break; - - case SLJIT_C_FLOAT_NOT_EQUAL: - GET_CR_BIT(4 + 2, reg); - INVERT_BIT(reg); - break; - - case SLJIT_C_FLOAT_UNORDERED: - GET_CR_BIT(4 + 3, reg); - break; - - case SLJIT_C_FLOAT_ORDERED: - GET_CR_BIT(4 + 3, reg); - INVERT_BIT(reg); - break; - - default: - SLJIT_ASSERT_STOP(); - break; - } - - if (op < SLJIT_ADD) { -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op == SLJIT_MOV) - input_flags = WORD_DATA; - else { - op = SLJIT_MOV_UI; - input_flags = INT_DATA; - } -#else - op = SLJIT_MOV; - input_flags = WORD_DATA; -#endif - return (reg == TMP_REG2) ? emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS; - } - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, op | flags, dst, dstw, src, srcw, TMP_REG2, 0); -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - struct sljit_const *const_; - sljit_si reg; - - CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); - ADJUST_LOCAL_OFFSET(dst, dstw); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - - reg = (dst <= ZERO_REG) ? dst : TMP_REG2; - - PTR_FAIL_IF(emit_const(compiler, reg, init_value)); - - if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); - return const_; -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeSPARC_32.c b/deps/libmagic/pcre/sljit/sljitNativeSPARC_32.c deleted file mode 100644 index 80479bf..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeSPARC_32.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw imm) -{ - if (imm <= SIMM_MAX && imm >= SIMM_MIN) - return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); - - FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst))); - return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS; -} - -#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) - -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_sw src2) -{ - SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); - - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - case SLJIT_MOV_P: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (dst != src2) - return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_UB) - return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); - FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); - return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); - return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */ - FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); - FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS))); - FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); - - /* Loop. */ - FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); - FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS)); - - case SLJIT_ADD: - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_ADDC: - return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_SUB: - return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_SUBC: - return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_MUL: - FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - if (!(flags & SET_FLAGS)) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4))); - return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS); - - case SLJIT_AND: - return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_OR: - return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_XOR: - return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); - - case SLJIT_SHL: - FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); - - case SLJIT_LSHR: - FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); - - case SLJIT_ASHR: - FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); - } - - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) -{ - FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); - return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff); - inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - sljit_ins *inst = (sljit_ins*)addr; - - inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff); - inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeSPARC_common.c b/deps/libmagic/pcre/sljit/sljitNativeSPARC_common.c deleted file mode 100644 index c6522be..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeSPARC_common.c +++ /dev/null @@ -1,1348 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ - return "SPARC" SLJIT_CPUINFO; -} - -/* Length of an instruction word - Both for sparc-32 and sparc-64 */ -typedef sljit_ui sljit_ins; - -static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) -{ - if (SLJIT_UNLIKELY(from == to)) - return; - - do { - __asm__ volatile ( - "flush %0\n" - : : "r"(from) - ); - /* Operates at least on doubleword. */ - from += 2; - } while (from < to); - - if (from == to) { - /* Flush the last word. */ - to --; - __asm__ volatile ( - "flush %0\n" - : : "r"(to) - ); - } -} - -/* TMP_REG2 is not used by getput_arg */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define LINK_REG (SLJIT_NO_REGISTERS + 5) - -#define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) - -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15 -}; - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ - -#define D(d) (reg_map[d] << 25) -#define DA(d) ((d) << 25) -#define S1(s1) (reg_map[s1] << 14) -#define S2(s2) (reg_map[s2]) -#define S1A(s1) ((s1) << 14) -#define S2A(s2) (s2) -#define IMM_ARG 0x2000 -#define DOP(op) ((op) << 5) -#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) - -#define DR(dr) (reg_map[dr]) -#define OPC1(opcode) ((opcode) << 30) -#define OPC2(opcode) ((opcode) << 22) -#define OPC3(opcode) ((opcode) << 19) -#define SET_FLAGS OPC3(0x10) - -#define ADD (OPC1(0x2) | OPC3(0x00)) -#define ADDC (OPC1(0x2) | OPC3(0x08)) -#define AND (OPC1(0x2) | OPC3(0x01)) -#define ANDN (OPC1(0x2) | OPC3(0x05)) -#define CALL (OPC1(0x1)) -#define FABSS (OPC1(0x2) | OPC3(0x34) | DOP(0x09)) -#define FADDD (OPC1(0x2) | OPC3(0x34) | DOP(0x42)) -#define FADDS (OPC1(0x2) | OPC3(0x34) | DOP(0x41)) -#define FCMPD (OPC1(0x2) | OPC3(0x35) | DOP(0x52)) -#define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51)) -#define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) -#define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d)) -#define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) -#define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) -#define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49)) -#define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) -#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) -#define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) -#define JMPL (OPC1(0x2) | OPC3(0x38)) -#define NOP (OPC1(0x0) | OPC2(0x04)) -#define OR (OPC1(0x2) | OPC3(0x02)) -#define ORN (OPC1(0x2) | OPC3(0x06)) -#define RDY (OPC1(0x2) | OPC3(0x28) | S1A(0)) -#define RESTORE (OPC1(0x2) | OPC3(0x3d)) -#define SAVE (OPC1(0x2) | OPC3(0x3c)) -#define SETHI (OPC1(0x0) | OPC2(0x04)) -#define SLL (OPC1(0x2) | OPC3(0x25)) -#define SLLX (OPC1(0x2) | OPC3(0x25) | (1 << 12)) -#define SRA (OPC1(0x2) | OPC3(0x27)) -#define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) -#define SRL (OPC1(0x2) | OPC3(0x26)) -#define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) -#define SUB (OPC1(0x2) | OPC3(0x04)) -#define SUBC (OPC1(0x2) | OPC3(0x0c)) -#define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25)) -#define WRY (OPC1(0x2) | OPC3(0x30) | DA(0)) -#define XOR (OPC1(0x2) | OPC3(0x03)) -#define XNOR (OPC1(0x2) | OPC3(0x07)) - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define MAX_DISP (0x1fffff) -#define MIN_DISP (-0x200000) -#define DISP_MASK (0x3fffff) - -#define BICC (OPC1(0x0) | OPC2(0x2)) -#define FBFCC (OPC1(0x0) | OPC2(0x6)) -#define SLL_W SLL -#define SDIV (OPC1(0x2) | OPC3(0x0f)) -#define SMUL (OPC1(0x2) | OPC3(0x0b)) -#define UDIV (OPC1(0x2) | OPC3(0x0e)) -#define UMUL (OPC1(0x2) | OPC3(0x0a)) -#else -#define SLL_W SLLX -#endif - -#define SIMM_MAX (0x0fff) -#define SIMM_MIN (-0x1000) - -/* dest_reg is the absolute name of the register - Useful for reordering instructions in the delay slot. */ -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot) -{ - sljit_ins *ptr; - SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS - || (delay_slot & DST_INS_MASK) == MOVABLE_INS - || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f)); - ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); - FAIL_IF(!ptr); - *ptr = ins; - compiler->size++; - compiler->delay_slot = delay_slot; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_ins* optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) -{ - sljit_sw diff; - sljit_uw target_addr; - sljit_ins *inst; - sljit_ins saved_inst; - - if (jump->flags & SLJIT_REWRITABLE_JUMP) - return code_ptr; - - if (jump->flags & JUMP_ADDR) - target_addr = jump->u.target; - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); - } - inst = (sljit_ins*)jump->addr; - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (jump->flags & IS_CALL) { - /* Call is always patchable on sparc 32. */ - jump->flags |= PATCH_CALL; - if (jump->flags & IS_MOVABLE) { - inst[0] = inst[-1]; - inst[-1] = CALL; - jump->addr -= sizeof(sljit_ins); - return inst; - } - inst[0] = CALL; - inst[1] = NOP; - return inst + 1; - } -#else - /* Both calls and BPr instructions shall not pass this point. */ -#error "Implementation required" -#endif - - if (jump->flags & IS_COND) - inst--; - - if (jump->flags & IS_MOVABLE) { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2; - if (diff <= MAX_DISP && diff >= MIN_DISP) { - jump->flags |= PATCH_B; - inst--; - if (jump->flags & IS_COND) { - saved_inst = inst[0]; - inst[0] = inst[1] ^ (1 << 28); - inst[1] = saved_inst; - } else { - inst[1] = inst[0]; - inst[0] = BICC | DA(0x8); - } - jump->addr = (sljit_uw)inst; - return inst + 1; - } - } - - diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; - if (diff <= MAX_DISP && diff >= MIN_DISP) { - jump->flags |= PATCH_B; - if (jump->flags & IS_COND) - inst[0] ^= (1 << 28); - else - inst[0] = BICC | DA(0x8); - inst[1] = NOP; - jump->addr = (sljit_uw)inst; - return inst + 1; - } - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_ins *code; - sljit_ins *code_ptr; - sljit_ins *buf_ptr; - sljit_ins *buf_end; - sljit_uw word_count; - sljit_uw addr; - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); - reverse_buf(compiler); - - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - - code_ptr = code; - word_count = 0; - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - do { - buf_ptr = (sljit_ins*)buf->memory; - buf_end = buf_ptr + (buf->used_size >> 2); - do { - *code_ptr = *buf_ptr++; - SLJIT_ASSERT(!label || label->size >= word_count); - SLJIT_ASSERT(!jump || jump->addr >= word_count); - SLJIT_ASSERT(!const_ || const_->addr >= word_count); - /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - jump->addr = (sljit_uw)(code_ptr - 3); -#else - jump->addr = (sljit_uw)(code_ptr - 6); -#endif - code_ptr = optimize_jump(jump, code_ptr, code); - jump = jump->next; - } - if (const_ && const_->addr == word_count) { - /* Just recording the address. */ - const_->addr = (sljit_uw)code_ptr; - const_ = const_->next; - } - code_ptr ++; - word_count ++; - } while (buf_ptr < buf_end); - - buf = buf->next; - } while (buf); - - if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); - SLJIT_ASSERT(code_ptr - code <= (sljit_si)compiler->size); - - jump = compiler->jumps; - while (jump) { - do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; - - if (jump->flags & PATCH_CALL) { - addr = (sljit_sw)(addr - jump->addr) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); - buf_ptr[0] = CALL | (addr & 0x3fffffff); - break; - } - if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - jump->addr) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); - buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); - break; - } - - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff); - buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff); -#else -#error "Implementation required" -#endif - } while (0); - jump = jump->next; - } - - - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = compiler->size * sizeof(sljit_ins); - SLJIT_CACHE_FLUSH(code, code_ptr); - return code; -} - -/* --------------------------------------------------------------------- */ -/* Entry, exit */ -/* --------------------------------------------------------------------- */ - -/* Creates an index in data_transfer_insts array. */ -#define LOAD_DATA 0x01 -#define WORD_DATA 0x00 -#define BYTE_DATA 0x02 -#define HALF_DATA 0x04 -#define INT_DATA 0x06 -#define SIGNED_DATA 0x08 -/* Separates integer and floating point registers */ -#define GPR_REG 0x0f -#define DOUBLE_DATA 0x10 - -#define MEM_MASK 0x1f - -#define WRITE_BACK 0x00020 -#define ARG_TEST 0x00040 -#define ALT_KEEP_CACHE 0x00080 -#define CUMULATIVE_OP 0x00100 -#define IMM_OP 0x00200 -#define SRC2_IMM 0x00400 - -#define REG_DEST 0x00800 -#define REG2_SOURCE 0x01000 -#define SLOW_SRC1 0x02000 -#define SLOW_SRC2 0x04000 -#define SLOW_DEST 0x08000 - -/* SET_FLAGS (0x10 << 19) also belong here! */ - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#include "sljitNativeSPARC_32.c" -#else -#include "sljitNativeSPARC_64.c" -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - local_size += 23 * sizeof(sljit_sw); - local_size = (local_size + 7) & ~0x7; - compiler->local_size = local_size; - - if (local_size <= SIMM_MAX) { - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS)); - } - else { - FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS)); - } - - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3))); - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - local_size += 23 * sizeof(sljit_sw); - compiler->local_size = (local_size + 7) & ~0x7; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - - if (op != SLJIT_MOV || !(src <= TMP_REG3)) { - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - src = SLJIT_SCRATCH_REG1; - } - - FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, RESTORE | D(SLJIT_SCRATCH_REG1) | S1(src) | S2(0), UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define ARCH_32_64(a, b) a -#else -#define ARCH_32_64(a, b) b -#endif - -static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { -/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), -/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), -/* u b s */ OPC1(3) | OPC3(0x05) /* stb */, -/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */, -/* u h s */ OPC1(3) | OPC3(0x06) /* sth */, -/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */, -/* u i s */ OPC1(3) | OPC3(0x04) /* stw */, -/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */, - -/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), -/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), -/* s b s */ OPC1(3) | OPC3(0x05) /* stb */, -/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */, -/* s h s */ OPC1(3) | OPC3(0x06) /* sth */, -/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */, -/* s i s */ OPC1(3) | OPC3(0x04) /* stw */, -/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */), - -/* d s */ OPC1(3) | OPC3(0x27), -/* d l */ OPC1(3) | OPC3(0x23), -/* s s */ OPC1(3) | OPC3(0x24), -/* s l */ OPC1(3) | OPC3(0x20), -}; - -#undef ARCH_32_64 - -/* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - SLJIT_ASSERT(arg & SLJIT_MEM); - - if (!(flags & WRITE_BACK) || !(arg & 0xf)) { - if ((!(arg & 0xf0) && argw <= SIMM_MAX && argw >= SIMM_MIN) - || ((arg & 0xf0) && (argw & 0x3) == 0)) { - /* Works for both absoulte and relative addresses (immediate case). */ - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] - | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)) - | S1(arg & 0xf) | ((arg & 0xf0) ? S2((arg >> 4) & 0xf) : IMM(argw)), - ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS)); - return -1; - } - } - return 0; -} - -/* See getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); - - /* Simple operation except for updates. */ - if (arg & 0xf0) { - argw &= 0x3; - SLJIT_ASSERT(argw); - next_argw &= 0x3; - if ((arg & 0xf0) == (next_arg & 0xf0) && argw == next_argw) - return 1; - return 0; - } - - if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) - return 1; - return 0; -} - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) -{ - sljit_si base, arg2, delay_slot; - sljit_ins dest; - - SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(next_arg & SLJIT_MEM)) { - next_arg = 0; - next_argw = 0; - } - - base = arg & 0xf; - if (SLJIT_UNLIKELY(arg & 0xf0)) { - argw &= 0x3; - SLJIT_ASSERT(argw != 0); - - /* Using the cache. */ - if (((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) && (argw == compiler->cache_argw)) - arg2 = TMP_REG3; - else { - if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) { - compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); - compiler->cache_argw = argw; - arg2 = TMP_REG3; - } - else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && (reg << 4) != (arg & 0xf0)) - arg2 = reg; - else /* It must be a mov operation, so tmp1 must be free to use. */ - arg2 = TMP_REG1; - FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1((arg >> 4) & 0xf) | IMM_ARG | argw, DR(arg2))); - } - } - else { - /* Using the cache. */ - if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { - if (argw != compiler->cache_argw) { - FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - compiler->cache_argw = argw; - } - arg2 = TMP_REG3; - } else { - if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) { - compiler->cache_arg = SLJIT_MEM; - compiler->cache_argw = argw; - arg2 = TMP_REG3; - } - else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base) - arg2 = reg; - else /* It must be a mov operation, so tmp1 must be free to use. */ - arg2 = TMP_REG1; - FAIL_IF(load_immediate(compiler, arg2, argw)); - } - } - - dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)); - delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS; - if (!base) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot); - if (!(flags & WRITE_BACK)) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot)); - return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base)); -} - -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); -} - -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* arg1 goes to TMP_REG1 or src reg - arg2 goes to TMP_REG2, imm or src reg - TMP_REG3 can be used for caching - result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r = TMP_REG2; - sljit_si src1_r; - sljit_sw src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; - - if (!(flags & ALT_KEEP_CACHE)) { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - } - - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - } - else if (dst <= TMP_REG3) { - dst_r = dst; - flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - sugg_src2_r = dst_r; - } - else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) - flags |= SLOW_DEST; - - if (flags & IMM_OP) { - if ((src2 & SLJIT_IMM) && src2w) { - if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) { - flags |= SRC2_IMM; - src2_r = src2w; - } - } - if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { - if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) { - flags |= SRC2_IMM; - src2_r = src1w; - - /* And swap arguments. */ - src1 = src2; - src1w = src2w; - src2 = SLJIT_IMM; - /* src2w = src2_r unneeded. */ - } - } - } - - /* Source 1. */ - if (src1 <= TMP_REG3) - src1_r = src1; - else if (src1 & SLJIT_IMM) { - if (src1w) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; - } - else - src1_r = 0; - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC1; - src1_r = TMP_REG1; - } - - /* Source 2. */ - if (src2 <= TMP_REG3) { - src2_r = src2; - flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) - dst_r = src2_r; - } - else if (src2 & SLJIT_IMM) { - if (!(flags & SRC2_IMM)) { - if (src2w) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; - } - else { - src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) - dst_r = 0; - } - } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC2; - src2_r = sugg_src2_r; - } - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); - - FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); - - if (dst & SLJIT_MEM) { - if (!(flags & SLOW_DEST)) { - getput_arg_fast(compiler, flags, dst_r, dst, dstw); - return compiler->error; - } - return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - return push_inst(compiler, TA, UNMOVABLE_INS); - case SLJIT_NOP: - return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_UMUL: - case SLJIT_SMUL: -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, RDY | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); -#else -#error "Implementation required" -#endif - case SLJIT_UDIV: - case SLJIT_SDIV: -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (op == SLJIT_UDIV) - FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); - else { - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_SCRATCH_REG1) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); - } - FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_SCRATCH_REG1), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_SCRATCH_REG2) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, SUB | D(SLJIT_SCRATCH_REG2) | S1(TMP_REG2) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); - return SLJIT_SUCCESS; -#else -#error "Implementation required" -#endif - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; - - CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_UI: - return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_SI: - return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); - - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); - - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); - - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); - - case SLJIT_MOVU: - case SLJIT_MOVU_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_UI: - return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_SI: - return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); - - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); - - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); - - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); - - case SLJIT_NOT: - case SLJIT_CLZ: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: - return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; - - CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_ADD: - case SLJIT_ADDC: - case SLJIT_MUL: - case SLJIT_AND: - case SLJIT_OR: - case SLJIT_XOR: - return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SUB: - case SLJIT_SUBC: - return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SHL: - case SLJIT_LSHR: - case SLJIT_ASHR: -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (src2 & SLJIT_IMM) - src2w &= 0x1f; -#else - SLJIT_ASSERT_STOP(); -#endif - return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - check_sljit_get_register_index(reg); - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); - - return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ - return 1; -} - -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_fr; - - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - else - dst <<= 1; - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - else - src <<= 1; - - return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS); - } - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : (dst << 1); - - if (src > SLJIT_FLOAT_REG6) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; - } - else - src <<= 1; - - switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) { - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (!(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - } - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (dst_fr != src && !(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (dst_fr != src && !(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_fr, flags = 0; - - CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG2 : (dst << 1); - - if (src1 > SLJIT_FLOAT_REG6) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1 = TMP_FREG1; - } else - flags |= SLOW_SRC1; - } - else - src1 <<= 1; - - if (src2 > SLJIT_FLOAT_REG6) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { - FAIL_IF(compiler->error); - src2 = TMP_FREG2; - } else - flags |= SLOW_SRC2; - } - else - src2 <<= 1; - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - - if (flags & SLOW_SRC1) - src1 = TMP_FREG1; - if (flags & SLOW_SRC2) - src2 = TMP_FREG2; - - switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); - break; - - case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); - break; - - case SLJIT_MULD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); - break; - - case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG2) - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); - - return SLJIT_SUCCESS; -} - -#undef FLOAT_DATA -#undef SELECT_FOP - -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - if (dst <= TMP_REG3) - return push_inst(compiler, OR | D(dst) | S1(0) | S2(LINK_REG), DR(dst)); - - /* Memory. */ - return emit_op_mem(compiler, WORD_DATA, LINK_REG, dst, dstw); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= TMP_REG3) - FAIL_IF(push_inst(compiler, OR | D(LINK_REG) | S1(0) | S2(src), DR(LINK_REG))); - else if (src & SLJIT_MEM) - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, LINK_REG, src, srcw)); - else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, LINK_REG, srcw)); - - FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(LINK_REG) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, NOP, UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - struct sljit_label *label; - - CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - compiler->delay_slot = UNMOVABLE_INS; - return label; -} - -static sljit_ins get_cc(sljit_si type) -{ - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: - return DA(0x1); - - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: - return DA(0x9); - - case SLJIT_C_LESS: - return DA(0x5); - - case SLJIT_C_GREATER_EQUAL: - return DA(0xd); - - case SLJIT_C_GREATER: - return DA(0xc); - - case SLJIT_C_LESS_EQUAL: - return DA(0x4); - - case SLJIT_C_SIG_LESS: - return DA(0x3); - - case SLJIT_C_SIG_GREATER_EQUAL: - return DA(0xb); - - case SLJIT_C_SIG_GREATER: - return DA(0xa); - - case SLJIT_C_SIG_LESS_EQUAL: - return DA(0x2); - - case SLJIT_C_OVERFLOW: - return DA(0x7); - - case SLJIT_C_NOT_OVERFLOW: - return DA(0xf); - - case SLJIT_C_FLOAT_EQUAL: - return DA(0x9); - - case SLJIT_C_FLOAT_NOT_EQUAL: /* Unordered. */ - return DA(0x1); - - case SLJIT_C_FLOAT_LESS: - return DA(0x4); - - case SLJIT_C_FLOAT_GREATER_EQUAL: /* Unordered. */ - return DA(0xc); - - case SLJIT_C_FLOAT_LESS_EQUAL: - return DA(0xd); - - case SLJIT_C_FLOAT_GREATER: /* Unordered. */ - return DA(0x5); - - case SLJIT_C_FLOAT_UNORDERED: - return DA(0x7); - - case SLJIT_C_FLOAT_ORDERED: - return DA(0xf); - - default: - SLJIT_ASSERT_STOP(); - return DA(0x8); - } -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - struct sljit_jump *jump; - - CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - if (type < SLJIT_C_FLOAT_EQUAL) { - jump->flags |= IS_COND; - if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) - jump->flags |= IS_MOVABLE; -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); -#else -#error "Implementation required" -#endif - } - else if (type < SLJIT_JUMP) { - jump->flags |= IS_COND; - if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) - jump->flags |= IS_MOVABLE; -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); -#else -#error "Implementation required" -#endif - } else { - if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) - jump->flags |= IS_MOVABLE; - if (type >= SLJIT_FAST_CALL) - jump->flags |= IS_CALL; - } - - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? LINK_REG : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - - return jump; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - struct sljit_jump *jump = NULL; - sljit_si src_r; - - CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (src <= TMP_REG3) - src_r = src; - else if (src & SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = srcw; - if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) - jump->flags |= IS_MOVABLE; - if (type >= SLJIT_FAST_CALL) - jump->flags |= IS_CALL; - - FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - src_r = TMP_REG2; - } - else { - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); - src_r = TMP_REG2; - } - - FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? LINK_REG : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS)); - if (jump) - jump->addr = compiler->size; - return push_inst(compiler, NOP, UNMOVABLE_INS); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0); - - CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); - ADJUST_LOCAL_OFFSET(dst, dstw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - op = GET_OPCODE(op); - reg = (op < SLJIT_ADD && dst <= TMP_REG3) ? dst : TMP_REG2; - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } - - if (type < SLJIT_C_FLOAT_EQUAL) - FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); - else - FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); - - FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); - - if (op >= SLJIT_ADD) - return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); - - return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; -#else -#error "Implementation required" -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - sljit_si reg; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); - ADJUST_LOCAL_OFFSET(dst, dstw); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - - reg = (dst <= TMP_REG3) ? dst : TMP_REG2; - - PTR_FAIL_IF(emit_const(compiler, reg, init_value)); - - if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - - return const_; -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeX86_32.c b/deps/libmagic/pcre/sljit/sljitNativeX86_32.c deleted file mode 100644 index 03a595b..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeX86_32.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* x86 32-bit arch dependent functions. */ - -static sljit_si emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_sw imm) -{ - sljit_ub *inst; - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); - FAIL_IF(!inst); - INC_SIZE(1 + sizeof(sljit_sw)); - *inst++ = opcode; - *(sljit_sw*)inst = imm; - return SLJIT_SUCCESS; -} - -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) -{ - if (type == SLJIT_JUMP) { - *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { - *code_ptr++ = CALL_i32; - jump->addr++; - } - else { - *code_ptr++ = GROUP_0F; - *code_ptr++ = get_jump_code(type); - jump->addr += 2; - } - - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MW; - else - *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4); - code_ptr += 4; - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si size; - sljit_si locals_offset; - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; - compiler->args = args; - compiler->flags_saved = 0; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); -#else - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0); -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - PUSH_REG(reg_map[TMP_REGISTER]); -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[TMP_REGISTER] << 3) | 0x4 /* esp */; - } -#endif - if (saveds > 2) - PUSH_REG(reg_map[SLJIT_SAVED_REG3]); - if (saveds > 1) - PUSH_REG(reg_map[SLJIT_SAVED_REG2]); - if (saveds > 0) - PUSH_REG(reg_map[SLJIT_SAVED_REG1]); - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3]; - } - if (args > 1) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2]; - } - if (args > 2) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */; - *inst++ = 0x24; - *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ - } -#else - if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REGISTER]; - *inst++ = sizeof(sljit_sw) * 2; - } - if (args > 1) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REGISTER]; - *inst++ = sizeof(sljit_sw) * 3; - } - if (args > 2) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REGISTER]; - *inst++ = sizeof(sljit_sw) * 4; - } -#endif - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words); - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; - local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); - - compiler->local_size = local_size; -#ifdef _WIN32 - if (local_size > 1024) { -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); -#else - local_size -= FIXED_LOCALS_OFFSET; - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); - FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET)); -#endif - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); - } -#endif - - SLJIT_ASSERT(local_size > 0); - return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si locals_offset; - - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; - compiler->args = args; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; - compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - sljit_si size; - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - SLJIT_ASSERT(compiler->args >= 0); - - compiler->flags_saved = 0; - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - SLJIT_ASSERT(compiler->local_size > 0); - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); - - size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args > 2) - size += 2; -#else - if (compiler->args > 0) - size += 2; -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - - if (compiler->saveds > 0) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds > 1) - POP_REG(reg_map[SLJIT_SAVED_REG2]); - if (compiler->saveds > 2) - POP_REG(reg_map[SLJIT_SAVED_REG3]); - POP_REG(reg_map[TMP_REGISTER]); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args > 2) - RET_I16(sizeof(sljit_sw)); - else - RET(); -#else - RET(); -#endif - - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -/* Size contains the flags as well. */ -static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size, - /* The register or immediate operand. */ - sljit_si a, sljit_sw imma, - /* The general operand (not immediate). */ - sljit_si b, sljit_sw immb) -{ - sljit_ub *inst; - sljit_ub *buf_ptr; - sljit_si flags = size & ~0xf; - sljit_si inst_size; - - /* Both cannot be switched on. */ - SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); - /* Size flags not allowed for typed instructions. */ - SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); - /* Both size flags cannot be switched on. */ - SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - /* SSE2 and immediate is not possible. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); - SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) - && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) - && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); -#endif - - size &= 0xf; - inst_size = size; - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) - inst_size++; -#endif - if (flags & EX86_PREF_66) - inst_size++; - - /* Calculate size of b. */ - inst_size += 1; /* mod r/m byte. */ - if (b & SLJIT_MEM) { - if ((b & 0x0f) == SLJIT_UNUSED) - inst_size += sizeof(sljit_sw); - else if (immb != 0 && !(b & 0xf0)) { - /* Immediate operand. */ - if (immb <= 127 && immb >= -128) - inst_size += sizeof(sljit_sb); - else - inst_size += sizeof(sljit_sw); - } - - if ((b & 0xf) == SLJIT_LOCALS_REG && !(b & 0xf0)) - b |= SLJIT_LOCALS_REG << 4; - - if ((b & 0xf0) != SLJIT_UNUSED) - inst_size += 1; /* SIB byte. */ - } - - /* Calculate size of a. */ - if (a & SLJIT_IMM) { - if (flags & EX86_BIN_INS) { - if (imma <= 127 && imma >= -128) { - inst_size += 1; - flags |= EX86_BYTE_ARG; - } else - inst_size += 4; - } - else if (flags & EX86_SHIFT_INS) { - imma &= 0x1f; - if (imma != 1) { - inst_size ++; - flags |= EX86_BYTE_ARG; - } - } else if (flags & EX86_BYTE_ARG) - inst_size++; - else if (flags & EX86_HALF_ARG) - inst_size += sizeof(short); - else - inst_size += sizeof(sljit_sw); - } - else - SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); - - inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); - PTR_FAIL_IF(!inst); - - /* Encoding the byte. */ - INC_SIZE(inst_size); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (flags & EX86_PREF_F2) - *inst++ = 0xf2; - if (flags & EX86_PREF_F3) - *inst++ = 0xf3; -#endif - if (flags & EX86_PREF_66) - *inst++ = 0x66; - - buf_ptr = inst + size; - - /* Encode mod/rm byte. */ - if (!(flags & EX86_SHIFT_INS)) { - if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) - *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - - if ((a & SLJIT_IMM) || (a == 0)) - *buf_ptr = 0; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2)) - *buf_ptr = reg_map[a] << 3; - else - *buf_ptr = a << 3; -#else - else - *buf_ptr = reg_map[a] << 3; -#endif - } - else { - if (a & SLJIT_IMM) { - if (imma == 1) - *inst = GROUP_SHIFT_1; - else - *inst = GROUP_SHIFT_N; - } else - *inst = GROUP_SHIFT_CL; - *buf_ptr = 0; - } - - if (!(b & SLJIT_MEM)) -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b); -#else - *buf_ptr++ |= MOD_REG + reg_map[b]; -#endif - else if ((b & 0x0f) != SLJIT_UNUSED) { - if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) { - if (immb != 0) { - if (immb <= 127 && immb >= -128) - *buf_ptr |= 0x40; - else - *buf_ptr |= 0x80; - } - - if ((b & 0xf0) == SLJIT_UNUSED) - *buf_ptr++ |= reg_map[b & 0x0f]; - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_map[b & 0x0f] | (reg_map[(b >> 4) & 0x0f] << 3); - } - - if (immb != 0) { - if (immb <= 127 && immb >= -128) - *buf_ptr++ = immb; /* 8 bit displacement. */ - else { - *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_sw); - } - } - } - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_map[b & 0x0f] | (reg_map[(b >> 4) & 0x0f] << 3) | (immb << 6); - } - } - else { - *buf_ptr++ |= 0x05; - *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_sw); - } - - if (a & SLJIT_IMM) { - if (flags & EX86_BYTE_ARG) - *buf_ptr = imma; - else if (flags & EX86_HALF_ARG) - *(short*)buf_ptr = imma; - else if (!(flags & EX86_SHIFT_INS)) - *(sljit_sw*)buf_ptr = imma; - } - - return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); -} - -/* --------------------------------------------------------------------- */ -/* Call / return instructions */ -/* --------------------------------------------------------------------- */ - -static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) -{ - sljit_ub *inst; - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - inst = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); - FAIL_IF(!inst); - INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); - - if (type >= SLJIT_CALL3) - PUSH_REG(reg_map[SLJIT_SCRATCH_REG3]); - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SCRATCH_REG3] << 3) | reg_map[SLJIT_SCRATCH_REG1]; -#else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); - FAIL_IF(!inst); - INC_SIZE(4 * (type - SLJIT_CALL0)); - - *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG1] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; - *inst++ = 0; - if (type >= SLJIT_CALL2) { - *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG2] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; - *inst++ = sizeof(sljit_sw); - } - if (type >= SLJIT_CALL3) { - *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG3] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; - *inst++ = 2 * sizeof(sljit_sw); - } -#endif - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - CHECK_EXTRA_REGS(dst, dstw, (void)0); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - dst = TMP_REGISTER; - - if (dst <= TMP_REGISTER) { - /* Unused dest is possible here. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - - INC_SIZE(1); - POP_REG(reg_map[dst]); - return SLJIT_SUCCESS; - } - - /* Memory. */ - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!inst); - *inst++ = POP_rm; - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - CHECK_EXTRA_REGS(src, srcw, (void)0); - - if (src <= TMP_REGISTER) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); - FAIL_IF(!inst); - - INC_SIZE(1 + 1); - PUSH_REG(reg_map[src]); - } - else if (src & SLJIT_MEM) { - inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_FF; - *inst |= PUSH_rm; - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - } - else { - /* SLJIT_IMM. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); - FAIL_IF(!inst); - - INC_SIZE(5 + 1); - *inst++ = PUSH_i32; - *(sljit_sw*)inst = srcw; - inst += sizeof(sljit_sw); - } - - RET(); - return SLJIT_SUCCESS; -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeX86_64.c b/deps/libmagic/pcre/sljit/sljitNativeX86_64.c deleted file mode 100644 index 28f04fd..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeX86_64.c +++ /dev/null @@ -1,810 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* x86 64-bit arch dependent functions. */ - -static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) -{ - sljit_ub *inst; - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); - FAIL_IF(!inst); - INC_SIZE(2 + sizeof(sljit_sw)); - *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); - *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); - *(sljit_sw*)inst = imm; - return SLJIT_SUCCESS; -} - -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) -{ - if (type < SLJIT_JUMP) { - /* Invert type. */ - *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; - *code_ptr++ = 10 + 3; - } - - SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first); - *code_ptr++ = REX_W | REX_B; - *code_ptr++ = MOV_r_i32 + 1; - jump->addr = (sljit_uw)code_ptr; - - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MD; - else - *(sljit_sw*)code_ptr = jump->u.target; - - code_ptr += sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); - - return code_ptr; -} - -static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type) -{ - sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si)); - - if (delta <= SLJIT_W(0x7fffffff) && delta >= SLJIT_W(-0x80000000)) { - *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; - *(sljit_sw*)code_ptr = delta; - } - else { - SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); - *code_ptr++ = REX_W | REX_B; - *code_ptr++ = MOV_r_i32 + 1; - *(sljit_sw*)code_ptr = addr; - code_ptr += sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); - } - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si size, pushed_size; - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; - compiler->flags_saved = 0; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - size = saveds; - /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifndef _WIN64 - if (saveds >= 2) - size += saveds - 1; -#else - if (saveds >= 4) - size += saveds - 3; - if (scratches >= 5) { - size += (5 - 4) * 2; - pushed_size += sizeof(sljit_sw); - } -#endif - size += args * 3; - if (size > 0) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - if (saveds >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]); - } - if (saveds >= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]); - } - if (saveds >= 3) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg); - *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (saveds >= 2) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg); - *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (saveds >= 1) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg); - PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]); - } -#ifdef _WIN64 - if (scratches >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); - } -#endif - -#ifndef _WIN64 - if (args > 0) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */; - } - if (args > 1) { - *inst++ = REX_W | REX_R; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */; - } - if (args > 2) { - *inst++ = REX_W | REX_R; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */; - } -#else - if (args > 0) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */; - } - if (args > 1) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */; - } - if (args > 2) { - *inst++ = REX_W | REX_B; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */; - } -#endif - } - - local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; - compiler->local_size = local_size; -#ifdef _WIN64 - if (local_size > 1024) { - /* Allocate stack for the callback, which grows the stack. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si))); - FAIL_IF(!inst); - INC_SIZE(4 + (3 + sizeof(sljit_si))); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | SUB | 4; - /* Pushed size must be divisible by 8. */ - SLJIT_ASSERT(!(pushed_size & 0x7)); - if (pushed_size & 0x8) { - *inst++ = 5 * sizeof(sljit_sw); - local_size -= 5 * sizeof(sljit_sw); - } else { - *inst++ = 4 * sizeof(sljit_sw); - local_size -= 4 * sizeof(sljit_sw); - } - /* Second instruction */ - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg); - *inst++ = REX_W; - *inst++ = MOV_rm_i32; - *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1]; - *(sljit_si*)inst = local_size; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); - } -#endif - SLJIT_ASSERT(local_size > 0); - if (local_size <= 127) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | SUB | 4; - *inst++ = local_size; - } - else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!inst); - INC_SIZE(7); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_81; - *inst++ = MOD_REG | SUB | 4; - *(sljit_si*)inst = local_size; - inst += sizeof(sljit_si); - } -#ifdef _WIN64 - /* Save xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247429; -#endif - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) -{ - sljit_si pushed_size; - - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifdef _WIN64 - if (scratches >= 5) - pushed_size += sizeof(sljit_sw); -#endif - compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) -{ - sljit_si size; - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); - - compiler->flags_saved = 0; - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - -#ifdef _WIN64 - /* Restore xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247428; -#endif - SLJIT_ASSERT(compiler->local_size > 0); - if (compiler->local_size <= 127) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | ADD | 4; - *inst = compiler->local_size; - } - else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!inst); - INC_SIZE(7); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_81; - *inst++ = MOD_REG | ADD | 4; - *(sljit_si*)inst = compiler->local_size; - } - - size = 1 + compiler->saveds; -#ifndef _WIN64 - if (compiler->saveds >= 2) - size += compiler->saveds - 1; -#else - if (compiler->saveds >= 4) - size += compiler->saveds - 3; - if (compiler->scratches >= 5) - size += (5 - 4) * 2; -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - -#ifdef _WIN64 - if (compiler->scratches >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); - } -#endif - if (compiler->saveds >= 1) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds >= 2) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (compiler->saveds >= 3) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (compiler->saveds >= 4) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG1]); - } - if (compiler->saveds >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG2]); - } - - RET(); - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm) -{ - sljit_ub *inst; - sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si); - - inst = (sljit_ub*)ensure_buf(compiler, 1 + length); - FAIL_IF(!inst); - INC_SIZE(length); - if (rex) - *inst++ = rex; - *inst++ = opcode; - *(sljit_si*)inst = imm; - return SLJIT_SUCCESS; -} - -static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size, - /* The register or immediate operand. */ - sljit_si a, sljit_sw imma, - /* The general operand (not immediate). */ - sljit_si b, sljit_sw immb) -{ - sljit_ub *inst; - sljit_ub *buf_ptr; - sljit_ub rex = 0; - sljit_si flags = size & ~0xf; - sljit_si inst_size; - - /* The immediate operand must be 32 bit. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); - /* Both cannot be switched on. */ - SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); - /* Size flags not allowed for typed instructions. */ - SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); - /* Both size flags cannot be switched on. */ - SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - /* SSE2 and immediate is not possible. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); - SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) - && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) - && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); -#endif - - size &= 0xf; - inst_size = size; - - if ((b & SLJIT_MEM) && !(b & 0xf0) && NOT_HALFWORD(immb)) { - if (emit_load_imm64(compiler, TMP_REG3, immb)) - return NULL; - immb = 0; - if (b & 0xf) - b |= TMP_REG3 << 4; - else - b |= TMP_REG3; - } - - if (!compiler->mode32 && !(flags & EX86_NO_REXW)) - rex |= REX_W; - else if (flags & EX86_REX) - rex |= REX; - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) - inst_size++; -#endif - if (flags & EX86_PREF_66) - inst_size++; - - /* Calculate size of b. */ - inst_size += 1; /* mod r/m byte. */ - if (b & SLJIT_MEM) { - if ((b & 0x0f) == SLJIT_UNUSED) - inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */ - else { - if (reg_map[b & 0x0f] >= 8) - rex |= REX_B; - if (immb != 0 && !(b & 0xf0)) { - /* Immediate operand. */ - if (immb <= 127 && immb >= -128) - inst_size += sizeof(sljit_sb); - else - inst_size += sizeof(sljit_si); - } - } - - if ((b & 0xf) == SLJIT_LOCALS_REG && !(b & 0xf0)) - b |= SLJIT_LOCALS_REG << 4; - - if ((b & 0xf0) != SLJIT_UNUSED) { - inst_size += 1; /* SIB byte. */ - if (reg_map[(b >> 4) & 0x0f] >= 8) - rex |= REX_X; - } - } -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2) && reg_map[b] >= 8) - rex |= REX_B; -#else - else if (reg_map[b] >= 8) - rex |= REX_B; -#endif - - if (a & SLJIT_IMM) { - if (flags & EX86_BIN_INS) { - if (imma <= 127 && imma >= -128) { - inst_size += 1; - flags |= EX86_BYTE_ARG; - } else - inst_size += 4; - } - else if (flags & EX86_SHIFT_INS) { - imma &= compiler->mode32 ? 0x1f : 0x3f; - if (imma != 1) { - inst_size ++; - flags |= EX86_BYTE_ARG; - } - } else if (flags & EX86_BYTE_ARG) - inst_size++; - else if (flags & EX86_HALF_ARG) - inst_size += sizeof(short); - else - inst_size += sizeof(sljit_si); - } - else { - SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); - /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (!(flags & EX86_SSE2) && reg_map[a] >= 8) - rex |= REX_R; -#else - if (reg_map[a] >= 8) - rex |= REX_R; -#endif - } - - if (rex) - inst_size++; - - inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); - PTR_FAIL_IF(!inst); - - /* Encoding the byte. */ - INC_SIZE(inst_size); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (flags & EX86_PREF_F2) - *inst++ = 0xf2; - if (flags & EX86_PREF_F3) - *inst++ = 0xf3; -#endif - if (flags & EX86_PREF_66) - *inst++ = 0x66; - if (rex) - *inst++ = rex; - buf_ptr = inst + size; - - /* Encode mod/rm byte. */ - if (!(flags & EX86_SHIFT_INS)) { - if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) - *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - - if ((a & SLJIT_IMM) || (a == 0)) - *buf_ptr = 0; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2)) - *buf_ptr = reg_lmap[a] << 3; - else - *buf_ptr = a << 3; -#else - else - *buf_ptr = reg_lmap[a] << 3; -#endif - } - else { - if (a & SLJIT_IMM) { - if (imma == 1) - *inst = GROUP_SHIFT_1; - else - *inst = GROUP_SHIFT_N; - } else - *inst = GROUP_SHIFT_CL; - *buf_ptr = 0; - } - - if (!(b & SLJIT_MEM)) -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b); -#else - *buf_ptr++ |= MOD_REG + reg_lmap[b]; -#endif - else if ((b & 0x0f) != SLJIT_UNUSED) { - if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) { - if (immb != 0) { - if (immb <= 127 && immb >= -128) - *buf_ptr |= 0x40; - else - *buf_ptr |= 0x80; - } - - if ((b & 0xf0) == SLJIT_UNUSED) - *buf_ptr++ |= reg_lmap[b & 0x0f]; - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3); - } - - if (immb != 0) { - if (immb <= 127 && immb >= -128) - *buf_ptr++ = immb; /* 8 bit displacement. */ - else { - *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_si); - } - } - } - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3) | (immb << 6); - } - } - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = 0x25; - *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_si); - } - - if (a & SLJIT_IMM) { - if (flags & EX86_BYTE_ARG) - *buf_ptr = imma; - else if (flags & EX86_HALF_ARG) - *(short*)buf_ptr = imma; - else if (!(flags & EX86_SHIFT_INS)) - *(sljit_si*)buf_ptr = imma; - } - - return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); -} - -/* --------------------------------------------------------------------- */ -/* Call / return instructions */ -/* --------------------------------------------------------------------- */ - -static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) -{ - sljit_ub *inst; - -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); - - inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); - FAIL_IF(!inst); - INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); - if (type >= SLJIT_CALL3) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; - } - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); - - inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); - FAIL_IF(!inst); - INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); - if (type >= SLJIT_CALL3) { - *inst++ = REX_W | REX_R; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; - } - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; -#endif - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) -{ - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); - ADJUST_LOCAL_OFFSET(dst, dstw); - - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - dst = TMP_REGISTER; - - if (dst <= TMP_REGISTER) { - if (reg_map[dst] < 8) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - POP_REG(reg_lmap[dst]); - return SLJIT_SUCCESS; - } - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - INC_SIZE(2); - *inst++ = REX_B; - POP_REG(reg_lmap[dst]); - return SLJIT_SUCCESS; - } - - /* REX_W is not necessary (src is not immediate). */ - compiler->mode32 = 1; - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!inst); - *inst++ = POP_rm; - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) -{ - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) { - FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, srcw)); - src = TMP_REGISTER; - } - - if (src <= TMP_REGISTER) { - if (reg_map[src] < 8) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); - FAIL_IF(!inst); - - INC_SIZE(1 + 1); - PUSH_REG(reg_lmap[src]); - } - else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1); - FAIL_IF(!inst); - - INC_SIZE(2 + 1); - *inst++ = REX_B; - PUSH_REG(reg_lmap[src]); - } - } - else if (src & SLJIT_MEM) { - /* REX_W is not necessary (src is not immediate). */ - compiler->mode32 = 1; - inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_FF; - *inst |= PUSH_rm; - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - } - else { - SLJIT_ASSERT(IS_HALFWORD(srcw)); - /* SLJIT_IMM. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); - FAIL_IF(!inst); - - INC_SIZE(5 + 1); - *inst++ = PUSH_i32; - *(sljit_si*)inst = srcw; - inst += sizeof(sljit_si); - } - - RET(); - return SLJIT_SUCCESS; -} - - -/* --------------------------------------------------------------------- */ -/* Extend input */ -/* --------------------------------------------------------------------- */ - -static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - sljit_si dst_r; - - compiler->mode32 = 0; - - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - - if (src & SLJIT_IMM) { - if (dst <= TMP_REGISTER) { - if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; - } - return emit_load_imm64(compiler, dst, srcw); - } - compiler->mode32 = 1; - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - compiler->mode32 = 0; - return SLJIT_SUCCESS; - } - - dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - if ((dst & SLJIT_MEM) && (src <= TMP_REGISTER)) - dst_r = src; - else { - if (sign) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = MOVSXD_r_rm; - } else { - compiler->mode32 = 1; - FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); - compiler->mode32 = 0; - } - } - - if (dst & SLJIT_MEM) { - compiler->mode32 = 1; - inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_r; - compiler->mode32 = 0; - } - - return SLJIT_SUCCESS; -} diff --git a/deps/libmagic/pcre/sljit/sljitNativeX86_common.c b/deps/libmagic/pcre/sljit/sljitNativeX86_common.c deleted file mode 100644 index ab98a03..0000000 --- a/deps/libmagic/pcre/sljit/sljitNativeX86_common.c +++ /dev/null @@ -1,2836 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) -{ - return "x86" SLJIT_CPUINFO; -} - -/* - 32b register indexes: - 0 - EAX - 1 - ECX - 2 - EDX - 3 - EBX - 4 - none - 5 - EBP - 6 - ESI - 7 - EDI -*/ - -/* - 64b register indexes: - 0 - RAX - 1 - RCX - 2 - RDX - 3 - RBX - 4 - none - 5 - RBP - 6 - RSI - 7 - RDI - 8 - R8 - From now on REX prefix is required - 9 - R9 - 10 - R10 - 11 - R11 - 12 - R12 - 13 - R13 - 14 - R14 - 15 - R15 -*/ - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - -/* Last register + 1. */ -#define TMP_REGISTER (SLJIT_NO_REGISTERS + 1) - -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { - 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 -}; - -#define CHECK_EXTRA_REGS(p, w, do) \ - if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \ - w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ - do; \ - } \ - else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ - w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ - do; \ - } - -#else /* SLJIT_CONFIG_X86_32 */ - -/* Last register + 1. */ -#define TMP_REGISTER (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) - -/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present - Note: avoid to use r12 and r13 for memory addessing - therefore r12 is better for SAVED_EREG than SAVED_REG. */ -#ifndef _WIN64 -/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 -}; -/* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 -}; -#else -/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 -}; -/* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 -}; -#endif - -#define REX_W 0x48 -#define REX_R 0x44 -#define REX_X 0x42 -#define REX_B 0x41 -#define REX 0x40 - -#define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll) -#define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll) - -#define CHECK_EXTRA_REGS(p, w, do) - -#endif /* SLJIT_CONFIG_X86_32 */ - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) -#define TMP_FREG (0) -#endif - -/* Size flags for emit_x86_instruction: */ -#define EX86_BIN_INS 0x0010 -#define EX86_SHIFT_INS 0x0020 -#define EX86_REX 0x0040 -#define EX86_NO_REXW 0x0080 -#define EX86_BYTE_ARG 0x0100 -#define EX86_HALF_ARG 0x0200 -#define EX86_PREF_66 0x0400 - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) -#define EX86_SSE2 0x0800 -#define EX86_PREF_F2 0x1000 -#define EX86_PREF_F3 0x2000 -#endif - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ - -#define ADD (/* BINARY */ 0 << 3) -#define ADD_EAX_i32 0x05 -#define ADD_r_rm 0x03 -#define ADD_rm_r 0x01 -#define ADDSD_x_xm 0x58 -#define ADC (/* BINARY */ 2 << 3) -#define ADC_EAX_i32 0x15 -#define ADC_r_rm 0x13 -#define ADC_rm_r 0x11 -#define AND (/* BINARY */ 4 << 3) -#define AND_EAX_i32 0x25 -#define AND_r_rm 0x23 -#define AND_rm_r 0x21 -#define ANDPD_x_xm 0x54 -#define BSR_r_rm (/* GROUP_0F */ 0xbd) -#define CALL_i32 0xe8 -#define CALL_rm (/* GROUP_FF */ 2 << 3) -#define CDQ 0x99 -#define CMOVNE_r_rm (/* GROUP_0F */ 0x45) -#define CMP (/* BINARY */ 7 << 3) -#define CMP_EAX_i32 0x3d -#define CMP_r_rm 0x3b -#define CMP_rm_r 0x39 -#define DIV (/* GROUP_F7 */ 6 << 3) -#define DIVSD_x_xm 0x5e -#define INT3 0xcc -#define IDIV (/* GROUP_F7 */ 7 << 3) -#define IMUL (/* GROUP_F7 */ 5 << 3) -#define IMUL_r_rm (/* GROUP_0F */ 0xaf) -#define IMUL_r_rm_i8 0x6b -#define IMUL_r_rm_i32 0x69 -#define JE_i8 0x74 -#define JMP_i8 0xeb -#define JMP_i32 0xe9 -#define JMP_rm (/* GROUP_FF */ 4 << 3) -#define LEA_r_m 0x8d -#define MOV_r_rm 0x8b -#define MOV_r_i32 0xb8 -#define MOV_rm_r 0x89 -#define MOV_rm_i32 0xc7 -#define MOV_rm8_i8 0xc6 -#define MOV_rm8_r8 0x88 -#define MOVSD_x_xm 0x10 -#define MOVSD_xm_x 0x11 -#define MOVSXD_r_rm 0x63 -#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) -#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) -#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) -#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) -#define MUL (/* GROUP_F7 */ 4 << 3) -#define MULSD_x_xm 0x59 -#define NEG_rm (/* GROUP_F7 */ 3 << 3) -#define NOP 0x90 -#define NOT_rm (/* GROUP_F7 */ 2 << 3) -#define OR (/* BINARY */ 1 << 3) -#define OR_r_rm 0x0b -#define OR_EAX_i32 0x0d -#define OR_rm_r 0x09 -#define POP_r 0x58 -#define POP_rm 0x8f -#define POPF 0x9d -#define PUSH_i32 0x68 -#define PUSH_r 0x50 -#define PUSH_rm (/* GROUP_FF */ 6 << 3) -#define PUSHF 0x9c -#define RET_near 0xc3 -#define RET_i16 0xc2 -#define SBB (/* BINARY */ 3 << 3) -#define SBB_EAX_i32 0x1d -#define SBB_r_rm 0x1b -#define SBB_rm_r 0x19 -#define SAR (/* SHIFT */ 7 << 3) -#define SHL (/* SHIFT */ 4 << 3) -#define SHR (/* SHIFT */ 5 << 3) -#define SUB (/* BINARY */ 5 << 3) -#define SUB_EAX_i32 0x2d -#define SUB_r_rm 0x2b -#define SUB_rm_r 0x29 -#define SUBSD_x_xm 0x5c -#define TEST_EAX_i32 0xa9 -#define TEST_rm_r 0x85 -#define UCOMISD_x_xm 0x2e -#define XCHG_EAX_r 0x90 -#define XCHG_r_rm 0x87 -#define XOR (/* BINARY */ 6 << 3) -#define XOR_EAX_i32 0x35 -#define XOR_r_rm 0x33 -#define XOR_rm_r 0x31 -#define XORPD_x_xm 0x57 - -#define GROUP_0F 0x0f -#define GROUP_F7 0xf7 -#define GROUP_FF 0xff -#define GROUP_BINARY_81 0x81 -#define GROUP_BINARY_83 0x83 -#define GROUP_SHIFT_1 0xd1 -#define GROUP_SHIFT_N 0xc1 -#define GROUP_SHIFT_CL 0xd3 - -#define MOD_REG 0xc0 -#define MOD_DISP8 0x40 - -#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) - -#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) -#define POP_REG(r) (*inst++ = (POP_r + (r))) -#define RET() (*inst++ = (RET_near)) -#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) -/* r32, r/m32 */ -#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) - -/* Multithreading does not affect these static variables, since they store - built-in CPU features. Therefore they can be overwritten by different threads - if they detect the CPU features in the same time. */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) -static sljit_si cpu_has_sse2 = -1; -#endif -static sljit_si cpu_has_cmov = -1; - -#if defined(_MSC_VER) && (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#if _MSC_VER >= 1400 -#include -#else -#error "MSVC does not support inline assembly in 64 bit mode" -#endif -#endif /* _MSC_VER && SLJIT_CONFIG_X86_64 */ - -static void get_cpu_features(void) -{ - sljit_ui features; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - -#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) - /* AT&T syntax. */ - __asm__ ( - "pushl %%ebx\n" - "movl $0x1, %%eax\n" - "cpuid\n" - "popl %%ebx\n" - "movl %%edx, %0\n" - : "=g" (features) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined(_MSC_VER) || defined(__BORLANDC__) - /* Intel syntax. */ - __asm { - mov eax, 1 - push ebx - cpuid - pop ebx - mov features, edx - } -#else -# error "SLJIT_DETECT_SSE2 is not implemented for this C compiler" -#endif - -#else /* SLJIT_CONFIG_X86_32 */ - -#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) - /* AT&T syntax. */ - __asm__ ( - "pushq %%rbx\n" - "movl $0x1, %%eax\n" - "cpuid\n" - "popq %%rbx\n" - "movl %%edx, %0\n" - : "=g" (features) - : - : "%rax", "%rcx", "%rdx" - ); -#elif defined(_MSC_VER) && _MSC_VER >= 1400 - int CPUInfo[4]; - - __cpuid(CPUInfo, 1); - features = (sljit_ui)CPUInfo[3]; -#else - __asm { - mov eax, 1 - push rbx - cpuid - pop rbx - mov features, edx - } -#endif - -#endif /* SLJIT_CONFIG_X86_32 */ - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - cpu_has_sse2 = (features >> 26) & 0x1; -#endif - cpu_has_cmov = (features >> 15) & 0x1; -} - -static sljit_ub get_jump_code(sljit_si type) -{ - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_EQUAL: - return 0x84 /* je */; - - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: - return 0x85 /* jne */; - - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - return 0x82 /* jc */; - - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - return 0x83 /* jae */; - - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - return 0x87 /* jnbe */; - - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - return 0x86 /* jbe */; - - case SLJIT_C_SIG_LESS: - return 0x8c /* jl */; - - case SLJIT_C_SIG_GREATER_EQUAL: - return 0x8d /* jnl */; - - case SLJIT_C_SIG_GREATER: - return 0x8f /* jnle */; - - case SLJIT_C_SIG_LESS_EQUAL: - return 0x8e /* jle */; - - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: - return 0x80 /* jo */; - - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: - return 0x81 /* jno */; - - case SLJIT_C_FLOAT_UNORDERED: - return 0x8a /* jp */; - - case SLJIT_C_FLOAT_ORDERED: - return 0x8b /* jpo */; - } - return 0; -} - -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); -#endif - -static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) -{ - sljit_si short_jump; - sljit_uw label_addr; - - if (jump->flags & JUMP_LABEL) - label_addr = (sljit_uw)(code + jump->u.label->size); - else - label_addr = jump->u.target; - short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((sljit_sw)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_sw)(label_addr - (jump->addr + 1)) < -0x80000000ll) - return generate_far_jump_code(jump, code_ptr, type); -#endif - - if (type == SLJIT_JUMP) { - if (short_jump) - *code_ptr++ = JMP_i8; - else - *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { - short_jump = 0; - *code_ptr++ = CALL_i32; - jump->addr++; - } - else if (short_jump) { - *code_ptr++ = get_jump_code(type) - 0x10; - jump->addr++; - } - else { - *code_ptr++ = GROUP_0F; - *code_ptr++ = get_jump_code(type); - jump->addr += 2; - } - - if (short_jump) { - jump->flags |= PATCH_MB; - code_ptr += sizeof(sljit_sb); - } else { - jump->flags |= PATCH_MW; -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code_ptr += sizeof(sljit_sw); -#else - code_ptr += sizeof(sljit_si); -#endif - } - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_ub *code; - sljit_ub *code_ptr; - sljit_ub *buf_ptr; - sljit_ub *buf_end; - sljit_ub len; - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - - CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); - reverse_buf(compiler); - - /* Second code generation pass. */ - code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - - code_ptr = code; - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - do { - buf_ptr = buf->memory; - buf_end = buf_ptr + buf->used_size; - do { - len = *buf_ptr++; - if (len > 0) { - /* The code is already generated. */ - SLJIT_MEMMOVE(code_ptr, buf_ptr, len); - code_ptr += len; - buf_ptr += len; - } - else { - if (*buf_ptr >= 4) { - jump->addr = (sljit_uw)code_ptr; - if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); - else - code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); - jump = jump->next; - } - else if (*buf_ptr == 0) { - label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; - label = label->next; - } - else if (*buf_ptr == 1) { - const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); - const_ = const_->next; - } - else { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; - buf_ptr++; - *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); - code_ptr += sizeof(sljit_sw); - buf_ptr += sizeof(sljit_sw) - 1; -#else - code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); - buf_ptr += sizeof(sljit_sw); -#endif - } - buf_ptr++; - } - } while (buf_ptr < buf_end); - SLJIT_ASSERT(buf_ptr == buf_end); - buf = buf->next; - } while (buf); - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); - - jump = compiler->jumps; - while (jump) { - if (jump->flags & PATCH_MB) { - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); - *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); - } else if (jump->flags & PATCH_MW) { - if (jump->flags & JUMP_LABEL) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); -#else - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll); - *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); -#endif - } - else { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); -#else - SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll); - *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); -#endif - } - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - else if (jump->flags & PATCH_MD) - *(sljit_sw*)jump->addr = jump->u.label->addr; -#endif - - jump = jump->next; - } - - /* Maybe we waste some space because of short jumps. */ - SLJIT_ASSERT(code_ptr <= code + compiler->size); - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = compiler->size; - return (void*)code; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -static sljit_si emit_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); - -static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); - -static sljit_si emit_mov(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw); - -static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) -{ - sljit_ub *inst; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); -#else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - INC_SIZE(6); - *inst++ = REX_W; -#endif - *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ - *inst++ = 0x64; - *inst++ = 0x24; - *inst++ = (sljit_ub)sizeof(sljit_sw); - *inst++ = PUSHF; - compiler->flags_saved = 1; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) -{ - sljit_ub *inst; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = POPF; -#else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - INC_SIZE(6); - *inst++ = POPF; - *inst++ = REX_W; -#endif - *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ - *inst++ = 0x64; - *inst++ = 0x24; - *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); - compiler->flags_saved = keep_flags; - return SLJIT_SUCCESS; -} - -#ifdef _WIN32 -#include - -static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) -{ - /* Workaround for calling the internal _chkstk() function on Windows. - This function touches all 4k pages belongs to the requested stack space, - which size is passed in local_size. This is necessary on Windows where - the stack can only grow in 4k steps. However, this function just burn - CPU cycles if the stack is large enough, but you don't know it in advance. - I think this is a bad design even if it has some reasons. */ - alloca(local_size); -} - -#endif - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#include "sljitNativeX86_32.c" -#else -#include "sljitNativeX86_64.c" -#endif - -static sljit_si emit_mov(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - - if (dst == SLJIT_UNUSED) { - /* No destination, doesn't need to setup flags. */ - if (src & SLJIT_MEM) { - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!inst); - *inst = MOV_r_rm; - } - return SLJIT_SUCCESS; - } - if (src <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_r; - return SLJIT_SUCCESS; - } - if (src & SLJIT_IMM) { - if (dst <= TMP_REGISTER) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); -#else - if (!compiler->mode32) { - if (NOT_HALFWORD(srcw)) - return emit_load_imm64(compiler, dst, srcw); - } - else - return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); -#endif - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (!compiler->mode32 && NOT_HALFWORD(srcw)) { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_r; - return SLJIT_SUCCESS; - } -#endif - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; - } - if (dst <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); - FAIL_IF(!inst); - *inst = MOV_r_rm; - return SLJIT_SUCCESS; - } - - /* Memory to memory move. Requires two instruction. */ - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!inst); - *inst = MOV_r_rm; - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_r; - return SLJIT_SUCCESS; -} - -#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ - FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) -{ - sljit_ub *inst; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si size; -#endif - - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - switch (GET_OPCODE(op)) { - case SLJIT_BREAKPOINT: - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = INT3; - break; - case SLJIT_NOP: - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = NOP; - break; - case SLJIT_UMUL: - case SLJIT_SMUL: - case SLJIT_UDIV: - case SLJIT_SDIV: - compiler->flags_saved = 0; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#ifdef _WIN64 - SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] == 2 - && reg_map[TMP_REGISTER] > 7, - invalid_register_assignment_for_div_mul); -#else - SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] < 7 - && reg_map[TMP_REGISTER] == 2, - invalid_register_assignment_for_div_mul); -#endif - compiler->mode32 = op & SLJIT_INT_OP; -#endif - - op = GET_OPCODE(op); - if (op == SLJIT_UDIV) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0); - inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0); -#else - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); -#endif - FAIL_IF(!inst); - *inst = XOR_r_rm; - } - - if (op == SLJIT_SDIV) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0); -#endif - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = CDQ; -#else - if (compiler->mode32) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = CDQ; - } else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - INC_SIZE(2); - *inst++ = REX_W; - *inst = CDQ; - } -#endif - } - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - INC_SIZE(2); - *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_SCRATCH_REG2]); -#else -#ifdef _WIN64 - size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; -#else - size = (!compiler->mode32) ? 3 : 2; -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - INC_SIZE(size); -#ifdef _WIN64 - if (!compiler->mode32) - *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); - else if (op >= SLJIT_UDIV) - *inst++ = REX_B; - *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_SCRATCH_REG2]); -#else - if (!compiler->mode32) - *inst++ = REX_W; - *inst++ = GROUP_F7; - *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2]; -#endif -#endif - switch (op) { - case SLJIT_UMUL: - *inst |= MUL; - break; - case SLJIT_SMUL: - *inst |= IMUL; - break; - case SLJIT_UDIV: - *inst |= DIV; - break; - case SLJIT_SDIV: - *inst |= IDIV; - break; - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REGISTER, 0); -#endif - break; - } - - return SLJIT_SUCCESS; -} - -#define ENCODE_PREFIX(prefix) \ - do { \ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ - FAIL_IF(!inst); \ - INC_SIZE(1); \ - *inst = (prefix); \ - } while (0) - -static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - sljit_si dst_r; -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_si work_r; -#endif - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 0; -#endif - - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - - if (src & SLJIT_IMM) { - if (dst <= TMP_REGISTER) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); -#else - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; -#endif - } - inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm8_i8; - return SLJIT_SUCCESS; - } - - dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - if ((dst & SLJIT_MEM) && src <= TMP_REGISTER) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg_map[src] >= 4) { - SLJIT_ASSERT(dst_r == TMP_REGISTER); - EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); - } else - dst_r = src; -#else - dst_r = src; -#endif - } -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - else if (src <= TMP_REGISTER && reg_map[src] >= 4) { - /* src, dst are registers. */ - SLJIT_ASSERT(dst >= SLJIT_SCRATCH_REG1 && dst <= TMP_REGISTER); - if (reg_map[dst] < 4) { - if (dst != src) - EMIT_MOV(compiler, dst, 0, src, 0); - inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; - } - else { - if (dst != src) - EMIT_MOV(compiler, dst, 0, src, 0); - if (sign) { - /* shl reg, 24 */ - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!inst); - *inst |= SHL; - /* sar reg, 24 */ - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!inst); - *inst |= SAR; - } - else { - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); - FAIL_IF(!inst); - *(inst + 1) |= AND; - } - } - return SLJIT_SUCCESS; - } -#endif - else { - /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; - } - - if (dst & SLJIT_MEM) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst_r == TMP_REGISTER) { - /* Find a non-used register, whose reg_map[src] < 4. */ - if ((dst & 0xf) == SLJIT_SCRATCH_REG1) { - if ((dst & 0xf0) == (SLJIT_SCRATCH_REG2 << 4)) - work_r = SLJIT_SCRATCH_REG3; - else - work_r = SLJIT_SCRATCH_REG2; - } - else { - if ((dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4)) - work_r = SLJIT_SCRATCH_REG1; - else if ((dst & 0xf) == SLJIT_SCRATCH_REG2) - work_r = SLJIT_SCRATCH_REG3; - else - work_r = SLJIT_SCRATCH_REG2; - } - - if (work_r == SLJIT_SCRATCH_REG1) { - ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]); - } - else { - inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - } - - inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm8_r8; - - if (work_r == SLJIT_SCRATCH_REG1) { - ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]); - } - else { - inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - } - } - else { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm8_r8; - } -#else - inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm8_r8; -#endif - } - - return SLJIT_SUCCESS; -} - -static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - sljit_si dst_r; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 0; -#endif - - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - - if (src & SLJIT_IMM) { - if (dst <= TMP_REGISTER) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); -#else - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; -#endif - } - inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; - } - - dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - if ((dst & SLJIT_MEM) && src <= TMP_REGISTER) - dst_r = src; - else { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; - } - - if (dst & SLJIT_MEM) { - inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_r; - } - - return SLJIT_SUCCESS; -} - -static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= opcode; - return SLJIT_SUCCESS; - } - if (dst == src && dstw == srcw) { - /* Same input and output */ - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= opcode; - return SLJIT_SUCCESS; - } - if (dst <= TMP_REGISTER) { - EMIT_MOV(compiler, dst, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= opcode; - return SLJIT_SUCCESS; - } - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= opcode; - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - return SLJIT_SUCCESS; -} - -static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= NOT_rm; - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst = OR_r_rm; - return SLJIT_SUCCESS; - } - if (dst <= TMP_REGISTER) { - EMIT_MOV(compiler, dst, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= NOT_rm; - inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); - FAIL_IF(!inst); - *inst = OR_r_rm; - return SLJIT_SUCCESS; - } - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= NOT_rm; - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst = OR_r_rm; - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - return SLJIT_SUCCESS; -} - -static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - sljit_si dst_r; - - SLJIT_UNUSED_ARG(op_flags); - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - /* Just set the zero flag. */ - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= NOT_rm; -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0); -#else - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0); -#endif - FAIL_IF(!inst); - *inst |= SHR; - return SLJIT_SUCCESS; - } - - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, srcw); - src = TMP_REGISTER; - srcw = 0; - } - - inst = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = BSR_r_rm; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst <= TMP_REGISTER) - dst_r = dst; - else { - /* Find an unused temporary register. */ - if ((dst & 0xf) != SLJIT_SCRATCH_REG1 && (dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4)) - dst_r = SLJIT_SCRATCH_REG1; - else if ((dst & 0xf) != SLJIT_SCRATCH_REG2 && (dst & 0xf0) != (SLJIT_SCRATCH_REG2 << 4)) - dst_r = SLJIT_SCRATCH_REG2; - else - dst_r = SLJIT_SCRATCH_REG3; - EMIT_MOV(compiler, dst, dstw, dst_r, 0); - } - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); -#else - dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REG2; - compiler->mode32 = 0; - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); - compiler->mode32 = op_flags & SLJIT_INT_OP; -#endif - - if (cpu_has_cmov == -1) - get_cpu_features(); - - if (cpu_has_cmov) { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = CMOVNE_r_rm; - } else { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - - *inst++ = JE_i8; - *inst++ = 2; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REGISTER]; -#else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - - *inst++ = JE_i8; - *inst++ = 3; - *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REGISTER] >= 8 ? REX_B : 0); - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REGISTER]; -#endif - } - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); -#else - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); -#endif - FAIL_IF(!inst); - *(inst + 1) |= XOR; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst & SLJIT_MEM) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - } -#else - if (dst & SLJIT_MEM) - EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); -#endif - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_ub* inst; - sljit_si update = 0; - sljit_si op_flags = GET_ALL_FLAGS(op); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_si dst_is_ereg = 0; - sljit_si src_is_ereg = 0; -#else -# define src_is_ereg 0 -#endif - - CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); - CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op_flags & SLJIT_INT_OP; -#endif - - op = GET_OPCODE(op); - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 0; -#endif - - if (op_flags & SLJIT_INT_OP) { - if (src <= TMP_REGISTER && src == dst) { - if (!TYPE_CAST_NEEDED(op)) - return SLJIT_SUCCESS; - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) - op = SLJIT_MOV_UI; - if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) - op = SLJIT_MOVU_UI; - if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) - op = SLJIT_MOV_SI; - if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) - op = SLJIT_MOVU_SI; -#endif - } - - SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); - if (op >= SLJIT_MOVU) { - update = 1; - op -= 8; - } - - if (src & SLJIT_IMM) { - switch (op) { - case SLJIT_MOV_UB: - srcw = (sljit_ub)srcw; - break; - case SLJIT_MOV_SB: - srcw = (sljit_sb)srcw; - break; - case SLJIT_MOV_UH: - srcw = (sljit_uh)srcw; - break; - case SLJIT_MOV_SH: - srcw = (sljit_sh)srcw; - break; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - case SLJIT_MOV_UI: - srcw = (sljit_ui)srcw; - break; - case SLJIT_MOV_SI: - srcw = (sljit_si)srcw; - break; -#endif - } -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (SLJIT_UNLIKELY(dst_is_ereg)) - return emit_mov(compiler, dst, dstw, src, srcw); -#endif - } - - if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) { - inst = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw); - FAIL_IF(!inst); - *inst = LEA_r_m; - src &= SLJIT_MEM | 0xf; - srcw = 0; - } - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { - SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); - dst = TMP_REGISTER; - } -#endif - - switch (op) { - case SLJIT_MOV: - case SLJIT_MOV_P: -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: -#endif - FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); - break; - case SLJIT_MOV_UB: - FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); - break; - case SLJIT_MOV_SB: - FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); - break; - case SLJIT_MOV_UH: - FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); - break; - case SLJIT_MOV_SH: - FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); - break; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - case SLJIT_MOV_UI: - FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); - break; - case SLJIT_MOV_SI: - FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); - break; -#endif - } - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER) - return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0); -#endif - - if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) { - inst = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw); - FAIL_IF(!inst); - *inst = LEA_r_m; - } - return SLJIT_SUCCESS; - } - - if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) - compiler->flags_saved = 0; - - switch (op) { - case SLJIT_NOT: - if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) - return emit_not_with_flags(compiler, dst, dstw, src, srcw); - return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); - - case SLJIT_NEG: - if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); - return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); - - case SLJIT_CLZ: - if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); - return emit_clz(compiler, op_flags, dst, dstw, src, srcw); - } - - return SLJIT_SUCCESS; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -# undef src_is_ereg -#endif -} - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - if (IS_HALFWORD(immw) || compiler->mode32) { \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); \ - } \ - else { \ - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ - FAIL_IF(!inst); \ - *inst = (op_mr); \ - } - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) - -#else - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) - -#endif - -static sljit_si emit_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - return SLJIT_SUCCESS; - } - - if (dst == src1 && dstw == src1w) { - if (src2 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { -#else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { -#endif - BINARY_EAX_IMM(op_eax_imm, src2w); - } - else { - BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); - } - } - else if (dst <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - else if (src2 <= TMP_REGISTER) { - /* Special exception for sljit_emit_op_flags. */ - inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); - FAIL_IF(!inst); - *inst = op_mr; - } - else { - EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!inst); - *inst = op_mr; - } - return SLJIT_SUCCESS; - } - - /* Only for cumulative operations. */ - if (dst == src2 && dstw == src2w) { - if (src1 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { -#else - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) { -#endif - BINARY_EAX_IMM(op_eax_imm, src1w); - } - else { - BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); - } - } - else if (dst <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); - FAIL_IF(!inst); - *inst = op_rm; - } - else if (src1 <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); - FAIL_IF(!inst); - *inst = op_mr; - } - else { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!inst); - *inst = op_mr; - } - return SLJIT_SUCCESS; - } - - /* General version. */ - if (dst <= TMP_REGISTER) { - EMIT_MOV(compiler, dst, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, dst, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - } - else { - /* This version requires less memory writing. */ - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - } - - return SLJIT_SUCCESS; -} - -static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - return SLJIT_SUCCESS; - } - - if (dst == src1 && dstw == src1w) { - if (src2 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { -#else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { -#endif - BINARY_EAX_IMM(op_eax_imm, src2w); - } - else { - BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); - } - } - else if (dst <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - else if (src2 <= TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); - FAIL_IF(!inst); - *inst = op_mr; - } - else { - EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!inst); - *inst = op_mr; - } - return SLJIT_SUCCESS; - } - - /* General version. */ - if (dst <= TMP_REGISTER && dst != src2) { - EMIT_MOV(compiler, dst, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, dst, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - } - else { - /* This version requires less memory writing. */ - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - } - - return SLJIT_SUCCESS; -} - -static sljit_si emit_mul(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - sljit_si dst_r; - - dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - /* Register destination. */ - if (dst_r == src1 && !(src2 & SLJIT_IMM)) { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = IMUL_r_rm; - } - else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = IMUL_r_rm; - } - else if (src1 & SLJIT_IMM) { - if (src2 & SLJIT_IMM) { - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); - src2 = dst_r; - src2w = 0; - } - - if (src1w <= 127 && src1w >= -128) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - *inst = IMUL_r_rm_i8; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = (sljit_sb)src1w; - } -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - else { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *(sljit_sw*)inst = src1w; - } -#else - else if (IS_HALFWORD(src1w)) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *(sljit_si*)inst = (sljit_si)src1w; - } - else { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); - if (dst_r != src2) - EMIT_MOV(compiler, dst_r, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = IMUL_r_rm; - } -#endif - } - else if (src2 & SLJIT_IMM) { - /* Note: src1 is NOT immediate. */ - - if (src2w <= 127 && src2w >= -128) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!inst); - *inst = IMUL_r_rm_i8; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = (sljit_sb)src2w; - } -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - else { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!inst); - *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *(sljit_sw*)inst = src2w; - } -#else - else if (IS_HALFWORD(src2w)) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!inst); - *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *(sljit_si*)inst = (sljit_si)src2w; - } - else { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); - if (dst_r != src1) - EMIT_MOV(compiler, dst_r, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = IMUL_r_rm; - } -#endif - } - else { - /* Neither argument is immediate. */ - if (ADDRESSING_DEPENDS_ON(src2, dst_r)) - dst_r = TMP_REGISTER; - EMIT_MOV(compiler, dst_r, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = IMUL_r_rm; - } - - if (dst_r == TMP_REGISTER) - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - - return SLJIT_SUCCESS; -} - -static sljit_si emit_lea_binary(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - sljit_si dst_r, done = 0; - - /* These cases better be left to handled by normal way. */ - if (dst == src1 && dstw == src1w) - return SLJIT_ERR_UNSUPPORTED; - if (dst == src2 && dstw == src2w) - return SLJIT_ERR_UNSUPPORTED; - - dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - if (src1 <= TMP_REGISTER) { - if (src2 <= TMP_REGISTER || src2 == TMP_REGISTER) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); - FAIL_IF(!inst); - *inst = LEA_r_m; - done = 1; - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); -#else - if (src2 & SLJIT_IMM) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); -#endif - FAIL_IF(!inst); - *inst = LEA_r_m; - done = 1; - } - } - else if (src2 <= TMP_REGISTER) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); -#else - if (src1 & SLJIT_IMM) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); -#endif - FAIL_IF(!inst); - *inst = LEA_r_m; - done = 1; - } - } - - if (done) { - if (dst_r == TMP_REGISTER) - return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0); - return SLJIT_SUCCESS; - } - return SLJIT_ERR_UNSUPPORTED; -} - -static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { -#else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { -#endif - BINARY_EAX_IMM(CMP_EAX_i32, src2w); - return SLJIT_SUCCESS; - } - - if (src1 <= TMP_REGISTER) { - if (src2 & SLJIT_IMM) { - BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); - FAIL_IF(!inst); - *inst = CMP_r_rm; - } - return SLJIT_SUCCESS; - } - - if (src2 <= TMP_REGISTER && !(src1 & SLJIT_IMM)) { - inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); - FAIL_IF(!inst); - *inst = CMP_rm_r; - return SLJIT_SUCCESS; - } - - if (src2 & SLJIT_IMM) { - if (src1 & SLJIT_IMM) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - src1 = TMP_REGISTER; - src1w = 0; - } - BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); - } - else { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!inst); - *inst = CMP_r_rm; - } - return SLJIT_SUCCESS; -} - -static sljit_si emit_test_binary(struct sljit_compiler *compiler, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { -#else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { -#endif - BINARY_EAX_IMM(TEST_EAX_i32, src2w); - return SLJIT_SUCCESS; - } - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { -#else - if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { -#endif - BINARY_EAX_IMM(TEST_EAX_i32, src1w); - return SLJIT_SUCCESS; - } - - if (src1 <= TMP_REGISTER) { - if (src2 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (IS_HALFWORD(src2w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); - FAIL_IF(!inst); - *inst = GROUP_F7; - } - else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); - FAIL_IF(!inst); - *inst = TEST_rm_r; - } -#else - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); - FAIL_IF(!inst); - *inst = GROUP_F7; -#endif - } - else { - inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); - FAIL_IF(!inst); - *inst = TEST_rm_r; - } - return SLJIT_SUCCESS; - } - - if (src2 <= TMP_REGISTER) { - if (src1 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (IS_HALFWORD(src1w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); - FAIL_IF(!inst); - *inst = GROUP_F7; - } - else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); - FAIL_IF(!inst); - *inst = TEST_rm_r; - } -#else - inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); - FAIL_IF(!inst); - *inst = GROUP_F7; -#endif - } - else { - inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); - FAIL_IF(!inst); - *inst = TEST_rm_r; - } - return SLJIT_SUCCESS; - } - - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - if (src2 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (IS_HALFWORD(src2w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst = GROUP_F7; - } - else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst = TEST_rm_r; - } -#else - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst = GROUP_F7; -#endif - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!inst); - *inst = TEST_rm_r; - } - return SLJIT_SUCCESS; -} - -static sljit_si emit_shift(struct sljit_compiler *compiler, - sljit_ub mode, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_ub* inst; - - if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { - if (dst == src1 && dstw == src1w) { - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); - FAIL_IF(!inst); - *inst |= mode; - return SLJIT_SUCCESS; - } - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst |= mode; - return SLJIT_SUCCESS; - } - if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst |= mode; - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - return SLJIT_SUCCESS; - } - if (dst <= TMP_REGISTER) { - EMIT_MOV(compiler, dst, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); - FAIL_IF(!inst); - *inst |= mode; - return SLJIT_SUCCESS; - } - - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst |= mode; - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - return SLJIT_SUCCESS; - } - - if (dst == SLJIT_PREF_SHIFT_REG) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst |= mode; - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - } - else if (dst <= TMP_REGISTER && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { - if (src1 != dst) - EMIT_MOV(compiler, dst, 0, src1, src1w); - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0); - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); - FAIL_IF(!inst); - *inst |= mode; - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - } - else { - /* This case is really difficult, since ecx itself may used for - addressing, and we must ensure to work even in that case. */ - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); -#else - /* [esp+0] contains the flags. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); -#endif - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!inst); - *inst |= mode; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); -#else - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw)); -#endif - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - } - - return SLJIT_SUCCESS; -} - -static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, - sljit_ub mode, sljit_si set_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - /* The CPU does not set flags if the shift count is 0. */ - if (src2 & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) - return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); -#else - if ((src2w & 0x1f) != 0) - return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); -#endif - if (!set_flags) - return emit_mov(compiler, dst, dstw, src1, src1w); - /* OR dst, src, 0 */ - return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, - dst, dstw, src1, src1w, SLJIT_IMM, 0); - } - - if (!set_flags) - return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); - - if (!(dst <= TMP_REGISTER)) - FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); - - FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); - - if (dst <= TMP_REGISTER) - return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - CHECK_EXTRA_REGS(dst, dstw, (void)0); - CHECK_EXTRA_REGS(src1, src1w, (void)0); - CHECK_EXTRA_REGS(src2, src2w, (void)0); -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op & SLJIT_INT_OP; -#endif - - if (GET_OPCODE(op) >= SLJIT_MUL) { - if (SLJIT_UNLIKELY(GET_FLAGS(op))) - compiler->flags_saved = 0; - else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); - } - - switch (GET_OPCODE(op)) { - case SLJIT_ADD: - if (!GET_FLAGS(op)) { - if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) - return compiler->error; - } - else - compiler->flags_saved = 0; - if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); - return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_ADDC: - if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ - FAIL_IF(emit_restore_flags(compiler, 1)); - else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) - FAIL_IF(emit_save_flags(compiler)); - if (SLJIT_UNLIKELY(GET_FLAGS(op))) - compiler->flags_saved = 0; - return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_SUB: - if (!GET_FLAGS(op)) { - if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) - return compiler->error; - } - else - compiler->flags_saved = 0; - if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); - if (dst == SLJIT_UNUSED) - return emit_cmp_binary(compiler, src1, src1w, src2, src2w); - return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_SUBC: - if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ - FAIL_IF(emit_restore_flags(compiler, 1)); - else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) - FAIL_IF(emit_save_flags(compiler)); - if (SLJIT_UNLIKELY(GET_FLAGS(op))) - compiler->flags_saved = 0; - return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_MUL: - return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); - case SLJIT_AND: - if (dst == SLJIT_UNUSED) - return emit_test_binary(compiler, src1, src1w, src2, src2w); - return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_OR: - return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_XOR: - return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_SHL: - return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_LSHR: - return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), - dst, dstw, src1, src1w, src2, src2w); - case SLJIT_ASHR: - return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), - dst, dstw, src1, src1w, src2, src2w); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) -{ - check_sljit_get_register_index(reg); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2 - || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2) - return -1; -#endif - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) -{ - sljit_ub *inst; - - CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size > 0 && size < 16); - - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - INC_SIZE(size); - SLJIT_MEMMOVE(inst, instruction, size); - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - -/* Alignment + 2 * 16 bytes. */ -static sljit_si sse2_data[3 + (4 + 4) * 2]; -static sljit_si *sse2_buffer; - -static void init_compiler(void) -{ - sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); - /* Single precision constants. */ - sse2_buffer[0] = 0x80000000; - sse2_buffer[4] = 0x7fffffff; - /* Double precision constants. */ - sse2_buffer[8] = 0; - sse2_buffer[9] = 0x80000000; - sse2_buffer[12] = 0xffffffff; - sse2_buffer[13] = 0x7fffffff; -} - -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) -{ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - if (cpu_has_sse2 == -1) - get_cpu_features(); - return cpu_has_sse2; -#else /* SLJIT_DETECT_SSE2 */ - return 1; -#endif /* SLJIT_DETECT_SSE2 */ -#else /* SLJIT_SSE2 */ - return 0; -#endif -} - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - -static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, - sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) -{ - sljit_ub *inst; - - inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = opcode; - return SLJIT_SUCCESS; -} - -static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, - sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) -{ - sljit_ub *inst; - - inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = opcode; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, - sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) -{ - return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); -} - -static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, - sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) -{ - return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - sljit_si dst_r; - - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 1; -#endif - - if (GET_OPCODE(op) == SLJIT_CMPD) { - compiler->flags_saved = 0; - if (dst <= SLJIT_FLOAT_REG6) - dst_r = dst; - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw)); - } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); - } - - if (op == SLJIT_MOVD) { - if (dst <= SLJIT_FLOAT_REG6) - return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); - if (src <= SLJIT_FLOAT_REG6) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); - } - - if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG6) { - dst_r = dst; - if (dst != src) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); - } - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); - } - - switch (GET_OPCODE(op)) { - case SLJIT_NEGD: - FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); - break; - - case SLJIT_ABSD: - FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); - break; - } - - if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - sljit_si dst_r; - - CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 1; -#endif - - if (dst <= SLJIT_FLOAT_REG6) { - dst_r = dst; - if (dst == src1) - ; /* Do nothing here. */ - else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) { - /* Swap arguments. */ - src2 = src1; - src2w = src1w; - } - else if (dst != src2) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); - } - } - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); - } - - switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); - break; - - case SLJIT_SUBD: - FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); - break; - - case SLJIT_MULD: - FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); - break; - - case SLJIT_DIVD: - FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); - break; - } - - if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); - return SLJIT_SUCCESS; -} - -#else - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - /* Should cause an assertion fail. */ - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - compiler->error = SLJIT_ERR_UNSUPPORTED; - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - CHECK_ERROR(); - /* Should cause an assertion fail. */ - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - compiler->error = SLJIT_ERR_UNSUPPORTED; - return SLJIT_ERR_UNSUPPORTED; -} - -#endif - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - sljit_ub *inst; - struct sljit_label *label; - - CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); - - /* We should restore the flags before the label, - since other taken jumps has their own flags as well. */ - if (SLJIT_UNLIKELY(compiler->flags_saved)) - PTR_FAIL_IF(emit_restore_flags(compiler, 0)); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - - inst = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF(!inst); - - *inst++ = 0; - *inst++ = 0; - - return label; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) -{ - sljit_ub *inst; - struct sljit_jump *jump; - - CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); - - if (SLJIT_UNLIKELY(compiler->flags_saved)) { - if ((type & 0xff) <= SLJIT_JUMP) - PTR_FAIL_IF(emit_restore_flags(compiler, 0)); - compiler->flags_saved = 0; - } - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF_NULL(jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - if (type >= SLJIT_CALL1) - PTR_FAIL_IF(call_with_args(compiler, type)); - - /* Worst case size. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; -#else - compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); -#endif - - inst = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF_NULL(inst); - - *inst++ = 0; - *inst++ = type + 4; - return jump; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) -{ - sljit_ub *inst; - struct sljit_jump *jump; - - CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); - - CHECK_EXTRA_REGS(src, srcw, (void)0); - - if (SLJIT_UNLIKELY(compiler->flags_saved)) { - if (type <= SLJIT_JUMP) - FAIL_IF(emit_restore_flags(compiler, 0)); - compiler->flags_saved = 0; - } - - if (type >= SLJIT_CALL1) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (src == SLJIT_SCRATCH_REG3) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); - src = TMP_REGISTER; - } - if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3) - srcw += sizeof(sljit_sw); -#endif -#endif -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) - if (src == SLJIT_SCRATCH_REG3) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); - src = TMP_REGISTER; - } -#endif - FAIL_IF(call_with_args(compiler, type)); - } - - if (src == SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF_NULL(jump); - set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = srcw; - - /* Worst case size. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->size += 5; -#else - compiler->size += 10 + 3; -#endif - - inst = (sljit_ub*)ensure_buf(compiler, 2); - FAIL_IF_NULL(inst); - - *inst++ = 0; - *inst++ = type + 4; - } - else { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - /* REX_W is not necessary (src is not immediate). */ - compiler->mode32 = 1; -#endif - inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_FF; - *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; - } - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) -{ - sljit_ub *inst; - sljit_ub cond_set = 0; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si reg; -#else - /* CHECK_EXTRA_REGS migh overwrite these values. */ - sljit_si dst_save = dst; - sljit_sw dstw_save = dstw; -#endif - - CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - - ADJUST_LOCAL_OFFSET(dst, dstw); - CHECK_EXTRA_REGS(dst, dstw, (void)0); - if (SLJIT_UNLIKELY(compiler->flags_saved)) - FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); - - /* setcc = jcc + 0x10. */ - cond_set = get_jump_code(type) + 0x10; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - reg = (op == SLJIT_MOV && dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); - FAIL_IF(!inst); - INC_SIZE(4 + 4); - /* Set low register to conditional flag. */ - *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; - *inst++ = GROUP_0F; - *inst++ = cond_set; - *inst++ = MOD_REG | reg_lmap[reg]; - *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); - *inst++ = GROUP_0F; - *inst++ = MOVZX_r_rm8; - *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; - - if (reg != TMP_REGISTER) - return SLJIT_SUCCESS; - - if (GET_OPCODE(op) < SLJIT_ADD) { - compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; - return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0); - } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0); -#else /* SLJIT_CONFIG_X86_64 */ - if (GET_OPCODE(op) < SLJIT_ADD && dst <= TMP_REGISTER) { - if (reg_map[dst] <= 4) { - /* Low byte is accessible. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); - FAIL_IF(!inst); - INC_SIZE(3 + 3); - /* Set low byte to conditional flag. */ - *inst++ = GROUP_0F; - *inst++ = cond_set; - *inst++ = MOD_REG | reg_map[dst]; - - *inst++ = GROUP_0F; - *inst++ = MOVZX_r_rm8; - *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; - return SLJIT_SUCCESS; - } - - /* Low byte is not accessible. */ - if (cpu_has_cmov == -1) - get_cpu_features(); - - if (cpu_has_cmov) { - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, 1); - /* a xor reg, reg operation would overwrite the flags. */ - EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); - FAIL_IF(!inst); - INC_SIZE(3); - - *inst++ = GROUP_0F; - /* cmovcc = setcc - 0x50. */ - *inst++ = cond_set - 0x50; - *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REGISTER]; - return SLJIT_SUCCESS; - } - - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); - FAIL_IF(!inst); - INC_SIZE(1 + 3 + 3 + 1); - *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; - /* Set al to conditional flag. */ - *inst++ = GROUP_0F; - *inst++ = cond_set; - *inst++ = MOD_REG | 0 /* eax */; - - *inst++ = GROUP_0F; - *inst++ = MOVZX_r_rm8; - *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; - *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; - return SLJIT_SUCCESS; - } - - /* Set TMP_REGISTER to the bit. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); - FAIL_IF(!inst); - INC_SIZE(1 + 3 + 3 + 1); - *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; - /* Set al to conditional flag. */ - *inst++ = GROUP_0F; - *inst++ = cond_set; - *inst++ = MOD_REG | 0 /* eax */; - - *inst++ = GROUP_0F; - *inst++ = MOVZX_r_rm8; - *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; - - *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; - - if (GET_OPCODE(op) < SLJIT_ADD) - return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0); -#endif /* SLJIT_CONFIG_X86_64 */ -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) -{ - CHECK_ERROR(); - check_sljit_get_local_base(compiler, dst, dstw, offset); - ADJUST_LOCAL_OFFSET(dst, dstw); - - CHECK_EXTRA_REGS(dst, dstw, (void)0); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 0; -#endif - - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (NOT_HALFWORD(offset)) { - FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset)); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED); - return compiler->error; -#else - return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0); -#endif - } -#endif - - if (offset != 0) - return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0); -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) -{ - sljit_ub *inst; - struct sljit_const *const_; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si reg; -#endif - - CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); - ADJUST_LOCAL_OFFSET(dst, dstw); - - CHECK_EXTRA_REGS(dst, dstw, (void)0); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = 0; - reg = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - - if (emit_load_imm64(compiler, reg, init_value)) - return NULL; -#else - if (dst == SLJIT_UNUSED) - dst = TMP_REGISTER; - - if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) - return NULL; -#endif - - inst = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF(!inst); - - *inst++ = 0; - *inst++ = 1; - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (reg == TMP_REGISTER && dst != SLJIT_UNUSED) - if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0)) - return NULL; -#endif - - return const_; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) -{ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)addr = new_addr - (addr + 4); -#else - *(sljit_uw*)addr = new_addr; -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) -{ - *(sljit_sw*)addr = new_constant; -} diff --git a/deps/libmagic/pcre/sljit/sljitUtils.c b/deps/libmagic/pcre/sljit/sljitUtils.c deleted file mode 100644 index 1f023fa..0000000 --- a/deps/libmagic/pcre/sljit/sljitUtils.c +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* ------------------------------------------------------------------------ */ -/* Locks */ -/* ------------------------------------------------------------------------ */ - -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) - -#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) - -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) - -static SLJIT_INLINE void allocator_grab_lock(void) -{ - /* Always successful. */ -} - -static SLJIT_INLINE void allocator_release_lock(void) -{ - /* Always successful. */ -} - -#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ - -#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) -{ - /* Always successful. */ -} - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) -{ - /* Always successful. */ -} - -#endif /* SLJIT_UTIL_GLOBAL_LOCK */ - -#elif defined(_WIN32) /* SLJIT_SINGLE_THREADED */ - -#include "windows.h" - -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) - -static HANDLE allocator_mutex = 0; - -static SLJIT_INLINE void allocator_grab_lock(void) -{ - /* No idea what to do if an error occures. Static mutexes should never fail... */ - if (!allocator_mutex) - allocator_mutex = CreateMutex(NULL, TRUE, NULL); - else - WaitForSingleObject(allocator_mutex, INFINITE); -} - -static SLJIT_INLINE void allocator_release_lock(void) -{ - ReleaseMutex(allocator_mutex); -} - -#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ - -#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) - -static HANDLE global_mutex = 0; - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) -{ - /* No idea what to do if an error occures. Static mutexes should never fail... */ - if (!global_mutex) - global_mutex = CreateMutex(NULL, TRUE, NULL); - else - WaitForSingleObject(global_mutex, INFINITE); -} - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) -{ - ReleaseMutex(global_mutex); -} - -#endif /* SLJIT_UTIL_GLOBAL_LOCK */ - -#else /* _WIN32 */ - -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) - -#include - -static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER; - -static SLJIT_INLINE void allocator_grab_lock(void) -{ - pthread_mutex_lock(&allocator_mutex); -} - -static SLJIT_INLINE void allocator_release_lock(void) -{ - pthread_mutex_unlock(&allocator_mutex); -} - -#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ - -#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) - -#include - -static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) -{ - pthread_mutex_lock(&global_mutex); -} - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) -{ - pthread_mutex_unlock(&global_mutex); -} - -#endif /* SLJIT_UTIL_GLOBAL_LOCK */ - -#endif /* _WIN32 */ - -/* ------------------------------------------------------------------------ */ -/* Stack */ -/* ------------------------------------------------------------------------ */ - -#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) - -#ifdef _WIN32 -#include "windows.h" -#else -/* Provides mmap function. */ -#include -/* For detecting the page size. */ -#include - -#ifndef MAP_ANON - -#include - -/* Some old systems does not have MAP_ANON. */ -static sljit_si dev_zero = -1; - -#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) - -static SLJIT_INLINE sljit_si open_dev_zero(void) -{ - dev_zero = open("/dev/zero", O_RDWR); - return dev_zero < 0; -} - -#else /* SLJIT_SINGLE_THREADED */ - -#include - -static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; - -static SLJIT_INLINE sljit_si open_dev_zero(void) -{ - pthread_mutex_lock(&dev_zero_mutex); - dev_zero = open("/dev/zero", O_RDWR); - pthread_mutex_unlock(&dev_zero_mutex); - return dev_zero < 0; -} - -#endif /* SLJIT_SINGLE_THREADED */ - -#endif - -#endif - -#endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */ - -#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) - -/* Planning to make it even more clever in the future. */ -static sljit_sw sljit_page_align = 0; - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit) -{ - struct sljit_stack *stack; - union { - void *ptr; - sljit_uw uw; - } base; -#ifdef _WIN32 - SYSTEM_INFO si; -#endif - - if (limit > max_limit || limit < 1) - return NULL; - -#ifdef _WIN32 - if (!sljit_page_align) { - GetSystemInfo(&si); - sljit_page_align = si.dwPageSize - 1; - } -#else - if (!sljit_page_align) { - sljit_page_align = sysconf(_SC_PAGESIZE); - /* Should never happen. */ - if (sljit_page_align < 0) - sljit_page_align = 4096; - sljit_page_align--; - } -#endif - - /* Align limit and max_limit. */ - max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; - - stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack)); - if (!stack) - return NULL; - -#ifdef _WIN32 - base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE); - if (!base.ptr) { - SLJIT_FREE(stack); - return NULL; - } - stack->base = base.uw; - stack->limit = stack->base; - stack->max_limit = stack->base + max_limit; - if (sljit_stack_resize(stack, stack->base + limit)) { - sljit_free_stack(stack); - return NULL; - } -#else -#ifdef MAP_ANON - base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); -#else - if (dev_zero < 0) { - if (open_dev_zero()) { - SLJIT_FREE(stack); - return NULL; - } - } - base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); -#endif - if (base.ptr == MAP_FAILED) { - SLJIT_FREE(stack); - return NULL; - } - stack->base = base.uw; - stack->limit = stack->base + limit; - stack->max_limit = stack->base + max_limit; -#endif - stack->top = stack->base; - return stack; -} - -#undef PAGE_ALIGN - -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack) -{ -#ifdef _WIN32 - VirtualFree((void*)stack->base, 0, MEM_RELEASE); -#else - munmap((void*)stack->base, stack->max_limit - stack->base); -#endif - SLJIT_FREE(stack); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit) -{ - sljit_uw aligned_old_limit; - sljit_uw aligned_new_limit; - - if ((new_limit > stack->max_limit) || (new_limit < stack->base)) - return -1; -#ifdef _WIN32 - aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; - aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; - if (aligned_new_limit != aligned_old_limit) { - if (aligned_new_limit > aligned_old_limit) { - if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE)) - return -1; - } - else { - if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT)) - return -1; - } - } - stack->limit = new_limit; - return 0; -#else - if (new_limit >= stack->limit) { - stack->limit = new_limit; - return 0; - } - aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; - aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; - /* If madvise is available, we release the unnecessary space. */ -#if defined(POSIX_MADV_DONTNEED) - if (aligned_new_limit < aligned_old_limit) - posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED); -#elif defined(MADV_DONTNEED) - if (aligned_new_limit < aligned_old_limit) - madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED); -#endif - stack->limit = new_limit; - return 0; -#endif -} - -#endif /* SLJIT_UTIL_STACK */ - -#endif diff --git a/deps/libmagic/pcre/ucp.h b/deps/libmagic/pcre/ucp.h deleted file mode 100644 index 2103910..0000000 --- a/deps/libmagic/pcre/ucp.h +++ /dev/null @@ -1,197 +0,0 @@ -/************************************************* -* Unicode Property Table handler * -*************************************************/ - -#ifndef _UCP_H -#define _UCP_H - -/* This file contains definitions of the property values that are returned by -the UCD access macros. New values that are added for new releases of Unicode -should always be at the end of each enum, for backwards compatibility. - -IMPORTANT: Note also that the specific numeric values of the enums have to be -the same as the values that are generated by the maint/MultiStage2.py script, -where the equivalent property descriptive names are listed in vectors. */ - -/* These are the general character categories. */ - -enum { - ucp_C, /* Other */ - ucp_L, /* Letter */ - ucp_M, /* Mark */ - ucp_N, /* Number */ - ucp_P, /* Punctuation */ - ucp_S, /* Symbol */ - ucp_Z /* Separator */ -}; - -/* These are the particular character categories. */ - -enum { - ucp_Cc, /* Control */ - ucp_Cf, /* Format */ - ucp_Cn, /* Unassigned */ - ucp_Co, /* Private use */ - ucp_Cs, /* Surrogate */ - ucp_Ll, /* Lower case letter */ - ucp_Lm, /* Modifier letter */ - ucp_Lo, /* Other letter */ - ucp_Lt, /* Title case letter */ - ucp_Lu, /* Upper case letter */ - ucp_Mc, /* Spacing mark */ - ucp_Me, /* Enclosing mark */ - ucp_Mn, /* Non-spacing mark */ - ucp_Nd, /* Decimal number */ - ucp_Nl, /* Letter number */ - ucp_No, /* Other number */ - ucp_Pc, /* Connector punctuation */ - ucp_Pd, /* Dash punctuation */ - ucp_Pe, /* Close punctuation */ - ucp_Pf, /* Final punctuation */ - ucp_Pi, /* Initial punctuation */ - ucp_Po, /* Other punctuation */ - ucp_Ps, /* Open punctuation */ - ucp_Sc, /* Currency symbol */ - ucp_Sk, /* Modifier symbol */ - ucp_Sm, /* Mathematical symbol */ - ucp_So, /* Other symbol */ - ucp_Zl, /* Line separator */ - ucp_Zp, /* Paragraph separator */ - ucp_Zs /* Space separator */ -}; - -/* These are grapheme break properties. Note that the code for processing them -assumes that the values are less than 16. If more values are added that take -the number to 16 or more, the code will have to be rewritten. */ - -enum { - ucp_gbCR, /* 0 */ - ucp_gbLF, /* 1 */ - ucp_gbControl, /* 2 */ - ucp_gbExtend, /* 3 */ - ucp_gbPrepend, /* 4 */ - ucp_gbSpacingMark, /* 5 */ - ucp_gbL, /* 6 Hangul syllable type L */ - ucp_gbV, /* 7 Hangul syllable type V */ - ucp_gbT, /* 8 Hangul syllable type T */ - ucp_gbLV, /* 9 Hangul syllable type LV */ - ucp_gbLVT, /* 10 Hangul syllable type LVT */ - ucp_gbRegionalIndicator, /* 11 */ - ucp_gbOther /* 12 */ -}; - -/* These are the script identifications. */ - -enum { - ucp_Arabic, - ucp_Armenian, - ucp_Bengali, - ucp_Bopomofo, - ucp_Braille, - ucp_Buginese, - ucp_Buhid, - ucp_Canadian_Aboriginal, - ucp_Cherokee, - ucp_Common, - ucp_Coptic, - ucp_Cypriot, - ucp_Cyrillic, - ucp_Deseret, - ucp_Devanagari, - ucp_Ethiopic, - ucp_Georgian, - ucp_Glagolitic, - ucp_Gothic, - ucp_Greek, - ucp_Gujarati, - ucp_Gurmukhi, - ucp_Han, - ucp_Hangul, - ucp_Hanunoo, - ucp_Hebrew, - ucp_Hiragana, - ucp_Inherited, - ucp_Kannada, - ucp_Katakana, - ucp_Kharoshthi, - ucp_Khmer, - ucp_Lao, - ucp_Latin, - ucp_Limbu, - ucp_Linear_B, - ucp_Malayalam, - ucp_Mongolian, - ucp_Myanmar, - ucp_New_Tai_Lue, - ucp_Ogham, - ucp_Old_Italic, - ucp_Old_Persian, - ucp_Oriya, - ucp_Osmanya, - ucp_Runic, - ucp_Shavian, - ucp_Sinhala, - ucp_Syloti_Nagri, - ucp_Syriac, - ucp_Tagalog, - ucp_Tagbanwa, - ucp_Tai_Le, - ucp_Tamil, - ucp_Telugu, - ucp_Thaana, - ucp_Thai, - ucp_Tibetan, - ucp_Tifinagh, - ucp_Ugaritic, - ucp_Yi, - /* New for Unicode 5.0: */ - ucp_Balinese, - ucp_Cuneiform, - ucp_Nko, - ucp_Phags_Pa, - ucp_Phoenician, - /* New for Unicode 5.1: */ - ucp_Carian, - ucp_Cham, - ucp_Kayah_Li, - ucp_Lepcha, - ucp_Lycian, - ucp_Lydian, - ucp_Ol_Chiki, - ucp_Rejang, - ucp_Saurashtra, - ucp_Sundanese, - ucp_Vai, - /* New for Unicode 5.2: */ - ucp_Avestan, - ucp_Bamum, - ucp_Egyptian_Hieroglyphs, - ucp_Imperial_Aramaic, - ucp_Inscriptional_Pahlavi, - ucp_Inscriptional_Parthian, - ucp_Javanese, - ucp_Kaithi, - ucp_Lisu, - ucp_Meetei_Mayek, - ucp_Old_South_Arabian, - ucp_Old_Turkic, - ucp_Samaritan, - ucp_Tai_Tham, - ucp_Tai_Viet, - /* New for Unicode 6.0.0: */ - ucp_Batak, - ucp_Brahmi, - ucp_Mandaic, - /* New for Unicode 6.1.0: */ - ucp_Chakma, - ucp_Meroitic_Cursive, - ucp_Meroitic_Hieroglyphs, - ucp_Miao, - ucp_Sharada, - ucp_Sora_Sompeng, - ucp_Takri -}; - -#endif - -/* End of ucp.h */ diff --git a/deps/libmagic/src/apprentice.c b/deps/libmagic/src/apprentice.c index 42657fb..8197f65 100644 --- a/deps/libmagic/src/apprentice.c +++ b/deps/libmagic/src/apprentice.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.249 2016/05/17 21:43:07 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.262 2017/08/28 13:39:18 christos Exp $") #endif /* lint */ #include "magic.h" @@ -411,11 +411,11 @@ add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) { struct mlist *ml; - mlp->map = idx == 0 ? map : NULL; + mlp->map = NULL; if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) return -1; - ml->map = NULL; + ml->map = idx == 0 ? map : NULL; ml->magic = map->magic[idx]; ml->nmagic = map->nmagic[idx]; @@ -454,6 +454,8 @@ apprentice_1(struct magic_set *ms, const char *fn, int action) #ifndef COMPILE_ONLY map = apprentice_map(ms, fn); + if (map == (struct magic_map *)-1) + return -1; if (map == NULL) { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "using regular magic file `%s'", fn); @@ -465,7 +467,7 @@ apprentice_1(struct magic_set *ms, const char *fn, int action) for (i = 0; i < MAGIC_SETS; i++) { if (add_mlist(ms->mlist[i], map, i) == -1) { file_oomem(ms, sizeof(*ml)); - goto fail; + return -1; } } @@ -479,12 +481,6 @@ apprentice_1(struct magic_set *ms, const char *fn, int action) } } return 0; -fail: - for (i = 0; i < MAGIC_SETS; i++) { - mlist_free(ms->mlist[i]); - ms->mlist[i] = NULL; - } - return -1; #else return 0; #endif /* COMPILE_ONLY */ @@ -556,8 +552,10 @@ apprentice_unmap(struct magic_map *map) break; case MAP_TYPE_MALLOC: for (i = 0; i < MAGIC_SETS; i++) { - if ((char *)map->magic[i] >= (char *)map->p && - (char *)map->magic[i] < (char *)map->p + map->len) + void *b = map->magic[i]; + void *p = map->p; + if (CAST(char *, b) >= CAST(char *, p) && + CAST(char *, b) <= CAST(char *, p) + map->len) continue; free(map->magic[i]); } @@ -597,7 +595,7 @@ mlist_free(struct mlist *mlist) ml = mlist->next; for (ml = mlist->next; (next = ml->next) != NULL; ml = next) { if (ml->map) - apprentice_unmap(ml->map); + apprentice_unmap(CAST(struct magic_map *, ml->map)); free(ml); if (ml == mlist) break; @@ -617,8 +615,7 @@ buffer_apprentice(struct magic_set *ms, struct magic **bufs, if (nbufs == 0) return -1; - if (ms->mlist[0] != NULL) - file_reset(ms); + (void)file_reset(ms, 0); init_file_tables(); @@ -661,8 +658,7 @@ file_apprentice(struct magic_set *ms, const char *fn, int action) int file_err, errs = -1; size_t i; - if (ms->mlist[0] != NULL) - file_reset(ms); + (void)file_reset(ms, 0); if ((fn = magic_getpath(fn, action)) == NULL) return -1; @@ -785,6 +781,59 @@ nonmagic(const char *str) return rv == 0 ? 1 : rv; /* Return at least 1 */ } + +private size_t +typesize(int type) +{ + switch (type) { + case FILE_BYTE: + return 1; + + case FILE_SHORT: + case FILE_LESHORT: + case FILE_BESHORT: + return 2; + + case FILE_LONG: + case FILE_LELONG: + case FILE_BELONG: + case FILE_MELONG: + return 4; + + case FILE_DATE: + case FILE_LEDATE: + case FILE_BEDATE: + case FILE_MEDATE: + case FILE_LDATE: + case FILE_LELDATE: + case FILE_BELDATE: + case FILE_MELDATE: + case FILE_FLOAT: + case FILE_BEFLOAT: + case FILE_LEFLOAT: + return 4; + + case FILE_QUAD: + case FILE_BEQUAD: + case FILE_LEQUAD: + case FILE_QDATE: + case FILE_LEQDATE: + case FILE_BEQDATE: + case FILE_QLDATE: + case FILE_LEQLDATE: + case FILE_BEQLDATE: + case FILE_QWDATE: + case FILE_LEQWDATE: + case FILE_BEQWDATE: + case FILE_DOUBLE: + case FILE_BEDOUBLE: + case FILE_LEDOUBLE: + return 8; + default: + return (size_t)~0; + } +} + /* * Get weight of this magic entry, for sorting purposes. */ @@ -792,7 +841,7 @@ private size_t apprentice_magic_strength(const struct magic *m) { #define MULT 10 - size_t v, val = 2 * MULT; /* baseline strength */ + size_t ts, v, val = 2 * MULT; /* baseline strength */ switch (m->type) { case FILE_DEFAULT: /* make sure this sorts last */ @@ -801,41 +850,13 @@ apprentice_magic_strength(const struct magic *m) return 0; case FILE_BYTE: - val += 1 * MULT; - break; - case FILE_SHORT: case FILE_LESHORT: case FILE_BESHORT: - val += 2 * MULT; - break; - case FILE_LONG: case FILE_LELONG: case FILE_BELONG: case FILE_MELONG: - val += 4 * MULT; - break; - - case FILE_PSTRING: - case FILE_STRING: - val += m->vallen * MULT; - break; - - case FILE_BESTRING16: - case FILE_LESTRING16: - val += m->vallen * MULT / 2; - break; - - case FILE_SEARCH: - val += m->vallen * MAX(MULT / m->vallen, 1); - break; - - case FILE_REGEX: - v = nonmagic(m->value.s); - val += v * MAX(MULT / v, 1); - break; - case FILE_DATE: case FILE_LEDATE: case FILE_BEDATE: @@ -847,9 +868,6 @@ apprentice_magic_strength(const struct magic *m) case FILE_FLOAT: case FILE_BEFLOAT: case FILE_LEFLOAT: - val += 4 * MULT; - break; - case FILE_QUAD: case FILE_BEQUAD: case FILE_LEQUAD: @@ -865,7 +883,29 @@ apprentice_magic_strength(const struct magic *m) case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: - val += 8 * MULT; + ts = typesize(m->type); + if (ts == (size_t)~0) + abort(); + val += ts * MULT; + break; + + case FILE_PSTRING: + case FILE_STRING: + val += m->vallen * MULT; + break; + + case FILE_BESTRING16: + case FILE_LESTRING16: + val += m->vallen * MULT / 2; + break; + + case FILE_SEARCH: + val += m->vallen * MAX(MULT / m->vallen, 1); + break; + + case FILE_REGEX: + v = nonmagic(m->value.s); + val += v * MAX(MULT / v, 1); break; case FILE_INDIRECT: @@ -1323,6 +1363,8 @@ apprentice_load(struct magic_set *ms, const char *fn, int action) goto out; } while ((d = readdir(dir)) != NULL) { + if (d->d_name[0] == '.') + continue; if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { file_oomem(ms, strlen(fn) + strlen(d->d_name) + 2); @@ -1884,10 +1926,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, if (m->flag & INDIR) { m->in_type = FILE_LONG; m->in_offset = 0; + m->in_op = 0; /* - * read [.lbs][+-]nnnnn) + * read [.,lbs][+-]nnnnn) */ - if (*l == '.') { + if (*l == '.' || *l == ',') { + if (*l == ',') + m->in_op |= FILE_OPSIGNED; l++; switch (*l) { case 'l': @@ -1939,7 +1984,6 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, l++; } - m->in_op = 0; if (*l == '~') { m->in_op |= FILE_OPINVERSE; l++; @@ -2298,7 +2342,7 @@ parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line) return parse_extra(ms, me, line, CAST(off_t, offsetof(struct magic, ext)), - sizeof(m->ext), "EXTENSION", ",!+-/", 0); + sizeof(m->ext), "EXTENSION", ",!+-/@", 0); } /* @@ -2359,6 +2403,8 @@ check_format_type(const char *ptr, int type, const char **estr) ptr++; if (*ptr == '.') ptr++; + if (*ptr == '#') + ptr++; #define CHECKLEN() do { \ for (len = cnt = 0; isdigit((unsigned char)*ptr); ptr++, cnt++) \ len = len * 10 + (*ptr - '0'); \ @@ -2624,9 +2670,46 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) default: if (m->reln != 'x') { char *ep; + uint64_t ull; errno = 0; - m->value.q = file_signextend(ms, m, - (uint64_t)strtoull(*p, &ep, 0)); + ull = (uint64_t)strtoull(*p, &ep, 0); + m->value.q = file_signextend(ms, m, ull); + if (*p == ep) { + file_magwarn(ms, "Unparseable number `%s'", *p); + } else { + size_t ts = typesize(m->type); + uint64_t x; + const char *q; + + if (ts == (size_t)~0) { + file_magwarn(ms, "Expected numeric type got `%s'", + type_tbl[m->type].name); + } + for (q = *p; isspace((unsigned char)*q); q++) + continue; + if (*q == '-') + ull = -(int64_t)ull; + switch (ts) { + case 1: + x = ull & ~0xffULL; + break; + case 2: + x = ull & ~0xffffULL; + break; + case 4: + x = ull & ~0xffffffffULL; + break; + case 8: + x = 0; + break; + default: + abort(); + } + if (x) { + file_magwarn(ms, "Overflow for numeric type `%s' value %#" PRIx64, + type_tbl[m->type].name, ull); + } + } if (errno == 0) { *p = ep; eatsize(p); @@ -2935,6 +3018,7 @@ apprentice_map(struct magic_set *ms, const char *fn) struct stat st; char *dbname = NULL; struct magic_map *map; + struct magic_map *rv = NULL; fd = -1; if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { @@ -2983,8 +3067,10 @@ apprentice_map(struct magic_set *ms, const char *fn) (void)close(fd); fd = -1; - if (check_buffer(ms, map, dbname) != 0) + if (check_buffer(ms, map, dbname) != 0) { + rv = (struct magic_map *)-1; goto error; + } #ifdef QUICK if (mprotect(map->p, (size_t)st.st_size, PROT_READ) == -1) { file_error(ms, errno, "cannot mprotect `%s'", dbname); @@ -3000,7 +3086,7 @@ apprentice_map(struct magic_set *ms, const char *fn) (void)close(fd); apprentice_unmap(map); free(dbname); - return NULL; + return rv; } private int @@ -3156,7 +3242,7 @@ mkdbname(struct magic_set *ms, const char *fn, int strip) return NULL; /* Compatibility with old code that looked in .mime */ - if (strstr(p, ".mime") != NULL) + if (strstr(fn, ".mime") != NULL) ms->flags &= MAGIC_MIME_TYPE; return buf; } @@ -3275,22 +3361,35 @@ file_pstring_get_length(const struct magic *m, const char *ss) { size_t len = 0; const unsigned char *s = (const unsigned char *)ss; + unsigned int s3, s2, s1, s0; switch (m->str_flags & PSTRING_LEN) { case PSTRING_1_LE: len = *s; break; case PSTRING_2_LE: - len = (s[1] << 8) | s[0]; + s0 = s[0]; + s1 = s[1]; + len = (s1 << 8) | s0; break; case PSTRING_2_BE: - len = (s[0] << 8) | s[1]; + s0 = s[0]; + s1 = s[1]; + len = (s0 << 8) | s1; break; case PSTRING_4_LE: - len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; + s0 = s[0]; + s1 = s[1]; + s2 = s[2]; + s3 = s[3]; + len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0; break; case PSTRING_4_BE: - len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; + s0 = s[0]; + s1 = s[1]; + s2 = s[2]; + s3 = s[3]; + len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3; break; default: abort(); /* Impossible */ diff --git a/deps/libmagic/src/ascmagic.c b/deps/libmagic/src/ascmagic.c index baa95ab..85a973e 100644 --- a/deps/libmagic/src/ascmagic.c +++ b/deps/libmagic/src/ascmagic.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.95 2016/05/03 16:10:37 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.97 2016/06/27 20:56:25 christos Exp $") #endif /* lint */ #include "magic.h" @@ -144,8 +144,10 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, (size_t)(utf8_end - utf8_buf), NULL, NULL, TEXTTEST, text)) == 0) rv = -1; - if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))) - return rv == -1 ? 0 : 1; + if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))) { + rv = rv == -1 ? 0 : 1; + goto done; + } } if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))) return 0; diff --git a/deps/libmagic/src/cdf.c b/deps/libmagic/src/cdf.c index 22ad306..0bb22e6 100644 --- a/deps/libmagic/src/cdf.c +++ b/deps/libmagic/src/cdf.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: cdf.c,v 1.82 2016/06/01 22:25:25 christos Exp $") +FILE_RCSID("@(#)$File: cdf.c,v 1.106 2017/04/30 17:05:02 christos Exp $") #endif #include @@ -44,6 +44,7 @@ FILE_RCSID("@(#)$File: cdf.c,v 1.82 2016/06/01 22:25:25 christos Exp $") #endif #include // XXX: change by mscdex +//#include #include #include #include @@ -80,6 +81,34 @@ static union { CDF_TOLE8(CAST(uint64_t, x)))) #define CDF_GETUINT32(x, y) cdf_getuint32(x, y) +#define CDF_MALLOC(n) cdf_malloc(__FILE__, __LINE__, (n)) +#define CDF_REALLOC(p, n) cdf_realloc(__FILE__, __LINE__, (p), (n)) +#define CDF_CALLOC(n, u) cdf_calloc(__FILE__, __LINE__, (n), (u)) + + +static void * +cdf_malloc(const char *file __attribute__((__unused__)), + size_t line __attribute__((__unused__)), size_t n) +{ + DPRINTF(("%s,%zu: %s %zu\n", file, line, __func__, n)); + return malloc(n); +} + +static void * +cdf_realloc(const char *file __attribute__((__unused__)), + size_t line __attribute__((__unused__)), void *p, size_t n) +{ + DPRINTF(("%s,%zu: %s %zu\n", file, line, __func__, n)); + return realloc(p, n); +} + +static void * +cdf_calloc(const char *file __attribute__((__unused__)), + size_t line __attribute__((__unused__)), size_t n, size_t u) +{ + DPRINTF(("%s,%zu: %s %zu %zu\n", file, line, __func__, n, u)); + return calloc(n, u); +} /* * swap a short @@ -266,7 +295,7 @@ cdf_unpack_dir(cdf_directory_t *d, char *buf) CDF_UNPACK(d->d_unused0); } -static int +int cdf_zero_stream(cdf_stream_t *scn) { scn->sst_len = 0; @@ -340,18 +369,18 @@ cdf_read_header(const cdf_info_t *info, cdf_header_t *h) cdf_unpack_header(h, buf); cdf_swap_header(h); if (h->h_magic != CDF_MAGIC) { - DPRINTF(("Bad magic 0x%" INT64_T_FORMAT "x != 0x%" + DPRINTF(("Bad magic %#" INT64_T_FORMAT "x != %#" INT64_T_FORMAT "x\n", (unsigned long long)h->h_magic, (unsigned long long)CDF_MAGIC)); goto out; } if (h->h_sec_size_p2 > 20) { - DPRINTF(("Bad sector size 0x%u\n", h->h_sec_size_p2)); + DPRINTF(("Bad sector size %hu\n", h->h_sec_size_p2)); goto out; } if (h->h_short_sec_size_p2 > 20) { - DPRINTF(("Bad short sector size 0x%u\n", + DPRINTF(("Bad short sector size %hu\n", h->h_short_sec_size_p2)); goto out; } @@ -408,7 +437,7 @@ cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat) if (h->h_master_sat[i] == CDF_SECID_FREE) break; -#define CDF_SEC_LIMIT (UINT32_MAX / (4 * ss)) +#define CDF_SEC_LIMIT (UINT32_MAX / (8 * ss)) if ((nsatpersec > 0 && h->h_num_sectors_in_master_sat > CDF_SEC_LIMIT / nsatpersec) || i > CDF_SEC_LIMIT) { @@ -421,7 +450,7 @@ cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat) sat->sat_len = h->h_num_sectors_in_master_sat * nsatpersec + i; DPRINTF(("sat_len = %" SIZE_T_FORMAT "u ss = %" SIZE_T_FORMAT "u\n", sat->sat_len, ss)); - if ((sat->sat_tab = CAST(cdf_secid_t *, calloc(sat->sat_len, ss))) + if ((sat->sat_tab = CAST(cdf_secid_t *, CDF_CALLOC(sat->sat_len, ss))) == NULL) return -1; @@ -435,7 +464,7 @@ cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat) } } - if ((msa = CAST(cdf_secid_t *, calloc(1, ss))) == NULL) + if ((msa = CAST(cdf_secid_t *, CDF_CALLOC(1, ss))) == NULL) goto out1; mid = h->h_secid_first_sector_in_master_sat; @@ -527,13 +556,16 @@ cdf_read_long_sector_chain(const cdf_info_t *info, const cdf_header_t *h, ssize_t nr; scn->sst_tab = NULL; scn->sst_len = cdf_count_chain(sat, sid, ss); - scn->sst_dirlen = len; + scn->sst_dirlen = MAX(h->h_min_size_standard_stream, len); scn->sst_ss = ss; + if (sid == CDF_SECID_END_OF_CHAIN || len == 0) + return cdf_zero_stream(scn); + if (scn->sst_len == (size_t)-1) goto out; - scn->sst_tab = calloc(scn->sst_len, ss); + scn->sst_tab = CDF_CALLOC(scn->sst_len, ss); if (scn->sst_tab == NULL) return cdf_zero_stream(scn); @@ -579,7 +611,7 @@ cdf_read_short_sector_chain(const cdf_header_t *h, if (scn->sst_len == (size_t)-1) goto out; - scn->sst_tab = calloc(scn->sst_len, ss); + scn->sst_tab = CDF_CALLOC(scn->sst_len, ss); if (scn->sst_tab == NULL) return cdf_zero_stream(scn); @@ -637,11 +669,11 @@ cdf_read_dir(const cdf_info_t *info, const cdf_header_t *h, dir->dir_len = ns * nd; dir->dir_tab = CAST(cdf_directory_t *, - calloc(dir->dir_len, sizeof(dir->dir_tab[0]))); + CDF_CALLOC(dir->dir_len, sizeof(dir->dir_tab[0]))); if (dir->dir_tab == NULL) return -1; - if ((buf = CAST(char *, malloc(ss))) == NULL) { + if ((buf = CAST(char *, CDF_MALLOC(ss))) == NULL) { free(dir->dir_tab); return -1; } @@ -687,7 +719,7 @@ cdf_read_ssat(const cdf_info_t *info, const cdf_header_t *h, if (ssat->sat_len == (size_t)-1) goto out; - ssat->sat_tab = CAST(cdf_secid_t *, calloc(ssat->sat_len, ss)); + ssat->sat_tab = CAST(cdf_secid_t *, CDF_CALLOC(ssat->sat_len, ss)); if (ssat->sat_tab == NULL) goto out1; @@ -731,22 +763,25 @@ cdf_read_short_stream(const cdf_info_t *info, const cdf_header_t *h, break; /* If the it is not there, just fake it; some docs don't have it */ - if (i == dir->dir_len) + if (i == dir->dir_len) { + DPRINTF(("Cannot find root storage dir\n")); goto out; + } d = &dir->dir_tab[i]; *root = d; /* If the it is not there, just fake it; some docs don't have it */ - if (d->d_stream_first_sector < 0) + if (d->d_stream_first_sector < 0) { + DPRINTF(("No first secror in dir\n")); goto out; + } return cdf_read_long_sector_chain(info, h, sat, d->d_stream_first_sector, d->d_size, scn); out: scn->sst_tab = NULL; (void)cdf_zero_stream(scn); - errno = EFTYPE; - return -1; + return 0; } static int @@ -758,6 +793,15 @@ cdf_namecmp(const char *d, const uint16_t *s, size_t l) return 0; } +int +cdf_read_doc_summary_info(const cdf_info_t *info, const cdf_header_t *h, + const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst, + const cdf_dir_t *dir, cdf_stream_t *scn) +{ + return cdf_read_user_stream(info, h, sat, ssat, sst, dir, + "\05DocumentSummaryInformation", scn); +} + int cdf_read_summary_info(const cdf_info_t *info, const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst, @@ -796,13 +840,107 @@ cdf_find_stream(const cdf_dir_t *dir, const char *name, int type) == 0) break; if (i > 0) - return i; + return CAST(int, i); DPRINTF(("Cannot find type %d `%s'\n", type, name)); errno = ESRCH; return 0; } +#define CDF_SHLEN_LIMIT (UINT32_MAX / 8) +#define CDF_PROP_LIMIT (UINT32_MAX / (8 * sizeof(cdf_property_info_t))) + +static const void * +cdf_offset(const void *p, size_t l) +{ + return CAST(const void *, CAST(const uint8_t *, p) + l); +} + +static const uint8_t * +cdf_get_property_info_pos(const cdf_stream_t *sst, const cdf_header_t *h, + const uint8_t *p, const uint8_t *e, size_t i) +{ + size_t tail = (i << 1) + 1; + size_t ofs; + const uint8_t *q; + + if (p >= e) { + DPRINTF(("Past end %p < %p\n", e, p)); + return NULL; + } + if (cdf_check_stream_offset(sst, h, p, (tail + 1) * sizeof(uint32_t), + __LINE__) == -1) + return NULL; + ofs = CDF_GETUINT32(p, tail); + q = CAST(const uint8_t *, cdf_offset(CAST(const void *, p), + ofs - 2 * sizeof(uint32_t))); + + if (q < p) { + DPRINTF(("Wrapped around %p < %p\n", q, p)); + return NULL; + } + + if (q >= e) { + DPRINTF(("Ran off the end %p >= %p\n", q, e)); + return NULL; + } + return q; +} + +static cdf_property_info_t * +cdf_grow_info(cdf_property_info_t **info, size_t *maxcount, size_t incr) +{ + cdf_property_info_t *inp; + size_t newcount = *maxcount + incr; + + if (newcount > CDF_PROP_LIMIT) { + DPRINTF(("exceeded property limit %zu > %zu\n", + newcount, CDF_PROP_LIMIT)); + goto out; + } + inp = CAST(cdf_property_info_t *, + CDF_REALLOC(*info, newcount * sizeof(*inp))); + if (inp == NULL) + goto out; + + *info = inp; + *maxcount = newcount; + return inp; +out: + free(*info); + *maxcount = 0; + *info = NULL; + return NULL; +} + +static int +cdf_copy_info(cdf_property_info_t *inp, const void *p, const void *e, + size_t len) +{ + if (inp->pi_type & CDF_VECTOR) + return 0; + + if ((size_t)(CAST(const char *, e) - CAST(const char *, p)) < len) + return 0; + + (void)memcpy(&inp->pi_val, p, len); + + switch (len) { + case 2: + inp->pi_u16 = CDF_TOLE2(inp->pi_u16); + break; + case 4: + inp->pi_u32 = CDF_TOLE4(inp->pi_u32); + break; + case 8: + inp->pi_u64 = CDF_TOLE8(inp->pi_u64); + break; + default: + abort(); + } + return 1; +} + int cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h, uint32_t offs, cdf_property_info_t **info, size_t *count, size_t *maxcount) @@ -810,92 +948,69 @@ cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h, const cdf_section_header_t *shp; cdf_section_header_t sh; const uint8_t *p, *q, *e; - int16_t s16; - int32_t s32; - uint32_t u32; - int64_t s64; - uint64_t u64; - cdf_timestamp_t tp; - size_t i, o, o4, nelements, j; + size_t i, o4, nelements, j, slen, left; cdf_property_info_t *inp; if (offs > UINT32_MAX / 4) { errno = EFTYPE; goto out; } - shp = CAST(const cdf_section_header_t *, (const void *) - ((const char *)sst->sst_tab + offs)); + shp = CAST(const cdf_section_header_t *, + cdf_offset(sst->sst_tab, offs)); if (cdf_check_stream_offset(sst, h, shp, sizeof(*shp), __LINE__) == -1) goto out; sh.sh_len = CDF_TOLE4(shp->sh_len); -#define CDF_SHLEN_LIMIT (UINT32_MAX / 8) if (sh.sh_len > CDF_SHLEN_LIMIT) { errno = EFTYPE; goto out; } - sh.sh_properties = CDF_TOLE4(shp->sh_properties); -#define CDF_PROP_LIMIT (UINT32_MAX / (4 * sizeof(*inp))) - if (sh.sh_properties > CDF_PROP_LIMIT) + + if (cdf_check_stream_offset(sst, h, shp, sh.sh_len, __LINE__) == -1) goto out; + + sh.sh_properties = CDF_TOLE4(shp->sh_properties); DPRINTF(("section len: %u properties %u\n", sh.sh_len, sh.sh_properties)); - if (*maxcount) { - if (*maxcount > CDF_PROP_LIMIT) - goto out; - *maxcount += sh.sh_properties; - inp = CAST(cdf_property_info_t *, - realloc(*info, *maxcount * sizeof(*inp))); - } else { - *maxcount = sh.sh_properties; - inp = CAST(cdf_property_info_t *, - malloc(*maxcount * sizeof(*inp))); - } + if (sh.sh_properties > CDF_PROP_LIMIT) + goto out; + inp = cdf_grow_info(info, maxcount, sh.sh_properties); if (inp == NULL) - goto out1; - *info = inp; + goto out; inp += *count; *count += sh.sh_properties; - p = CAST(const uint8_t *, (const void *) - ((const char *)(const void *)sst->sst_tab + - offs + sizeof(sh))); - e = CAST(const uint8_t *, (const void *) - (((const char *)(const void *)shp) + sh.sh_len)); - if (cdf_check_stream_offset(sst, h, e, 0, __LINE__) == -1) + p = CAST(const uint8_t *, cdf_offset(sst->sst_tab, offs + sizeof(sh))); + e = CAST(const uint8_t *, cdf_offset(shp, sh.sh_len)); + if (p >= e || cdf_check_stream_offset(sst, h, e, 0, __LINE__) == -1) goto out; + for (i = 0; i < sh.sh_properties; i++) { - size_t tail = (i << 1) + 1; - size_t ofs; - if (cdf_check_stream_offset(sst, h, p, tail * sizeof(uint32_t), - __LINE__) == -1) - goto out; - ofs = CDF_GETUINT32(p, tail); - q = (const uint8_t *)(const void *) - ((const char *)(const void *)p + ofs - - 2 * sizeof(uint32_t)); - if (q < p) { - DPRINTF(("Wrapped around %p < %p\n", q, p)); + if ((q = cdf_get_property_info_pos(sst, h, p, e, i)) == NULL) goto out; - } - if (q > e) { - DPRINTF(("Ran of the end %p > %p\n", q, e)); + inp[i].pi_id = CDF_GETUINT32(p, i << 1); + left = CAST(size_t, e - q); + if (left < sizeof(uint32_t)) { + DPRINTF(("short info (no type)_\n")); goto out; } - inp[i].pi_id = CDF_GETUINT32(p, i << 1); inp[i].pi_type = CDF_GETUINT32(q, 0); - DPRINTF(("%" SIZE_T_FORMAT "u) id=%x type=%x offs=0x%tx,0x%x\n", + DPRINTF(("%" SIZE_T_FORMAT "u) id=%#x type=%#x offs=%#tx,%#x\n", i, inp[i].pi_id, inp[i].pi_type, q - p, offs)); if (inp[i].pi_type & CDF_VECTOR) { + if (left < sizeof(uint32_t) * 2) { + DPRINTF(("missing CDF_VECTOR length\n")); + goto out; + } nelements = CDF_GETUINT32(q, 1); if (nelements == 0) { DPRINTF(("CDF_VECTOR with nelements == 0\n")); goto out; } - o = 2; + slen = 2; } else { nelements = 1; - o = 1; + slen = 1; } - o4 = o * sizeof(uint32_t); + o4 = slen * sizeof(uint32_t); if (inp[i].pi_type & (CDF_ARRAY|CDF_BYREF|CDF_RESERVED)) goto unknown; switch (inp[i].pi_type & CDF_TYPEMASK) { @@ -903,109 +1018,83 @@ cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h, case CDF_EMPTY: break; case CDF_SIGNED16: - if (inp[i].pi_type & CDF_VECTOR) + if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int16_t))) goto unknown; - (void)memcpy(&s16, &q[o4], sizeof(s16)); - inp[i].pi_s16 = CDF_TOLE2(s16); break; case CDF_SIGNED32: - if (inp[i].pi_type & CDF_VECTOR) - goto unknown; - (void)memcpy(&s32, &q[o4], sizeof(s32)); - inp[i].pi_s32 = CDF_TOLE4((uint32_t)s32); - break; case CDF_BOOL: case CDF_UNSIGNED32: - if (inp[i].pi_type & CDF_VECTOR) + case CDF_FLOAT: + if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int32_t))) goto unknown; - (void)memcpy(&u32, &q[o4], sizeof(u32)); - inp[i].pi_u32 = CDF_TOLE4(u32); break; case CDF_SIGNED64: - if (inp[i].pi_type & CDF_VECTOR) - goto unknown; - (void)memcpy(&s64, &q[o4], sizeof(s64)); - inp[i].pi_s64 = CDF_TOLE8((uint64_t)s64); - break; case CDF_UNSIGNED64: - if (inp[i].pi_type & CDF_VECTOR) - goto unknown; - (void)memcpy(&u64, &q[o4], sizeof(u64)); - inp[i].pi_u64 = CDF_TOLE8((uint64_t)u64); - break; - case CDF_FLOAT: - if (inp[i].pi_type & CDF_VECTOR) - goto unknown; - (void)memcpy(&u32, &q[o4], sizeof(u32)); - u32 = CDF_TOLE4(u32); - memcpy(&inp[i].pi_f, &u32, sizeof(inp[i].pi_f)); - break; case CDF_DOUBLE: - if (inp[i].pi_type & CDF_VECTOR) + case CDF_FILETIME: + if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int64_t))) goto unknown; - (void)memcpy(&u64, &q[o4], sizeof(u64)); - u64 = CDF_TOLE8((uint64_t)u64); - memcpy(&inp[i].pi_d, &u64, sizeof(inp[i].pi_d)); break; case CDF_LENGTH32_STRING: case CDF_LENGTH32_WSTRING: if (nelements > 1) { size_t nelem = inp - *info; - if (*maxcount > CDF_PROP_LIMIT - || nelements > CDF_PROP_LIMIT) - goto out; - *maxcount += nelements; - inp = CAST(cdf_property_info_t *, - realloc(*info, *maxcount * sizeof(*inp))); + inp = cdf_grow_info(info, maxcount, nelements); if (inp == NULL) - goto out1; - *info = inp; - inp = *info + nelem; + goto out; + inp += nelem; } DPRINTF(("nelements = %" SIZE_T_FORMAT "u\n", nelements)); for (j = 0; j < nelements && i < sh.sh_properties; j++, i++) { - uint32_t l = CDF_GETUINT32(q, o); + uint32_t l; + + if (o4 + sizeof(uint32_t) > left) + goto out; + + l = CDF_GETUINT32(q, slen); + o4 += sizeof(uint32_t); + if (o4 + l > left) + goto out; + inp[i].pi_str.s_len = l; - inp[i].pi_str.s_buf = (const char *) - (const void *)(&q[o4 + sizeof(l)]); - DPRINTF(("l = %d, r = %" SIZE_T_FORMAT - "u, s = %s\n", l, - CDF_ROUND(l, sizeof(l)), + inp[i].pi_str.s_buf = CAST(const char *, + CAST(const void *, &q[o4])); + + DPRINTF(("o=%zu l=%d(%" SIZE_T_FORMAT + "u), t=%zu s=%s\n", o4, l, + CDF_ROUND(l, sizeof(l)), left, inp[i].pi_str.s_buf)); + if (l & 1) l++; - o += l >> 1; - if (q + o >= e) - goto out; - o4 = o * sizeof(uint32_t); + + slen += l >> 1; + o4 = slen * sizeof(uint32_t); } i--; break; - case CDF_FILETIME: - if (inp[i].pi_type & CDF_VECTOR) - goto unknown; - (void)memcpy(&tp, &q[o4], sizeof(tp)); - inp[i].pi_tp = CDF_TOLE8((uint64_t)tp); - break; case CDF_CLIPBOARD: if (inp[i].pi_type & CDF_VECTOR) goto unknown; break; default: unknown: - DPRINTF(("Don't know how to deal with %x\n", + memset(&inp[i].pi_val, 0, sizeof(inp[i].pi_val)); + DPRINTF(("Don't know how to deal with %#x\n", inp[i].pi_type)); break; } } return 0; out: - errno = EFTYPE; -out1: free(*info); + *info = NULL; + *count = 0; + *maxcount = 0; + errno = EFTYPE; return -1; } @@ -1053,7 +1142,7 @@ cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst, { size_t ss = cdf_check_stream(sst, h); const char *b = CAST(const char *, sst->sst_tab); - const char *eb = b + ss * sst->sst_len; + const char *nb, *eb = b + ss * sst->sst_len; size_t nr, i, j, k; cdf_catalog_entry_t *ce; uint16_t reclen; @@ -1072,7 +1161,7 @@ cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst, return -1; nr--; *cat = CAST(cdf_catalog_t *, - malloc(sizeof(cdf_catalog_t) + nr * sizeof(*ce))); + CDF_MALLOC(sizeof(cdf_catalog_t) + nr * sizeof(*ce))); if (*cat == NULL) return -1; ce = (*cat)->cat_e; @@ -1098,7 +1187,9 @@ cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst, cep->ce_namlen = rlen; np = CAST(const uint16_t *, CAST(const void *, (b + 16))); - if (CAST(const char *, np + cep->ce_namlen) > eb) { + nb = CAST(const char *, CAST(const void *, + (np + cep->ce_namlen))); + if (nb > eb) { cep->ce_namlen = 0; break; } @@ -1157,7 +1248,7 @@ cdf_print_property_name(char *buf, size_t bufsiz, uint32_t p) for (i = 0; i < __arraycount(vn); i++) if (vn[i].v == p) return snprintf(buf, bufsiz, "%s", vn[i].n); - return snprintf(buf, bufsiz, "0x%x", p); + return snprintf(buf, bufsiz, "%#x", p); } int @@ -1216,7 +1307,7 @@ cdf_dump_header(const cdf_header_t *h) h->h_ ## b, 1 << h->h_ ## b) DUMP("%d", revision); DUMP("%d", version); - DUMP("0x%x", byte_order); + DUMP("%#x", byte_order); DUMP2("%d", sec_size_p2); DUMP2("%d", short_sec_size_p2); DUMP("%d", num_sectors_in_sat); @@ -1275,7 +1366,7 @@ cdf_dump(const void *v, size_t len) } void -cdf_dump_stream(const cdf_header_t *h, const cdf_stream_t *sst) +cdf_dump_stream(const cdf_stream_t *sst) { size_t ss = sst->sst_ss; cdf_dump(sst->sst_tab, ss * sst->sst_len); @@ -1310,7 +1401,7 @@ cdf_dump_dir(const cdf_info_t *info, const cdf_header_t *h, d->d_color ? "black" : "red"); (void)fprintf(stderr, "Left child: %d\n", d->d_left_child); (void)fprintf(stderr, "Right child: %d\n", d->d_right_child); - (void)fprintf(stderr, "Flags: 0x%x\n", d->d_flags); + (void)fprintf(stderr, "Flags: %#x\n", d->d_flags); cdf_timestamp_to_timespec(&ts, d->d_created); (void)fprintf(stderr, "Created %s", cdf_ctime(&ts.tv_sec, buf)); cdf_timestamp_to_timespec(&ts, d->d_modified); @@ -1331,7 +1422,7 @@ cdf_dump_dir(const cdf_info_t *info, const cdf_header_t *h, name, d->d_stream_first_sector, d->d_size); break; } - cdf_dump_stream(h, &scn); + cdf_dump_stream(&scn); free(scn.sst_tab); break; default: @@ -1403,7 +1494,7 @@ cdf_dump_property_info(const cdf_property_info_t *info, size_t count) (void)fprintf(stderr, "CLIPBOARD %u\n", info[i].pi_u32); break; default: - DPRINTF(("Don't know how to deal with %x\n", + DPRINTF(("Don't know how to deal with %#x\n", info[i].pi_type)); break; } @@ -1422,7 +1513,7 @@ cdf_dump_summary_info(const cdf_header_t *h, const cdf_stream_t *sst) (void)&h; if (cdf_unpack_summary_info(sst, h, &ssi, &info, &count) == -1) return; - (void)fprintf(stderr, "Endian: %x\n", ssi.si_byte_order); + (void)fprintf(stderr, "Endian: %#x\n", ssi.si_byte_order); (void)fprintf(stderr, "Os Version %d.%d\n", ssi.si_os_version & 0xff, ssi.si_os_version >> 8); (void)fprintf(stderr, "Os %d\n", ssi.si_os); @@ -1507,7 +1598,7 @@ main(int argc, char *argv[]) == -1) err(1, "Cannot read short stream"); #ifdef CDF_DEBUG - cdf_dump_stream(&h, &sst); + cdf_dump_stream(&sst); #endif #ifdef CDF_DEBUG diff --git a/deps/libmagic/src/cdf.h b/deps/libmagic/src/cdf.h index 853a719..f2df830 100644 --- a/deps/libmagic/src/cdf.h +++ b/deps/libmagic/src/cdf.h @@ -127,9 +127,9 @@ typedef struct { typedef struct { void *sst_tab; - size_t sst_len; - size_t sst_dirlen; - size_t sst_ss; + size_t sst_len; /* Number of sectors */ + size_t sst_dirlen; /* Directory sector size */ + size_t sst_ss; /* Sector size */ } cdf_stream_t; typedef struct { @@ -316,6 +316,10 @@ int cdf_read_user_stream(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *, const char *, cdf_stream_t *); int cdf_find_stream(const cdf_dir_t *, const char *, int); +int cdf_zero_stream(cdf_stream_t *); +int cdf_read_doc_summary_info(const cdf_info_t *, const cdf_header_t *, + const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, + const cdf_dir_t *, cdf_stream_t *); int cdf_read_summary_info(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *, cdf_stream_t *); @@ -336,7 +340,7 @@ char *cdf_u16tos8(char *, size_t, const uint16_t *); void cdf_dump_header(const cdf_header_t *); void cdf_dump_sat(const char *, const cdf_sat_t *, size_t); void cdf_dump(const void *, size_t); -void cdf_dump_stream(const cdf_header_t *, const cdf_stream_t *); +void cdf_dump_stream(const cdf_stream_t *); void cdf_dump_dir(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *); void cdf_dump_property_info(const cdf_property_info_t *, size_t); diff --git a/deps/libmagic/src/cdf_time.c b/deps/libmagic/src/cdf_time.c index 1e572de..2bdcd2a 100644 --- a/deps/libmagic/src/cdf_time.c +++ b/deps/libmagic/src/cdf_time.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: cdf_time.c,v 1.15 2014/05/14 23:15:42 christos Exp $") +FILE_RCSID("@(#)$File: cdf_time.c,v 1.16 2017/03/29 15:57:48 christos Exp $") #endif #include @@ -171,7 +171,7 @@ cdf_ctime(const time_t *sec, char *buf) char *ptr = ctime_r(sec, buf); if (ptr != NULL) return buf; - (void)snprintf(buf, 26, "*Bad* 0x%16.16" INT64_T_FORMAT "x\n", + (void)snprintf(buf, 26, "*Bad* %#16.16" INT64_T_FORMAT "x\n", (long long)*sec); return buf; } diff --git a/deps/libmagic/src/compress.c b/deps/libmagic/src/compress.c index 1988fe7..2f789cd 100644 --- a/deps/libmagic/src/compress.c +++ b/deps/libmagic/src/compress.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: compress.c,v 1.97 2016/05/13 23:02:28 christos Exp $") +FILE_RCSID("@(#)$File: compress.c,v 1.105 2017/05/25 00:13:03 christos Exp $") #endif #include "magic.h" @@ -62,7 +62,7 @@ typedef void (*sig_t)(int); #if defined(HAVE_SYS_TIME_H) #include #endif -#if defined(HAVE_ZLIB_H) +#if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) #define BUILTIN_DECOMPRESS #include #endif @@ -83,6 +83,7 @@ int tty = -1; /* * The following python code is not really used because ZLIBSUPPORT is only * defined if we have a built-in zlib, and the built-in zlib handles that. + * That is not true for android where we have zlib.h and not -lz. */ static const char zlibcode[] = "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; @@ -93,7 +94,7 @@ static int zlibcmp(const unsigned char *buf) { unsigned short x = 1; - unsigned char *s = (unsigned char *)&x; + unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) return 0; @@ -132,6 +133,9 @@ static const char *lrzip_args[] = { static const char *lz4_args[] = { "lz4", "-cd", NULL }; +static const char *zstd_args[] = { + "zstd", "-cd", NULL +}; private const struct { const void *magic; @@ -154,8 +158,9 @@ private const struct { { "\3757zXZ\0", 6, xz_args }, /* XZ Utils */ { "LRZI", 4, lrzip_args }, /* LRZIP */ { "\004\"M\030",4, lz4_args }, /* LZ4 */ + { "\x28\xB5\x2F\xFD", 4, zstd_args }, /* zstd */ #ifdef ZLIBSUPPORT - { zlibcmp, 0, zlib_args }, /* zlib */ + { RCAST(const void *, zlibcmp), 0, zlib_args }, /* zlib */ #endif }; @@ -204,7 +209,7 @@ file_zmagic(struct magic_set *ms, int fd, const char *name, continue; #ifdef ZLIBSUPPORT if (compr[i].maglen == 0) - zm = (CAST(int (*)(const unsigned char *), + zm = (RCAST(int (*)(const unsigned char *), CCAST(void *, compr[i].magic)))(buf); else #endif @@ -362,7 +367,7 @@ sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) return rn - n; default: n -= rv; - buf = ((char *)buf) + rv; + buf = CAST(char *, CCAST(void *, buf)) + rv; break; } while (n > 0); @@ -493,7 +498,7 @@ uncompresszlib(const unsigned char *old, unsigned char **newch, z.next_in = CCAST(Bytef *, old); z.avail_in = CAST(uint32_t, *n); z.next_out = *newch; - z.avail_out = bytes_max; + z.avail_out = CAST(unsigned int, bytes_max); z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL; @@ -517,7 +522,7 @@ uncompresszlib(const unsigned char *old, unsigned char **newch, return OKDATA; err: - strlcpy((char *)*newch, z.msg, bytes_max); + strlcpy((char *)*newch, z.msg ? z.msg : zError(rc), bytes_max); *n = strlen((char *)*newch); return ERRDATA; } @@ -628,7 +633,7 @@ filter_error(unsigned char *ubuf, ssize_t n) while (isspace((unsigned char)*p)) p++; n = strlen(p); - memmove(ubuf, p, n + 1); + memmove(ubuf, p, CAST(size_t, n + 1)); } DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); if (islower(*ubuf)) @@ -684,7 +689,7 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, } for (i = 0; i < __arraycount(fdp); i++) - copydesc(i, fdp[i]); + copydesc(CAST(int, i), fdp[i]); (void)execvp(compr[method].argv[0], (char *const *)(intptr_t)compr[method].argv); @@ -744,9 +749,9 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); DPRINTF("Child wait return %#x\n", status); } else if (!WIFEXITED(status)) { - DPRINTF("Child not exited (0x%x)\n", status); + DPRINTF("Child not exited (%#x)\n", status); } else if (WEXITSTATUS(status) != 0) { - DPRINTF("Child exited (0x%d)\n", WEXITSTATUS(status)); + DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); } closefd(fdp[STDIN_FILENO], 0); diff --git a/deps/libmagic/src/der.c b/deps/libmagic/src/der.c index 025c390..e7ca16e 100644 --- a/deps/libmagic/src/der.c +++ b/deps/libmagic/src/der.c @@ -35,22 +35,13 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: der.c,v 1.7 2016/06/01 22:01:15 christos Exp $") +FILE_RCSID("@(#)$File: der.c,v 1.12 2017/02/10 18:14:01 christos Exp $") #endif #endif #include -#include -// XXX: change by mscdex -#ifdef TEST_DER -# include -#endif #include -// XXX: change by mscdex -#ifdef TEST_DER -# include -#endif #include #include #include @@ -64,6 +55,10 @@ FILE_RCSID("@(#)$File: der.c,v 1.7 2016/06/01 22:01:15 christos Exp $") #ifndef TEST_DER #include "magic.h" #include "der.h" +#else +#include +#include +#include #endif #define DER_BAD ((uint32_t)-1) @@ -169,29 +164,49 @@ gettag(const uint8_t *c, size_t *p, size_t l) return tag; } +/* + * Read the length of a DER tag from the input. + * + * `c` is the input, `p` is an output parameter that specifies how much of the + * input we consumed, and `l` is the maximum input length. + * + * Returns the length, or DER_BAD if the end of the input is reached or the + * length exceeds the remaining input. + */ static uint32_t getlength(const uint8_t *c, size_t *p, size_t l) { uint8_t digits, i; size_t len; + int is_onebyte_result; if (*p >= l) return DER_BAD; - digits = c[(*p)++]; + /* + * Digits can either be 0b0 followed by the result, or 0b1 + * followed by the number of digits of the result. In either case, + * we verify that we can read so many bytes from the input. + */ + is_onebyte_result = (c[*p] & 0x80) == 0; + digits = c[(*p)++] & 0x7f; + if (*p + digits >= l) + return DER_BAD; - if ((digits & 0x80) == 0) + if (is_onebyte_result) return digits; - digits &= 0x7f; + /* + * Decode len. We've already verified that we're allowed to read + * `digits` bytes. + */ len = 0; - - if (*p + digits >= l) - return DER_BAD; - for (i = 0; i < digits; i++) len = (len << 8) | c[(*p)++]; - return len; + + if (*p + len >= l) + return DER_BAD; + return CAST(uint32_t, len); } static const char * @@ -208,7 +223,7 @@ der_tag(char *buf, size_t len, uint32_t tag) static int der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len) { - const uint8_t *d = q; + const uint8_t *d = CAST(const uint8_t *, q); switch (tag) { case DER_TAG_PRINTABLE_STRING: case DER_TAG_UTF8_STRING: @@ -230,7 +245,7 @@ der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len) int32_t der_offs(struct magic_set *ms, struct magic *m, size_t nbytes) { - const uint8_t *b = CAST(const void *, ms->search.s); + const uint8_t *b = RCAST(const uint8_t *, ms->search.s); size_t offs = 0, len = ms->search.s_len ? ms->search.s_len : nbytes; if (gettag(b, &offs, len) == DER_BAD) @@ -250,18 +265,18 @@ der_offs(struct magic_set *ms, struct magic *m, size_t nbytes) #endif if (m->cont_level != 0) { if (offs + tlen > nbytes) - return DER_BAD; - ms->c.li[m->cont_level - 1].off = offs + tlen; + return -1; + ms->c.li[m->cont_level - 1].off = CAST(int, offs + tlen); DPRINTF(("cont_level[%u] = %u\n", m->cont_level - 1, ms->c.li[m->cont_level - 1].off)); } - return offs; + return CAST(int32_t, offs); } int der_cmp(struct magic_set *ms, struct magic *m) { - const uint8_t *b = CAST(const void *, ms->search.s); + const uint8_t *b = RCAST(const uint8_t *, ms->search.s); const char *s = m->value.s; size_t offs = 0, len = ms->search.s_len; uint32_t tag, tlen; diff --git a/deps/libmagic/src/file.c b/deps/libmagic/src/file.c index dc5f24b..861a824 100644 --- a/deps/libmagic/src/file.c +++ b/deps/libmagic/src/file.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.171 2016/05/17 15:52:45 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.172 2016/10/24 15:21:07 christos Exp $") #endif /* lint */ #include "magic.h" @@ -438,6 +438,8 @@ private struct magic_set * load(const char *magicfile, int flags) { struct magic_set *magic = magic_open(flags); + const char *e; + if (magic == NULL) { (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); return NULL; @@ -448,6 +450,8 @@ load(const char *magicfile, int flags) magic_close(magic); return NULL; } + if ((e = magic_error(magic)) != NULL) + (void)fprintf(stderr, "%s: Warning: %s\n", progname, e); return magic; } diff --git a/deps/libmagic/src/file.h b/deps/libmagic/src/file.h index 22a7f66..9930710 100644 --- a/deps/libmagic/src/file.h +++ b/deps/libmagic/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.178 2016/03/31 17:51:12 christos Exp $ + * @(#)$File: file.h,v 1.183 2017/08/28 13:39:18 christos Exp $ */ #ifndef __file_h__ @@ -36,6 +36,10 @@ #ifdef HAVE_CONFIG_H #include #endif +#ifdef HAVE_STDINT_H +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif // XXX: change by mscdex #ifdef _MSC_VER @@ -84,21 +88,16 @@ typedef unsigned int mode_t; #define INT64_T_FORMAT "ll" #define INTMAX_T_FORMAT "j" #endif +#include +#endif #include /* Include that here, to make sure __P gets defined */ #include #include /* For open and flags */ -#ifdef HAVE_STDINT_H -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS -#endif -#include -#endif #ifdef HAVE_INTTYPES_H #include #endif -// XXX: change by mscdex -#include +#include #include #include // XXX: change by mscdex @@ -170,11 +169,11 @@ typedef unsigned int mode_t; or directory */ #define MAXDESC 64 /* max len of text description/MIME type */ #define MAXMIME 80 /* max len of text MIME type */ -#define MAXstring 64 /* max len of "string" types */ +#define MAXstring 96 /* max len of "string" types */ #define MAGICNO 0xF11E041C -#define VERSIONNO 13 -#define FILE_MAGICSIZE 312 +#define VERSIONNO 14 +#define FILE_MAGICSIZE 344 #define FILE_LOAD 0 #define FILE_CHECK 1 @@ -311,7 +310,7 @@ struct magic { #define FILE_OPS_MASK 0x07 /* mask for above ops */ #define FILE_UNUSED_1 0x08 #define FILE_UNUSED_2 0x10 -#define FILE_UNUSED_3 0x20 +#define FILE_OPSIGNED 0x20 #define FILE_OPINVERSE 0x40 #define FILE_OPINDIRECT 0x80 @@ -483,7 +482,7 @@ protected size_t file_printedlen(const struct magic_set *); protected int file_replace(struct magic_set *, const char *, const char *); protected int file_printf(struct magic_set *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); -protected int file_reset(struct magic_set *); +protected int file_reset(struct magic_set *, int); protected int file_tryelf(struct magic_set *, int, const unsigned char *, size_t); protected int file_trycdf(struct magic_set *, int, const unsigned char *, diff --git a/deps/libmagic/src/fsmagic.c b/deps/libmagic/src/fsmagic.c index 60e0dea..f84a10e 100644 --- a/deps/libmagic/src/fsmagic.c +++ b/deps/libmagic/src/fsmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: fsmagic.c,v 1.76 2015/04/09 20:01:41 christos Exp $") +FILE_RCSID("@(#)$File: fsmagic.c,v 1.77 2017/05/24 19:17:50 christos Exp $") #endif /* lint */ #include "magic.h" @@ -106,14 +106,13 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) { int ret, did = 0; int mime = ms->flags & MAGIC_MIME; + int silent = ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION); #ifdef S_IFLNK char buf[BUFSIZ+4]; ssize_t nch; struct stat tstatbuf; #endif - if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) - return 0; if (fn == NULL) return 0; @@ -170,7 +169,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) } ret = 1; - if (!mime) { + if (!mime && !silent) { #ifdef S_ISUID if (sb->st_mode & S_ISUID) if (file_printf(ms, "%ssetuid", COMMA) == -1) @@ -193,6 +192,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "directory") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%sdirectory", COMMA) == -1) return -1; break; @@ -210,6 +210,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "chardevice") == -1) return -1; + } else if (silent) { } else { #ifdef HAVE_STRUCT_STAT_ST_RDEV # ifdef dv_unit @@ -244,6 +245,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "blockdevice") == -1) return -1; + } else if (silent) { } else { #ifdef HAVE_STRUCT_STAT_ST_RDEV # ifdef dv_unit @@ -272,6 +274,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "fifo") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%sfifo (named pipe)", COMMA) == -1) return -1; break; @@ -281,6 +284,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "door") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%sdoor", COMMA) == -1) return -1; break; @@ -296,6 +300,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "symlink") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%sunreadable symlink `%s' (%s)", COMMA, fn, strerror(errno)) == -1) @@ -325,6 +330,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (handle_mime(ms, mime, "x-path-too-long") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%spath too long: `%s'", COMMA, fn) == -1) @@ -354,6 +360,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "symlink") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%ssymbolic link to %s", COMMA, buf) == -1) return -1; @@ -366,6 +373,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "socket") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%ssocket", COMMA) == -1) return -1; break; @@ -388,6 +396,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) if (mime) { if (handle_mime(ms, mime, "x-empty") == -1) return -1; + } else if (silent) { } else if (file_printf(ms, "%sempty", COMMA) == -1) return -1; break; @@ -401,7 +410,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) /*NOTREACHED*/ } - if (!mime && did && ret == 0) { + if (!silent && !mime && did && ret == 0) { if (file_printf(ms, " ") == -1) return -1; } diff --git a/deps/libmagic/src/funcs.c b/deps/libmagic/src/funcs.c index df8dbae..d7a18f4 100644 --- a/deps/libmagic/src/funcs.c +++ b/deps/libmagic/src/funcs.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: funcs.c,v 1.89 2016/03/21 15:56:53 christos Exp $") +FILE_RCSID("@(#)$File: funcs.c,v 1.93 2017/08/28 13:39:18 christos Exp $") #endif /* lint */ #include "magic.h" @@ -76,7 +76,7 @@ file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) ms->o.buf = buf; return 0; out: - file_error(ms, errno, "vasprintf failed"); + fprintf(stderr, "vasprintf failed (%s)", strerror(errno)); return -1; } @@ -250,7 +250,7 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__u } /* try soft magic tests */ - if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) + if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { m = file_softmagic(ms, ubuf, nb, NULL, NULL, BINTEST, looks_text); if ((ms->flags & MAGIC_DEBUG) != 0) @@ -277,6 +277,7 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__u if (checkdone(ms, &rv)) goto done; } + } /* try text properties */ if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { @@ -327,9 +328,9 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__u #endif protected int -file_reset(struct magic_set *ms) +file_reset(struct magic_set *ms, int checkloaded) { - if (ms->mlist[0] == NULL) { + if (checkloaded && ms->mlist[0] == NULL) { file_error(ms, 0, "no magic files loaded"); return -1; } @@ -508,6 +509,8 @@ file_regexec(file_regex_t *rx, const char *str, size_t nmatch, regmatch_t* pmatch, int eflags) { assert(rx->rc == 0); + /* XXX: force initialization because glibc does not always do this */ + memset(pmatch, 0, nmatch * sizeof(*pmatch)); return regexec(&rx->rx, str, nmatch, pmatch, eflags); } diff --git a/deps/libmagic/src/is_tar.c b/deps/libmagic/src/is_tar.c index a3e5dbf..1953a7f 100644 --- a/deps/libmagic/src/is_tar.c +++ b/deps/libmagic/src/is_tar.c @@ -40,7 +40,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: is_tar.c,v 1.38 2015/04/09 20:01:41 christos Exp $") +FILE_RCSID("@(#)$File: is_tar.c,v 1.39 2017/03/17 20:45:01 christos Exp $") #endif #include "magic.h" @@ -51,7 +51,7 @@ FILE_RCSID("@(#)$File: is_tar.c,v 1.38 2015/04/09 20:01:41 christos Exp $") #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) private int is_tar(const unsigned char *, size_t); -private int from_oct(int, const char *); /* Decode octal number */ +private int from_oct(const char *, size_t); /* Decode octal number */ static const char tartype[][32] = { "tar archive", @@ -93,31 +93,35 @@ private int is_tar(const unsigned char *buf, size_t nbytes) { const union record *header = (const union record *)(const void *)buf; - int i; - int sum, recsum; - const unsigned char *p; + size_t i; + int sum, recsum; + const unsigned char *p, *ep; - if (nbytes < sizeof(union record)) + if (nbytes < sizeof(*header)) return 0; - recsum = from_oct(8, header->header.chksum); + recsum = from_oct(header->header.chksum, sizeof(header->header.chksum)); sum = 0; p = header->charptr; - for (i = sizeof(union record); --i >= 0;) + ep = header->charptr + sizeof(*header); + while (p < ep) sum += *p++; /* Adjust checksum to count the "chksum" field as blanks. */ - for (i = sizeof(header->header.chksum); --i >= 0;) + for (i = 0; i < sizeof(header->header.chksum); i++) sum -= header->header.chksum[i]; - sum += ' ' * sizeof header->header.chksum; + sum += ' ' * sizeof(header->header.chksum); if (sum != recsum) return 0; /* Not a tar archive */ - if (strcmp(header->header.magic, GNUTMAGIC) == 0) + if (strncmp(header->header.magic, GNUTMAGIC, + sizeof(header->header.magic)) == 0) return 3; /* GNU Unix Standard tar archive */ - if (strcmp(header->header.magic, TMAGIC) == 0) + + if (strncmp(header->header.magic, TMAGIC, + sizeof(header->header.magic)) == 0) return 2; /* Unix Standard tar archive */ return 1; /* Old fashioned tar archive */ @@ -130,19 +134,22 @@ is_tar(const unsigned char *buf, size_t nbytes) * Result is -1 if the field is invalid (all blank, or non-octal). */ private int -from_oct(int digs, const char *where) +from_oct(const char *where, size_t digs) { int value; + if (digs == 0) + return -1; + while (isspace((unsigned char)*where)) { /* Skip spaces */ where++; - if (--digs <= 0) + if (digs-- == 0) return -1; /* All blank field */ } value = 0; while (digs > 0 && isodigit(*where)) { /* Scan til non-octal */ value = (value << 3) | (*where++ - '0'); - --digs; + digs--; } if (digs > 0 && *where && !isspace((unsigned char)*where)) diff --git a/deps/libmagic/src/magic.c b/deps/libmagic/src/magic.c index 1d7962a..91c9d82 100644 --- a/deps/libmagic/src/magic.c +++ b/deps/libmagic/src/magic.c @@ -34,7 +34,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: magic.c,v 1.99 2016/05/03 16:09:38 christos Exp $") +FILE_RCSID("@(#)$File: magic.c,v 1.102 2017/08/28 13:39:18 christos Exp $") #endif /* lint */ #include "magic.h" @@ -171,7 +171,7 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, { if (fdwReason == DLL_PROCESS_ATTACH) _w32_dll_instance = hinstDLL; - return TRUE; + return 1; } #endif @@ -413,7 +413,7 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) int ispipe = 0; off_t pos = (off_t)-1; - if (file_reset(ms) == -1) + if (file_reset(ms, 1) == -1) goto out; /* @@ -496,7 +496,7 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) if (r < PIPE_BUF) break; } - if (nbytes == 0) { + if (nbytes == 0 && inname) { /* We can not read it, but we were able to stat it. */ if (unreadable_info(ms, sb.st_mode, inname) == -1) goto done; @@ -542,7 +542,7 @@ magic_buffer(struct magic_set *ms, const void *buf, size_t nb) { if (ms == NULL) return NULL; - if (file_reset(ms) == -1) + if (file_reset(ms, 1) == -1) return NULL; /* * The main work is done here! @@ -571,6 +571,15 @@ magic_errno(struct magic_set *ms) return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0; } +public int +magic_getflags(struct magic_set *ms) +{ + if (ms == NULL) + return -1; + + return ms->flags; +} + public int magic_setflags(struct magic_set *ms, int flags) { diff --git a/deps/libmagic/src/magic.h b/deps/libmagic/src/magic.h index af6b5b6..2d707d5 100644 --- a/deps/libmagic/src/magic.h +++ b/deps/libmagic/src/magic.h @@ -73,6 +73,35 @@ 0 \ ) +#define MAGIC_SNPRINTB "\177\020\ +b\0debug\0\ +b\1symlink\0\ +b\2compress\0\ +b\3devices\0\ +b\4mime_type\0\ +b\5continue\0\ +b\6check\0\ +b\7preserve_atime\0\ +b\10raw\0\ +b\11error\0\ +b\12mime_encoding\0\ +b\13apple\0\ +b\14no_check_compress\0\ +b\15no_check_tar\0\ +b\16no_check_soft\0\ +b\17no_check_sapptype\0\ +b\20no_check_elf\0\ +b\21no_check_text\0\ +b\22no_check_cdf\0\ +b\23no_check_reserved0\0\ +b\24no_check_tokens\0\ +b\25no_check_encoding\0\ +b\26no_check_reserved1\0\ +b\27no_check_reserved2\0\ +b\30extension\0\ +b\31transp_compression\0\ +" + /* Defined for backwards compatibility (renamed) */ #define MAGIC_NO_CHECK_ASCII MAGIC_NO_CHECK_TEXT @@ -80,7 +109,7 @@ #define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */ #define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */ -#define MAGIC_VERSION 525 /* This implementation */ +#define MAGIC_VERSION 532 /* This implementation */ #ifdef __cplusplus @@ -97,6 +126,7 @@ const char *magic_descriptor(magic_t, int); const char *magic_buffer(magic_t, const void *, size_t); const char *magic_error(magic_t); +int magic_getflags(magic_t); int magic_setflags(magic_t, int); int magic_version(void); @@ -114,7 +144,7 @@ int magic_errno(magic_t); #define MAGIC_PARAM_ELF_SHNUM_MAX 3 #define MAGIC_PARAM_ELF_NOTES_MAX 4 #define MAGIC_PARAM_REGEX_MAX 5 -#define MAGIC_PARAM_BYTES_MAX 6 +#define MAGIC_PARAM_BYTES_MAX 6 int magic_setparam(magic_t, int, const void *); int magic_getparam(magic_t, int, void *); diff --git a/deps/libmagic/src/print.c b/deps/libmagic/src/print.c index a0221b1..0b91863 100644 --- a/deps/libmagic/src/print.c +++ b/deps/libmagic/src/print.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: print.c,v 1.81 2016/01/19 15:09:03 christos Exp $") +FILE_RCSID("@(#)$File: print.c,v 1.82 2017/02/10 18:14:01 christos Exp $") #endif /* lint */ #include @@ -238,7 +238,7 @@ file_fmttime(uint64_t v, int flags, char *buf) if (flags & FILE_T_WINDOWS) { struct timespec ts; - cdf_timestamp_to_timespec(&ts, v); + cdf_timestamp_to_timespec(&ts, CAST(cdf_timestamp_t, v)); t = ts.tv_sec; } else { // XXX: perhaps detect and print something if overflow diff --git a/deps/libmagic/src/readcdf.c b/deps/libmagic/src/readcdf.c index 5d20aad..3d251d4 100644 --- a/deps/libmagic/src/readcdf.c +++ b/deps/libmagic/src/readcdf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2008 Christos Zoulas + * Copyright (c) 2008, 2016 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,7 +26,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: readcdf.c,v 1.57 2016/05/03 16:08:49 christos Exp $") +FILE_RCSID("@(#)$File: readcdf.c,v 1.65 2017/04/08 20:58:03 christos Exp $") #endif #include @@ -155,7 +155,7 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, struct timespec ts; char buf[64]; const char *str = NULL; - const char *s; + const char *s, *e; int len; if (!NOTMIME(ms) && root_storage) @@ -202,7 +202,9 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, if (info[i].pi_type == CDF_LENGTH32_WSTRING) k++; s = info[i].pi_str.s_buf; - for (j = 0; j < sizeof(vbuf) && len--; s += k) { + e = info[i].pi_str.s_buf + len; + for (j = 0; s < e && j < sizeof(vbuf) + && len--; s += k) { if (*s == '\0') break; if (isprint((unsigned char)*s)) @@ -376,13 +378,61 @@ cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, dir, "Catalog", scn)) == -1) return i; #ifdef CDF_DEBUG - cdf_dump_catalog(&h, scn); + cdf_dump_catalog(h, scn); #endif if ((i = cdf_file_catalog(ms, h, scn)) == -1) return -1; return i; } +private int +cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info, + const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, + const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn, + const cdf_directory_t *root_storage, const char **expn) +{ + int i; + const char *str = NULL; + cdf_directory_t *d; + char name[__arraycount(d->d_name)]; + size_t j, k; + +#ifdef CDF_DEBUG + cdf_dump_summary_info(h, scn); +#endif + if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) { + *expn = "Can't expand summary_info"; + return i; + } + if (i == 1) + return i; + for (j = 0; str == NULL && j < dir->dir_len; j++) { + d = &dir->dir_tab[j]; + for (k = 0; k < sizeof(name); k++) + name[k] = (char)cdf_tole2(d->d_name[k]); + str = cdf_app_to_mime(name, + NOTMIME(ms) ? name2desc : name2mime); + } + if (NOTMIME(ms)) { + if (str != NULL) { + if (file_printf(ms, "%s", str) == -1) + return -1; + i = 1; + } + } else { + if (str == NULL) + str = "vnd.ms-office"; + if (file_printf(ms, "application/%s", str) == -1) + return -1; + i = 1; + } + if (i <= 0) { + i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst, + dir, scn); + } + return i; +} + private struct sinfo { const char *name; const char *mime; @@ -391,10 +441,13 @@ private struct sinfo { } sectioninfo[] = { { "Encrypted", "encrypted", { - "EncryptedPackage", NULL, NULL, NULL, NULL, + "EncryptedPackage", "EncryptedSummary", + NULL, NULL, NULL, }, { - CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0, + CDF_DIR_TYPE_USER_STREAM, + CDF_DIR_TYPE_USER_STREAM, + 0, 0, 0, }, }, @@ -415,6 +468,46 @@ private struct sinfo { 0, 0, 0, 0 }, }, + { "Microsoft Excel", "vnd.ms-excel", + { + "Book", "Workbook", NULL, NULL, NULL, + }, + { + CDF_DIR_TYPE_USER_STREAM, + CDF_DIR_TYPE_USER_STREAM, + 0, 0, 0, + }, + }, + { "Microsoft Word", "msword", + { + "WordDocument", NULL, NULL, NULL, NULL, + }, + { + CDF_DIR_TYPE_USER_STREAM, + 0, 0, 0, 0, + }, + }, + { "Microsoft PowerPoint", "vnd.ms-powerpoint", + { + "PowerPoint", NULL, NULL, NULL, NULL, + }, + { + CDF_DIR_TYPE_USER_STREAM, + 0, 0, 0, 0, + }, + }, + { "Microsoft Outlook Message", "vnd.ms-outlook", + { + "__properties_version1.0", + "__recip_version1.0_#00000000", + NULL, NULL, NULL, + }, + { + CDF_DIR_TYPE_USER_STREAM, + CDF_DIR_TYPE_USER_STORAGE, + 0, 0, 0, + }, + }, }; private int @@ -426,22 +519,19 @@ cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) const struct sinfo *si = §ioninfo[sd]; for (j = 0; si->sections[j]; j++) { if (cdf_find_stream(dir, si->sections[j], si->types[j]) - <= 0) { + > 0) + break; #ifdef CDF_DEBUG - fprintf(stderr, "Can't read %s\n", - si->sections[j]); + fprintf(stderr, "Can't read %s\n", si->sections[j]); #endif - break; - } } - if (si->sections[j] != NULL) + if (si->sections[j] == NULL) continue; if (NOTMIME(ms)) { if (file_printf(ms, "CDFV2 %s", si->name) == -1) return -1; } else { - if (file_printf(ms, "application/CDFV2-%s", - si->mime) == -1) + if (file_printf(ms, "application/%s", si->mime) == -1) return -1; } return 1; @@ -462,6 +552,7 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, const char *expn = ""; const cdf_directory_t *root_storage; + scn.sst_tab = NULL; info.i_fd = fd; info.i_buf = buf; info.i_len = nbytes; @@ -517,7 +608,7 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, "FileHeader", &scn)) != -1) { #define HWP5_SIGNATURE "HWP Document File" - if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 + if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1 && memcmp(scn.sst_tab, HWP5_SIGNATURE, sizeof(HWP5_SIGNATURE) - 1) == 0) { if (NOTMIME(ms)) { @@ -531,10 +622,7 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, i = 1; goto out5; } else { - free(scn.sst_tab); - scn.sst_tab = NULL; - scn.sst_len = 0; - scn.sst_dirlen = 0; + cdf_zero_stream(&scn); } } @@ -542,56 +630,31 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, &scn)) == -1) { if (errno != ESRCH) { expn = "Cannot read summary info"; - goto out4; } - i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst, - &dir, &scn); - if (i > 0) - goto out4; - i = cdf_file_dir_info(ms, &dir); - if (i < 0) - expn = "Cannot read section info"; - goto out4; + } else { + i = cdf_check_summary_info(ms, &info, &h, + &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); + cdf_zero_stream(&scn); } - - -#ifdef CDF_DEBUG - cdf_dump_summary_info(&h, &scn); -#endif - if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0) - expn = "Can't expand summary_info"; - - if (i == 0) { - const char *str = NULL; - cdf_directory_t *d; - char name[__arraycount(d->d_name)]; - size_t j, k; - - for (j = 0; str == NULL && j < dir.dir_len; j++) { - d = &dir.dir_tab[j]; - for (k = 0; k < sizeof(name); k++) - name[k] = (char)cdf_tole2(d->d_name[k]); - str = cdf_app_to_mime(name, - NOTMIME(ms) ? name2desc : name2mime); - } - if (NOTMIME(ms)) { - if (str != NULL) { - if (file_printf(ms, "%s", str) == -1) - return -1; - i = 1; + if (i <= 0) { + if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat, + &sst, &dir, &scn)) == -1) { + if (errno != ESRCH) { + expn = "Cannot read summary info"; } } else { - if (str == NULL) - str = "vnd.ms-office"; - if (file_printf(ms, "application/%s", str) == -1) - return -1; - i = 1; + i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, + &sst, &dir, &scn, root_storage, &expn); } } + if (i <= 0) { + i = cdf_file_dir_info(ms, &dir); + if (i < 0) + expn = "Cannot read section info"; + } out5: - free(scn.sst_tab); -out4: - free(sst.sst_tab); + cdf_zero_stream(&scn); + cdf_zero_stream(&sst); out3: free(dir.dir_tab); out2: @@ -608,7 +671,7 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, if (file_printf(ms, ", %s", expn) == -1) return -1; } else { - if (file_printf(ms, "application/CDFV2-unknown") == -1) + if (file_printf(ms, "application/CDFV2") == -1) return -1; } i = 1; diff --git a/deps/libmagic/src/readelf.c b/deps/libmagic/src/readelf.c index 39598f7..5f425c9 100644 --- a/deps/libmagic/src/readelf.c +++ b/deps/libmagic/src/readelf.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: readelf.c,v 1.127 2015/11/18 12:29:29 christos Exp $") +FILE_RCSID("@(#)$File: readelf.c,v 1.138 2017/08/27 07:55:02 christos Exp $") #endif #ifdef BUILTIN_ELF @@ -310,16 +310,18 @@ private const char os_style_names[][8] = { "NetBSD", }; -#define FLAGS_DID_CORE 0x001 -#define FLAGS_DID_OS_NOTE 0x002 -#define FLAGS_DID_BUILD_ID 0x004 -#define FLAGS_DID_CORE_STYLE 0x008 -#define FLAGS_DID_NETBSD_PAX 0x010 -#define FLAGS_DID_NETBSD_MARCH 0x020 -#define FLAGS_DID_NETBSD_CMODEL 0x040 -#define FLAGS_DID_NETBSD_UNKNOWN 0x080 -#define FLAGS_IS_CORE 0x100 -#define FLAGS_DID_AUXV 0x200 +#define FLAGS_CORE_STYLE 0x003 + +#define FLAGS_DID_CORE 0x004 +#define FLAGS_DID_OS_NOTE 0x008 +#define FLAGS_DID_BUILD_ID 0x010 +#define FLAGS_DID_CORE_STYLE 0x020 +#define FLAGS_DID_NETBSD_PAX 0x040 +#define FLAGS_DID_NETBSD_MARCH 0x080 +#define FLAGS_DID_NETBSD_CMODEL 0x100 +#define FLAGS_DID_NETBSD_UNKNOWN 0x200 +#define FLAGS_IS_CORE 0x400 +#define FLAGS_DID_AUXV 0x800 private int dophn_core(struct magic_set *ms, int clazz, int swap, int fd, off_t off, @@ -509,12 +511,26 @@ do_bid_note(struct magic_set *ms, unsigned char *nbuf, uint32_t type, size_t noff, size_t doff, int *flags) { if (namesz == 4 && strcmp((char *)&nbuf[noff], "GNU") == 0 && - type == NT_GNU_BUILD_ID && (descsz == 16 || descsz == 20)) { + type == NT_GNU_BUILD_ID && (descsz >= 4 && descsz <= 20)) { uint8_t desc[20]; + const char *btype; uint32_t i; *flags |= FLAGS_DID_BUILD_ID; - if (file_printf(ms, ", BuildID[%s]=", descsz == 16 ? "md5/uuid" : - "sha1") == -1) + switch (descsz) { + case 8: + btype = "xxHash"; + break; + case 16: + btype = "md5/uuid"; + break; + case 20: + btype = "sha1"; + break; + default: + btype = "unknown"; + break; + } + if (file_printf(ms, ", BuildID[%s]=", btype) == -1) return 1; (void)memcpy(desc, &nbuf[doff], descsz); for (i = 0; i < descsz; i++) @@ -695,32 +711,30 @@ do_core_note(struct magic_set *ms, unsigned char *nbuf, uint32_t type, == -1) return 1; *flags |= FLAGS_DID_CORE_STYLE; + *flags |= os_style; } switch (os_style) { case OS_STYLE_NETBSD: if (type == NT_NETBSD_CORE_PROCINFO) { char sbuf[512]; - uint32_t signo; - /* - * Extract the program name. It is at - * offset 0x7c, and is up to 32-bytes, - * including the terminating NUL. - */ - if (file_printf(ms, ", from '%.31s'", + struct NetBSD_elfcore_procinfo pi; + memset(&pi, 0, sizeof(pi)); + memcpy(&pi, nbuf + doff, descsz); + + if (file_printf(ms, ", from '%.31s', pid=%u, uid=%u, " + "gid=%u, nlwps=%u, lwp=%u (signal %u/code %u)", file_printable(sbuf, sizeof(sbuf), - (const char *)&nbuf[doff + 0x7c])) == -1) - return 1; - - /* - * Extract the signal number. It is at - * offset 0x08. - */ - (void)memcpy(&signo, &nbuf[doff + 0x08], - sizeof(signo)); - if (file_printf(ms, " (signal %u)", - elf_getu32(swap, signo)) == -1) + CAST(char *, pi.cpi_name)), + elf_getu32(swap, pi.cpi_pid), + elf_getu32(swap, pi.cpi_euid), + elf_getu32(swap, pi.cpi_egid), + elf_getu32(swap, pi.cpi_nlwps), + elf_getu32(swap, pi.cpi_siglwp), + elf_getu32(swap, pi.cpi_signo), + elf_getu32(swap, pi.cpi_sigcode)) == -1) return 1; + *flags |= FLAGS_DID_CORE; return 1; } @@ -876,7 +890,7 @@ get_string_on_virtaddr(struct magic_set *ms, offset = get_offset_from_virtaddr(ms, swap, clazz, fd, ph_off, ph_num, fsize, virtaddr); - if ((buflen = pread(fd, buf, buflen, offset)) <= 0) { + if ((buflen = pread(fd, buf, CAST(size_t, buflen), offset)) <= 0) { file_badread(ms); return 0; } @@ -910,8 +924,28 @@ do_auxv_note(struct magic_set *ms, unsigned char *nbuf, uint32_t type, int is_string; size_t nval; - if (type != NT_AUXV || (*flags & FLAGS_IS_CORE) == 0) + if ((*flags & (FLAGS_IS_CORE|FLAGS_DID_CORE_STYLE)) != + (FLAGS_IS_CORE|FLAGS_DID_CORE_STYLE)) + return 0; + + switch (*flags & FLAGS_CORE_STYLE) { + case OS_STYLE_SVR4: + if (type != NT_AUXV) + return 0; + break; +#ifdef notyet + case OS_STYLE_NETBSD: + if (type != NT_NETBSD_CORE_AUXV) + return 0; + break; + case OS_STYLE_FREEBSD: + if (type != NT_FREEBSD_PROCSTAT_AUXV) + return 0; + break; +#endif + default: return 0; + } *flags |= FLAGS_DID_AUXV; @@ -1017,13 +1051,13 @@ donote(struct magic_set *ms, void *vbuf, size_t offset, size_t size, } if (namesz & 0x80000000) { - (void)file_printf(ms, ", bad note name size 0x%lx", + (void)file_printf(ms, ", bad note name size %#lx", (unsigned long)namesz); return 0; } if (descsz & 0x80000000) { - (void)file_printf(ms, ", bad note description size 0x%lx", + (void)file_printf(ms, ", bad note description size %#lx", (unsigned long)descsz); return 0; } @@ -1171,12 +1205,12 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, { Elf32_Shdr sh32; Elf64_Shdr sh64; - int stripped = 1; + int stripped = 1, has_debug_info = 0; size_t nbadcap = 0; void *nbuf; off_t noff, coff, name_off; - uint64_t cap_hw1 = 0; /* SunOS 5.x hardware capabilites */ - uint64_t cap_sf1 = 0; /* SunOS 5.x software capabilites */ + uint64_t cap_hw1 = 0; /* SunOS 5.x hardware capabilities */ + uint64_t cap_sf1 = 0; /* SunOS 5.x software capabilities */ char name[50]; ssize_t namesize; @@ -1189,8 +1223,9 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, /* Read offset of name section to be able to read section names later */ if (pread(fd, xsh_addr, xsh_sizeof, CAST(off_t, (off + size * strtab))) < (ssize_t)xsh_sizeof) { - file_badread(ms); - return -1; + if (file_printf(ms, ", missing section headers") == -1) + return -1; + return 0; } name_off = xsh_offset; @@ -1201,8 +1236,10 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, return -1; } name[namesize] = '\0'; - if (strcmp(name, ".debug_info") == 0) + if (strcmp(name, ".debug_info") == 0) { + has_debug_info = 1; stripped = 0; + } if (pread(fd, xsh_addr, xsh_sizeof, off) < (ssize_t)xsh_sizeof) { file_badread(ms); @@ -1233,9 +1270,9 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, if ((uintmax_t)(xsh_size + xsh_offset) > (uintmax_t)fsize) { if (file_printf(ms, - ", note offset/size 0x%" INTMAX_T_FORMAT - "x+0x%" INTMAX_T_FORMAT "x exceeds" - " file size 0x%" INTMAX_T_FORMAT "x", + ", note offset/size %#" INTMAX_T_FORMAT + "x+%#" INTMAX_T_FORMAT "x exceeds" + " file size %#" INTMAX_T_FORMAT "x", (uintmax_t)xsh_offset, (uintmax_t)xsh_size, (uintmax_t)fsize) == -1) return -1; @@ -1339,7 +1376,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, default: if (file_printf(ms, ", with unknown capability " - "0x%" INT64_T_FORMAT "x = 0x%" + "%#" INT64_T_FORMAT "x = %#" INT64_T_FORMAT "x", (unsigned long long)xcap_tag, (unsigned long long)xcap_val) == -1) @@ -1356,6 +1393,10 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, } } + if (has_debug_info) { + if (file_printf(ms, ", with debug_info") == -1) + return -1; + } if (file_printf(ms, ", %sstripped", stripped ? "" : "not ") == -1) return -1; if (cap_hw1) { @@ -1389,13 +1430,13 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, } if (cap_hw1) if (file_printf(ms, - " unknown hardware capability 0x%" + " unknown hardware capability %#" INT64_T_FORMAT "x", (unsigned long long)cap_hw1) == -1) return -1; } else { if (file_printf(ms, - " hardware capability 0x%" INT64_T_FORMAT "x", + " hardware capability %#" INT64_T_FORMAT "x", (unsigned long long)cap_hw1) == -1) return -1; } @@ -1411,7 +1452,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num, cap_sf1 &= ~SF1_SUNW_MASK; if (cap_sf1) if (file_printf(ms, - ", with unknown software capability 0x%" + ", with unknown software capability %#" INT64_T_FORMAT "x", (unsigned long long)cap_sf1) == -1) return -1; @@ -1465,7 +1506,7 @@ dophn_exec(struct magic_set *ms, int clazz, int swap, int fd, off_t off, if (((align = xph_align) & 0x80000000UL) != 0 || align < 4) { if (file_printf(ms, - ", invalid note alignment 0x%lx", + ", invalid note alignment %#lx", (unsigned long)align) == -1) return -1; align = 4; diff --git a/deps/libmagic/src/readelf.h b/deps/libmagic/src/readelf.h index f443b29..ef880b9 100644 --- a/deps/libmagic/src/readelf.h +++ b/deps/libmagic/src/readelf.h @@ -141,7 +141,7 @@ typedef struct { #define SHT_SYMTAB 2 #define SHT_NOTE 7 #define SHT_DYNSYM 11 -#define SHT_SUNW_cap 0x6ffffff5 /* SunOS 5.x hw/sw capabilites */ +#define SHT_SUNW_cap 0x6ffffff5 /* SunOS 5.x hw/sw capabilities */ /* elf type */ #define ELFDATANONE 0 /* e_ident[EI_DATA] */ @@ -230,6 +230,33 @@ typedef struct { } Elf64_Shdr; #define NT_NETBSD_CORE_PROCINFO 1 +#define NT_NETBSD_CORE_AUXV 2 + +struct NetBSD_elfcore_procinfo { + /* Version 1 fields start here. */ + uint32_t cpi_version; /* our version */ + uint32_t cpi_cpisize; /* sizeof(this struct) */ + uint32_t cpi_signo; /* killing signal */ + uint32_t cpi_sigcode; /* signal code */ + uint32_t cpi_sigpend[4]; /* pending signals */ + uint32_t cpi_sigmask[4]; /* blocked signals */ + uint32_t cpi_sigignore[4]; /* ignored signals */ + uint32_t cpi_sigcatch[4]; /* caught signals */ + int32_t cpi_pid; /* process ID */ + int32_t cpi_ppid; /* parent process ID */ + int32_t cpi_pgrp; /* process group ID */ + int32_t cpi_sid; /* session ID */ + uint32_t cpi_ruid; /* real user ID */ + uint32_t cpi_euid; /* effective user ID */ + uint32_t cpi_svuid; /* saved user ID */ + uint32_t cpi_rgid; /* real group ID */ + uint32_t cpi_egid; /* effective group ID */ + uint32_t cpi_svgid; /* saved group ID */ + uint32_t cpi_nlwps; /* number of LWPs */ + int8_t cpi_name[32]; /* copy of p->p_comm */ + /* Add version 2 fields below here. */ + int32_t cpi_siglwp; /* LWP target of killing signal */ +}; /* Note header in a PT_NOTE section */ typedef struct elf_note { @@ -328,6 +355,11 @@ typedef struct { */ #define NT_NETBSD_CMODEL 6 +/* + * FreeBSD specific notes + */ +#define NT_FREEBSD_PROCSTAT_AUXV 16 + #if !defined(ELFSIZE) && defined(ARCH_ELFSIZE) #define ELFSIZE ARCH_ELFSIZE #endif diff --git a/deps/libmagic/src/softmagic.c b/deps/libmagic/src/softmagic.c index 644c9d9..b9e9753 100644 --- a/deps/libmagic/src/softmagic.c +++ b/deps/libmagic/src/softmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.234 2016/06/13 12:02:06 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.249 2017/06/19 18:30:25 christos Exp $") #endif /* lint */ #include "magic.h" @@ -57,13 +57,13 @@ private int mcopy(struct magic_set *, union VALUETYPE *, int, int, const unsigned char *, uint32_t, size_t, struct magic *); private int mconvert(struct magic_set *, struct magic *, int); private int print_sep(struct magic_set *, int); -private int handle_annotation(struct magic_set *, struct magic *); +private int handle_annotation(struct magic_set *, struct magic *, int); private int cvt_8(union VALUETYPE *, const struct magic *); private int cvt_16(union VALUETYPE *, const struct magic *); private int cvt_32(union VALUETYPE *, const struct magic *); private int cvt_64(union VALUETYPE *, const struct magic *); -#define OFFSET_OOB(n, o, i) ((n) < (o) || (i) > ((n) - (o))) +#define OFFSET_OOB(n, o, i) ((n) < (uint32_t)(o) || (i) > ((n) - (o))) #define BE64(p) (((uint64_t)(p)->hq[0]<<56)|((uint64_t)(p)->hq[1]<<48)| \ ((uint64_t)(p)->hq[2]<<40)|((uint64_t)(p)->hq[3]<<32)| \ ((uint64_t)(p)->hq[4]<<24)|((uint64_t)(p)->hq[5]<<16)| \ @@ -80,6 +80,7 @@ private int cvt_64(union VALUETYPE *, const struct magic *); ((uint32_t)(p)->hl[3]<<8)|((uint32_t)(p)->hl[2])) #define BE16(p) (((uint16_t)(p)->hs[0]<<8)|((uint16_t)(p)->hs[1])) #define LE16(p) (((uint16_t)(p)->hs[1]<<8)|((uint16_t)(p)->hs[0])) +#define SEXT(s,v,p) ((s)?(intmax_t)(int##v##_t)(p):(intmax_t)(uint##v##_t)(p)) /* * softmagic - lookup one file in parsed, in-memory copy of database @@ -191,6 +192,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, while (magindex < nmagic - 1 && magic[magindex + 1].cont_level != 0) magindex++; + cont_level = 0; continue; /* Skip to next top-level test*/ } @@ -230,7 +232,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, goto flush; } - if ((e = handle_annotation(ms, m)) != 0) { + if ((e = handle_annotation(ms, m, firstline)) != 0) { *need_separator = 1; *printed_something = 1; *returnval = 1; @@ -328,7 +330,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, } else ms->c.li[cont_level].got_match = 1; - if ((e = handle_annotation(ms, m)) != 0) { + if ((e = handle_annotation(ms, m, firstline)) != 0) { *need_separator = 1; *printed_something = 1; *returnval = 1; @@ -369,6 +371,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, case -1: case 0: flush = 1; + cont_level--; break; default: break; @@ -1016,9 +1019,8 @@ private int mconvert(struct magic_set *ms, struct magic *m, int flip) { union VALUETYPE *p = &ms->ms_value; - uint8_t type; - switch (type = cvt_flip(m->type, flip)) { + switch (cvt_flip(m->type, flip)) { case FILE_BYTE: if (cvt_8(p, m) == -1) goto out; @@ -1183,7 +1185,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, case FILE_DER: case FILE_SEARCH: if (offset > nbytes) - offset = nbytes; + offset = CAST(uint32_t, nbytes); ms->search.s = RCAST(const char *, s) + offset; ms->search.s_len = nbytes - offset; ms->search.offset = offset; @@ -1197,7 +1199,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, const char *end; size_t lines, linecnt, bytecnt; - if (s == NULL) { + if (s == NULL || nbytes < offset) { ms->search.s_len = 0; ms->search.s = NULL; return 0; @@ -1226,7 +1228,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, memchr(c, '\r', CAST(size_t, (end - c)))))); lines--, b++) { last = b; - if (b[0] == '\r' && b[1] == '\n') + if (b < end - 1 && b[0] == '\r' && b[1] == '\n') b++; } if (lines) @@ -1259,7 +1261,8 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, if (*dst == '\0') { if (type == FILE_BESTRING16 ? *(src - 1) != '\0' : - *(src + 1) != '\0') + ((src + 1 < esrc) && + *(src + 1) != '\0')) *dst = ' '; } } @@ -1294,6 +1297,45 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, return 0; } +private uint32_t +do_ops(struct magic *m, intmax_t lhs, intmax_t off) +{ + intmax_t offset; + if (off) { + switch (m->in_op & FILE_OPS_MASK) { + case FILE_OPAND: + offset = lhs & off; + break; + case FILE_OPOR: + offset = lhs | off; + break; + case FILE_OPXOR: + offset = lhs ^ off; + break; + case FILE_OPADD: + offset = lhs + off; + break; + case FILE_OPMINUS: + offset = lhs - off; + break; + case FILE_OPMULTIPLY: + offset = lhs * off; + break; + case FILE_OPDIVIDE: + offset = lhs / off; + break; + case FILE_OPMODULO: + offset = lhs % off; + break; + } + } else + offset = lhs; + if (m->in_op & FILE_OPINVERSE) + offset = ~offset; + + return (uint32_t)offset; +} + private int mget(struct magic_set *ms, const unsigned char *s, struct magic *m, size_t nbytes, size_t o, unsigned int cont_level, int mode, int text, @@ -1301,7 +1343,7 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, int *printed_something, int *need_separator, int *returnval) { uint32_t offset = ms->offset; - uint32_t lhs; + intmax_t lhs; file_pushbuf_t *pb; int rv, oneed_separator, in_type; char *rbuf; @@ -1325,7 +1367,7 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, return -1; if ((ms->flags & MAGIC_DEBUG) != 0) { - fprintf(stderr, "mget(type=%d, flag=%x, offset=%u, o=%" + fprintf(stderr, "mget(type=%d, flag=%#x, offset=%u, o=%" SIZE_T_FORMAT "u, " "nbytes=%" SIZE_T_FORMAT "u, il=%hu, nc=%hu)\n", m->type, m->flag, offset, o, nbytes, @@ -1337,7 +1379,8 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, } if (m->flag & INDIR) { - int off = m->in_offset; + intmax_t off = m->in_offset; + const int sgn = m->in_op & FILE_OPSIGNED; if (m->in_op & FILE_OPINDIRECT) { const union VALUETYPE *q = CAST(const union VALUETYPE *, ((const void *)(s + offset + off))); @@ -1345,178 +1388,55 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, return 0; switch (cvt_flip(m->in_type, flip)) { case FILE_BYTE: - off = q->b; + off = SEXT(sgn,8,q->b); break; case FILE_SHORT: - off = q->h; + off = SEXT(sgn,16,q->h); break; case FILE_BESHORT: - off = (short)BE16(q); + off = SEXT(sgn,16,BE16(q)); break; case FILE_LESHORT: - off = (short)LE16(q); + off = SEXT(sgn,16,LE16(q)); break; case FILE_LONG: - off = q->l; + off = SEXT(sgn,32,q->l); break; case FILE_BELONG: case FILE_BEID3: - off = (int32_t)BE32(q); + off = SEXT(sgn,32,BE32(q)); break; case FILE_LEID3: case FILE_LELONG: - off = (int32_t)LE32(q); + off = SEXT(sgn,32,LE32(q)); break; case FILE_MELONG: - off = (int32_t)ME32(q); + off = SEXT(sgn,32,ME32(q)); break; } if ((ms->flags & MAGIC_DEBUG) != 0) - fprintf(stderr, "indirect offs=%u\n", off); + fprintf(stderr, "indirect offs=%jd\n", off); } switch (in_type = cvt_flip(m->in_type, flip)) { case FILE_BYTE: if (OFFSET_OOB(nbytes, offset, 1)) return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = p->b & off; - break; - case FILE_OPOR: - offset = p->b | off; - break; - case FILE_OPXOR: - offset = p->b ^ off; - break; - case FILE_OPADD: - offset = p->b + off; - break; - case FILE_OPMINUS: - offset = p->b - off; - break; - case FILE_OPMULTIPLY: - offset = p->b * off; - break; - case FILE_OPDIVIDE: - offset = p->b / off; - break; - case FILE_OPMODULO: - offset = p->b % off; - break; - } - } else - offset = p->b; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + offset = do_ops(m, SEXT(sgn,8,p->b), off); break; case FILE_BESHORT: if (OFFSET_OOB(nbytes, offset, 2)) return 0; - lhs = (p->hs[0] << 8) | p->hs[1]; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = lhs & off; - break; - case FILE_OPOR: - offset = lhs | off; - break; - case FILE_OPXOR: - offset = lhs ^ off; - break; - case FILE_OPADD: - offset = lhs + off; - break; - case FILE_OPMINUS: - offset = lhs - off; - break; - case FILE_OPMULTIPLY: - offset = lhs * off; - break; - case FILE_OPDIVIDE: - offset = lhs / off; - break; - case FILE_OPMODULO: - offset = lhs % off; - break; - } - } else - offset = lhs; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + offset = do_ops(m, SEXT(sgn,16,BE16(p)), off); break; case FILE_LESHORT: if (OFFSET_OOB(nbytes, offset, 2)) return 0; - lhs = (p->hs[1] << 8) | p->hs[0]; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = lhs & off; - break; - case FILE_OPOR: - offset = lhs | off; - break; - case FILE_OPXOR: - offset = lhs ^ off; - break; - case FILE_OPADD: - offset = lhs + off; - break; - case FILE_OPMINUS: - offset = lhs - off; - break; - case FILE_OPMULTIPLY: - offset = lhs * off; - break; - case FILE_OPDIVIDE: - offset = lhs / off; - break; - case FILE_OPMODULO: - offset = lhs % off; - break; - } - } else - offset = lhs; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + offset = do_ops(m, SEXT(sgn,16,LE16(p)), off); break; case FILE_SHORT: if (OFFSET_OOB(nbytes, offset, 2)) return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = p->h & off; - break; - case FILE_OPOR: - offset = p->h | off; - break; - case FILE_OPXOR: - offset = p->h ^ off; - break; - case FILE_OPADD: - offset = p->h + off; - break; - case FILE_OPMINUS: - offset = p->h - off; - break; - case FILE_OPMULTIPLY: - offset = p->h * off; - break; - case FILE_OPDIVIDE: - offset = p->h / off; - break; - case FILE_OPMODULO: - offset = p->h % off; - break; - } - } - else - offset = p->h; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + offset = do_ops(m, SEXT(sgn,16,p->h), off); break; case FILE_BELONG: case FILE_BEID3: @@ -1524,38 +1444,8 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, return 0; lhs = BE32(p); if (in_type == FILE_BEID3) - lhs = cvt_id3(ms, lhs); - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = lhs & off; - break; - case FILE_OPOR: - offset = lhs | off; - break; - case FILE_OPXOR: - offset = lhs ^ off; - break; - case FILE_OPADD: - offset = lhs + off; - break; - case FILE_OPMINUS: - offset = lhs - off; - break; - case FILE_OPMULTIPLY: - offset = lhs * off; - break; - case FILE_OPDIVIDE: - offset = lhs / off; - break; - case FILE_OPMODULO: - offset = lhs % off; - break; - } - } else - offset = lhs; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + lhs = cvt_id3(ms, (uint32_t)lhs); + offset = do_ops(m, SEXT(sgn,32,lhs), off); break; case FILE_LELONG: case FILE_LEID3: @@ -1563,109 +1453,18 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m, return 0; lhs = LE32(p); if (in_type == FILE_LEID3) - lhs = cvt_id3(ms, lhs); - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = lhs & off; - break; - case FILE_OPOR: - offset = lhs | off; - break; - case FILE_OPXOR: - offset = lhs ^ off; - break; - case FILE_OPADD: - offset = lhs + off; - break; - case FILE_OPMINUS: - offset = lhs - off; - break; - case FILE_OPMULTIPLY: - offset = lhs * off; - break; - case FILE_OPDIVIDE: - offset = lhs / off; - break; - case FILE_OPMODULO: - offset = lhs % off; - break; - } - } else - offset = lhs; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + lhs = cvt_id3(ms, (uint32_t)lhs); + offset = do_ops(m, SEXT(sgn,32,lhs), off); break; case FILE_MELONG: if (OFFSET_OOB(nbytes, offset, 4)) return 0; - lhs = ME32(p); - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = lhs & off; - break; - case FILE_OPOR: - offset = lhs | off; - break; - case FILE_OPXOR: - offset = lhs ^ off; - break; - case FILE_OPADD: - offset = lhs + off; - break; - case FILE_OPMINUS: - offset = lhs - off; - break; - case FILE_OPMULTIPLY: - offset = lhs * off; - break; - case FILE_OPDIVIDE: - offset = lhs / off; - break; - case FILE_OPMODULO: - offset = lhs % off; - break; - } - } else - offset = lhs; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + offset = do_ops(m, SEXT(sgn,32,ME32(p)), off); break; case FILE_LONG: if (OFFSET_OOB(nbytes, offset, 4)) return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = p->l & off; - break; - case FILE_OPOR: - offset = p->l | off; - break; - case FILE_OPXOR: - offset = p->l ^ off; - break; - case FILE_OPADD: - offset = p->l + off; - break; - case FILE_OPMINUS: - offset = p->l - off; - break; - case FILE_OPMULTIPLY: - offset = p->l * off; - break; - case FILE_OPDIVIDE: - offset = p->l / off; - break; - case FILE_OPMODULO: - offset = p->l % off; - break; - } - } else - offset = p->l; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; + offset = do_ops(m, SEXT(sgn,32,p->l), off); break; default: break; @@ -1835,6 +1634,7 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) */ const unsigned char *a = (const unsigned char *)s1; const unsigned char *b = (const unsigned char *)s2; + const unsigned char *eb = b + len; uint64_t v; /* @@ -1849,6 +1649,10 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) } else { /* combine the others */ while (len-- > 0) { + if (b >= eb) { + v = 1; + break; + } if ((flags & STRING_IGNORE_LOWERCASE) && islower(*a)) { if ((v = tolower(*b++) - *a++) != '\0') @@ -1864,7 +1668,7 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) a++; if (isspace(*b++)) { if (!isspace(*a)) - while (isspace(*b)) + while (b < eb && isspace(*b)) b++; } else { @@ -1875,7 +1679,7 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) else if ((flags & STRING_COMPACT_OPTIONAL_WHITESPACE) && isspace(*a)) { a++; - while (isspace(*b)) + while (b < eb && isspace(*b)) b++; } else { @@ -2046,13 +1850,13 @@ magiccheck(struct magic_set *ms, struct magic *m) for (idx = 0; m->str_range == 0 || idx < m->str_range; idx++) { if (slen + idx > ms->search.s_len) - break; + return 0; v = file_strncmp(m->value.s, ms->search.s + idx, slen, m->str_flags); if (v == 0) { /* found match */ ms->search.offset += idx; - ms->search.rm_len = m->str_range - idx; + ms->search.rm_len = ms->search.s_len - idx; break; } } @@ -2078,7 +1882,7 @@ magiccheck(struct magic_set *ms, struct magic *m) size_t slen = ms->search.s_len; char *copy; if (slen != 0) { - copy = malloc(slen); + copy = CAST(char *, malloc(slen)); if (copy == NULL) { file_regfree(&rx); file_error(ms, errno, @@ -2090,7 +1894,7 @@ magiccheck(struct magic_set *ms, struct magic *m) copy[--slen] = '\0'; search = copy; } else { - search = ms->search.s; + search = CCAST(char *, ""); copy = NULL; } rc = file_regexec(&rx, (const char *)search, @@ -2231,19 +2035,25 @@ magiccheck(struct magic_set *ms, struct magic *m) } private int -handle_annotation(struct magic_set *ms, struct magic *m) +handle_annotation(struct magic_set *ms, struct magic *m, int firstline) { if ((ms->flags & MAGIC_APPLE) && m->apple[0]) { + if (!firstline && file_printf(ms, "\n- ") == -1) + return -1; if (file_printf(ms, "%.8s", m->apple) == -1) return -1; return 1; } if ((ms->flags & MAGIC_EXTENSION) && m->ext[0]) { + if (!firstline && file_printf(ms, "\n- ") == -1) + return -1; if (file_printf(ms, "%s", m->ext) == -1) return -1; return 1; } if ((ms->flags & MAGIC_MIME_TYPE) && m->mimetype[0]) { + if (!firstline && file_printf(ms, "\n- ") == -1) + return -1; if (file_printf(ms, "%s", m->mimetype) == -1) return -1; return 1; @@ -2254,8 +2064,8 @@ handle_annotation(struct magic_set *ms, struct magic *m) private int print_sep(struct magic_set *ms, int firstline) { - if (ms->flags & MAGIC_NODESC) - return 0; +// if (ms->flags & MAGIC_NODESC) +// return 0; if (firstline) return 0; /* diff --git a/magic/magic.mgc b/magic/magic.mgc index 69f4df4..2fdc46d 100644 Binary files a/magic/magic.mgc and b/magic/magic.mgc differ diff --git a/package.json b/package.json index 37a7377..87d9b1c 100644 --- a/package.json +++ b/package.json @@ -1,17 +1,17 @@ { "name": "mmmagic", - "version": "0.4.6", + "version": "0.5.1", "author": "Brian White ", "contributors": "Roee Kasher ", "description": "An async libmagic binding for node.js for detecting content types by data inspection", "main": "./lib/index", "dependencies": { - "nan": "^2.4.0" + "nan": "^2.8.0" }, "scripts": { "install": "node-gyp rebuild", "test": "node test/test.js" }, - "engines": { "node": ">=0.10.0" }, + "engines": { "node": ">=4.0.0" }, "keywords": [ "magic", "libmagic", "file", "filetype", "mime", "mimetype" ], "licenses": [ { "type": "MIT", "url": "http://github.com/mscdex/mmmagic/raw/master/LICENSE" } ], "repository" : { "type": "git", "url": "http://github.com/mscdex/mmmagic.git" }