XMPP reverse proxy and outgoing proxy https://github.com/moparisthebest/xmpp-proxy
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

265 lines
9.0 KiB

  1. use anyhow::{bail, Result};
  2. use crate::stanzafilter::StanzaState::*;
  3. use crate::to_str;
  4. #[derive(Debug)]
  5. enum StanzaState {
  6. OutsideStanza,
  7. StanzaFirstChar,
  8. InsideTagFirstChar,
  9. InsideTag,
  10. InsideAttribute(u8),
  11. BetweenTags,
  12. ExclamationTag(usize),
  13. InsideCDATA,
  14. QuestionTag(usize),
  15. InsideXmlTag,
  16. EndStream,
  17. }
  18. pub struct StanzaFilter {
  19. buf_size: usize,
  20. pub buf: Vec<u8>,
  21. cnt: usize,
  22. tag_cnt: usize,
  23. state: StanzaState,
  24. }
  25. #[inline(always)]
  26. fn checked_sub(i: usize, s: usize) -> Result<usize> {
  27. // i.checked_sub(s).ok_or_else(||anyhow::anyhow!("invalid stanza"))
  28. if s > i {
  29. bail!("invalid stanza")
  30. } else {
  31. Ok(i - s)
  32. }
  33. }
  34. impl StanzaFilter {
  35. pub fn new(buf_size: usize) -> StanzaFilter {
  36. StanzaFilter {
  37. buf_size,
  38. buf: vec![0u8; buf_size],
  39. cnt: 0,
  40. tag_cnt: 0,
  41. state: OutsideStanza,
  42. }
  43. }
  44. #[inline(always)]
  45. pub fn current_buf(&mut self) -> &mut [u8] {
  46. &mut self.buf[self.cnt..(self.cnt + 1)]
  47. }
  48. #[allow(dead_code)]
  49. pub fn process_next_byte(&mut self) -> Result<Option<&[u8]>> {
  50. if let Some(idx) = self.process_next_byte_idx()? {
  51. return Ok(Some(&self.buf[0..idx]));
  52. }
  53. Ok(None)
  54. }
  55. pub fn process_next_byte_idx(&mut self) -> Result<Option<usize>> {
  56. let b = self.buf[self.cnt];
  57. //print!("b: '{}', cnt: {}, tag_cnt: {}, state: {:?}; ", b as char, self.cnt, self.tag_cnt, self.state);
  58. match self.state {
  59. OutsideStanza => {
  60. if b == b'<' {
  61. self.tag_cnt += 1;
  62. self.state = StanzaFirstChar;
  63. } else {
  64. // outside of stanzas, let's ignore all characters except <
  65. // prosody does this, and since things do whitespace pings, it's good
  66. return Ok(None);
  67. }
  68. }
  69. BetweenTags => {
  70. if b == b'<' {
  71. self.tag_cnt += 1;
  72. self.state = InsideTagFirstChar;
  73. }
  74. }
  75. StanzaFirstChar => match b {
  76. b'/' => self.state = EndStream,
  77. b'!' | b'>' | b'\'' | b'"' => bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)])),
  78. b'?' => self.state = QuestionTag(self.cnt + 4), // 4 is length of b"xml "
  79. _ => self.state = InsideTag,
  80. },
  81. InsideTagFirstChar => match b {
  82. b'/' => self.tag_cnt = checked_sub(self.tag_cnt, 2)?,
  83. b'!' => self.state = ExclamationTag(self.cnt + 7), // 7 is length of b"[CDATA["
  84. b'?' | b'>' | b'\'' | b'"' => bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)])),
  85. _ => self.state = InsideTag,
  86. },
  87. InsideTag => match b {
  88. b'>' => {
  89. if self.buf[self.cnt - 1] == b'/' {
  90. // state can't be InsideTag unless we are on at least the second character, so can't go out of range
  91. // self-closing tag
  92. self.tag_cnt = checked_sub(self.tag_cnt, 1)?;
  93. }
  94. if self.tag_cnt == 0 {
  95. return self.stanza_end();
  96. }
  97. // now special case <stream:stream ...> which we want to send stand-alone:
  98. if self.tag_cnt == 1 && self.buf.len() >= 15 && b"<stream:stream " == &self.buf[0..15] {
  99. return self.stanza_end();
  100. }
  101. self.state = BetweenTags;
  102. }
  103. b'\'' | b'"' => self.state = InsideAttribute(b),
  104. _ => {}
  105. },
  106. InsideAttribute(end) => {
  107. if b == end {
  108. self.state = InsideTag;
  109. }
  110. }
  111. QuestionTag(idx) => {
  112. if idx == self.cnt {
  113. if self.last_equals(b"xml ")? {
  114. self.state = InsideXmlTag;
  115. } else {
  116. bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)]));
  117. }
  118. }
  119. }
  120. InsideXmlTag => {
  121. if b == b'>' {
  122. return self.stanza_end();
  123. }
  124. }
  125. ExclamationTag(idx) => {
  126. if idx == self.cnt {
  127. if self.last_equals(b"[CDATA[")? {
  128. self.state = InsideCDATA;
  129. self.tag_cnt = checked_sub(self.tag_cnt, 1)?; // cdata not a tag
  130. } else {
  131. bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)]));
  132. }
  133. }
  134. }
  135. InsideCDATA => {
  136. if b == b'>' && self.last_equals(b"]]>")? {
  137. self.state = BetweenTags;
  138. }
  139. }
  140. EndStream => {
  141. if b == b'>' {
  142. if self.last_equals(b"</stream:stream>")? {
  143. return self.stanza_end();
  144. } else {
  145. bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)]));
  146. }
  147. }
  148. }
  149. }
  150. //println!("cnt: {}, tag_cnt: {}, state: {:?}", self.cnt, self.tag_cnt, self.state);
  151. self.cnt += 1;
  152. if self.cnt == self.buf_size {
  153. bail!("stanza too big: {}", to_str(&self.buf));
  154. }
  155. Ok(None)
  156. }
  157. fn stanza_end(&mut self) -> Result<Option<usize>> {
  158. let ret = Ok(Some(self.cnt + 1));
  159. self.tag_cnt = 0;
  160. self.cnt = 0;
  161. self.state = OutsideStanza;
  162. //println!("cnt: {}, tag_cnt: {}, state: {:?}", self.cnt, self.tag_cnt, self.state);
  163. return ret;
  164. }
  165. fn last_equals(&self, needle: &[u8]) -> Result<bool> {
  166. Ok(needle == self.last_num_bytes(needle.len())?)
  167. }
  168. fn last_num_bytes(&self, num: usize) -> Result<&[u8]> {
  169. let num = num - 1;
  170. if num <= self.cnt {
  171. Ok(&self.buf[(self.cnt - num)..(self.cnt + 1)])
  172. } else {
  173. bail!("expected {} bytes only have {} bytes", num, (self.cnt + 1))
  174. }
  175. }
  176. }
  177. // this would be better as an async trait, but that doesn't work yet...
  178. pub struct StanzaReader<T>(pub T);
  179. impl<T: tokio::io::AsyncRead + Unpin> StanzaReader<T> {
  180. pub async fn next<'a>(&'a mut self, filter: &'a mut StanzaFilter) -> Result<Option<&'a [u8]>> {
  181. use tokio::io::AsyncReadExt;
  182. loop {
  183. let n = self.0.read(filter.current_buf()).await?;
  184. if n == 0 {
  185. return Ok(None);
  186. }
  187. if let Some(idx) = filter.process_next_byte_idx()? {
  188. return Ok(Some(&filter.buf[0..idx]));
  189. }
  190. }
  191. }
  192. }
  193. #[cfg(test)]
  194. mod tests {
  195. use crate::stanzafilter::*;
  196. use std::io::Cursor;
  197. impl<T: tokio::io::AsyncRead + Unpin> StanzaReader<T> {
  198. async fn to_vec<'a>(&'a mut self, filter: &'a mut StanzaFilter) -> Result<Vec<String>> {
  199. let mut ret = Vec::new();
  200. while let Some(stanza) = self.next(filter).await? {
  201. ret.push(to_str(stanza).to_string());
  202. }
  203. return Ok(ret);
  204. }
  205. }
  206. #[tokio::test]
  207. async fn process_next_byte() -> Result<()> {
  208. let mut filter = StanzaFilter::new(262_144);
  209. //todo: <x a='/>'>This is going to be fun.</x>
  210. assert_eq!(
  211. StanzaReader(Cursor::new(
  212. br###"
  213. <?xml version='1.0'?>
  214. <stream:stream xmlns='jabber:server' xmlns:stream='http://etherx.jabber.org/streams' xmlns:db='jabber:server:dialback' version='1.0' to='example.org' from='example.com' xml:lang='en'>
  215. <a/><b>inside b before c<c>inside c</c></b></stream:stream>
  216. <q>bla<![CDATA[<this>is</not><xml/>]]>bloo</q>
  217. <x><![CDATA[ lol</x> ]]></x>
  218. <z><x><![CDATA[ lol</x> ]]></x></z>
  219. <a a='![CDATA['/>
  220. <x a='/>'>This is going to be fun.</x>
  221. <z><x a='/>'>This is going to be fun.</x></y>
  222. <d></d><e><![CDATA[what]>]]]]></e></stream:stream>
  223. "###,
  224. ))
  225. .to_vec(&mut filter)
  226. .await?,
  227. vec![
  228. "<?xml version='1.0'?>",
  229. "<stream:stream xmlns='jabber:server' xmlns:stream='http://etherx.jabber.org/streams' xmlns:db='jabber:server:dialback' version='1.0' to='example.org' from='example.com' xml:lang='en'>",
  230. "<a/>",
  231. "<b>inside b before c<c>inside c</c></b>",
  232. "</stream:stream>",
  233. "<q>bla<![CDATA[<this>is</not><xml/>]]>bloo</q>",
  234. "<x><![CDATA[ lol</x> ]]></x>",
  235. "<z><x><![CDATA[ lol</x> ]]></x></z>",
  236. "<a a='![CDATA['/>",
  237. "<x a='/>'>This is going to be fun.</x>",
  238. "<z><x a='/>'>This is going to be fun.</x></y>",
  239. "<d></d>",
  240. "<e><![CDATA[what]>]]]]></e>",
  241. "</stream:stream>",
  242. ]
  243. );
  244. Ok(())
  245. }
  246. }